You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2017/01/10 20:53:24 UTC
[1/9] hive git commit: HIVE-15481 : Support multiple and nested
subqueries (Vineet Garg via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan
Repository: hive
Updated Branches:
refs/heads/master 1749d7045 -> b0ed8241a
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientpositive/subquery_notexists.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_notexists.q.out b/ql/src/test/results/clientpositive/subquery_notexists.q.out
index 6ec3b46..60dbf57 100644
--- a/ql/src/test/results/clientpositive/subquery_notexists.q.out
+++ b/ql/src/test/results/clientpositive/subquery_notexists.q.out
@@ -659,3 +659,216 @@ POSTHOOK: Input: default@src
199 val_199
199 val_199
2 val_2
+Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: -- non equi predicate
+explain
+select *
+from src b
+where not exists
+ (select a.key
+ from src a
+ where b.value <> a.value and a.key > b.key and a.value > 'val_2'
+ )
+PREHOOK: type: QUERY
+POSTHOOK: query: -- non equi predicate
+explain
+select *
+from src b
+where not exists
+ (select a.key
+ from src a
+ where b.value <> a.value and a.key > b.key and a.value > 'val_2'
+ )
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-4 is a root stage
+ Stage-2 depends on stages: Stage-4
+ Stage-3 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: key (type: string), value (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (value > 'val_2') (type: boolean)
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 41500 Data size: 923146 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col0 > _col2) and (_col3 <> _col1)) (type: boolean)
+ Statistics: Num rows: 13833 Data size: 307707 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: string), _col3 (type: string)
+ outputColumnNames: _col2, _col3
+ Statistics: Num rows: 13833 Data size: 307707 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col2 (type: string), _col3 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 13833 Data size: 307707 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 13833 Data size: 307707 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6916 Data size: 153842 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), true (type: boolean)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6916 Data size: 153842 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 6916 Data size: 153842 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: boolean)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col0 (type: string), _col1 (type: string)
+ 1 _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col4
+ Statistics: Num rows: 7607 Data size: 169226 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col4 is null (type: boolean)
+ Statistics: Num rows: 3803 Data size: 84601 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3803 Data size: 84601 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3803 Data size: 84601 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: select *
+from src b
+where not exists
+ (select a.key
+ from src a
+ where b.value <> a.value and a.key > b.key and a.value > 'val_2'
+ )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from src b
+where not exists
+ (select a.key
+ from src a
+ where b.value <> a.value and a.key > b.key and a.value > 'val_2'
+ )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+98 val_98
+98 val_98
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientpositive/subquery_notin_having.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_notin_having.q.out b/ql/src/test/results/clientpositive/subquery_notin_having.q.out
index 9f72cc9..cd19309 100644
--- a/ql/src/test/results/clientpositive/subquery_notin_having.q.out
+++ b/ql/src/test/results/clientpositive/subquery_notin_having.q.out
@@ -122,15 +122,15 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col5
Statistics: Num rows: 275 Data size: 7596 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) (type: boolean)
- Statistics: Num rows: 138 Data size: 3811 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((_col2 = 0) or (_col5 is null and _col0 is not null and (_col3 >= _col2))) (type: boolean)
+ Statistics: Num rows: 182 Data size: 5027 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 138 Data size: 3811 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 182 Data size: 5027 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 138 Data size: 3811 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 182 Data size: 5027 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -936,15 +936,15 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col5
Statistics: Num rows: 14 Data size: 2087 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) (type: boolean)
- Statistics: Num rows: 7 Data size: 1043 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((_col2 = 0) or (_col5 is null and _col0 is not null and (_col3 >= _col2))) (type: boolean)
+ Statistics: Num rows: 9 Data size: 1341 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: double)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 7 Data size: 1043 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9 Data size: 1341 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 7 Data size: 1043 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9 Data size: 1341 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -1236,7 +1236,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col4
Statistics: Num rows: 2 Data size: 209 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (not CASE WHEN ((_col1 = 0)) THEN (false) WHEN (_col4 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col2 < _col1)) THEN (true) ELSE (false) END) (type: boolean)
+ predicate: ((_col1 = 0) or (_col4 is null and _col0 is not null and (_col2 >= _col1))) (type: boolean)
Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int)
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out b/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out
index af42e41..8ab20c8 100644
--- a/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out
@@ -131,12 +131,12 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col5
Statistics: Num rows: 550 Data size: 15193 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) (type: boolean)
- Statistics: Num rows: 275 Data size: 7596 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((_col2 = 0) or (_col5 is null and _col0 is not null and (_col3 >= _col2))) (type: boolean)
+ Statistics: Num rows: 366 Data size: 10110 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 275 Data size: 7596 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 366 Data size: 10110 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -153,16 +153,16 @@ STAGE PLANS:
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
- Statistics: Num rows: 275 Data size: 7596 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 366 Data size: 10110 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 275 Data size: 7596 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 366 Data size: 10110 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 275 Data size: 7596 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 366 Data size: 10110 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -195,12 +195,12 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col5
Statistics: Num rows: 550 Data size: 15193 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) (type: boolean)
- Statistics: Num rows: 275 Data size: 7596 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((_col2 = 0) or (_col5 is null and _col0 is not null and (_col3 >= _col2))) (type: boolean)
+ Statistics: Num rows: 366 Data size: 10110 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 275 Data size: 7596 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 366 Data size: 10110 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
[5/9] hive git commit: HIVE-15481 : Support multiple and nested
subqueries (Vineet Garg via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan
Posted by ha...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientpositive/perf/query23.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query23.q.out b/ql/src/test/results/clientpositive/perf/query23.q.out
new file mode 100644
index 0000000..6d4cfca
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/query23.q.out
@@ -0,0 +1,383 @@
+PREHOOK: query: explain with frequent_ss_items as
+ (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt
+ from store_sales
+ ,date_dim
+ ,item
+ where ss_sold_date_sk = d_date_sk
+ and ss_item_sk = i_item_sk
+ and d_year in (1999,1999+1,1999+2,1999+3)
+ group by substr(i_item_desc,1,30),i_item_sk,d_date
+ having count(*) >4),
+ max_store_sales as
+ (select max(csales) tpcds_cmax
+ from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales
+ from store_sales
+ ,customer
+ ,date_dim
+ where ss_customer_sk = c_customer_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year in (1999,1999+1,1999+2,1999+3)
+ group by c_customer_sk) x),
+ best_ss_customer as
+ (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales
+ from store_sales
+ ,customer
+ where ss_customer_sk = c_customer_sk
+ group by c_customer_sk
+ having sum(ss_quantity*ss_sales_price) > (95/100.0))
+ select sum(sales)
+ from (select cs_quantity*cs_list_price sales
+ from catalog_sales
+ ,date_dim
+ where d_year = 1999
+ and d_moy = 1
+ and cs_sold_date_sk = d_date_sk
+ and cs_item_sk in (select item_sk from frequent_ss_items)
+ and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer)
+ union all
+ select ws_quantity*ws_list_price sales
+ from web_sales
+ ,date_dim
+ where d_year = 1999
+ and d_moy = 1
+ and ws_sold_date_sk = d_date_sk
+ and ws_item_sk in (select item_sk from frequent_ss_items)
+ and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)) y
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain with frequent_ss_items as
+ (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt
+ from store_sales
+ ,date_dim
+ ,item
+ where ss_sold_date_sk = d_date_sk
+ and ss_item_sk = i_item_sk
+ and d_year in (1999,1999+1,1999+2,1999+3)
+ group by substr(i_item_desc,1,30),i_item_sk,d_date
+ having count(*) >4),
+ max_store_sales as
+ (select max(csales) tpcds_cmax
+ from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales
+ from store_sales
+ ,customer
+ ,date_dim
+ where ss_customer_sk = c_customer_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year in (1999,1999+1,1999+2,1999+3)
+ group by c_customer_sk) x),
+ best_ss_customer as
+ (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales
+ from store_sales
+ ,customer
+ where ss_customer_sk = c_customer_sk
+ group by c_customer_sk
+ having sum(ss_quantity*ss_sales_price) > (95/100.0))
+ select sum(sales)
+ from (select cs_quantity*cs_list_price sales
+ from catalog_sales
+ ,date_dim
+ where d_year = 1999
+ and d_moy = 1
+ and cs_sold_date_sk = d_date_sk
+ and cs_item_sk in (select item_sk from frequent_ss_items)
+ and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer)
+ union all
+ select ws_quantity*ws_list_price sales
+ from web_sales
+ ,date_dim
+ where d_year = 1999
+ and d_moy = 1
+ and ws_sold_date_sk = d_date_sk
+ and ws_item_sk in (select item_sk from frequent_ss_items)
+ and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)) y
+ limit 100
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 10 <- Map 13 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
+Reducer 11 <- Reducer 10 (SIMPLE_EDGE)
+Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE)
+Reducer 16 <- Reducer 15 (SIMPLE_EDGE)
+Reducer 19 <- Map 18 (SIMPLE_EDGE), Map 22 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
+Reducer 20 <- Reducer 19 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE)
+Reducer 21 <- Reducer 20 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE), Union 5 (CONTAINS)
+Reducer 24 <- Map 23 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE)
+Reducer 25 <- Map 28 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE)
+Reducer 26 <- Reducer 25 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 30 <- Map 29 (SIMPLE_EDGE), Map 32 (SIMPLE_EDGE)
+Reducer 31 <- Reducer 30 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 16 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS)
+Reducer 6 <- Union 5 (SIMPLE_EDGE)
+Reducer 9 <- Map 12 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:100
+ Stage-1
+ Reducer 6
+ File Output Operator [FS_136]
+ Limit [LIM_135] (rows=1 width=112)
+ Number of rows:100
+ Group By Operator [GBY_133] (rows=1 width=112)
+ Output:["_col0"],aggregations:["sum(VALUE._col0)"]
+ <-Union 5 [SIMPLE_EDGE]
+ <-Reducer 21 [CONTAINS]
+ Reduce Output Operator [RS_132]
+ Group By Operator [GBY_131] (rows=1 width=112)
+ Output:["_col0"],aggregations:["sum(_col0)"]
+ Select Operator [SEL_127] (rows=191667562 width=135)
+ Output:["_col0"]
+ Merge Join Operator [MERGEJOIN_206] (rows=191667562 width=135)
+ Conds:RS_124._col2=RS_125._col0(Inner),Output:["_col3","_col4"]
+ <-Reducer 20 [SIMPLE_EDGE]
+ SHUFFLE [RS_124]
+ PartitionCols:_col2
+ Merge Join Operator [MERGEJOIN_204] (rows=174243235 width=135)
+ Conds:RS_121._col1=RS_122._col0(Inner),Output:["_col2","_col3","_col4"]
+ <-Reducer 19 [SIMPLE_EDGE]
+ SHUFFLE [RS_121]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_199] (rows=158402938 width=135)
+ Conds:RS_118._col0=RS_119._col0(Inner),Output:["_col1","_col2","_col3","_col4"]
+ <-Map 18 [SIMPLE_EDGE]
+ SHUFFLE [RS_118]
+ PartitionCols:_col0
+ Select Operator [SEL_66] (rows=144002668 width=135)
+ Output:["_col0","_col1","_col2","_col3","_col4"]
+ Filter Operator [FIL_186] (rows=144002668 width=135)
+ predicate:ws_sold_date_sk is not null
+ TableScan [TS_64] (rows=144002668 width=135)
+ default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_quantity","ws_list_price"]
+ <-Map 22 [SIMPLE_EDGE]
+ SHUFFLE [RS_119]
+ PartitionCols:_col0
+ Select Operator [SEL_69] (rows=18262 width=1119)
+ Output:["_col0"]
+ Filter Operator [FIL_187] (rows=18262 width=1119)
+ predicate:((d_year = 1999) and (d_moy = 1) and d_date_sk is not null)
+ TableScan [TS_67] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
+ <-Reducer 26 [SIMPLE_EDGE]
+ SHUFFLE [RS_122]
+ PartitionCols:_col0
+ Group By Operator [GBY_95] (rows=58079562 width=88)
+ Output:["_col0"],keys:_col1
+ Select Operator [SEL_91] (rows=116159124 width=88)
+ Output:["_col1"]
+ Filter Operator [FIL_90] (rows=116159124 width=88)
+ predicate:(_col3 > 4)
+ Select Operator [SEL_193] (rows=348477374 width=88)
+ Output:["_col0","_col3"]
+ Group By Operator [GBY_89] (rows=348477374 width=88)
+ Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2
+ <-Reducer 25 [SIMPLE_EDGE]
+ SHUFFLE [RS_88]
+ PartitionCols:_col0
+ Group By Operator [GBY_87] (rows=696954748 width=88)
+ Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2
+ Select Operator [SEL_85] (rows=696954748 width=88)
+ Output:["_col0","_col1","_col2"]
+ Merge Join Operator [MERGEJOIN_201] (rows=696954748 width=88)
+ Conds:RS_82._col1=RS_83._col0(Inner),Output:["_col3","_col5","_col6"]
+ <-Map 28 [SIMPLE_EDGE]
+ SHUFFLE [RS_83]
+ PartitionCols:_col0
+ Select Operator [SEL_78] (rows=462000 width=1436)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_190] (rows=462000 width=1436)
+ predicate:i_item_sk is not null
+ TableScan [TS_76] (rows=462000 width=1436)
+ default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_desc"]
+ <-Reducer 24 [SIMPLE_EDGE]
+ SHUFFLE [RS_82]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_200] (rows=633595212 width=88)
+ Conds:RS_79._col0=RS_80._col0(Inner),Output:["_col1","_col3"]
+ <-Map 23 [SIMPLE_EDGE]
+ SHUFFLE [RS_79]
+ PartitionCols:_col0
+ Select Operator [SEL_72] (rows=575995635 width=88)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_188] (rows=575995635 width=88)
+ predicate:(ss_sold_date_sk is not null and ss_item_sk is not null)
+ TableScan [TS_70] (rows=575995635 width=88)
+ default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk"]
+ <-Map 27 [SIMPLE_EDGE]
+ SHUFFLE [RS_80]
+ PartitionCols:_col0
+ Select Operator [SEL_75] (rows=36525 width=1119)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_189] (rows=36525 width=1119)
+ predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null)
+ TableScan [TS_73] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date","d_year"]
+ <-Reducer 31 [SIMPLE_EDGE]
+ SHUFFLE [RS_125]
+ PartitionCols:_col0
+ Group By Operator [GBY_116] (rows=52799601 width=88)
+ Output:["_col0"],keys:_col0
+ Select Operator [SEL_112] (rows=105599202 width=88)
+ Output:["_col0"]
+ Filter Operator [FIL_111] (rows=105599202 width=88)
+ predicate:(_col1 > 0.95)
+ Group By Operator [GBY_110] (rows=316797606 width=88)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 30 [SIMPLE_EDGE]
+ SHUFFLE [RS_109]
+ PartitionCols:_col0
+ Group By Operator [GBY_108] (rows=633595212 width=88)
+ Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0
+ Select Operator [SEL_106] (rows=633595212 width=88)
+ Output:["_col0","_col1"]
+ Merge Join Operator [MERGEJOIN_202] (rows=633595212 width=88)
+ Conds:RS_103._col0=RS_104._col0(Inner),Output:["_col1","_col2","_col3"]
+ <-Map 29 [SIMPLE_EDGE]
+ SHUFFLE [RS_103]
+ PartitionCols:_col0
+ Select Operator [SEL_99] (rows=575995635 width=88)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_191] (rows=575995635 width=88)
+ predicate:ss_customer_sk is not null
+ TableScan [TS_97] (rows=575995635 width=88)
+ default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"]
+ <-Map 32 [SIMPLE_EDGE]
+ SHUFFLE [RS_104]
+ PartitionCols:_col0
+ Select Operator [SEL_102] (rows=80000000 width=860)
+ Output:["_col0"]
+ Filter Operator [FIL_192] (rows=80000000 width=860)
+ predicate:c_customer_sk is not null
+ TableScan [TS_100] (rows=80000000 width=860)
+ default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"]
+ <-Reducer 4 [CONTAINS]
+ Reduce Output Operator [RS_132]
+ Group By Operator [GBY_131] (rows=1 width=112)
+ Output:["_col0"],aggregations:["sum(_col0)"]
+ Select Operator [SEL_63] (rows=383314495 width=135)
+ Output:["_col0"]
+ Merge Join Operator [MERGEJOIN_205] (rows=383314495 width=135)
+ Conds:RS_60._col1=RS_61._col0(Inner),Output:["_col3","_col4"]
+ <-Reducer 16 [SIMPLE_EDGE]
+ SHUFFLE [RS_61]
+ PartitionCols:_col0
+ Group By Operator [GBY_52] (rows=52799601 width=88)
+ Output:["_col0"],keys:_col0
+ Select Operator [SEL_48] (rows=105599202 width=88)
+ Output:["_col0"]
+ Filter Operator [FIL_47] (rows=105599202 width=88)
+ predicate:(_col1 > 0.95)
+ Group By Operator [GBY_46] (rows=316797606 width=88)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 15 [SIMPLE_EDGE]
+ SHUFFLE [RS_45]
+ PartitionCols:_col0
+ Group By Operator [GBY_44] (rows=633595212 width=88)
+ Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0
+ Select Operator [SEL_42] (rows=633595212 width=88)
+ Output:["_col0","_col1"]
+ Merge Join Operator [MERGEJOIN_198] (rows=633595212 width=88)
+ Conds:RS_39._col0=RS_40._col0(Inner),Output:["_col1","_col2","_col3"]
+ <-Map 14 [SIMPLE_EDGE]
+ SHUFFLE [RS_39]
+ PartitionCols:_col0
+ Select Operator [SEL_35] (rows=575995635 width=88)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_184] (rows=575995635 width=88)
+ predicate:ss_customer_sk is not null
+ TableScan [TS_33] (rows=575995635 width=88)
+ default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"]
+ <-Map 17 [SIMPLE_EDGE]
+ SHUFFLE [RS_40]
+ PartitionCols:_col0
+ Select Operator [SEL_38] (rows=80000000 width=860)
+ Output:["_col0"]
+ Filter Operator [FIL_185] (rows=80000000 width=860)
+ predicate:c_customer_sk is not null
+ TableScan [TS_36] (rows=80000000 width=860)
+ default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"]
+ <-Reducer 3 [SIMPLE_EDGE]
+ SHUFFLE [RS_60]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_203] (rows=348467716 width=135)
+ Conds:RS_57._col2=RS_58._col0(Inner),Output:["_col1","_col3","_col4"]
+ <-Reducer 11 [SIMPLE_EDGE]
+ SHUFFLE [RS_58]
+ PartitionCols:_col0
+ Group By Operator [GBY_31] (rows=58079562 width=88)
+ Output:["_col0"],keys:_col1
+ Select Operator [SEL_27] (rows=116159124 width=88)
+ Output:["_col1"]
+ Filter Operator [FIL_26] (rows=116159124 width=88)
+ predicate:(_col3 > 4)
+ Select Operator [SEL_194] (rows=348477374 width=88)
+ Output:["_col0","_col3"]
+ Group By Operator [GBY_25] (rows=348477374 width=88)
+ Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2
+ <-Reducer 10 [SIMPLE_EDGE]
+ SHUFFLE [RS_24]
+ PartitionCols:_col0
+ Group By Operator [GBY_23] (rows=696954748 width=88)
+ Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2
+ Select Operator [SEL_21] (rows=696954748 width=88)
+ Output:["_col0","_col1","_col2"]
+ Merge Join Operator [MERGEJOIN_197] (rows=696954748 width=88)
+ Conds:RS_18._col1=RS_19._col0(Inner),Output:["_col3","_col5","_col6"]
+ <-Map 13 [SIMPLE_EDGE]
+ SHUFFLE [RS_19]
+ PartitionCols:_col0
+ Select Operator [SEL_14] (rows=462000 width=1436)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_183] (rows=462000 width=1436)
+ predicate:i_item_sk is not null
+ TableScan [TS_12] (rows=462000 width=1436)
+ default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_desc"]
+ <-Reducer 9 [SIMPLE_EDGE]
+ SHUFFLE [RS_18]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_196] (rows=633595212 width=88)
+ Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1","_col3"]
+ <-Map 12 [SIMPLE_EDGE]
+ SHUFFLE [RS_16]
+ PartitionCols:_col0
+ Select Operator [SEL_11] (rows=36525 width=1119)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_182] (rows=36525 width=1119)
+ predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null)
+ TableScan [TS_9] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date","d_year"]
+ <-Map 8 [SIMPLE_EDGE]
+ SHUFFLE [RS_15]
+ PartitionCols:_col0
+ Select Operator [SEL_8] (rows=575995635 width=88)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_181] (rows=575995635 width=88)
+ predicate:(ss_sold_date_sk is not null and ss_item_sk is not null)
+ TableScan [TS_6] (rows=575995635 width=88)
+ default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk"]
+ <-Reducer 2 [SIMPLE_EDGE]
+ SHUFFLE [RS_57]
+ PartitionCols:_col2
+ Merge Join Operator [MERGEJOIN_195] (rows=316788826 width=135)
+ Conds:RS_54._col0=RS_55._col0(Inner),Output:["_col1","_col2","_col3","_col4"]
+ <-Map 1 [SIMPLE_EDGE]
+ SHUFFLE [RS_54]
+ PartitionCols:_col0
+ Select Operator [SEL_2] (rows=287989836 width=135)
+ Output:["_col0","_col1","_col2","_col3","_col4"]
+ Filter Operator [FIL_179] (rows=287989836 width=135)
+ predicate:cs_sold_date_sk is not null
+ TableScan [TS_0] (rows=287989836 width=135)
+ default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity","cs_list_price"]
+ <-Map 7 [SIMPLE_EDGE]
+ SHUFFLE [RS_55]
+ PartitionCols:_col0
+ Select Operator [SEL_5] (rows=18262 width=1119)
+ Output:["_col0"]
+ Filter Operator [FIL_180] (rows=18262 width=1119)
+ predicate:((d_year = 1999) and (d_moy = 1) and d_date_sk is not null)
+ TableScan [TS_3] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
+
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientpositive/perf/query33.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query33.q.out b/ql/src/test/results/clientpositive/perf/query33.q.out
new file mode 100644
index 0000000..e42c685
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/query33.q.out
@@ -0,0 +1,437 @@
+PREHOOK: query: -- start query 1 in stream 0 using template query33.tpl and seed 1930872976
+explain with ss as (
+ select
+ i_manufact_id,sum(ss_ext_sales_price) total_sales
+ from
+ store_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_manufact_id in (select
+ i_manufact_id
+from
+ item
+where i_category in ('Books'))
+ and ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 3
+ and ss_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_manufact_id),
+ cs as (
+ select
+ i_manufact_id,sum(cs_ext_sales_price) total_sales
+ from
+ catalog_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_manufact_id in (select
+ i_manufact_id
+from
+ item
+where i_category in ('Books'))
+ and cs_item_sk = i_item_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 3
+ and cs_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_manufact_id),
+ ws as (
+ select
+ i_manufact_id,sum(ws_ext_sales_price) total_sales
+ from
+ web_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_manufact_id in (select
+ i_manufact_id
+from
+ item
+where i_category in ('Books'))
+ and ws_item_sk = i_item_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 3
+ and ws_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_manufact_id)
+ select i_manufact_id ,sum(total_sales) total_sales
+ from (select * from ss
+ union all
+ select * from cs
+ union all
+ select * from ws) tmp1
+ group by i_manufact_id
+ order by total_sales
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: -- start query 1 in stream 0 using template query33.tpl and seed 1930872976
+explain with ss as (
+ select
+ i_manufact_id,sum(ss_ext_sales_price) total_sales
+ from
+ store_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_manufact_id in (select
+ i_manufact_id
+from
+ item
+where i_category in ('Books'))
+ and ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 3
+ and ss_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_manufact_id),
+ cs as (
+ select
+ i_manufact_id,sum(cs_ext_sales_price) total_sales
+ from
+ catalog_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_manufact_id in (select
+ i_manufact_id
+from
+ item
+where i_category in ('Books'))
+ and cs_item_sk = i_item_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 3
+ and cs_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_manufact_id),
+ ws as (
+ select
+ i_manufact_id,sum(ws_ext_sales_price) total_sales
+ from
+ web_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_manufact_id in (select
+ i_manufact_id
+from
+ item
+where i_category in ('Books'))
+ and ws_item_sk = i_item_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 3
+ and ws_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_manufact_id)
+ select i_manufact_id ,sum(total_sales) total_sales
+ from (select * from ss
+ union all
+ select * from cs
+ union all
+ select * from ws) tmp1
+ group by i_manufact_id
+ order by total_sales
+limit 100
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE)
+Reducer 12 <- Map 14 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE)
+Reducer 16 <- Map 15 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE)
+Reducer 17 <- Reducer 16 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE)
+Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Union 5 (CONTAINS)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
+Reducer 20 <- Map 19 (SIMPLE_EDGE)
+Reducer 22 <- Map 21 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE)
+Reducer 23 <- Map 25 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE)
+Reducer 27 <- Map 26 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE)
+Reducer 28 <- Reducer 27 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE)
+Reducer 29 <- Reducer 28 (SIMPLE_EDGE), Union 5 (CONTAINS)
+Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 31 <- Map 30 (SIMPLE_EDGE)
+Reducer 33 <- Map 32 (SIMPLE_EDGE), Map 35 (SIMPLE_EDGE)
+Reducer 34 <- Map 36 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS)
+Reducer 6 <- Union 5 (SIMPLE_EDGE)
+Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
+Reducer 9 <- Map 8 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:100
+ Stage-1
+ Reducer 7
+ File Output Operator [FS_122]
+ Limit [LIM_121] (rows=100 width=108)
+ Number of rows:100
+ Select Operator [SEL_120] (rows=335408073 width=108)
+ Output:["_col0","_col1"]
+ <-Reducer 6 [SIMPLE_EDGE]
+ SHUFFLE [RS_119]
+ Group By Operator [GBY_117] (rows=335408073 width=108)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Union 5 [SIMPLE_EDGE]
+ <-Reducer 18 [CONTAINS]
+ Reduce Output Operator [RS_116]
+ PartitionCols:_col0
+ Group By Operator [GBY_115] (rows=670816147 width=108)
+ Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0
+ Group By Operator [GBY_72] (rows=191657247 width=135)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 17 [SIMPLE_EDGE]
+ SHUFFLE [RS_71]
+ PartitionCols:_col0
+ Group By Operator [GBY_70] (rows=383314495 width=135)
+ Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1
+ Merge Join Operator [MERGEJOIN_184] (rows=383314495 width=135)
+ Conds:RS_66._col0=RS_67._col4(Inner),Output:["_col1","_col8"]
+ <-Reducer 16 [SIMPLE_EDGE]
+ SHUFFLE [RS_66]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_177] (rows=508200 width=1436)
+ Conds:RS_63._col1=RS_64._col0(Inner),Output:["_col0","_col1"]
+ <-Map 15 [SIMPLE_EDGE]
+ SHUFFLE [RS_63]
+ PartitionCols:_col1
+ Select Operator [SEL_39] (rows=462000 width=1436)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_164] (rows=462000 width=1436)
+ predicate:i_item_sk is not null
+ TableScan [TS_37] (rows=462000 width=1436)
+ default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_manufact_id"]
+ <-Reducer 20 [SIMPLE_EDGE]
+ SHUFFLE [RS_64]
+ PartitionCols:_col0
+ Group By Operator [GBY_45] (rows=115500 width=1436)
+ Output:["_col0"],keys:KEY._col0
+ <-Map 19 [SIMPLE_EDGE]
+ SHUFFLE [RS_44]
+ PartitionCols:_col0
+ Group By Operator [GBY_43] (rows=231000 width=1436)
+ Output:["_col0"],keys:i_manufact_id
+ Select Operator [SEL_42] (rows=231000 width=1436)
+ Output:["i_manufact_id"]
+ Filter Operator [FIL_165] (rows=231000 width=1436)
+ predicate:(i_category) IN ('Books')
+ TableScan [TS_40] (rows=462000 width=1436)
+ default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_category","i_manufact_id"]
+ <-Reducer 23 [SIMPLE_EDGE]
+ SHUFFLE [RS_67]
+ PartitionCols:_col4
+ Select Operator [SEL_62] (rows=348467716 width=135)
+ Output:["_col4","_col5"]
+ Merge Join Operator [MERGEJOIN_179] (rows=348467716 width=135)
+ Conds:RS_59._col1=RS_60._col0(Inner),Output:["_col2","_col3"]
+ <-Map 25 [SIMPLE_EDGE]
+ SHUFFLE [RS_60]
+ PartitionCols:_col0
+ Select Operator [SEL_55] (rows=20000000 width=1014)
+ Output:["_col0"]
+ Filter Operator [FIL_168] (rows=20000000 width=1014)
+ predicate:((ca_gmt_offset = -6) and ca_address_sk is not null)
+ TableScan [TS_53] (rows=40000000 width=1014)
+ default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_gmt_offset"]
+ <-Reducer 22 [SIMPLE_EDGE]
+ SHUFFLE [RS_59]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_178] (rows=316788826 width=135)
+ Conds:RS_56._col0=RS_57._col0(Inner),Output:["_col1","_col2","_col3"]
+ <-Map 21 [SIMPLE_EDGE]
+ SHUFFLE [RS_56]
+ PartitionCols:_col0
+ Select Operator [SEL_49] (rows=287989836 width=135)
+ Output:["_col0","_col1","_col2","_col3"]
+ Filter Operator [FIL_166] (rows=287989836 width=135)
+ predicate:(cs_sold_date_sk is not null and cs_bill_addr_sk is not null and cs_item_sk is not null)
+ TableScan [TS_47] (rows=287989836 width=135)
+ default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"]
+ <-Map 24 [SIMPLE_EDGE]
+ SHUFFLE [RS_57]
+ PartitionCols:_col0
+ Select Operator [SEL_52] (rows=18262 width=1119)
+ Output:["_col0"]
+ Filter Operator [FIL_167] (rows=18262 width=1119)
+ predicate:((d_year = 1999) and (d_moy = 3) and d_date_sk is not null)
+ TableScan [TS_50] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
+ <-Reducer 29 [CONTAINS]
+ Reduce Output Operator [RS_116]
+ PartitionCols:_col0
+ Group By Operator [GBY_115] (rows=670816147 width=108)
+ Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0
+ Group By Operator [GBY_111] (rows=95833781 width=135)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 28 [SIMPLE_EDGE]
+ SHUFFLE [RS_110]
+ PartitionCols:_col0
+ Group By Operator [GBY_109] (rows=191667562 width=135)
+ Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1
+ Merge Join Operator [MERGEJOIN_185] (rows=191667562 width=135)
+ Conds:RS_105._col0=RS_106._col3(Inner),Output:["_col1","_col8"]
+ <-Reducer 27 [SIMPLE_EDGE]
+ SHUFFLE [RS_105]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_180] (rows=508200 width=1436)
+ Conds:RS_102._col1=RS_103._col0(Inner),Output:["_col0","_col1"]
+ <-Map 26 [SIMPLE_EDGE]
+ SHUFFLE [RS_102]
+ PartitionCols:_col1
+ Select Operator [SEL_78] (rows=462000 width=1436)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_169] (rows=462000 width=1436)
+ predicate:i_item_sk is not null
+ TableScan [TS_76] (rows=462000 width=1436)
+ default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_manufact_id"]
+ <-Reducer 31 [SIMPLE_EDGE]
+ SHUFFLE [RS_103]
+ PartitionCols:_col0
+ Group By Operator [GBY_84] (rows=115500 width=1436)
+ Output:["_col0"],keys:KEY._col0
+ <-Map 30 [SIMPLE_EDGE]
+ SHUFFLE [RS_83]
+ PartitionCols:_col0
+ Group By Operator [GBY_82] (rows=231000 width=1436)
+ Output:["_col0"],keys:i_manufact_id
+ Select Operator [SEL_81] (rows=231000 width=1436)
+ Output:["i_manufact_id"]
+ Filter Operator [FIL_170] (rows=231000 width=1436)
+ predicate:(i_category) IN ('Books')
+ TableScan [TS_79] (rows=462000 width=1436)
+ default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_category","i_manufact_id"]
+ <-Reducer 34 [SIMPLE_EDGE]
+ SHUFFLE [RS_106]
+ PartitionCols:_col3
+ Select Operator [SEL_101] (rows=174243235 width=135)
+ Output:["_col3","_col5"]
+ Merge Join Operator [MERGEJOIN_182] (rows=174243235 width=135)
+ Conds:RS_98._col2=RS_99._col0(Inner),Output:["_col1","_col3"]
+ <-Map 36 [SIMPLE_EDGE]
+ SHUFFLE [RS_99]
+ PartitionCols:_col0
+ Select Operator [SEL_94] (rows=20000000 width=1014)
+ Output:["_col0"]
+ Filter Operator [FIL_173] (rows=20000000 width=1014)
+ predicate:((ca_gmt_offset = -6) and ca_address_sk is not null)
+ TableScan [TS_92] (rows=40000000 width=1014)
+ default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_gmt_offset"]
+ <-Reducer 33 [SIMPLE_EDGE]
+ SHUFFLE [RS_98]
+ PartitionCols:_col2
+ Merge Join Operator [MERGEJOIN_181] (rows=158402938 width=135)
+ Conds:RS_95._col0=RS_96._col0(Inner),Output:["_col1","_col2","_col3"]
+ <-Map 32 [SIMPLE_EDGE]
+ SHUFFLE [RS_95]
+ PartitionCols:_col0
+ Select Operator [SEL_88] (rows=144002668 width=135)
+ Output:["_col0","_col1","_col2","_col3"]
+ Filter Operator [FIL_171] (rows=144002668 width=135)
+ predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_item_sk is not null)
+ TableScan [TS_86] (rows=144002668 width=135)
+ default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"]
+ <-Map 35 [SIMPLE_EDGE]
+ SHUFFLE [RS_96]
+ PartitionCols:_col0
+ Select Operator [SEL_91] (rows=18262 width=1119)
+ Output:["_col0"]
+ Filter Operator [FIL_172] (rows=18262 width=1119)
+ predicate:((d_year = 1999) and (d_moy = 3) and d_date_sk is not null)
+ TableScan [TS_89] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
+ <-Reducer 4 [CONTAINS]
+ Reduce Output Operator [RS_116]
+ PartitionCols:_col0
+ Group By Operator [GBY_115] (rows=670816147 width=108)
+ Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0
+ Group By Operator [GBY_35] (rows=383325119 width=88)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 3 [SIMPLE_EDGE]
+ SHUFFLE [RS_34]
+ PartitionCols:_col0
+ Group By Operator [GBY_33] (rows=766650239 width=88)
+ Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1
+ Merge Join Operator [MERGEJOIN_183] (rows=766650239 width=88)
+ Conds:RS_29._col0=RS_30._col3(Inner),Output:["_col1","_col8"]
+ <-Reducer 12 [SIMPLE_EDGE]
+ SHUFFLE [RS_30]
+ PartitionCols:_col3
+ Select Operator [SEL_25] (rows=696954748 width=88)
+ Output:["_col3","_col5"]
+ Merge Join Operator [MERGEJOIN_176] (rows=696954748 width=88)
+ Conds:RS_22._col2=RS_23._col0(Inner),Output:["_col1","_col3"]
+ <-Map 14 [SIMPLE_EDGE]
+ SHUFFLE [RS_23]
+ PartitionCols:_col0
+ Select Operator [SEL_18] (rows=20000000 width=1014)
+ Output:["_col0"]
+ Filter Operator [FIL_163] (rows=20000000 width=1014)
+ predicate:((ca_gmt_offset = -6) and ca_address_sk is not null)
+ TableScan [TS_16] (rows=40000000 width=1014)
+ default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_gmt_offset"]
+ <-Reducer 11 [SIMPLE_EDGE]
+ SHUFFLE [RS_22]
+ PartitionCols:_col2
+ Merge Join Operator [MERGEJOIN_175] (rows=633595212 width=88)
+ Conds:RS_19._col0=RS_20._col0(Inner),Output:["_col1","_col2","_col3"]
+ <-Map 10 [SIMPLE_EDGE]
+ SHUFFLE [RS_19]
+ PartitionCols:_col0
+ Select Operator [SEL_12] (rows=575995635 width=88)
+ Output:["_col0","_col1","_col2","_col3"]
+ Filter Operator [FIL_161] (rows=575995635 width=88)
+ predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_item_sk is not null)
+ TableScan [TS_10] (rows=575995635 width=88)
+ default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"]
+ <-Map 13 [SIMPLE_EDGE]
+ SHUFFLE [RS_20]
+ PartitionCols:_col0
+ Select Operator [SEL_15] (rows=18262 width=1119)
+ Output:["_col0"]
+ Filter Operator [FIL_162] (rows=18262 width=1119)
+ predicate:((d_year = 1999) and (d_moy = 3) and d_date_sk is not null)
+ TableScan [TS_13] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
+ <-Reducer 2 [SIMPLE_EDGE]
+ SHUFFLE [RS_29]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_174] (rows=508200 width=1436)
+ Conds:RS_26._col1=RS_27._col0(Inner),Output:["_col0","_col1"]
+ <-Map 1 [SIMPLE_EDGE]
+ SHUFFLE [RS_26]
+ PartitionCols:_col1
+ Select Operator [SEL_2] (rows=462000 width=1436)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_159] (rows=462000 width=1436)
+ predicate:i_item_sk is not null
+ TableScan [TS_0] (rows=462000 width=1436)
+ default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_manufact_id"]
+ <-Reducer 9 [SIMPLE_EDGE]
+ SHUFFLE [RS_27]
+ PartitionCols:_col0
+ Group By Operator [GBY_8] (rows=115500 width=1436)
+ Output:["_col0"],keys:KEY._col0
+ <-Map 8 [SIMPLE_EDGE]
+ SHUFFLE [RS_7]
+ PartitionCols:_col0
+ Group By Operator [GBY_6] (rows=231000 width=1436)
+ Output:["_col0"],keys:i_manufact_id
+ Select Operator [SEL_5] (rows=231000 width=1436)
+ Output:["i_manufact_id"]
+ Filter Operator [FIL_160] (rows=231000 width=1436)
+ predicate:(i_category) IN ('Books')
+ TableScan [TS_3] (rows=462000 width=1436)
+ default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_category","i_manufact_id"]
+
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientpositive/perf/query56.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query56.q.out b/ql/src/test/results/clientpositive/perf/query56.q.out
new file mode 100644
index 0000000..2db6bfb
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/query56.q.out
@@ -0,0 +1,421 @@
+PREHOOK: query: explain with ss as (
+ select i_item_id,sum(ss_ext_sales_price) total_sales
+ from
+ store_sales,
+ date_dim,
+ customer_address,
+ item
+ where i_item_id in (select
+ i_item_id
+from item
+where i_color in ('orchid','chiffon','lace'))
+ and ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 1
+ and ss_addr_sk = ca_address_sk
+ and ca_gmt_offset = -8
+ group by i_item_id),
+ cs as (
+ select i_item_id,sum(cs_ext_sales_price) total_sales
+ from
+ catalog_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from item
+where i_color in ('orchid','chiffon','lace'))
+ and cs_item_sk = i_item_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 1
+ and cs_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -8
+ group by i_item_id),
+ ws as (
+ select i_item_id,sum(ws_ext_sales_price) total_sales
+ from
+ web_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from item
+where i_color in ('orchid','chiffon','lace'))
+ and ws_item_sk = i_item_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 1
+ and ws_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -8
+ group by i_item_id)
+ select i_item_id ,sum(total_sales) total_sales
+ from (select * from ss
+ union all
+ select * from cs
+ union all
+ select * from ws) tmp1
+ group by i_item_id
+ order by total_sales
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain with ss as (
+ select i_item_id,sum(ss_ext_sales_price) total_sales
+ from
+ store_sales,
+ date_dim,
+ customer_address,
+ item
+ where i_item_id in (select
+ i_item_id
+from item
+where i_color in ('orchid','chiffon','lace'))
+ and ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 1
+ and ss_addr_sk = ca_address_sk
+ and ca_gmt_offset = -8
+ group by i_item_id),
+ cs as (
+ select i_item_id,sum(cs_ext_sales_price) total_sales
+ from
+ catalog_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from item
+where i_color in ('orchid','chiffon','lace'))
+ and cs_item_sk = i_item_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 1
+ and cs_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -8
+ group by i_item_id),
+ ws as (
+ select i_item_id,sum(ws_ext_sales_price) total_sales
+ from
+ web_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from item
+where i_color in ('orchid','chiffon','lace'))
+ and ws_item_sk = i_item_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 1
+ and ws_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -8
+ group by i_item_id)
+ select i_item_id ,sum(total_sales) total_sales
+ from (select * from ss
+ union all
+ select * from cs
+ union all
+ select * from ws) tmp1
+ group by i_item_id
+ order by total_sales
+ limit 100
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE)
+Reducer 12 <- Map 14 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE)
+Reducer 16 <- Map 15 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE)
+Reducer 17 <- Reducer 16 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE)
+Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Union 5 (CONTAINS)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
+Reducer 20 <- Map 19 (SIMPLE_EDGE)
+Reducer 22 <- Map 21 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE)
+Reducer 23 <- Map 25 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE)
+Reducer 27 <- Map 26 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE)
+Reducer 28 <- Reducer 27 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE)
+Reducer 29 <- Reducer 28 (SIMPLE_EDGE), Union 5 (CONTAINS)
+Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 31 <- Map 30 (SIMPLE_EDGE)
+Reducer 33 <- Map 32 (SIMPLE_EDGE), Map 35 (SIMPLE_EDGE)
+Reducer 34 <- Map 36 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS)
+Reducer 6 <- Union 5 (SIMPLE_EDGE)
+Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
+Reducer 9 <- Map 8 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:100
+ Stage-1
+ Reducer 7
+ File Output Operator [FS_122]
+ Limit [LIM_121] (rows=100 width=108)
+ Number of rows:100
+ Select Operator [SEL_120] (rows=335408073 width=108)
+ Output:["_col0","_col1"]
+ <-Reducer 6 [SIMPLE_EDGE]
+ SHUFFLE [RS_119]
+ Group By Operator [GBY_117] (rows=335408073 width=108)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Union 5 [SIMPLE_EDGE]
+ <-Reducer 18 [CONTAINS]
+ Reduce Output Operator [RS_116]
+ PartitionCols:_col0
+ Group By Operator [GBY_115] (rows=670816147 width=108)
+ Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0
+ Group By Operator [GBY_72] (rows=191657247 width=135)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 17 [SIMPLE_EDGE]
+ SHUFFLE [RS_71]
+ PartitionCols:_col0
+ Group By Operator [GBY_70] (rows=383314495 width=135)
+ Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1
+ Merge Join Operator [MERGEJOIN_184] (rows=383314495 width=135)
+ Conds:RS_66._col0=RS_67._col4(Inner),Output:["_col1","_col8"]
+ <-Reducer 16 [SIMPLE_EDGE]
+ SHUFFLE [RS_66]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_177] (rows=508200 width=1436)
+ Conds:RS_63._col1=RS_64._col0(Inner),Output:["_col0","_col1"]
+ <-Map 15 [SIMPLE_EDGE]
+ SHUFFLE [RS_63]
+ PartitionCols:_col1
+ Select Operator [SEL_39] (rows=462000 width=1436)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_164] (rows=462000 width=1436)
+ predicate:i_item_sk is not null
+ TableScan [TS_37] (rows=462000 width=1436)
+ default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"]
+ <-Reducer 20 [SIMPLE_EDGE]
+ SHUFFLE [RS_64]
+ PartitionCols:_col0
+ Group By Operator [GBY_45] (rows=115500 width=1436)
+ Output:["_col0"],keys:KEY._col0
+ <-Map 19 [SIMPLE_EDGE]
+ SHUFFLE [RS_44]
+ PartitionCols:_col0
+ Group By Operator [GBY_43] (rows=231000 width=1436)
+ Output:["_col0"],keys:i_item_id
+ Select Operator [SEL_42] (rows=231000 width=1436)
+ Output:["i_item_id"]
+ Filter Operator [FIL_165] (rows=231000 width=1436)
+ predicate:(i_color) IN ('orchid', 'chiffon', 'lace')
+ TableScan [TS_40] (rows=462000 width=1436)
+ default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_id","i_color"]
+ <-Reducer 23 [SIMPLE_EDGE]
+ SHUFFLE [RS_67]
+ PartitionCols:_col4
+ Select Operator [SEL_62] (rows=348467716 width=135)
+ Output:["_col4","_col5"]
+ Merge Join Operator [MERGEJOIN_179] (rows=348467716 width=135)
+ Conds:RS_59._col1=RS_60._col0(Inner),Output:["_col2","_col3"]
+ <-Map 25 [SIMPLE_EDGE]
+ SHUFFLE [RS_60]
+ PartitionCols:_col0
+ Select Operator [SEL_55] (rows=20000000 width=1014)
+ Output:["_col0"]
+ Filter Operator [FIL_168] (rows=20000000 width=1014)
+ predicate:((ca_gmt_offset = -8) and ca_address_sk is not null)
+ TableScan [TS_53] (rows=40000000 width=1014)
+ default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_gmt_offset"]
+ <-Reducer 22 [SIMPLE_EDGE]
+ SHUFFLE [RS_59]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_178] (rows=316788826 width=135)
+ Conds:RS_56._col0=RS_57._col0(Inner),Output:["_col1","_col2","_col3"]
+ <-Map 21 [SIMPLE_EDGE]
+ SHUFFLE [RS_56]
+ PartitionCols:_col0
+ Select Operator [SEL_49] (rows=287989836 width=135)
+ Output:["_col0","_col1","_col2","_col3"]
+ Filter Operator [FIL_166] (rows=287989836 width=135)
+ predicate:(cs_sold_date_sk is not null and cs_bill_addr_sk is not null and cs_item_sk is not null)
+ TableScan [TS_47] (rows=287989836 width=135)
+ default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"]
+ <-Map 24 [SIMPLE_EDGE]
+ SHUFFLE [RS_57]
+ PartitionCols:_col0
+ Select Operator [SEL_52] (rows=18262 width=1119)
+ Output:["_col0"]
+ Filter Operator [FIL_167] (rows=18262 width=1119)
+ predicate:((d_year = 2000) and (d_moy = 1) and d_date_sk is not null)
+ TableScan [TS_50] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
+ <-Reducer 29 [CONTAINS]
+ Reduce Output Operator [RS_116]
+ PartitionCols:_col0
+ Group By Operator [GBY_115] (rows=670816147 width=108)
+ Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0
+ Group By Operator [GBY_111] (rows=95833781 width=135)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 28 [SIMPLE_EDGE]
+ SHUFFLE [RS_110]
+ PartitionCols:_col0
+ Group By Operator [GBY_109] (rows=191667562 width=135)
+ Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1
+ Merge Join Operator [MERGEJOIN_185] (rows=191667562 width=135)
+ Conds:RS_105._col0=RS_106._col3(Inner),Output:["_col1","_col8"]
+ <-Reducer 27 [SIMPLE_EDGE]
+ SHUFFLE [RS_105]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_180] (rows=508200 width=1436)
+ Conds:RS_102._col1=RS_103._col0(Inner),Output:["_col0","_col1"]
+ <-Map 26 [SIMPLE_EDGE]
+ SHUFFLE [RS_102]
+ PartitionCols:_col1
+ Select Operator [SEL_78] (rows=462000 width=1436)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_169] (rows=462000 width=1436)
+ predicate:i_item_sk is not null
+ TableScan [TS_76] (rows=462000 width=1436)
+ default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"]
+ <-Reducer 31 [SIMPLE_EDGE]
+ SHUFFLE [RS_103]
+ PartitionCols:_col0
+ Group By Operator [GBY_84] (rows=115500 width=1436)
+ Output:["_col0"],keys:KEY._col0
+ <-Map 30 [SIMPLE_EDGE]
+ SHUFFLE [RS_83]
+ PartitionCols:_col0
+ Group By Operator [GBY_82] (rows=231000 width=1436)
+ Output:["_col0"],keys:i_item_id
+ Select Operator [SEL_81] (rows=231000 width=1436)
+ Output:["i_item_id"]
+ Filter Operator [FIL_170] (rows=231000 width=1436)
+ predicate:(i_color) IN ('orchid', 'chiffon', 'lace')
+ TableScan [TS_79] (rows=462000 width=1436)
+ default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_id","i_color"]
+ <-Reducer 34 [SIMPLE_EDGE]
+ SHUFFLE [RS_106]
+ PartitionCols:_col3
+ Select Operator [SEL_101] (rows=174243235 width=135)
+ Output:["_col3","_col5"]
+ Merge Join Operator [MERGEJOIN_182] (rows=174243235 width=135)
+ Conds:RS_98._col2=RS_99._col0(Inner),Output:["_col1","_col3"]
+ <-Map 36 [SIMPLE_EDGE]
+ SHUFFLE [RS_99]
+ PartitionCols:_col0
+ Select Operator [SEL_94] (rows=20000000 width=1014)
+ Output:["_col0"]
+ Filter Operator [FIL_173] (rows=20000000 width=1014)
+ predicate:((ca_gmt_offset = -8) and ca_address_sk is not null)
+ TableScan [TS_92] (rows=40000000 width=1014)
+ default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_gmt_offset"]
+ <-Reducer 33 [SIMPLE_EDGE]
+ SHUFFLE [RS_98]
+ PartitionCols:_col2
+ Merge Join Operator [MERGEJOIN_181] (rows=158402938 width=135)
+ Conds:RS_95._col0=RS_96._col0(Inner),Output:["_col1","_col2","_col3"]
+ <-Map 32 [SIMPLE_EDGE]
+ SHUFFLE [RS_95]
+ PartitionCols:_col0
+ Select Operator [SEL_88] (rows=144002668 width=135)
+ Output:["_col0","_col1","_col2","_col3"]
+ Filter Operator [FIL_171] (rows=144002668 width=135)
+ predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_item_sk is not null)
+ TableScan [TS_86] (rows=144002668 width=135)
+ default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"]
+ <-Map 35 [SIMPLE_EDGE]
+ SHUFFLE [RS_96]
+ PartitionCols:_col0
+ Select Operator [SEL_91] (rows=18262 width=1119)
+ Output:["_col0"]
+ Filter Operator [FIL_172] (rows=18262 width=1119)
+ predicate:((d_year = 2000) and (d_moy = 1) and d_date_sk is not null)
+ TableScan [TS_89] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
+ <-Reducer 4 [CONTAINS]
+ Reduce Output Operator [RS_116]
+ PartitionCols:_col0
+ Group By Operator [GBY_115] (rows=670816147 width=108)
+ Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0
+ Group By Operator [GBY_35] (rows=383325119 width=88)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 3 [SIMPLE_EDGE]
+ SHUFFLE [RS_34]
+ PartitionCols:_col0
+ Group By Operator [GBY_33] (rows=766650239 width=88)
+ Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1
+ Merge Join Operator [MERGEJOIN_183] (rows=766650239 width=88)
+ Conds:RS_29._col0=RS_30._col3(Inner),Output:["_col1","_col8"]
+ <-Reducer 12 [SIMPLE_EDGE]
+ SHUFFLE [RS_30]
+ PartitionCols:_col3
+ Select Operator [SEL_25] (rows=696954748 width=88)
+ Output:["_col3","_col5"]
+ Merge Join Operator [MERGEJOIN_176] (rows=696954748 width=88)
+ Conds:RS_22._col2=RS_23._col0(Inner),Output:["_col1","_col3"]
+ <-Map 14 [SIMPLE_EDGE]
+ SHUFFLE [RS_23]
+ PartitionCols:_col0
+ Select Operator [SEL_18] (rows=20000000 width=1014)
+ Output:["_col0"]
+ Filter Operator [FIL_163] (rows=20000000 width=1014)
+ predicate:((ca_gmt_offset = -8) and ca_address_sk is not null)
+ TableScan [TS_16] (rows=40000000 width=1014)
+ default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_gmt_offset"]
+ <-Reducer 11 [SIMPLE_EDGE]
+ SHUFFLE [RS_22]
+ PartitionCols:_col2
+ Merge Join Operator [MERGEJOIN_175] (rows=633595212 width=88)
+ Conds:RS_19._col0=RS_20._col0(Inner),Output:["_col1","_col2","_col3"]
+ <-Map 10 [SIMPLE_EDGE]
+ SHUFFLE [RS_19]
+ PartitionCols:_col0
+ Select Operator [SEL_12] (rows=575995635 width=88)
+ Output:["_col0","_col1","_col2","_col3"]
+ Filter Operator [FIL_161] (rows=575995635 width=88)
+ predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_item_sk is not null)
+ TableScan [TS_10] (rows=575995635 width=88)
+ default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"]
+ <-Map 13 [SIMPLE_EDGE]
+ SHUFFLE [RS_20]
+ PartitionCols:_col0
+ Select Operator [SEL_15] (rows=18262 width=1119)
+ Output:["_col0"]
+ Filter Operator [FIL_162] (rows=18262 width=1119)
+ predicate:((d_year = 2000) and (d_moy = 1) and d_date_sk is not null)
+ TableScan [TS_13] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
+ <-Reducer 2 [SIMPLE_EDGE]
+ SHUFFLE [RS_29]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_174] (rows=508200 width=1436)
+ Conds:RS_26._col1=RS_27._col0(Inner),Output:["_col0","_col1"]
+ <-Map 1 [SIMPLE_EDGE]
+ SHUFFLE [RS_26]
+ PartitionCols:_col1
+ Select Operator [SEL_2] (rows=462000 width=1436)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_159] (rows=462000 width=1436)
+ predicate:i_item_sk is not null
+ TableScan [TS_0] (rows=462000 width=1436)
+ default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"]
+ <-Reducer 9 [SIMPLE_EDGE]
+ SHUFFLE [RS_27]
+ PartitionCols:_col0
+ Group By Operator [GBY_8] (rows=115500 width=1436)
+ Output:["_col0"],keys:KEY._col0
+ <-Map 8 [SIMPLE_EDGE]
+ SHUFFLE [RS_7]
+ PartitionCols:_col0
+ Group By Operator [GBY_6] (rows=231000 width=1436)
+ Output:["_col0"],keys:i_item_id
+ Select Operator [SEL_5] (rows=231000 width=1436)
+ Output:["i_item_id"]
+ Filter Operator [FIL_160] (rows=231000 width=1436)
+ predicate:(i_color) IN ('orchid', 'chiffon', 'lace')
+ TableScan [TS_3] (rows=462000 width=1436)
+ default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_id","i_color"]
+
[3/9] hive git commit: HIVE-15481 : Support multiple and nested
subqueries (Vineet Garg via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan
Posted by ha...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientpositive/perf/query83.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query83.q.out b/ql/src/test/results/clientpositive/perf/query83.q.out
new file mode 100644
index 0000000..2789643
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/query83.q.out
@@ -0,0 +1,440 @@
+PREHOOK: query: -- start query 1 in stream 0 using template query83.tpl and seed 1930872976
+explain with sr_items as
+ (select i_item_id item_id,
+ sum(sr_return_quantity) sr_item_qty
+ from store_returns,
+ item,
+ date_dim
+ where sr_item_sk = i_item_sk
+ and d_date in
+ (select d_date
+ from date_dim
+ where d_week_seq in
+ (select d_week_seq
+ from date_dim
+ where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and sr_returned_date_sk = d_date_sk
+ group by i_item_id),
+ cr_items as
+ (select i_item_id item_id,
+ sum(cr_return_quantity) cr_item_qty
+ from catalog_returns,
+ item,
+ date_dim
+ where cr_item_sk = i_item_sk
+ and d_date in
+ (select d_date
+ from date_dim
+ where d_week_seq in
+ (select d_week_seq
+ from date_dim
+ where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and cr_returned_date_sk = d_date_sk
+ group by i_item_id),
+ wr_items as
+ (select i_item_id item_id,
+ sum(wr_return_quantity) wr_item_qty
+ from web_returns,
+ item,
+ date_dim
+ where wr_item_sk = i_item_sk
+ and d_date in
+ (select d_date
+ from date_dim
+ where d_week_seq in
+ (select d_week_seq
+ from date_dim
+ where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and wr_returned_date_sk = d_date_sk
+ group by i_item_id)
+ select sr_items.item_id
+ ,sr_item_qty
+ ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev
+ ,cr_item_qty
+ ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev
+ ,wr_item_qty
+ ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev
+ ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average
+ from sr_items
+ ,cr_items
+ ,wr_items
+ where sr_items.item_id=cr_items.item_id
+ and sr_items.item_id=wr_items.item_id
+ order by sr_items.item_id
+ ,sr_item_qty
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: -- start query 1 in stream 0 using template query83.tpl and seed 1930872976
+explain with sr_items as
+ (select i_item_id item_id,
+ sum(sr_return_quantity) sr_item_qty
+ from store_returns,
+ item,
+ date_dim
+ where sr_item_sk = i_item_sk
+ and d_date in
+ (select d_date
+ from date_dim
+ where d_week_seq in
+ (select d_week_seq
+ from date_dim
+ where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and sr_returned_date_sk = d_date_sk
+ group by i_item_id),
+ cr_items as
+ (select i_item_id item_id,
+ sum(cr_return_quantity) cr_item_qty
+ from catalog_returns,
+ item,
+ date_dim
+ where cr_item_sk = i_item_sk
+ and d_date in
+ (select d_date
+ from date_dim
+ where d_week_seq in
+ (select d_week_seq
+ from date_dim
+ where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and cr_returned_date_sk = d_date_sk
+ group by i_item_id),
+ wr_items as
+ (select i_item_id item_id,
+ sum(wr_return_quantity) wr_item_qty
+ from web_returns,
+ item,
+ date_dim
+ where wr_item_sk = i_item_sk
+ and d_date in
+ (select d_date
+ from date_dim
+ where d_week_seq in
+ (select d_week_seq
+ from date_dim
+ where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and wr_returned_date_sk = d_date_sk
+ group by i_item_id)
+ select sr_items.item_id
+ ,sr_item_qty
+ ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev
+ ,cr_item_qty
+ ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev
+ ,wr_item_qty
+ ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev
+ ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average
+ from sr_items
+ ,cr_items
+ ,wr_items
+ where sr_items.item_id=cr_items.item_id
+ and sr_items.item_id=wr_items.item_id
+ order by sr_items.item_id
+ ,sr_item_qty
+ limit 100
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 11 <- Map 10 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE)
+Reducer 12 <- Reducer 11 (SIMPLE_EDGE)
+Reducer 14 <- Map 13 (SIMPLE_EDGE)
+Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE)
+Reducer 17 <- Reducer 16 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE)
+Reducer 18 <- Reducer 17 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
+Reducer 21 <- Map 20 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE)
+Reducer 23 <- Map 22 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE)
+Reducer 24 <- Reducer 23 (SIMPLE_EDGE)
+Reducer 26 <- Map 25 (SIMPLE_EDGE)
+Reducer 28 <- Map 27 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE)
+Reducer 29 <- Reducer 28 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
+Reducer 30 <- Reducer 29 (SIMPLE_EDGE)
+Reducer 33 <- Map 32 (SIMPLE_EDGE), Reducer 36 (SIMPLE_EDGE)
+Reducer 35 <- Map 34 (SIMPLE_EDGE), Reducer 38 (SIMPLE_EDGE)
+Reducer 36 <- Reducer 35 (SIMPLE_EDGE)
+Reducer 38 <- Map 37 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+Reducer 5 <- Reducer 18 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
+Reducer 9 <- Map 8 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:100
+ Stage-1
+ Reducer 6
+ File Output Operator [FS_137]
+ Limit [LIM_136] (rows=100 width=77)
+ Number of rows:100
+ Select Operator [SEL_135] (rows=76653825 width=77)
+ Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
+ <-Reducer 5 [SIMPLE_EDGE]
+ SHUFFLE [RS_134]
+ Select Operator [SEL_133] (rows=76653825 width=77)
+ Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
+ Merge Join Operator [MERGEJOIN_231] (rows=76653825 width=77)
+ Conds:RS_129._col0=RS_130._col0(Inner),RS_129._col0=RS_131._col0(Inner),Output:["_col0","_col1","_col3","_col5"]
+ <-Reducer 18 [SIMPLE_EDGE]
+ SHUFFLE [RS_130]
+ PartitionCols:_col0
+ Group By Operator [GBY_84] (rows=34842647 width=77)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 17 [SIMPLE_EDGE]
+ SHUFFLE [RS_83]
+ PartitionCols:_col0
+ Group By Operator [GBY_82] (rows=69685294 width=77)
+ Output:["_col0","_col1"],aggregations:["sum(_col0)"],keys:_col1
+ Filter Operator [FIL_80] (rows=69685294 width=77)
+ predicate:_col1 is not null
+ Select Operator [SEL_79] (rows=69685294 width=77)
+ Output:["_col0","_col1"]
+ Merge Join Operator [MERGEJOIN_229] (rows=69685294 width=77)
+ Conds:RS_76._col0=RS_77._col0(Inner),Output:["_col2","_col4"]
+ <-Reducer 16 [SIMPLE_EDGE]
+ SHUFFLE [RS_76]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_224] (rows=63350266 width=77)
+ Conds:RS_73._col1=RS_74._col0(Inner),Output:["_col0","_col2","_col4"]
+ <-Map 15 [SIMPLE_EDGE]
+ SHUFFLE [RS_73]
+ PartitionCols:_col1
+ Select Operator [SEL_45] (rows=57591150 width=77)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_209] (rows=57591150 width=77)
+ predicate:(sr_item_sk is not null and sr_returned_date_sk is not null)
+ TableScan [TS_43] (rows=57591150 width=77)
+ default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_item_sk","sr_return_quantity"]
+ <-Map 19 [SIMPLE_EDGE]
+ SHUFFLE [RS_74]
+ PartitionCols:_col0
+ Select Operator [SEL_48] (rows=462000 width=1436)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_210] (rows=462000 width=1436)
+ predicate:i_item_sk is not null
+ TableScan [TS_46] (rows=462000 width=1436)
+ default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"]
+ <-Reducer 21 [SIMPLE_EDGE]
+ SHUFFLE [RS_77]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_225] (rows=80353 width=1119)
+ Conds:RS_69._col1=RS_70._col0(Inner),Output:["_col0"]
+ <-Map 20 [SIMPLE_EDGE]
+ SHUFFLE [RS_69]
+ PartitionCols:_col1
+ Select Operator [SEL_51] (rows=73049 width=1119)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_211] (rows=73049 width=1119)
+ predicate:d_date_sk is not null
+ TableScan [TS_49] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"]
+ <-Reducer 24 [SIMPLE_EDGE]
+ SHUFFLE [RS_70]
+ PartitionCols:_col0
+ Group By Operator [GBY_67] (rows=40176 width=1119)
+ Output:["_col0"],keys:KEY._col0
+ <-Reducer 23 [SIMPLE_EDGE]
+ SHUFFLE [RS_66]
+ PartitionCols:_col0
+ Group By Operator [GBY_65] (rows=80353 width=1119)
+ Output:["_col0"],keys:_col0
+ Merge Join Operator [MERGEJOIN_220] (rows=80353 width=1119)
+ Conds:RS_61._col1=RS_62._col0(Inner),Output:["_col0"]
+ <-Map 22 [SIMPLE_EDGE]
+ SHUFFLE [RS_61]
+ PartitionCols:_col1
+ Select Operator [SEL_53] (rows=73049 width=1119)
+ Output:["_col0","_col1"]
+ TableScan [TS_52] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date","d_week_seq"]
+ <-Reducer 26 [SIMPLE_EDGE]
+ SHUFFLE [RS_62]
+ PartitionCols:_col0
+ Group By Operator [GBY_59] (rows=18262 width=1119)
+ Output:["_col0"],keys:KEY._col0
+ <-Map 25 [SIMPLE_EDGE]
+ SHUFFLE [RS_58]
+ PartitionCols:_col0
+ Group By Operator [GBY_57] (rows=36525 width=1119)
+ Output:["_col0"],keys:d_week_seq
+ Select Operator [SEL_56] (rows=36525 width=1119)
+ Output:["d_week_seq"]
+ Filter Operator [FIL_213] (rows=36525 width=1119)
+ predicate:(d_date) IN ('1998-01-02', '1998-10-15', '1998-11-10')
+ TableScan [TS_54] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date","d_week_seq"]
+ <-Reducer 30 [SIMPLE_EDGE]
+ SHUFFLE [RS_131]
+ PartitionCols:_col0
+ Group By Operator [GBY_127] (rows=8711072 width=92)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 29 [SIMPLE_EDGE]
+ SHUFFLE [RS_126]
+ PartitionCols:_col0
+ Group By Operator [GBY_125] (rows=17422145 width=92)
+ Output:["_col0","_col1"],aggregations:["sum(_col0)"],keys:_col1
+ Filter Operator [FIL_123] (rows=17422145 width=92)
+ predicate:_col1 is not null
+ Select Operator [SEL_122] (rows=17422145 width=92)
+ Output:["_col0","_col1"]
+ Merge Join Operator [MERGEJOIN_230] (rows=17422145 width=92)
+ Conds:RS_119._col0=RS_120._col0(Inner),Output:["_col2","_col4"]
+ <-Reducer 28 [SIMPLE_EDGE]
+ SHUFFLE [RS_119]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_226] (rows=15838314 width=92)
+ Conds:RS_116._col1=RS_117._col0(Inner),Output:["_col0","_col2","_col4"]
+ <-Map 27 [SIMPLE_EDGE]
+ SHUFFLE [RS_116]
+ PartitionCols:_col1
+ Select Operator [SEL_88] (rows=14398467 width=92)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_214] (rows=14398467 width=92)
+ predicate:(wr_item_sk is not null and wr_returned_date_sk is not null)
+ TableScan [TS_86] (rows=14398467 width=92)
+ default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_returned_date_sk","wr_item_sk","wr_return_quantity"]
+ <-Map 31 [SIMPLE_EDGE]
+ SHUFFLE [RS_117]
+ PartitionCols:_col0
+ Select Operator [SEL_91] (rows=462000 width=1436)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_215] (rows=462000 width=1436)
+ predicate:i_item_sk is not null
+ TableScan [TS_89] (rows=462000 width=1436)
+ default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"]
+ <-Reducer 33 [SIMPLE_EDGE]
+ SHUFFLE [RS_120]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_227] (rows=80353 width=1119)
+ Conds:RS_112._col1=RS_113._col0(Inner),Output:["_col0"]
+ <-Map 32 [SIMPLE_EDGE]
+ SHUFFLE [RS_112]
+ PartitionCols:_col1
+ Select Operator [SEL_94] (rows=73049 width=1119)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_216] (rows=73049 width=1119)
+ predicate:d_date_sk is not null
+ TableScan [TS_92] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"]
+ <-Reducer 36 [SIMPLE_EDGE]
+ SHUFFLE [RS_113]
+ PartitionCols:_col0
+ Group By Operator [GBY_110] (rows=40176 width=1119)
+ Output:["_col0"],keys:KEY._col0
+ <-Reducer 35 [SIMPLE_EDGE]
+ SHUFFLE [RS_109]
+ PartitionCols:_col0
+ Group By Operator [GBY_108] (rows=80353 width=1119)
+ Output:["_col0"],keys:_col0
+ Merge Join Operator [MERGEJOIN_221] (rows=80353 width=1119)
+ Conds:RS_104._col1=RS_105._col0(Inner),Output:["_col0"]
+ <-Map 34 [SIMPLE_EDGE]
+ SHUFFLE [RS_104]
+ PartitionCols:_col1
+ Select Operator [SEL_96] (rows=73049 width=1119)
+ Output:["_col0","_col1"]
+ TableScan [TS_95] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date","d_week_seq"]
+ <-Reducer 38 [SIMPLE_EDGE]
+ SHUFFLE [RS_105]
+ PartitionCols:_col0
+ Group By Operator [GBY_102] (rows=18262 width=1119)
+ Output:["_col0"],keys:KEY._col0
+ <-Map 37 [SIMPLE_EDGE]
+ SHUFFLE [RS_101]
+ PartitionCols:_col0
+ Group By Operator [GBY_100] (rows=36525 width=1119)
+ Output:["_col0"],keys:d_week_seq
+ Select Operator [SEL_99] (rows=36525 width=1119)
+ Output:["d_week_seq"]
+ Filter Operator [FIL_218] (rows=36525 width=1119)
+ predicate:(d_date) IN ('1998-01-02', '1998-10-15', '1998-11-10')
+ TableScan [TS_97] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date","d_week_seq"]
+ <-Reducer 4 [SIMPLE_EDGE]
+ SHUFFLE [RS_129]
+ PartitionCols:_col0
+ Group By Operator [GBY_41] (rows=17423323 width=106)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 3 [SIMPLE_EDGE]
+ SHUFFLE [RS_40]
+ PartitionCols:_col0
+ Group By Operator [GBY_39] (rows=34846646 width=106)
+ Output:["_col0","_col1"],aggregations:["sum(_col0)"],keys:_col1
+ Filter Operator [FIL_37] (rows=34846646 width=106)
+ predicate:_col1 is not null
+ Select Operator [SEL_36] (rows=34846646 width=106)
+ Output:["_col0","_col1"]
+ Merge Join Operator [MERGEJOIN_228] (rows=34846646 width=106)
+ Conds:RS_33._col0=RS_34._col0(Inner),Output:["_col2","_col4"]
+ <-Reducer 2 [SIMPLE_EDGE]
+ SHUFFLE [RS_33]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_222] (rows=31678769 width=106)
+ Conds:RS_30._col1=RS_31._col0(Inner),Output:["_col0","_col2","_col4"]
+ <-Map 1 [SIMPLE_EDGE]
+ SHUFFLE [RS_30]
+ PartitionCols:_col1
+ Select Operator [SEL_2] (rows=28798881 width=106)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_204] (rows=28798881 width=106)
+ predicate:(cr_item_sk is not null and cr_returned_date_sk is not null)
+ TableScan [TS_0] (rows=28798881 width=106)
+ default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_item_sk","cr_return_quantity"]
+ <-Map 7 [SIMPLE_EDGE]
+ SHUFFLE [RS_31]
+ PartitionCols:_col0
+ Select Operator [SEL_5] (rows=462000 width=1436)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_205] (rows=462000 width=1436)
+ predicate:i_item_sk is not null
+ TableScan [TS_3] (rows=462000 width=1436)
+ default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"]
+ <-Reducer 9 [SIMPLE_EDGE]
+ SHUFFLE [RS_34]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_223] (rows=80353 width=1119)
+ Conds:RS_26._col1=RS_27._col0(Inner),Output:["_col0"]
+ <-Map 8 [SIMPLE_EDGE]
+ SHUFFLE [RS_26]
+ PartitionCols:_col1
+ Select Operator [SEL_8] (rows=73049 width=1119)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_206] (rows=73049 width=1119)
+ predicate:d_date_sk is not null
+ TableScan [TS_6] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"]
+ <-Reducer 12 [SIMPLE_EDGE]
+ SHUFFLE [RS_27]
+ PartitionCols:_col0
+ Group By Operator [GBY_24] (rows=40176 width=1119)
+ Output:["_col0"],keys:KEY._col0
+ <-Reducer 11 [SIMPLE_EDGE]
+ SHUFFLE [RS_23]
+ PartitionCols:_col0
+ Group By Operator [GBY_22] (rows=80353 width=1119)
+ Output:["_col0"],keys:_col0
+ Merge Join Operator [MERGEJOIN_219] (rows=80353 width=1119)
+ Conds:RS_18._col1=RS_19._col0(Inner),Output:["_col0"]
+ <-Map 10 [SIMPLE_EDGE]
+ SHUFFLE [RS_18]
+ PartitionCols:_col1
+ Select Operator [SEL_10] (rows=73049 width=1119)
+ Output:["_col0","_col1"]
+ TableScan [TS_9] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date","d_week_seq"]
+ <-Reducer 14 [SIMPLE_EDGE]
+ SHUFFLE [RS_19]
+ PartitionCols:_col0
+ Group By Operator [GBY_16] (rows=18262 width=1119)
+ Output:["_col0"],keys:KEY._col0
+ <-Map 13 [SIMPLE_EDGE]
+ SHUFFLE [RS_15]
+ PartitionCols:_col0
+ Group By Operator [GBY_14] (rows=36525 width=1119)
+ Output:["_col0"],keys:d_week_seq
+ Select Operator [SEL_13] (rows=36525 width=1119)
+ Output:["d_week_seq"]
+ Filter Operator [FIL_208] (rows=36525 width=1119)
+ predicate:(d_date) IN ('1998-01-02', '1998-10-15', '1998-11-10')
+ TableScan [TS_11] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date","d_week_seq"]
+
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
index c28a218..f75e497 100644
--- a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
+++ b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
@@ -417,3 +417,626 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
+PREHOOK: query: -- uncorr exists
+explain
+select *
+from src b
+where exists
+ (select a.key
+ from src a
+ where a.value > 'val_9'
+ )
+PREHOOK: type: QUERY
+POSTHOOK: query: -- uncorr exists
+explain
+select *
+from src b
+where exists
+ (select a.key
+ from src a
+ where a.value > 'val_9'
+ )
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1)
+ Reducer 4 <- Map 3 (GROUP, 2)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (value > 'val_9') (type: boolean)
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: true (type: boolean)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: boolean)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: boolean)
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 41500 Data size: 922896 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 41500 Data size: 922896 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: boolean)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
+PREHOOK: query: select *
+from src b
+where exists
+ (select a.key
+ from src a
+ where a.value > 'val_9'
+ )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from src b
+where exists
+ (select a.key
+ from src a
+ where a.value > 'val_9'
+ )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 val_0
+0 val_0
+0 val_0
+10 val_10
+100 val_100
+100 val_100
+103 val_103
+103 val_103
+104 val_104
+104 val_104
+105 val_105
+11 val_11
+111 val_111
+113 val_113
+113 val_113
+114 val_114
+116 val_116
+118 val_118
+118 val_118
+119 val_119
+119 val_119
+119 val_119
+12 val_12
+12 val_12
+120 val_120
+120 val_120
+125 val_125
+125 val_125
+126 val_126
+128 val_128
+128 val_128
+128 val_128
+129 val_129
+129 val_129
+131 val_131
+133 val_133
+134 val_134
+134 val_134
+136 val_136
+137 val_137
+137 val_137
+138 val_138
+138 val_138
+138 val_138
+138 val_138
+143 val_143
+145 val_145
+146 val_146
+146 val_146
+149 val_149
+149 val_149
+15 val_15
+15 val_15
+150 val_150
+152 val_152
+152 val_152
+153 val_153
+155 val_155
+156 val_156
+157 val_157
+158 val_158
+160 val_160
+162 val_162
+163 val_163
+164 val_164
+164 val_164
+165 val_165
+165 val_165
+166 val_166
+167 val_167
+167 val_167
+167 val_167
+168 val_168
+169 val_169
+169 val_169
+169 val_169
+169 val_169
+17 val_17
+170 val_170
+172 val_172
+172 val_172
+174 val_174
+174 val_174
+175 val_175
+175 val_175
+176 val_176
+176 val_176
+177 val_177
+178 val_178
+179 val_179
+179 val_179
+18 val_18
+18 val_18
+180 val_180
+181 val_181
+183 val_183
+186 val_186
+187 val_187
+187 val_187
+187 val_187
+189 val_189
+19 val_19
+190 val_190
+191 val_191
+191 val_191
+192 val_192
+193 val_193
+193 val_193
+193 val_193
+194 val_194
+195 val_195
+195 val_195
+196 val_196
+197 val_197
+197 val_197
+199 val_199
+199 val_199
+199 val_199
+2 val_2
+20 val_20
+200 val_200
+200 val_200
+201 val_201
+202 val_202
+203 val_203
+203 val_203
+205 val_205
+205 val_205
+207 val_207
+207 val_207
+208 val_208
+208 val_208
+208 val_208
+209 val_209
+209 val_209
+213 val_213
+213 val_213
+214 val_214
+216 val_216
+216 val_216
+217 val_217
+217 val_217
+218 val_218
+219 val_219
+219 val_219
+221 val_221
+221 val_221
+222 val_222
+223 val_223
+223 val_223
+224 val_224
+224 val_224
+226 val_226
+228 val_228
+229 val_229
+229 val_229
+230 val_230
+230 val_230
+230 val_230
+230 val_230
+230 val_230
+233 val_233
+233 val_233
+235 val_235
+237 val_237
+237 val_237
+238 val_238
+238 val_238
+239 val_239
+239 val_239
+24 val_24
+24 val_24
+241 val_241
+242 val_242
+242 val_242
+244 val_244
+247 val_247
+248 val_248
+249 val_249
+252 val_252
+255 val_255
+255 val_255
+256 val_256
+256 val_256
+257 val_257
+258 val_258
+26 val_26
+26 val_26
+260 val_260
+262 val_262
+263 val_263
+265 val_265
+265 val_265
+266 val_266
+27 val_27
+272 val_272
+272 val_272
+273 val_273
+273 val_273
+273 val_273
+274 val_274
+275 val_275
+277 val_277
+277 val_277
+277 val_277
+277 val_277
+278 val_278
+278 val_278
+28 val_28
+280 val_280
+280 val_280
+281 val_281
+281 val_281
+282 val_282
+282 val_282
+283 val_283
+284 val_284
+285 val_285
+286 val_286
+287 val_287
+288 val_288
+288 val_288
+289 val_289
+291 val_291
+292 val_292
+296 val_296
+298 val_298
+298 val_298
+298 val_298
+30 val_30
+302 val_302
+305 val_305
+306 val_306
+307 val_307
+307 val_307
+308 val_308
+309 val_309
+309 val_309
+310 val_310
+311 val_311
+311 val_311
+311 val_311
+315 val_315
+316 val_316
+316 val_316
+316 val_316
+317 val_317
+317 val_317
+318 val_318
+318 val_318
+318 val_318
+321 val_321
+321 val_321
+322 val_322
+322 val_322
+323 val_323
+325 val_325
+325 val_325
+327 val_327
+327 val_327
+327 val_327
+33 val_33
+331 val_331
+331 val_331
+332 val_332
+333 val_333
+333 val_333
+335 val_335
+336 val_336
+338 val_338
+339 val_339
+34 val_34
+341 val_341
+342 val_342
+342 val_342
+344 val_344
+344 val_344
+345 val_345
+348 val_348
+348 val_348
+348 val_348
+348 val_348
+348 val_348
+35 val_35
+35 val_35
+35 val_35
+351 val_351
+353 val_353
+353 val_353
+356 val_356
+360 val_360
+362 val_362
+364 val_364
+365 val_365
+366 val_366
+367 val_367
+367 val_367
+368 val_368
+369 val_369
+369 val_369
+369 val_369
+37 val_37
+37 val_37
+373 val_373
+374 val_374
+375 val_375
+377 val_377
+378 val_378
+379 val_379
+382 val_382
+382 val_382
+384 val_384
+384 val_384
+384 val_384
+386 val_386
+389 val_389
+392 val_392
+393 val_393
+394 val_394
+395 val_395
+395 val_395
+396 val_396
+396 val_396
+396 val_396
+397 val_397
+397 val_397
+399 val_399
+399 val_399
+4 val_4
+400 val_400
+401 val_401
+401 val_401
+401 val_401
+401 val_401
+401 val_401
+402 val_402
+403 val_403
+403 val_403
+403 val_403
+404 val_404
+404 val_404
+406 val_406
+406 val_406
+406 val_406
+406 val_406
+407 val_407
+409 val_409
+409 val_409
+409 val_409
+41 val_41
+411 val_411
+413 val_413
+413 val_413
+414 val_414
+414 val_414
+417 val_417
+417 val_417
+417 val_417
+418 val_418
+419 val_419
+42 val_42
+42 val_42
+421 val_421
+424 val_424
+424 val_424
+427 val_427
+429 val_429
+429 val_429
+43 val_43
+430 val_430
+430 val_430
+430 val_430
+431 val_431
+431 val_431
+431 val_431
+432 val_432
+435 val_435
+436 val_436
+437 val_437
+438 val_438
+438 val_438
+438 val_438
+439 val_439
+439 val_439
+44 val_44
+443 val_443
+444 val_444
+446 val_446
+448 val_448
+449 val_449
+452 val_452
+453 val_453
+454 val_454
+454 val_454
+454 val_454
+455 val_455
+457 val_457
+458 val_458
+458 val_458
+459 val_459
+459 val_459
+460 val_460
+462 val_462
+462 val_462
+463 val_463
+463 val_463
+466 val_466
+466 val_466
+466 val_466
+467 val_467
+468 val_468
+468 val_468
+468 val_468
+468 val_468
+469 val_469
+469 val_469
+469 val_469
+469 val_469
+469 val_469
+47 val_47
+470 val_470
+472 val_472
+475 val_475
+477 val_477
+478 val_478
+478 val_478
+479 val_479
+480 val_480
+480 val_480
+480 val_480
+481 val_481
+482 val_482
+483 val_483
+484 val_484
+485 val_485
+487 val_487
+489 val_489
+489 val_489
+489 val_489
+489 val_489
+490 val_490
+491 val_491
+492 val_492
+492 val_492
+493 val_493
+494 val_494
+495 val_495
+496 val_496
+497 val_497
+498 val_498
+498 val_498
+498 val_498
+5 val_5
+5 val_5
+5 val_5
+51 val_51
+51 val_51
+53 val_53
+54 val_54
+57 val_57
+58 val_58
+58 val_58
+64 val_64
+65 val_65
+66 val_66
+67 val_67
+67 val_67
+69 val_69
+70 val_70
+70 val_70
+70 val_70
+72 val_72
+72 val_72
+74 val_74
+76 val_76
+76 val_76
+77 val_77
+78 val_78
+8 val_8
+80 val_80
+82 val_82
+83 val_83
+83 val_83
+84 val_84
+84 val_84
+85 val_85
+86 val_86
+87 val_87
+9 val_9
+90 val_90
+90 val_90
+90 val_90
+92 val_92
+95 val_95
+95 val_95
+96 val_96
+97 val_97
+97 val_97
+98 val_98
+98 val_98
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientpositive/spark/subquery_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/subquery_in.q.out b/ql/src/test/results/clientpositive/spark/subquery_in.q.out
index 36e3e6e..8400d8e 100644
--- a/ql/src/test/results/clientpositive/spark/subquery_in.q.out
+++ b/ql/src/test/results/clientpositive/spark/subquery_in.q.out
@@ -1130,6 +1130,260 @@ POSTHOOK: Input: default@lineitem
#### A masked pattern was here ####
108570 8571
4297 1798
+Warning: Shuffle Join JOIN[14][tables = [$hdt$_3, $hdt$_4]] in Work 'Reducer 11' is a cross product
+PREHOOK: query: -- corr, agg in outer and inner
+explain select sum(l_extendedprice) from lineitem, part where p_partkey = l_partkey and l_quantity IN (select avg(l_quantity) from lineitem where l_partkey = p_partkey)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- corr, agg in outer and inner
+explain select sum(l_extendedprice) from lineitem, part where p_partkey = l_partkey and l_quantity IN (select avg(l_quantity) from lineitem where l_partkey = p_partkey)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 1), Map 13 (PARTITION-LEVEL SORT, 1)
+ Reducer 12 <- Reducer 11 (GROUP, 2)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2)
+ Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2)
+ Reducer 4 <- Reducer 3 (GROUP, 1)
+ Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 12 (PARTITION-LEVEL SORT, 2)
+ Reducer 8 <- Reducer 7 (GROUP, 2)
+ Reducer 9 <- Reducer 8 (GROUP, 2)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: lineitem
+ Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: l_partkey is not null (type: boolean)
+ Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: l_partkey (type: int), l_quantity (type: double), l_extendedprice (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double), _col2 (type: double)
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: lineitem
+ Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_partkey (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: p_partkey is not null (type: boolean)
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_partkey (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: lineitem
+ Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: l_partkey (type: int), l_quantity (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double)
+ Reducer 11
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col1
+ Statistics: Num rows: 2600 Data size: 327700 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 2600 Data size: 327700 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 2600 Data size: 327700 Basic stats: COMPLETE Column stats: NONE
+ Reducer 12
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1300 Data size: 163850 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1300 Data size: 163850 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 110 Data size: 13198 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: double), _col3 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: double), _col3 (type: int)
+ Statistics: Num rows: 110 Data size: 13198 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: double)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: double), _col3 (type: int)
+ 1 _col0 (type: double), _col1 (type: int)
+ outputColumnNames: _col2
+ Statistics: Num rows: 392 Data size: 49494 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col2)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: double)
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 7
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 1430 Data size: 180235 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: int), _col1 (type: double)
+ outputColumnNames: _col2, _col1
+ Statistics: Num rows: 1430 Data size: 180235 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(_col1)
+ keys: _col2 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1430 Data size: 180235 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1430 Data size: 180235 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: struct<count:bigint,sum:double,input:double>)
+ Reducer 8
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 715 Data size: 90117 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: double), _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 715 Data size: 90117 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: double), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: double), _col1 (type: int)
+ Statistics: Num rows: 715 Data size: 90117 Basic stats: COMPLETE Column stats: NONE
+ Reducer 9
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: double), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 357 Data size: 44995 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: double), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: double), _col1 (type: int)
+ Statistics: Num rows: 357 Data size: 44995 Basic stats: COMPLETE Column stats: NONE
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join JOIN[14][tables = [$hdt$_3, $hdt$_4]] in Work 'Reducer 11' is a cross product
+PREHOOK: query: select sum(l_extendedprice) from lineitem, part where p_partkey = l_partkey and l_quantity IN (select avg(l_quantity) from lineitem where l_partkey = p_partkey)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@lineitem
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(l_extendedprice) from lineitem, part where p_partkey = l_partkey and l_quantity IN (select avg(l_quantity) from lineitem where l_partkey = p_partkey)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@lineitem
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+NULL
PREHOOK: query: --where has multiple conjuction
explain select * from part where p_brand <> 'Brand#14' AND p_size IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientpositive/subquery_exists.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_exists.q.out b/ql/src/test/results/clientpositive/subquery_exists.q.out
index 1019e7a..a77efc9 100644
--- a/ql/src/test/results/clientpositive/subquery_exists.q.out
+++ b/ql/src/test/results/clientpositive/subquery_exists.q.out
@@ -457,3 +457,628 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: -- uncorr exists
+explain
+select *
+from src b
+where exists
+ (select a.key
+ from src a
+ where a.value > 'val_9'
+ )
+PREHOOK: type: QUERY
+POSTHOOK: query: -- uncorr exists
+explain
+select *
+from src b
+where exists
+ (select a.key
+ from src a
+ where a.value > 'val_9'
+ )
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-1 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (value > 'val_9') (type: boolean)
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: true (type: boolean)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: boolean)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: boolean)
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: boolean)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 41500 Data size: 922896 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 41500 Data size: 922896 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: select *
+from src b
+where exists
+ (select a.key
+ from src a
+ where a.value > 'val_9'
+ )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from src b
+where exists
+ (select a.key
+ from src a
+ where a.value > 'val_9'
+ )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 val_0
+0 val_0
+0 val_0
+10 val_10
+100 val_100
+100 val_100
+103 val_103
+103 val_103
+104 val_104
+104 val_104
+105 val_105
+11 val_11
+111 val_111
+113 val_113
+113 val_113
+114 val_114
+116 val_116
+118 val_118
+118 val_118
+119 val_119
+119 val_119
+119 val_119
+12 val_12
+12 val_12
+120 val_120
+120 val_120
+125 val_125
+125 val_125
+126 val_126
+128 val_128
+128 val_128
+128 val_128
+129 val_129
+129 val_129
+131 val_131
+133 val_133
+134 val_134
+134 val_134
+136 val_136
+137 val_137
+137 val_137
+138 val_138
+138 val_138
+138 val_138
+138 val_138
+143 val_143
+145 val_145
+146 val_146
+146 val_146
+149 val_149
+149 val_149
+15 val_15
+15 val_15
+150 val_150
+152 val_152
+152 val_152
+153 val_153
+155 val_155
+156 val_156
+157 val_157
+158 val_158
+160 val_160
+162 val_162
+163 val_163
+164 val_164
+164 val_164
+165 val_165
+165 val_165
+166 val_166
+167 val_167
+167 val_167
+167 val_167
+168 val_168
+169 val_169
+169 val_169
+169 val_169
+169 val_169
+17 val_17
+170 val_170
+172 val_172
+172 val_172
+174 val_174
+174 val_174
+175 val_175
+175 val_175
+176 val_176
+176 val_176
+177 val_177
+178 val_178
+179 val_179
+179 val_179
+18 val_18
+18 val_18
+180 val_180
+181 val_181
+183 val_183
+186 val_186
+187 val_187
+187 val_187
+187 val_187
+189 val_189
+19 val_19
+190 val_190
+191 val_191
+191 val_191
+192 val_192
+193 val_193
+193 val_193
+193 val_193
+194 val_194
+195 val_195
+195 val_195
+196 val_196
+197 val_197
+197 val_197
+199 val_199
+199 val_199
+199 val_199
+2 val_2
+20 val_20
+200 val_200
+200 val_200
+201 val_201
+202 val_202
+203 val_203
+203 val_203
+205 val_205
+205 val_205
+207 val_207
+207 val_207
+208 val_208
+208 val_208
+208 val_208
+209 val_209
+209 val_209
+213 val_213
+213 val_213
+214 val_214
+216 val_216
+216 val_216
+217 val_217
+217 val_217
+218 val_218
+219 val_219
+219 val_219
+221 val_221
+221 val_221
+222 val_222
+223 val_223
+223 val_223
+224 val_224
+224 val_224
+226 val_226
+228 val_228
+229 val_229
+229 val_229
+230 val_230
+230 val_230
+230 val_230
+230 val_230
+230 val_230
+233 val_233
+233 val_233
+235 val_235
+237 val_237
+237 val_237
+238 val_238
+238 val_238
+239 val_239
+239 val_239
+24 val_24
+24 val_24
+241 val_241
+242 val_242
+242 val_242
+244 val_244
+247 val_247
+248 val_248
+249 val_249
+252 val_252
+255 val_255
+255 val_255
+256 val_256
+256 val_256
+257 val_257
+258 val_258
+26 val_26
+26 val_26
+260 val_260
+262 val_262
+263 val_263
+265 val_265
+265 val_265
+266 val_266
+27 val_27
+272 val_272
+272 val_272
+273 val_273
+273 val_273
+273 val_273
+274 val_274
+275 val_275
+277 val_277
+277 val_277
+277 val_277
+277 val_277
+278 val_278
+278 val_278
+28 val_28
+280 val_280
+280 val_280
+281 val_281
+281 val_281
+282 val_282
+282 val_282
+283 val_283
+284 val_284
+285 val_285
+286 val_286
+287 val_287
+288 val_288
+288 val_288
+289 val_289
+291 val_291
+292 val_292
+296 val_296
+298 val_298
+298 val_298
+298 val_298
+30 val_30
+302 val_302
+305 val_305
+306 val_306
+307 val_307
+307 val_307
+308 val_308
+309 val_309
+309 val_309
+310 val_310
+311 val_311
+311 val_311
+311 val_311
+315 val_315
+316 val_316
+316 val_316
+316 val_316
+317 val_317
+317 val_317
+318 val_318
+318 val_318
+318 val_318
+321 val_321
+321 val_321
+322 val_322
+322 val_322
+323 val_323
+325 val_325
+325 val_325
+327 val_327
+327 val_327
+327 val_327
+33 val_33
+331 val_331
+331 val_331
+332 val_332
+333 val_333
+333 val_333
+335 val_335
+336 val_336
+338 val_338
+339 val_339
+34 val_34
+341 val_341
+342 val_342
+342 val_342
+344 val_344
+344 val_344
+345 val_345
+348 val_348
+348 val_348
+348 val_348
+348 val_348
+348 val_348
+35 val_35
+35 val_35
+35 val_35
+351 val_351
+353 val_353
+353 val_353
+356 val_356
+360 val_360
+362 val_362
+364 val_364
+365 val_365
+366 val_366
+367 val_367
+367 val_367
+368 val_368
+369 val_369
+369 val_369
+369 val_369
+37 val_37
+37 val_37
+373 val_373
+374 val_374
+375 val_375
+377 val_377
+378 val_378
+379 val_379
+382 val_382
+382 val_382
+384 val_384
+384 val_384
+384 val_384
+386 val_386
+389 val_389
+392 val_392
+393 val_393
+394 val_394
+395 val_395
+395 val_395
+396 val_396
+396 val_396
+396 val_396
+397 val_397
+397 val_397
+399 val_399
+399 val_399
+4 val_4
+400 val_400
+401 val_401
+401 val_401
+401 val_401
+401 val_401
+401 val_401
+402 val_402
+403 val_403
+403 val_403
+403 val_403
+404 val_404
+404 val_404
+406 val_406
+406 val_406
+406 val_406
+406 val_406
+407 val_407
+409 val_409
+409 val_409
+409 val_409
+41 val_41
+411 val_411
+413 val_413
+413 val_413
+414 val_414
+414 val_414
+417 val_417
+417 val_417
+417 val_417
+418 val_418
+419 val_419
+42 val_42
+42 val_42
+421 val_421
+424 val_424
+424 val_424
+427 val_427
+429 val_429
+429 val_429
+43 val_43
+430 val_430
+430 val_430
+430 val_430
+431 val_431
+431 val_431
+431 val_431
+432 val_432
+435 val_435
+436 val_436
+437 val_437
+438 val_438
+438 val_438
+438 val_438
+439 val_439
+439 val_439
+44 val_44
+443 val_443
+444 val_444
+446 val_446
+448 val_448
+449 val_449
+452 val_452
+453 val_453
+454 val_454
+454 val_454
+454 val_454
+455 val_455
+457 val_457
+458 val_458
+458 val_458
+459 val_459
+459 val_459
+460 val_460
+462 val_462
+462 val_462
+463 val_463
+463 val_463
+466 val_466
+466 val_466
+466 val_466
+467 val_467
+468 val_468
+468 val_468
+468 val_468
+468 val_468
+469 val_469
+469 val_469
+469 val_469
+469 val_469
+469 val_469
+47 val_47
+470 val_470
+472 val_472
+475 val_475
+477 val_477
+478 val_478
+478 val_478
+479 val_479
+480 val_480
+480 val_480
+480 val_480
+481 val_481
+482 val_482
+483 val_483
+484 val_484
+485 val_485
+487 val_487
+489 val_489
+489 val_489
+489 val_489
+489 val_489
+490 val_490
+491 val_491
+492 val_492
+492 val_492
+493 val_493
+494 val_494
+495 val_495
+496 val_496
+497 val_497
+498 val_498
+498 val_498
+498 val_498
+5 val_5
+5 val_5
+5 val_5
+51 val_51
+51 val_51
+53 val_53
+54 val_54
+57 val_57
+58 val_58
+58 val_58
+64 val_64
+65 val_65
+66 val_66
+67 val_67
+67 val_67
+69 val_69
+70 val_70
+70 val_70
+70 val_70
+72 val_72
+72 val_72
+74 val_74
+76 val_76
+76 val_76
+77 val_77
+78 val_78
+8 val_8
+80 val_80
+82 val_82
+83 val_83
+83 val_83
+84 val_84
+84 val_84
+85 val_85
+86 val_86
+87 val_87
+9 val_9
+90 val_90
+90 val_90
+90 val_90
+92 val_92
+95 val_95
+95 val_95
+96 val_96
+97 val_97
+97 val_97
+98 val_98
+98 val_98
[9/9] hive git commit: HIVE-15481 : Support multiple and nested
subqueries (Vineet Garg via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan
Posted by ha...@apache.org.
HIVE-15481 : Support multiple and nested subqueries (Vineet Garg via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b0ed8241
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b0ed8241
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b0ed8241
Branch: refs/heads/master
Commit: b0ed8241acecb11da8f53be906d47861636fad24
Parents: 1749d70
Author: Vineet Garg <vg...@hortonworks.com>
Authored: Tue Dec 20 21:44:00 2016 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Tue Jan 10 12:52:44 2017 -0800
----------------------------------------------------------------------
data/files/part_tiny_nulls.txt | 26 +
.../test/resources/testconfiguration.properties | 4 +-
.../calcite/HiveSubQRemoveRelBuilder.java | 2 +-
.../calcite/reloperators/HiveFilter.java | 6 +
.../calcite/rules/HiveSubQueryRemoveRule.java | 8 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 58 +-
.../apache/hadoop/hive/ql/parse/QBSubQuery.java | 111 +-
.../clientnegative/subquery_restrictions.q | 92 -
.../subquery_subquery_chain_exists.q | 4 +
.../test/queries/clientpositive/perf/query16.q | 30 +
.../test/queries/clientpositive/perf/query23.q | 48 +
.../test/queries/clientpositive/perf/query33.q | 73 +
.../test/queries/clientpositive/perf/query45.q | 1 -
.../test/queries/clientpositive/perf/query56.q | 65 +
.../test/queries/clientpositive/perf/query60.q | 77 +
.../test/queries/clientpositive/perf/query69.q | 46 +
.../test/queries/clientpositive/perf/query83.q | 65 +
.../queries/clientpositive/subquery_exists.q | 18 +
.../test/queries/clientpositive/subquery_in.q | 6 +-
.../queries/clientpositive/subquery_in_having.q | 39 +
.../queries/clientpositive/subquery_multi.q | 111 +
.../queries/clientpositive/subquery_notexists.q | 19 +
.../queries/clientpositive/subquery_notin.q | 17 +
.../subquery_corr_grandparent.q.out | 2 +-
.../clientnegative/subquery_restrictions.q.out | 1 -
.../subquery_subquery_chain_exists.q.out | 1 +
.../clientpositive/llap/explainuser_1.q.out | 16 +-
.../results/clientpositive/llap/lineage3.q.out | 2 +-
.../clientpositive/llap/subquery_exists.q.out | 630 ++
.../clientpositive/llap/subquery_in.q.out | 273 +
.../clientpositive/llap/subquery_multi.q.out | 5610 ++++++++++++++++++
.../clientpositive/llap/subquery_notin.q.out | 971 ++-
.../llap/vector_groupby_mapjoin.q.out | 8 +-
.../results/clientpositive/perf/query16.q.out | 330 ++
.../results/clientpositive/perf/query23.q.out | 383 ++
.../results/clientpositive/perf/query33.q.out | 437 ++
.../results/clientpositive/perf/query56.q.out | 421 ++
.../results/clientpositive/perf/query60.q.out | 443 ++
.../results/clientpositive/perf/query69.q.out | 591 ++
.../results/clientpositive/perf/query83.q.out | 440 ++
.../clientpositive/spark/subquery_exists.q.out | 623 ++
.../clientpositive/spark/subquery_in.q.out | 254 +
.../clientpositive/subquery_exists.q.out | 625 ++
.../clientpositive/subquery_in_having.q.out | 2140 ++++++-
.../clientpositive/subquery_notexists.q.out | 213 +
.../clientpositive/subquery_notin_having.q.out | 18 +-
.../clientpositive/vector_groupby_mapjoin.q.out | 18 +-
47 files changed, 15036 insertions(+), 340 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/data/files/part_tiny_nulls.txt
----------------------------------------------------------------------
diff --git a/data/files/part_tiny_nulls.txt b/data/files/part_tiny_nulls.txt
new file mode 100644
index 0000000..c2b80ff
--- /dev/null
+++ b/data/files/part_tiny_nulls.txt
@@ -0,0 +1,26 @@
+121152,almond antique burnished rose metallic,Manufacturer#1,Brand#14,PROMO PLATED TIN,2,JUMBO BOX,1173.15,e pinto beans h
+121152,almond antique burnished rose metallic,Manufacturer#1,Brand#14,PROMO PLATED TIN,2,JUMBO BOX,1173.15,e pinto beans h
+85768,almond antique chartreuse lavender yellow,Manufacturer#1,Brand#12,LARGE BRUSHED STEEL,34,SM BAG,1753.76,refull
+110592,almond antique salmon chartreuse burlywood,Manufacturer#1,Brand#15,PROMO BURNISHED NICKEL,6,JUMBO PKG,1602.59, to the furiously
+86428,almond aquamarine burnished black steel,Manufacturer#1,Brand#12,STANDARD ANODIZED STEEL,28,WRAP BAG,1414.42,arefully
+65667,almond aquamarine pink moccasin thistle,Manufacturer#1,Brand#12,LARGE BURNISHED STEEL,42,JUMBO CASE,1632.66,e across the expr
+105685,almond antique violet chocolate turquoise,Manufacturer#2,Brand#22,MEDIUM ANODIZED COPPER,14,MED CAN,1690.68,ly pending requ
+191709,almond antique violet turquoise frosted,Manufacturer#2,Brand#22,ECONOMY POLISHED STEEL,40,MED BOX,1800.7, haggle
+146985,almond aquamarine midnight light salmon,Manufacturer#2,Brand#23,MEDIUM BURNISHED COPPER,2,SM CASE,2031.98,s cajole caref
+132666,almond aquamarine rose maroon antique,Manufacturer#2,Brand#24,SMALL POLISHED NICKEL,25,MED BOX,1698.66,even
+195606,almond aquamarine sandy cyan gainsboro,Manufacturer#2,Brand#25,STANDARD PLATED TIN,18,SM PKG,1701.6,ic de
+90681,almond antique chartreuse khaki white,Manufacturer#3,Brand#31,MEDIUM BURNISHED TIN,17,SM CASE,1671.68,are slyly after the sl
+17273,almond antique forest lavender goldenrod,Manufacturer#3,Brand#35,PROMO ANODIZED TIN,14,JUMBO CASE,1190.27,along the
+112398,almond antique metallic orange dim,Manufacturer#3,Brand#32,MEDIUM BURNISHED BRASS,19,JUMBO JAR,1410.39,ole car
+40982,almond antique misty red olive,Manufacturer#3,Brand#32,ECONOMY PLATED COPPER,1,LG PKG,1922.98,c foxes can s
+144293,almond antique olive coral navajo,Manufacturer#3,Brand#34,STANDARD POLISHED STEEL,45,JUMBO CAN,1337.29,ag furiously about
+49671,almond antique gainsboro frosted violet,Manufacturer#4,Brand#41,SMALL BRUSHED BRASS,10,SM BOX,1620.67,ccounts run quick
+48427,almond antique violet mint lemon,Manufacturer#4,Brand#42,PROMO POLISHED STEEL,39,SM CASE,1375.42,hely ironic i
+45261,almond aquamarine floral ivory bisque,Manufacturer#4,Brand#42,SMALL PLATED STEEL,27,WRAP CASE,1206.26,careful
+17927,almond aquamarine yellow dodger mint,Manufacturer#4,Brand#41,ECONOMY BRUSHED COPPER,7,SM PKG,1844.92,ites. eve
+33357,almond azure aquamarine papaya violet,Manufacturer#4,Brand#41,STANDARD ANODIZED TIN,12,WRAP CASE,1290.35,reful
+192697,almond antique blue firebrick mint,Manufacturer#5,Brand#52,MEDIUM BURNISHED TIN,31,LG DRUM,1789.69,ickly ir
+42669,almond antique medium spring khaki,Manufacturer#5,Brand#51,STANDARD BURNISHED TIN,6,MED CAN,1611.66,sits haggl
+155733,almond antique sky peru orange,Manufacturer#5,Brand#53,SMALL PLATED BRASS,2,WRAP DRUM,1788.73,furiously. bra
+15103,almond aquamarine dodger light gainsboro,Manufacturer#5,Brand#53,ECONOMY BURNISHED STEEL,46,LG PACK,1018.1,packages hinder carefu
+78486,almond azure blanched chiffon midnight,Manufacturer#5,Brand#52,LARGE BRUSHED BRASS,23,MED BAG,1464.48,hely blith
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 70e7197..be5a747 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -224,7 +224,6 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
stats_only_null.q,\
subquery_exists.q,\
subquery_in.q,\
- subquery_restrictions,\
temp_table.q,\
tez_bmj_schema_evolution.q,\
tez_dml.q,\
@@ -647,7 +646,7 @@ minillaplocal.query.files=acid_globallimit.q,\
offset_limit_ppd_optimizer.q,\
cluster.q,\
subquery_in.q,\
- subquery_restrictions,\
+ subquery_multi.q,\
stats11.q,\
orc_create.q,\
orc_split_elimination.q,\
@@ -1327,7 +1326,6 @@ spark.query.files=add_part_multiple.q, \
statsfs.q, \
subquery_exists.q, \
subquery_in.q, \
- subquery_restrictions, \
subquery_multiinsert.q, \
table_access_keys_stats.q, \
temp_table.q, \
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java
index e028a99..e4f3057 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java
@@ -148,7 +148,7 @@ public class HiveSubQRemoveRelBuilder {
RelFactories.DEFAULT_AGGREGATE_FACTORY);
this.filterFactory =
Util.first(context.unwrap(RelFactories.FilterFactory.class),
- RelFactories.DEFAULT_FILTER_FACTORY);
+ HiveRelFactories.HIVE_FILTER_FACTORY);
this.projectFactory =
Util.first(context.unwrap(RelFactories.ProjectFactory.class),
RelFactories.DEFAULT_PROJECT_FACTORY);
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java
index ce207da..b7b16b8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java
@@ -103,6 +103,12 @@ public class HiveFilter extends Filter implements HiveRelNode {
return allCorrVars;
}
+ public Set<CorrelationId> getVariablesSet(RexSubQuery e) {
+ Set<CorrelationId> allCorrVars = new HashSet<>();
+ traverseFilter(e, allCorrVars);
+ return allCorrVars;
+ }
+
public RelNode accept(RelShuttle shuttle) {
if (shuttle instanceof HiveRelShuttle) {
return ((HiveRelShuttle)shuttle).visit(this);
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
index 5f58aae..f1e8ebd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
@@ -48,6 +48,7 @@ import java.util.Set;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveSubQRemoveRelBuilder;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
/**
* NOTE: this rule is replicated from Calcite's SubqueryRemoveRule
@@ -74,16 +75,21 @@ public abstract class HiveSubQueryRemoveRule extends RelOptRule{
public void onMatch(RelOptRuleCall call) {
final Filter filter = call.rel(0);
//final RelBuilder builder = call.builder();
+ //TODO: replace HiveSubQRemoveRelBuilder with calcite's once calcite 1.11.0 is released
final HiveSubQRemoveRelBuilder builder = new HiveSubQRemoveRelBuilder(null, call.rel(0).getCluster(), null);
final RexSubQuery e =
RexUtil.SubQueryFinder.find(filter.getCondition());
assert e != null;
+
final RelOptUtil.Logic logic =
LogicVisitor.find(RelOptUtil.Logic.TRUE,
ImmutableList.of(filter.getCondition()), e);
builder.push(filter.getInput());
final int fieldCount = builder.peek().getRowType().getFieldCount();
- final RexNode target = apply(e, filter.getVariablesSet(), logic,
+
+ assert(filter instanceof HiveFilter);
+
+ final RexNode target = apply(e, ((HiveFilter)filter).getVariablesSet(e), logic,
builder, 1, fieldCount);
final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target);
builder.filter(shuttle.apply(filter.getCondition()));
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index fdb468d..cc357c5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -111,7 +111,6 @@ import org.apache.calcite.util.CompositeList;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.ImmutableIntList;
import org.apache.calcite.util.Pair;
-import org.apache.commons.lang.mutable.MutableBoolean;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.Constants;
import org.apache.hadoop.hive.conf.HiveConf;
@@ -246,7 +245,6 @@ import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableList.Builder;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
-import com.google.common.math.IntMath;
public class CalcitePlanner extends SemanticAnalyzer {
@@ -1030,7 +1028,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
RelNode calciteGenPlan = null;
RelNode calcitePreCboPlan = null;
RelNode calciteOptimizedPlan = null;
- subqueryId = -1;
+ subqueryId = 0;
/*
* recreate cluster, so that it picks up the additional traitDef
@@ -2076,40 +2074,23 @@ public class CalcitePlanner extends SemanticAnalyzer {
return filterRel;
}
- private void subqueryRestritionCheck(QB qb, ASTNode searchCond, RelNode srcRel,
+ private void subqueryRestrictionCheck(QB qb, ASTNode searchCond, RelNode srcRel,
boolean forHavingClause, Map<String, RelNode> aliasToRel ) throws SemanticException {
List<ASTNode> subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond);
- if (subQueriesInOriginalTree.size() > 0) {
-
- /*
- * Restriction.9.m :: disallow nested SubQuery expressions.
- */
- if (qb.getSubQueryPredicateDef() != null) {
- throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
- subQueriesInOriginalTree.get(0),
- "Nested SubQuery expressions are not supported."));
- }
-
- /*
- * Restriction.8.m :: We allow only 1 SubQuery expression per Query.
- */
- if (subQueriesInOriginalTree.size() > 1) {
-
- throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
- subQueriesInOriginalTree.get(1), "Only 1 SubQuery expression is supported."));
- }
+ ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond);
+ List<ASTNode> subQueries = SubQueryUtils.findSubQueries(clonedSearchCond);
+ for(int i=0; i<subQueriesInOriginalTree.size(); i++){
//we do not care about the transformation or rewriting of AST
// which following statement does
// we only care about the restriction checks they perform.
// We plan to get rid of these restrictions later
int sqIdx = qb.incrNumSubQueryPredicates();
- ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(0);
+ ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i);
- ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond);
- List<ASTNode> subQueries = SubQueryUtils.findSubQueries(clonedSearchCond);
- ASTNode subQueryAST = subQueries.get(0);
- clonedSearchCond = SubQueryUtils.rewriteParentQueryWhere(clonedSearchCond, subQueryAST);
+ ASTNode subQueryAST = subQueries.get(i);
+
+ SubQueryUtils.rewriteParentQueryWhere(clonedSearchCond, subQueryAST);
QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx, subQueryAST,
originalSubQueryAST, ctx);
@@ -2123,19 +2104,15 @@ public class CalcitePlanner extends SemanticAnalyzer {
aliasToRel.put(havingInputAlias, srcRel);
}
- subQuery.validateAndRewriteAST(inputRR, forHavingClause, havingInputAlias,
- aliasToRel.keySet());
-
- // Missing Check: Check.5.h :: For In and Not In the SubQuery must implicitly or
- // explicitly only contain one select item.
- }
+ subQuery.subqueryRestrictionsCheck(inputRR, forHavingClause, havingInputAlias);
+ }
}
private boolean genSubQueryRelNode(QB qb, ASTNode node, RelNode srcRel, boolean forHavingClause,
Map<ASTNode, RelNode> subQueryToRelNode,
Map<String, RelNode> aliasToRel) throws SemanticException {
//disallow subqueries which HIVE doesn't currently support
- subqueryRestritionCheck(qb, node, srcRel, forHavingClause, aliasToRel);
+ subqueryRestrictionCheck(qb, node, srcRel, forHavingClause, aliasToRel);
Deque<ASTNode> stack = new ArrayDeque<ASTNode>();
stack.push(node);
@@ -2146,12 +2123,20 @@ public class CalcitePlanner extends SemanticAnalyzer {
switch(next.getType()) {
case HiveParser.TOK_SUBQUERY_EXPR:
+ /*
+ * Restriction 2.h Subquery isnot allowed in LHS
+ */
+ if(next.getChildren().size() == 3
+ && next.getChild(2).getType() == HiveParser.TOK_SUBQUERY_EXPR){
+ throw new CalciteSemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
+ next.getChild(2),
+ "SubQuery in LHS expressions are not supported."));
+ }
String sbQueryAlias = "sq_" + qb.incrNumSubQueryPredicates();
QB qbSQ = new QB(qb.getId(), sbQueryAlias, true);
Phase1Ctx ctx1 = initPhase1Ctx();
doPhase1((ASTNode)next.getChild(1), qbSQ, ctx1, null);
getMetaData(qbSQ);
- subqueryId++;
RelNode subQueryRelNode = genLogicalPlan(qbSQ, false, relToHiveColNameCalcitePosMap.get(srcRel),
relToHiveRR.get(srcRel));
subQueryToRelNode.put(next, subQueryRelNode);
@@ -2189,6 +2174,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
this.relToHiveColNameCalcitePosMap.put(filterRel, this.relToHiveColNameCalcitePosMap
.get(srcRel));
relToHiveRR.put(filterRel, relToHiveRR.get(srcRel));
+ this.subqueryId++;
// semi-join opt doesn't work with subqueries
conf.setBoolVar(ConfVars.SEMIJOIN_CONVERSION, false);
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
index cfac6c0..24381b9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
@@ -503,6 +503,106 @@ public class QBSubQuery implements ISubQueryJoinInfo {
originalSQASTOrigin.getUsageNode());
}
+ void subqueryRestrictionsCheck(RowResolver parentQueryRR,
+ boolean forHavingClause,
+ String outerQueryAlias)
+ throws SemanticException {
+ ASTNode insertClause = getChildFromSubqueryAST("Insert", HiveParser.TOK_INSERT);
+
+ ASTNode selectClause = (ASTNode) insertClause.getChild(1);
+
+
+ int selectExprStart = 0;
+ if ( selectClause.getChild(0).getType() == HiveParser.TOK_HINTLIST ) {
+ selectExprStart = 1;
+ }
+
+ /*
+ * Check.5.h :: For In and Not In the SubQuery must implicitly or
+ * explicitly only contain one select item.
+ */
+ if ( operator.getType() != SubQueryType.EXISTS &&
+ operator.getType() != SubQueryType.NOT_EXISTS &&
+ selectClause.getChildCount() - selectExprStart > 1 ) {
+ subQueryAST.setOrigin(originalSQASTOrigin);
+ throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(
+ subQueryAST, "SubQuery can contain only 1 item in Select List."));
+ }
+
+ boolean hasAggreateExprs = false;
+ boolean hasWindowing = false;
+ for(int i= selectExprStart; i < selectClause.getChildCount(); i++ ) {
+
+ ASTNode selectItem = (ASTNode) selectClause.getChild(i);
+ int r = SubQueryUtils.checkAggOrWindowing(selectItem);
+
+ hasWindowing = hasWindowing | ( r == 2);
+ hasAggreateExprs = hasAggreateExprs | ( r == 1 );
+ }
+
+ /*
+ * Restriction.13.m :: In the case of an implied Group By on a
+ * correlated SubQuery, the SubQuery always returns 1 row.
+ * An exists on a SubQuery with an implied GBy will always return true.
+ * Whereas Algebraically transforming to a Join may not return true. See
+ * Specification doc for details.
+ * Similarly a not exists on a SubQuery with a implied GBY will always return false.
+ */
+ boolean noImplicityGby = true;
+ if ( insertClause.getChild(1).getChildCount() > 3 &&
+ insertClause.getChild(1).getChild(3).getType() == HiveParser.TOK_GROUPBY ) {
+ if((ASTNode) insertClause.getChild(1).getChild(3) != null){
+ noImplicityGby = false;
+ }
+ }
+ if ( operator.getType() == SubQueryType.EXISTS &&
+ hasAggreateExprs &&
+ noImplicityGby) {
+ throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(
+ subQueryAST,
+ "An Exists predicate on SubQuery with implicit Aggregation(no Group By clause) " +
+ "cannot be rewritten. (predicate will always return true)."));
+ }
+ if ( operator.getType() == SubQueryType.NOT_EXISTS &&
+ hasAggreateExprs &&
+ noImplicityGby) {
+ throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(
+ subQueryAST,
+ "A Not Exists predicate on SubQuery with implicit Aggregation(no Group By clause) " +
+ "cannot be rewritten. (predicate will always return false)."));
+ }
+
+ ASTNode whereClause = SubQueryUtils.subQueryWhere(insertClause);
+
+ if ( whereClause == null ) {
+ return;
+ }
+ ASTNode searchCond = (ASTNode) whereClause.getChild(0);
+ List<ASTNode> conjuncts = new ArrayList<ASTNode>();
+ SubQueryUtils.extractConjuncts(searchCond, conjuncts);
+
+ ConjunctAnalyzer conjunctAnalyzer = new ConjunctAnalyzer(parentQueryRR,
+ forHavingClause, outerQueryAlias);
+ ASTNode sqNewSearchCond = null;
+
+ boolean hasCorrelation = false;
+ for(ASTNode conjunctAST : conjuncts) {
+ Conjunct conjunct = conjunctAnalyzer.analyzeConjunct(conjunctAST);
+ if(conjunct.isCorrelated()){
+ hasCorrelation = true;
+ break;
+ }
+ }
+
+ /*
+ * Restriction.14.h :: Correlated Sub Queries cannot contain Windowing clauses.
+ */
+ if ( hasWindowing && hasCorrelation) {
+ throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
+ subQueryAST, "Correlated Sub Queries cannot contain Windowing clauses."));
+ }
+ }
+
void validateAndRewriteAST(RowResolver outerQueryRR,
boolean forHavingClause,
String outerQueryAlias,
@@ -776,17 +876,6 @@ public class QBSubQuery implements ISubQueryJoinInfo {
Conjunct conjunct = conjunctAnalyzer.analyzeConjunct(conjunctAST);
/*
- * Restriction.11.m :: A SubQuery predicate that refers to an Outer
- * Query column must be a valid Join predicate.
- */
- if ( conjunct.eitherSideRefersBoth() ) {
- throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
- conjunctAST,
- "SubQuery expression refers to both Parent and SubQuery expressions and " +
- "is not a valid join condition."));
- }
-
- /*
* Check.12.h :: SubQuery predicates cannot only refer to Outer Query columns.
*/
if ( conjunct.refersOuterOnly() ) {
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientnegative/subquery_restrictions.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/subquery_restrictions.q b/ql/src/test/queries/clientnegative/subquery_restrictions.q
deleted file mode 100644
index 80870d7..0000000
--- a/ql/src/test/queries/clientnegative/subquery_restrictions.q
+++ /dev/null
@@ -1,92 +0,0 @@
---Restriction.1.h SubQueries only supported in the SQL Where Clause.
-select src.key in (select key from src s1 where s1.key > '9')
-from src;
-
-select count(*)
-from src
-group by src.key in (select key from src s1 where s1.key > '9') ;
-
---Restriction.2.h The subquery can only be the RHS of an expression
-----curently paser doesn't allow such queries
---select * from part where (select p_size from part) IN (1,2);
-
---Restriction.3.m The predicate operators supported are In, Not In, exists and Not exists.
-----select * from part where p_brand > (select key from src);
-
---Check.4.h For Exists and Not Exists, the Sub Query must have 1 or more correlated predicates.
-select * from src where exists (select * from part);
-
---Check.5.h multiple columns in subquery select
-select * from src where src.key in (select * from src s1 where s1.key > '9');
-
---Restriction.6.m The LHS in a SubQuery must have all its Column References be qualified
---This is not restriction anymore
-
---Restriction 7.h subquery with or condition
-select count(*)
-from src
-where src.key in (select key from src s1 where s1.key > '9') or src.value is not null
-;
-
---Restriction.8.m We allow only 1 SubQuery expression per Query
-select * from part where p_size IN (select p_size from part) AND p_brand IN (select p_brand from part);
-
---Restriction 9.m nested subquery
-select *
-from part x
-where x.p_name in (select y.p_name from part y where exists (select z.p_name from part z where y.p_name = z.p_name))
-;
-
---Restriction.10.h In a SubQuery references to Parent Query columns is only supported in the where clause.
-select * from part where p_size in (select p.p_size + part.p_size from part p);
-select * from part where part.p_size IN (select min(p_size) from part p group by part.p_type);
-
-
---Restriction.11.m A SubQuery predicate that refers to a Parent Query column must be a valid Join predicate
-select * from part where p_size in (select p_size from part p where p.p_type > part.p_type);
-select * from part where part.p_size IN (select min(p_size) from part p where NOT(part.p_type = p.p_type));
-
-
---Check.12.h SubQuery predicates cannot only refer to Parent Query columns
-select * from part where p_name IN (select p_name from part p where part.p_type <> 1);
-
---Restriction.13.m In the case of an implied Group By on a correlated Sub- Query, the SubQuery always returns 1 row. For e.g. a count on an empty set is 0, while all other UDAFs return null. Converting such a SubQuery into a Join by computing all Groups in one shot, changes the semantics: the Group By SubQuery output will not contain rows for Groups that don\u2019t exist.
-select *
-from src b
-where exists
- (select count(*)
- from src a
- where b.value = a.value and a.key = b.key and a.value > 'val_9'
- )
-;
-
---Restriction.14.h Correlated Sub Queries cannot contain Windowing clauses.
-select p_mfgr, p_name, p_size
-from part a
-where a.p_size in
- (select first_value(p_size) over(partition by p_mfgr order by p_size)
- from part b
- where a.p_brand = b.p_brand)
-;
-
---Restriction 15.h all unqualified column references in a SubQuery will resolve to table sources within the SubQuery.
-select *
-from src
-where src.key in (select key from src where key > '9')
-;
-
-----------------------------------------------------------------
--- Following tests does not fall under any restrictions per-se, they just currently don't work with HIVE
-----------------------------------------------------------------
-
--- correlated var which refers to outer query join table
-explain select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_partkey = p.l_partkey) ;
-
--- union, not in, corr
-explain select * from part where p_name NOT IN (select p_name from part p where p.p_mfgr = part.p_comment UNION ALL select p_brand from part);
-
--- union, not in, corr, cor var in both queries
-explain select * from part where p_name NOT IN (select p_name from part p where p.p_mfgr = part.p_comment UNION ALL select p_brand from part pp where pp.p_mfgr = part.p_comment);
-
--- IN, union, corr
-explain select * from part where p_name IN (select p_name from part p where p.p_mfgr = part.p_comment UNION ALL select p_brand from part);
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientnegative/subquery_subquery_chain_exists.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/subquery_subquery_chain_exists.q b/ql/src/test/queries/clientnegative/subquery_subquery_chain_exists.q
new file mode 100644
index 0000000..0a771e2
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/subquery_subquery_chain_exists.q
@@ -0,0 +1,4 @@
+explain
+select *
+from src
+where (exists(select key from src)) in (select key from src);
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientpositive/perf/query16.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/perf/query16.q b/ql/src/test/queries/clientpositive/perf/query16.q
new file mode 100644
index 0000000..0243bf3
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/perf/query16.q
@@ -0,0 +1,30 @@
+set hive.mapred.mode=nonstrict;
+
+explain select
+ count(distinct cs_order_number) as `order count`
+ ,sum(cs_ext_ship_cost) as `total shipping cost`
+ ,sum(cs_net_profit) as `total net profit`
+from
+ catalog_sales cs1
+ ,date_dim
+ ,customer_address
+ ,call_center
+where
+ d_date between '2001-4-01' and
+ (cast('2001-4-01' as date) + 60 days)
+and cs1.cs_ship_date_sk = d_date_sk
+and cs1.cs_ship_addr_sk = ca_address_sk
+and ca_state = 'NY'
+and cs1.cs_call_center_sk = cc_call_center_sk
+and cc_county in ('Ziebach County','Levy County','Huron County','Franklin Parish',
+ 'Daviess County'
+)
+and exists (select *
+ from catalog_sales cs2
+ where cs1.cs_order_number = cs2.cs_order_number
+ and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk)
+and not exists(select *
+ from catalog_returns cr1
+ where cs1.cs_order_number = cr1.cr_order_number)
+order by `order count`
+limit 100;
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientpositive/perf/query23.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/perf/query23.q b/ql/src/test/queries/clientpositive/perf/query23.q
new file mode 100644
index 0000000..e8ebd86
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/perf/query23.q
@@ -0,0 +1,48 @@
+set hive.mapred.mode=nonstrict;
+
+explain with frequent_ss_items as
+ (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt
+ from store_sales
+ ,date_dim
+ ,item
+ where ss_sold_date_sk = d_date_sk
+ and ss_item_sk = i_item_sk
+ and d_year in (1999,1999+1,1999+2,1999+3)
+ group by substr(i_item_desc,1,30),i_item_sk,d_date
+ having count(*) >4),
+ max_store_sales as
+ (select max(csales) tpcds_cmax
+ from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales
+ from store_sales
+ ,customer
+ ,date_dim
+ where ss_customer_sk = c_customer_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year in (1999,1999+1,1999+2,1999+3)
+ group by c_customer_sk) x),
+ best_ss_customer as
+ (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales
+ from store_sales
+ ,customer
+ where ss_customer_sk = c_customer_sk
+ group by c_customer_sk
+ having sum(ss_quantity*ss_sales_price) > (95/100.0))
+ select sum(sales)
+ from (select cs_quantity*cs_list_price sales
+ from catalog_sales
+ ,date_dim
+ where d_year = 1999
+ and d_moy = 1
+ and cs_sold_date_sk = d_date_sk
+ and cs_item_sk in (select item_sk from frequent_ss_items)
+ and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer)
+ union all
+ select ws_quantity*ws_list_price sales
+ from web_sales
+ ,date_dim
+ where d_year = 1999
+ and d_moy = 1
+ and ws_sold_date_sk = d_date_sk
+ and ws_item_sk in (select item_sk from frequent_ss_items)
+ and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)) y
+ limit 100;
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientpositive/perf/query33.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/perf/query33.q b/ql/src/test/queries/clientpositive/perf/query33.q
new file mode 100644
index 0000000..06628bd
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/perf/query33.q
@@ -0,0 +1,73 @@
+-- start query 1 in stream 0 using template query33.tpl and seed 1930872976
+explain with ss as (
+ select
+ i_manufact_id,sum(ss_ext_sales_price) total_sales
+ from
+ store_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_manufact_id in (select
+ i_manufact_id
+from
+ item
+where i_category in ('Books'))
+ and ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 3
+ and ss_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_manufact_id),
+ cs as (
+ select
+ i_manufact_id,sum(cs_ext_sales_price) total_sales
+ from
+ catalog_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_manufact_id in (select
+ i_manufact_id
+from
+ item
+where i_category in ('Books'))
+ and cs_item_sk = i_item_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 3
+ and cs_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_manufact_id),
+ ws as (
+ select
+ i_manufact_id,sum(ws_ext_sales_price) total_sales
+ from
+ web_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_manufact_id in (select
+ i_manufact_id
+from
+ item
+where i_category in ('Books'))
+ and ws_item_sk = i_item_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 3
+ and ws_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_manufact_id)
+ select i_manufact_id ,sum(total_sales) total_sales
+ from (select * from ss
+ union all
+ select * from cs
+ union all
+ select * from ws) tmp1
+ group by i_manufact_id
+ order by total_sales
+limit 100;
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientpositive/perf/query45.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/perf/query45.q b/ql/src/test/queries/clientpositive/perf/query45.q
deleted file mode 100644
index 0b34bfd..0000000
--- a/ql/src/test/queries/clientpositive/perf/query45.q
+++ /dev/null
@@ -1 +0,0 @@
-explain select ca_zip, ca_county, sum(ws_sales_price) from web_sales JOIN customer ON web_sales.ws_bill_customer_sk = customer.c_customer_sk JOIN customer_address ON customer.c_current_addr_sk = customer_address.ca_address_sk JOIN date_dim ON web_sales.ws_sold_date_sk = date_dim.d_date_sk JOIN item ON web_sales.ws_item_sk = item.i_item_sk where ( item.i_item_id in (select i_item_id from item i2 where i2.i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) ) ) and d_qoy = 2 and d_year = 2000 group by ca_zip, ca_county order by ca_zip, ca_county limit 100;
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientpositive/perf/query56.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/perf/query56.q b/ql/src/test/queries/clientpositive/perf/query56.q
new file mode 100644
index 0000000..63e53ea
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/perf/query56.q
@@ -0,0 +1,65 @@
+explain with ss as (
+ select i_item_id,sum(ss_ext_sales_price) total_sales
+ from
+ store_sales,
+ date_dim,
+ customer_address,
+ item
+ where i_item_id in (select
+ i_item_id
+from item
+where i_color in ('orchid','chiffon','lace'))
+ and ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 1
+ and ss_addr_sk = ca_address_sk
+ and ca_gmt_offset = -8
+ group by i_item_id),
+ cs as (
+ select i_item_id,sum(cs_ext_sales_price) total_sales
+ from
+ catalog_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from item
+where i_color in ('orchid','chiffon','lace'))
+ and cs_item_sk = i_item_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 1
+ and cs_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -8
+ group by i_item_id),
+ ws as (
+ select i_item_id,sum(ws_ext_sales_price) total_sales
+ from
+ web_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from item
+where i_color in ('orchid','chiffon','lace'))
+ and ws_item_sk = i_item_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 1
+ and ws_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -8
+ group by i_item_id)
+ select i_item_id ,sum(total_sales) total_sales
+ from (select * from ss
+ union all
+ select * from cs
+ union all
+ select * from ws) tmp1
+ group by i_item_id
+ order by total_sales
+ limit 100;
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientpositive/perf/query60.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/perf/query60.q b/ql/src/test/queries/clientpositive/perf/query60.q
new file mode 100644
index 0000000..efa86d1
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/perf/query60.q
@@ -0,0 +1,77 @@
+-- start query 1 in stream 0 using template query60.tpl and seed 1930872976
+explain with ss as (
+ select
+ i_item_id,sum(ss_ext_sales_price) total_sales
+ from
+ store_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from
+ item
+where i_category in ('Children'))
+ and ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 9
+ and ss_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_item_id),
+ cs as (
+ select
+ i_item_id,sum(cs_ext_sales_price) total_sales
+ from
+ catalog_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from
+ item
+where i_category in ('Children'))
+ and cs_item_sk = i_item_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 9
+ and cs_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_item_id),
+ ws as (
+ select
+ i_item_id,sum(ws_ext_sales_price) total_sales
+ from
+ web_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from
+ item
+where i_category in ('Children'))
+ and ws_item_sk = i_item_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 9
+ and ws_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_item_id)
+ select
+ i_item_id
+,sum(total_sales) total_sales
+ from (select * from ss
+ union all
+ select * from cs
+ union all
+ select * from ws) tmp1
+ group by i_item_id
+ order by i_item_id
+ ,total_sales
+ limit 100;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientpositive/perf/query69.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/perf/query69.q b/ql/src/test/queries/clientpositive/perf/query69.q
new file mode 100644
index 0000000..d9528ae
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/perf/query69.q
@@ -0,0 +1,46 @@
+set hive.mapred.mode=nonstrict;
+
+explain select
+ cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ count(*) cnt1,
+ cd_purchase_estimate,
+ count(*) cnt2,
+ cd_credit_rating,
+ count(*) cnt3
+ from
+ customer c,customer_address ca,customer_demographics
+ where
+ c.c_current_addr_sk = ca.ca_address_sk and
+ ca_state in ('CO','IL','MN') and
+ cd_demo_sk = c.c_current_cdemo_sk and
+ exists (select *
+ from store_sales,date_dim
+ where c.c_customer_sk = ss_customer_sk and
+ ss_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_moy between 1 and 1+2) and
+ (not exists (select *
+ from web_sales,date_dim
+ where c.c_customer_sk = ws_bill_customer_sk and
+ ws_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_moy between 1 and 1+2) and
+ not exists (select *
+ from catalog_sales,date_dim
+ where c.c_customer_sk = cs_ship_customer_sk and
+ cs_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_moy between 1 and 1+2))
+ group by cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ cd_purchase_estimate,
+ cd_credit_rating
+ order by cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ cd_purchase_estimate,
+ cd_credit_rating
+ limit 100;
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientpositive/perf/query83.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/perf/query83.q b/ql/src/test/queries/clientpositive/perf/query83.q
new file mode 100644
index 0000000..0186e03
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/perf/query83.q
@@ -0,0 +1,65 @@
+-- start query 1 in stream 0 using template query83.tpl and seed 1930872976
+explain with sr_items as
+ (select i_item_id item_id,
+ sum(sr_return_quantity) sr_item_qty
+ from store_returns,
+ item,
+ date_dim
+ where sr_item_sk = i_item_sk
+ and d_date in
+ (select d_date
+ from date_dim
+ where d_week_seq in
+ (select d_week_seq
+ from date_dim
+ where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and sr_returned_date_sk = d_date_sk
+ group by i_item_id),
+ cr_items as
+ (select i_item_id item_id,
+ sum(cr_return_quantity) cr_item_qty
+ from catalog_returns,
+ item,
+ date_dim
+ where cr_item_sk = i_item_sk
+ and d_date in
+ (select d_date
+ from date_dim
+ where d_week_seq in
+ (select d_week_seq
+ from date_dim
+ where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and cr_returned_date_sk = d_date_sk
+ group by i_item_id),
+ wr_items as
+ (select i_item_id item_id,
+ sum(wr_return_quantity) wr_item_qty
+ from web_returns,
+ item,
+ date_dim
+ where wr_item_sk = i_item_sk
+ and d_date in
+ (select d_date
+ from date_dim
+ where d_week_seq in
+ (select d_week_seq
+ from date_dim
+ where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and wr_returned_date_sk = d_date_sk
+ group by i_item_id)
+ select sr_items.item_id
+ ,sr_item_qty
+ ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev
+ ,cr_item_qty
+ ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev
+ ,wr_item_qty
+ ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev
+ ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average
+ from sr_items
+ ,cr_items
+ ,wr_items
+ where sr_items.item_id=cr_items.item_id
+ and sr_items.item_id=wr_items.item_id
+ order by sr_items.item_id
+ ,sr_item_qty
+ limit 100;
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientpositive/subquery_exists.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_exists.q b/ql/src/test/queries/clientpositive/subquery_exists.q
index be80721..cac8e1b 100644
--- a/ql/src/test/queries/clientpositive/subquery_exists.q
+++ b/ql/src/test/queries/clientpositive/subquery_exists.q
@@ -58,3 +58,21 @@ where exists
)
;
+-- uncorr exists
+explain
+select *
+from src b
+where exists
+ (select a.key
+ from src a
+ where a.value > 'val_9'
+ );
+
+select *
+from src b
+where exists
+ (select a.key
+ from src a
+ where a.value > 'val_9'
+ );
+
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientpositive/subquery_in.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_in.q b/ql/src/test/queries/clientpositive/subquery_in.q
index 5b22dce..fe0c9c8 100644
--- a/ql/src/test/queries/clientpositive/subquery_in.q
+++ b/ql/src/test/queries/clientpositive/subquery_in.q
@@ -35,6 +35,7 @@ where b.key in
)
;
+
-- agg, non corr
explain
select p_name, p_size
@@ -120,6 +121,10 @@ where li.l_linenumber = 1 and
li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
;
+-- corr, agg in outer and inner
+explain select sum(l_extendedprice) from lineitem, part where p_partkey = l_partkey and l_quantity IN (select avg(l_quantity) from lineitem where l_partkey = p_partkey);
+select sum(l_extendedprice) from lineitem, part where p_partkey = l_partkey and l_quantity IN (select avg(l_quantity) from lineitem where l_partkey = p_partkey);
+
--where has multiple conjuction
explain select * from part where p_brand <> 'Brand#14' AND p_size IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340;
@@ -219,4 +224,3 @@ select * from part where p_size IN (select i from tnull);
select * from tnull where i IN (select i from tnull);
drop table tempty;
-
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientpositive/subquery_in_having.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_in_having.q b/ql/src/test/queries/clientpositive/subquery_in_having.q
index 2433556..40b7e32 100644
--- a/ql/src/test/queries/clientpositive/subquery_in_having.q
+++ b/ql/src/test/queries/clientpositive/subquery_in_having.q
@@ -96,6 +96,12 @@ where b.key in (select key from src where src.key > '8')
group by key, value
having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key )
;
+select key, value, count(*)
+from src b
+where b.key in (select key from src where src.key > '8')
+group by key, value
+having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key )
+;
set hive.auto.convert.join=true;
-- Plan is:
@@ -113,6 +119,21 @@ group by key, value
having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key )
;
+-- both having and where corr
+explain
+select key, value, count(*)
+from src b
+where b.key in (select key from src where src.value = b.value)
+group by key, value
+having count(*) in (select count(*) from src s1 where s1.key > '9' and s1.value = b.value group by s1.key )
+;
+select key, value, count(*)
+from src b
+where b.key in (select key from src where src.value = b.value)
+group by key, value
+having count(*) in (select count(*) from src s1 where s1.key > '9' and s1.value = b.value group by s1.key )
+;
+
-- non agg, non corr, windowing
explain
select p_mfgr, p_name, avg(p_size)
@@ -122,4 +143,22 @@ having p_name in
(select first_value(p_name) over(partition by p_mfgr order by p_size) from part_subq)
;
+CREATE TABLE src_null (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS TEXTFILE;
+LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" INTO TABLE src_null;
+INSERT INTO src_null values('5444', null);
+
+explain
+select key, value, count(*)
+from src_null b
+where NOT EXISTS (select key from src_null where src_null.value <> b.value)
+group by key, value
+having count(*) not in (select count(*) from src_null s1 where s1.key > '9' and s1.value <> b.value group by s1.key );
+
+select key, value, count(*)
+from src_null b
+where NOT EXISTS (select key from src_null where src_null.value <> b.value)
+group by key, value
+having count(*) not in (select count(*) from src_null s1 where s1.key > '9' and s1.value <> b.value group by s1.key );
+
+DROP TABLE src_null;
DROP TABLE part_subq;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientpositive/subquery_multi.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_multi.q b/ql/src/test/queries/clientpositive/subquery_multi.q
new file mode 100644
index 0000000..aff7f20
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/subquery_multi.q
@@ -0,0 +1,111 @@
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=false;
+
+create table tnull(i int, c char(2));
+insert into tnull values(NULL, NULL), (NULL, NULL);
+
+create table tempty(c char(2));
+
+CREATE TABLE part_null(
+ p_partkey INT,
+ p_name STRING,
+ p_mfgr STRING,
+ p_brand STRING,
+ p_type STRING,
+ p_size INT,
+ p_container STRING,
+ p_retailprice DOUBLE,
+ p_comment STRING
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ","
+;
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny_nulls.txt' overwrite into table part_null;
+
+insert into part_null values(78487,NULL,'Manufacturer#6','Brand#52','LARGE BRUSHED BRASS', 23, 'MED BAG',1464.48,'hely blith');
+
+-- multiple subquery
+
+-- Both IN are always true so should return all rows
+explain select * from part_null where p_size IN (select p_size from part_null) AND p_brand IN (select p_brand from part_null);
+select * from part_null where p_size IN (select p_size from part_null) AND p_brand IN (select p_brand from part_null);
+
+-- NOT IN has null value so should return 0 rows
+explain select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_name from part_null);
+select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_name from part_null);
+
+-- NOT IN is always true and IN is false for where p_name is NULL, hence should return all but one row
+explain select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_type from part_null);
+select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_type from part_null);
+
+-- NOT IN has one NULL value so this whole query should not return any row
+explain select * from part_null where p_brand IN (select p_brand from part_null) AND p_brand NOT IN (select p_name from part_null);
+select * from part_null where p_brand IN (select p_brand from part_null) AND p_brand NOT IN (select p_name from part_null);
+
+-- NOT IN is always true irrespective of p_name being null/non-null since inner query is empty
+-- second query is always true so this should return all rows
+explain select * from part_null where p_name NOT IN (select c from tempty) AND p_brand IN (select p_brand from part_null);
+select * from part_null where p_name NOT IN (select c from tempty) AND p_brand IN (select p_brand from part_null);
+
+-- IN, EXISTS
+explain select * from part_null where p_name IN (select p_name from part_null) AND EXISTS (select c from tnull);
+select * from part_null where p_name IN (select p_name from part_null) AND EXISTS (select c from tnull);
+
+explain select * from part_null where p_size IN (select p_size from part_null) AND EXISTS (select c from tempty);
+select * from part_null where p_size IN (select p_size from part_null) AND EXISTS (select c from tempty);
+
+explain select * from part_null where p_name IN (select p_name from part_null) AND NOT EXISTS (select c from tempty);
+select * from part_null where p_name IN (select p_name from part_null) AND NOT EXISTS (select c from tempty);
+
+-- corr, mix of IN/NOT IN
+explain select * from part_null where p_name IN ( select p_name from part where part.p_type = part_null.p_type) AND p_brand NOT IN (select p_container from part where part.p_type = part_null.p_type AND p_brand IN (select p_brand from part pp where part.p_type = pp.p_type));
+select * from part_null where p_name IN ( select p_name from part where part.p_type = part_null.p_type) AND p_brand NOT IN (select p_container from part where part.p_type = part_null.p_type AND p_brand IN (select p_brand from part pp where part.p_type = pp.p_type));
+
+-- mix of corr and uncorr
+explain select * from part_null where p_name IN ( select p_name from part) AND p_brand IN (select p_brand from part where part.p_type = part_null.p_type);
+select * from part_null where p_name IN ( select p_name from part) AND p_brand IN (select p_brand from part where part.p_type = part_null.p_type);
+
+-- one query has multiple corr
+explain select * from part_null where p_name IN ( select p_name from part where part.p_type = part_null.p_type AND part.p_container=part_null.p_container) AND p_brand NOT IN (select p_container from part where part.p_type = part_null.p_type AND p_brand IN (select p_brand from part pp where part.p_type = pp.p_type));
+select * from part_null where p_name IN ( select p_name from part where part.p_type = part_null.p_type AND part.p_container=part_null.p_container) AND p_brand NOT IN (select p_container from part where part.p_type = part_null.p_type AND p_brand IN (select p_brand from part pp where part.p_type = pp.p_type));
+
+--diff corr var (all reffering to diff outer var)
+explain select * from part_null where p_name IN (select p_name from part where part.p_type = part_null.p_type) AND p_brand NOT IN (select p_type from part where part.p_size = part_null.p_size);
+select * from part_null where p_name IN (select p_name from part where part.p_type = part_null.p_type) AND p_brand NOT IN (select p_type from part where part.p_size = part_null.p_size);
+
+-- NESTED QUERIES
+-- both queries are correlated
+explain select * from part_null where p_name IN (select p_name from part where part.p_type = part_null.p_type AND p_brand IN (select p_brand from part pp where part.p_type = pp.p_type));
+select * from part_null where p_name IN (select p_name from part where part.p_type = part_null.p_type AND p_brand IN (select p_brand from part pp where part.p_type = pp.p_type));
+
+-- in, not in corr
+explain select p.p_partkey, li.l_suppkey
+from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
+where li.l_linenumber = 1 and
+ li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber AND l_quantity NOT IN (select avg(l_quantity) from lineitem));
+select p.p_partkey, li.l_suppkey
+from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
+where li.l_linenumber = 1 and
+ li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber AND l_quantity NOT IN (select avg(l_quantity) from lineitem));
+
+explain
+select key, value, count(*)
+from src b
+where b.key in (select key from src where src.value = b.value)
+group by key, value
+having count(*) in (select count(*) from src s1 where s1.key > '9' and exists (select * from src s2 where s1.value = s2.value) group by s1.key )
+ ;
+select key, value, count(*)
+from src b
+where b.key in (select key from src where src.value = b.value)
+group by key, value
+having count(*) in (select count(*) from src s1 where s1.key > '9' and exists (select * from src s2 where s1.value = s2.value) group by s1.key ) ;
+
+-- subquery pred only refer to parent query column
+explain select * from part where p_name IN (select p_name from part p where part.p_type <> '1');
+select * from part where p_name IN (select p_name from part p where part.p_type <> '1');
+
+drop table tnull;
+drop table tempty;
+drop table part_null;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientpositive/subquery_notexists.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_notexists.q b/ql/src/test/queries/clientpositive/subquery_notexists.q
index 2d7e9ce..dc48c3e 100644
--- a/ql/src/test/queries/clientpositive/subquery_notexists.q
+++ b/ql/src/test/queries/clientpositive/subquery_notexists.q
@@ -39,4 +39,23 @@ where not exists
from src a
where b.value = a.value and a.value > 'val_2'
)
+;
+
+-- non equi predicate
+explain
+select *
+from src b
+where not exists
+ (select a.key
+ from src a
+ where b.value <> a.value and a.key > b.key and a.value > 'val_2'
+ )
+;
+select *
+from src b
+where not exists
+ (select a.key
+ from src a
+ where b.value <> a.value and a.key > b.key and a.value > 'val_2'
+ )
;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientpositive/subquery_notin.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_notin.q b/ql/src/test/queries/clientpositive/subquery_notin.q
index c29e63f..f9b5405 100644
--- a/ql/src/test/queries/clientpositive/subquery_notin.q
+++ b/ql/src/test/queries/clientpositive/subquery_notin.q
@@ -228,3 +228,20 @@ select t.i from t where t.i NOT IN (select t1.i from t t1 );
drop table t1;
+-- corr predicate is not equi
+explain select *
+from src b
+where b.key not in
+ (select a.key
+ from src a
+ where b.value > a.value and a.key > '9'
+ )
+;
+select *
+from src b
+where b.key not in
+ (select a.key
+ from src a
+ where b.value > a.value and a.key > '9'
+ );
+
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientnegative/subquery_corr_grandparent.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/subquery_corr_grandparent.q.out b/ql/src/test/results/clientnegative/subquery_corr_grandparent.q.out
index fb72270..4475502 100644
--- a/ql/src/test/results/clientnegative/subquery_corr_grandparent.q.out
+++ b/ql/src/test/results/clientnegative/subquery_corr_grandparent.q.out
@@ -1 +1 @@
-FAILED: SemanticException [Error 10249]: Line 4:53 Unsupported SubQuery Expression 'p_name': SubQuery expression refers to both Parent and SubQuery expressions and is not a valid join condition.
+FAILED: SemanticException [Error 10249]: Line 4:53 Unsupported SubQuery Expression 'p_name': Nested SubQuery expressions are not supported.
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientnegative/subquery_restrictions.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/subquery_restrictions.q.out b/ql/src/test/results/clientnegative/subquery_restrictions.q.out
deleted file mode 100644
index a546d49..0000000
--- a/ql/src/test/results/clientnegative/subquery_restrictions.q.out
+++ /dev/null
@@ -1 +0,0 @@
-FAILED: SemanticException [Error 10249]: Unsupported SubQuery Expression Currently SubQuery expressions are only allowed as Where and Having Clause predicates
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientnegative/subquery_subquery_chain_exists.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/subquery_subquery_chain_exists.q.out b/ql/src/test/results/clientnegative/subquery_subquery_chain_exists.q.out
new file mode 100644
index 0000000..0a79559
--- /dev/null
+++ b/ql/src/test/results/clientnegative/subquery_subquery_chain_exists.q.out
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10249]: Line 4:56 Unsupported SubQuery Expression 'key': Only 1 SubQuery expression is supported.
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
index fa54bb7..bea58fb 100644
--- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
@@ -2459,14 +2459,14 @@ Stage-0
Stage-1
Reducer 4 llap
File Output Operator [FS_27]
- Select Operator [SEL_26] (rows=250 width=178)
+ Select Operator [SEL_26] (rows=500 width=178)
Output:["_col0","_col1"]
<-Reducer 3 [SIMPLE_EDGE] llap
SHUFFLE [RS_25]
- Select Operator [SEL_24] (rows=250 width=178)
+ Select Operator [SEL_24] (rows=500 width=178)
Output:["_col0","_col1"]
- Filter Operator [FIL_23] (rows=250 width=198)
- predicate:(not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END)
+ Filter Operator [FIL_23] (rows=500 width=198)
+ predicate:((_col2 = 0) or (_col5 is null and _col0 is not null and (_col3 >= _col2)))
Merge Join Operator [MERGEJOIN_32] (rows=500 width=198)
Conds:RS_20._col0=RS_21._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5"]
<-Reducer 2 [SIMPLE_EDGE] llap
@@ -2683,14 +2683,14 @@ Stage-0
Stage-1
Reducer 4 llap
File Output Operator [FS_36]
- Select Operator [SEL_35] (rows=13 width=125)
+ Select Operator [SEL_35] (rows=26 width=125)
Output:["_col0","_col1"]
<-Reducer 3 [SIMPLE_EDGE] llap
SHUFFLE [RS_34]
- Select Operator [SEL_33] (rows=13 width=125)
+ Select Operator [SEL_33] (rows=26 width=125)
Output:["_col0","_col1"]
- Filter Operator [FIL_32] (rows=13 width=145)
- predicate:(not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col1 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END)
+ Filter Operator [FIL_32] (rows=26 width=145)
+ predicate:((_col2 = 0) or (_col5 is null and _col1 is not null and (_col3 >= _col2)))
Merge Join Operator [MERGEJOIN_42] (rows=26 width=145)
Conds:RS_29.UDFToDouble(_col1)=RS_30._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5"]
<-Reducer 2 [SIMPLE_EDGE] llap
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientpositive/llap/lineage3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/lineage3.q.out b/ql/src/test/results/clientpositive/llap/lineage3.q.out
index 72a9344..43e6b4b 100644
--- a/ql/src/test/results/clientpositive/llap/lineage3.q.out
+++ b/ql/src/test/results/clientpositive/llap/lineage3.q.out
@@ -186,7 +186,7 @@ where key not in (select key+18 from src1) order by key
PREHOOK: type: QUERY
PREHOOK: Input: default@src1
#### A masked pattern was here ####
-{"version":"1.0","engine":"tez","database":"default","hash":"9b488fe1d7cf018aad3825173808cd36","queryText":"select key, value from src1\nwhere key not in (select key+18 from src1) order by key","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) = (UDFToDouble(src1.key) + 18.0))","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(not CASE WHEN ((count(*) = 0)) THEN (false) WHEN (i is not null) THEN (true) WHEN (src1.key is null) THEN (null) WHEN ((count((UDFToDouble(src1.key) + 18.0)) < count(*))) THEN (true) ELSE (false) END)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"key"},{"id":1,"vertexType":"COLUMN","vertexId":"value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"TABLE","vertexId":"default.s
rc1"}]}
+{"version":"1.0","engine":"tez","database":"default","hash":"9b488fe1d7cf018aad3825173808cd36","queryText":"select key, value from src1\nwhere key not in (select key+18 from src1) order by key","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) = (UDFToDouble(src1.key) + 18.0))","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"((count(*) = 0) or (i is null and src1.key is not null and (count((UDFToDouble(src1.key) + 18.0)) >= count(*))))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"key"},{"id":1,"vertexType":"COLUMN","vertexId":"value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"TABLE","vertexId":"default.src1"}]}
PREHOOK: query: select * from src1 a
where not exists
(select cint from alltypesorc b
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientpositive/llap/subquery_exists.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out
index b132cb6..3d8251f 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out
@@ -431,3 +431,633 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Shuffle Join MERGEJOIN[16][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+PREHOOK: query: -- uncorr exists
+explain
+select *
+from src b
+where exists
+ (select a.key
+ from src a
+ where a.value > 'val_9'
+ )
+PREHOOK: type: QUERY
+POSTHOOK: query: -- uncorr exists
+explain
+select *
+from src b
+where exists
+ (select a.key
+ from src a
+ where a.value > 'val_9'
+ )
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+ Reducer 4 <- Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (value > 'val_9') (type: boolean)
+ Statistics: Num rows: 166 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 166 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: true (type: boolean)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: boolean)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: boolean)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join MERGEJOIN[16][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+PREHOOK: query: select *
+from src b
+where exists
+ (select a.key
+ from src a
+ where a.value > 'val_9'
+ )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from src b
+where exists
+ (select a.key
+ from src a
+ where a.value > 'val_9'
+ )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 val_0
+0 val_0
+0 val_0
+10 val_10
+100 val_100
+100 val_100
+103 val_103
+103 val_103
+104 val_104
+104 val_104
+105 val_105
+11 val_11
+111 val_111
+113 val_113
+113 val_113
+114 val_114
+116 val_116
+118 val_118
+118 val_118
+119 val_119
+119 val_119
+119 val_119
+12 val_12
+12 val_12
+120 val_120
+120 val_120
+125 val_125
+125 val_125
+126 val_126
+128 val_128
+128 val_128
+128 val_128
+129 val_129
+129 val_129
+131 val_131
+133 val_133
+134 val_134
+134 val_134
+136 val_136
+137 val_137
+137 val_137
+138 val_138
+138 val_138
+138 val_138
+138 val_138
+143 val_143
+145 val_145
+146 val_146
+146 val_146
+149 val_149
+149 val_149
+15 val_15
+15 val_15
+150 val_150
+152 val_152
+152 val_152
+153 val_153
+155 val_155
+156 val_156
+157 val_157
+158 val_158
+160 val_160
+162 val_162
+163 val_163
+164 val_164
+164 val_164
+165 val_165
+165 val_165
+166 val_166
+167 val_167
+167 val_167
+167 val_167
+168 val_168
+169 val_169
+169 val_169
+169 val_169
+169 val_169
+17 val_17
+170 val_170
+172 val_172
+172 val_172
+174 val_174
+174 val_174
+175 val_175
+175 val_175
+176 val_176
+176 val_176
+177 val_177
+178 val_178
+179 val_179
+179 val_179
+18 val_18
+18 val_18
+180 val_180
+181 val_181
+183 val_183
+186 val_186
+187 val_187
+187 val_187
+187 val_187
+189 val_189
+19 val_19
+190 val_190
+191 val_191
+191 val_191
+192 val_192
+193 val_193
+193 val_193
+193 val_193
+194 val_194
+195 val_195
+195 val_195
+196 val_196
+197 val_197
+197 val_197
+199 val_199
+199 val_199
+199 val_199
+2 val_2
+20 val_20
+200 val_200
+200 val_200
+201 val_201
+202 val_202
+203 val_203
+203 val_203
+205 val_205
+205 val_205
+207 val_207
+207 val_207
+208 val_208
+208 val_208
+208 val_208
+209 val_209
+209 val_209
+213 val_213
+213 val_213
+214 val_214
+216 val_216
+216 val_216
+217 val_217
+217 val_217
+218 val_218
+219 val_219
+219 val_219
+221 val_221
+221 val_221
+222 val_222
+223 val_223
+223 val_223
+224 val_224
+224 val_224
+226 val_226
+228 val_228
+229 val_229
+229 val_229
+230 val_230
+230 val_230
+230 val_230
+230 val_230
+230 val_230
+233 val_233
+233 val_233
+235 val_235
+237 val_237
+237 val_237
+238 val_238
+238 val_238
+239 val_239
+239 val_239
+24 val_24
+24 val_24
+241 val_241
+242 val_242
+242 val_242
+244 val_244
+247 val_247
+248 val_248
+249 val_249
+252 val_252
+255 val_255
+255 val_255
+256 val_256
+256 val_256
+257 val_257
+258 val_258
+26 val_26
+26 val_26
+260 val_260
+262 val_262
+263 val_263
+265 val_265
+265 val_265
+266 val_266
+27 val_27
+272 val_272
+272 val_272
+273 val_273
+273 val_273
+273 val_273
+274 val_274
+275 val_275
+277 val_277
+277 val_277
+277 val_277
+277 val_277
+278 val_278
+278 val_278
+28 val_28
+280 val_280
+280 val_280
+281 val_281
+281 val_281
+282 val_282
+282 val_282
+283 val_283
+284 val_284
+285 val_285
+286 val_286
+287 val_287
+288 val_288
+288 val_288
+289 val_289
+291 val_291
+292 val_292
+296 val_296
+298 val_298
+298 val_298
+298 val_298
+30 val_30
+302 val_302
+305 val_305
+306 val_306
+307 val_307
+307 val_307
+308 val_308
+309 val_309
+309 val_309
+310 val_310
+311 val_311
+311 val_311
+311 val_311
+315 val_315
+316 val_316
+316 val_316
+316 val_316
+317 val_317
+317 val_317
+318 val_318
+318 val_318
+318 val_318
+321 val_321
+321 val_321
+322 val_322
+322 val_322
+323 val_323
+325 val_325
+325 val_325
+327 val_327
+327 val_327
+327 val_327
+33 val_33
+331 val_331
+331 val_331
+332 val_332
+333 val_333
+333 val_333
+335 val_335
+336 val_336
+338 val_338
+339 val_339
+34 val_34
+341 val_341
+342 val_342
+342 val_342
+344 val_344
+344 val_344
+345 val_345
+348 val_348
+348 val_348
+348 val_348
+348 val_348
+348 val_348
+35 val_35
+35 val_35
+35 val_35
+351 val_351
+353 val_353
+353 val_353
+356 val_356
+360 val_360
+362 val_362
+364 val_364
+365 val_365
+366 val_366
+367 val_367
+367 val_367
+368 val_368
+369 val_369
+369 val_369
+369 val_369
+37 val_37
+37 val_37
+373 val_373
+374 val_374
+375 val_375
+377 val_377
+378 val_378
+379 val_379
+382 val_382
+382 val_382
+384 val_384
+384 val_384
+384 val_384
+386 val_386
+389 val_389
+392 val_392
+393 val_393
+394 val_394
+395 val_395
+395 val_395
+396 val_396
+396 val_396
+396 val_396
+397 val_397
+397 val_397
+399 val_399
+399 val_399
+4 val_4
+400 val_400
+401 val_401
+401 val_401
+401 val_401
+401 val_401
+401 val_401
+402 val_402
+403 val_403
+403 val_403
+403 val_403
+404 val_404
+404 val_404
+406 val_406
+406 val_406
+406 val_406
+406 val_406
+407 val_407
+409 val_409
+409 val_409
+409 val_409
+41 val_41
+411 val_411
+413 val_413
+413 val_413
+414 val_414
+414 val_414
+417 val_417
+417 val_417
+417 val_417
+418 val_418
+419 val_419
+42 val_42
+42 val_42
+421 val_421
+424 val_424
+424 val_424
+427 val_427
+429 val_429
+429 val_429
+43 val_43
+430 val_430
+430 val_430
+430 val_430
+431 val_431
+431 val_431
+431 val_431
+432 val_432
+435 val_435
+436 val_436
+437 val_437
+438 val_438
+438 val_438
+438 val_438
+439 val_439
+439 val_439
+44 val_44
+443 val_443
+444 val_444
+446 val_446
+448 val_448
+449 val_449
+452 val_452
+453 val_453
+454 val_454
+454 val_454
+454 val_454
+455 val_455
+457 val_457
+458 val_458
+458 val_458
+459 val_459
+459 val_459
+460 val_460
+462 val_462
+462 val_462
+463 val_463
+463 val_463
+466 val_466
+466 val_466
+466 val_466
+467 val_467
+468 val_468
+468 val_468
+468 val_468
+468 val_468
+469 val_469
+469 val_469
+469 val_469
+469 val_469
+469 val_469
+47 val_47
+470 val_470
+472 val_472
+475 val_475
+477 val_477
+478 val_478
+478 val_478
+479 val_479
+480 val_480
+480 val_480
+480 val_480
+481 val_481
+482 val_482
+483 val_483
+484 val_484
+485 val_485
+487 val_487
+489 val_489
+489 val_489
+489 val_489
+489 val_489
+490 val_490
+491 val_491
+492 val_492
+492 val_492
+493 val_493
+494 val_494
+495 val_495
+496 val_496
+497 val_497
+498 val_498
+498 val_498
+498 val_498
+5 val_5
+5 val_5
+5 val_5
+51 val_51
+51 val_51
+53 val_53
+54 val_54
+57 val_57
+58 val_58
+58 val_58
+64 val_64
+65 val_65
+66 val_66
+67 val_67
+67 val_67
+69 val_69
+70 val_70
+70 val_70
+70 val_70
+72 val_72
+72 val_72
+74 val_74
+76 val_76
+76 val_76
+77 val_77
+78 val_78
+8 val_8
+80 val_80
+82 val_82
+83 val_83
+83 val_83
+84 val_84
+84 val_84
+85 val_85
+86 val_86
+87 val_87
+9 val_9
+90 val_90
+90 val_90
+90 val_90
+92 val_92
+95 val_95
+95 val_95
+96 val_96
+97 val_97
+97 val_97
+98 val_98
+98 val_98
[8/9] hive git commit: HIVE-15481 : Support multiple and nested
subqueries (Vineet Garg via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan
Posted by ha...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientpositive/llap/subquery_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_in.q.out b/ql/src/test/results/clientpositive/llap/subquery_in.q.out
index e71add5..887a27e 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_in.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_in.q.out
@@ -1180,6 +1180,279 @@ POSTHOOK: Input: default@lineitem
#### A masked pattern was here ####
108570 8571
4297 1798
+Warning: Shuffle Join MERGEJOIN[60][tables = [$hdt$_3, $hdt$_4]] in Stage 'Reducer 11' is a cross product
+PREHOOK: query: -- corr, agg in outer and inner
+explain select sum(l_extendedprice) from lineitem, part where p_partkey = l_partkey and l_quantity IN (select avg(l_quantity) from lineitem where l_partkey = p_partkey)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- corr, agg in outer and inner
+explain select sum(l_extendedprice) from lineitem, part where p_partkey = l_partkey and l_quantity IN (select avg(l_quantity) from lineitem where l_partkey = p_partkey)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE)
+ Reducer 12 <- Reducer 11 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE)
+ Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
+ Reducer 9 <- Reducer 8 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: lineitem
+ Statistics: Num rows: 100 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: l_partkey is not null (type: boolean)
+ Statistics: Num rows: 100 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: l_partkey (type: int), l_quantity (type: double), l_extendedprice (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 100 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 100 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: double), _col2 (type: double)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 10
+ Map Operator Tree:
+ TableScan
+ alias: lineitem
+ Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: p_partkey (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: p_partkey is not null (type: boolean)
+ Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: p_partkey (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: lineitem
+ Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: l_partkey (type: int), l_quantity (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: double)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 11
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col1
+ Statistics: Num rows: 2600 Data size: 10400 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE
+ Reducer 12
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 26 Data size: 520 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: double), _col3 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: double), _col3 (type: int)
+ Statistics: Num rows: 26 Data size: 520 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: double)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: double), _col3 (type: int)
+ 1 _col0 (type: double), _col1 (type: int)
+ outputColumnNames: _col2
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col2)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: double)
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 7
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 23 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col2 (type: int), _col1 (type: double)
+ outputColumnNames: _col2, _col1
+ Statistics: Num rows: 23 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: avg(_col1)
+ keys: _col2 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 11 Data size: 924 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 11 Data size: 924 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: struct<count:bigint,sum:double,input:double>)
+ Reducer 8
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 11 Data size: 132 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col1 (type: double), _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: double), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: double), _col1 (type: int)
+ Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE
+ Reducer 9
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: double), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: double), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: double), _col1 (type: int)
+ Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join MERGEJOIN[60][tables = [$hdt$_3, $hdt$_4]] in Stage 'Reducer 11' is a cross product
+PREHOOK: query: select sum(l_extendedprice) from lineitem, part where p_partkey = l_partkey and l_quantity IN (select avg(l_quantity) from lineitem where l_partkey = p_partkey)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@lineitem
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(l_extendedprice) from lineitem, part where p_partkey = l_partkey and l_quantity IN (select avg(l_quantity) from lineitem where l_partkey = p_partkey)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@lineitem
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+NULL
PREHOOK: query: --where has multiple conjuction
explain select * from part where p_brand <> 'Brand#14' AND p_size IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340
PREHOOK: type: QUERY
[7/9] hive git commit: HIVE-15481 : Support multiple and nested
subqueries (Vineet Garg via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan
Posted by ha...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientpositive/llap/subquery_multi.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out
new file mode 100644
index 0000000..7765221
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out
@@ -0,0 +1,5610 @@
+PREHOOK: query: create table tnull(i int, c char(2))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tnull
+POSTHOOK: query: create table tnull(i int, c char(2))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tnull
+PREHOOK: query: insert into tnull values(NULL, NULL), (NULL, NULL)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@tnull
+POSTHOOK: query: insert into tnull values(NULL, NULL), (NULL, NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@tnull
+POSTHOOK: Lineage: tnull.c EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: tnull.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: create table tempty(c char(2))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tempty
+POSTHOOK: query: create table tempty(c char(2))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tempty
+PREHOOK: query: CREATE TABLE part_null(
+ p_partkey INT,
+ p_name STRING,
+ p_mfgr STRING,
+ p_brand STRING,
+ p_type STRING,
+ p_size INT,
+ p_container STRING,
+ p_retailprice DOUBLE,
+ p_comment STRING
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ","
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@part_null
+POSTHOOK: query: CREATE TABLE part_null(
+ p_partkey INT,
+ p_name STRING,
+ p_mfgr STRING,
+ p_brand STRING,
+ p_type STRING,
+ p_size INT,
+ p_container STRING,
+ p_retailprice DOUBLE,
+ p_comment STRING
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ","
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@part_null
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny_nulls.txt' overwrite into table part_null
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@part_null
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny_nulls.txt' overwrite into table part_null
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@part_null
+PREHOOK: query: insert into part_null values(78487,NULL,'Manufacturer#6','Brand#52','LARGE BRUSHED BRASS', 23, 'MED BAG',1464.48,'hely blith')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@part_null
+POSTHOOK: query: insert into part_null values(78487,NULL,'Manufacturer#6','Brand#52','LARGE BRUSHED BRASS', 23, 'MED BAG',1464.48,'hely blith')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@part_null
+POSTHOOK: Lineage: part_null.p_brand SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_comment SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col9, type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_container SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col7, type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_mfgr SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_name SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_partkey EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_retailprice EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col8, type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_size EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col6, type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_type SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col5, type:string, comment:), ]
+PREHOOK: query: -- multiple subquery
+
+-- Both IN are always true so should return all rows
+explain select * from part_null where p_size IN (select p_size from part_null) AND p_brand IN (select p_brand from part_null)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- multiple subquery
+
+-- Both IN are always true so should return all rows
+explain select * from part_null where p_size IN (select p_size from part_null) AND p_brand IN (select p_brand from part_null)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
+ Reducer 5 <- Map 4 (SIMPLE_EDGE)
+ Reducer 7 <- Map 6 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part_null
+ Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col5 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col5 (type: int)
+ Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: part_null
+ Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: p_size (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: part_null
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: p_brand (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col5 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 447 Data size: 1790 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: string)
+ Statistics: Num rows: 447 Data size: 1790 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 491 Data size: 1969 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 491 Data size: 1969 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 407 Data size: 1628 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 407 Data size: 1628 Basic stats: COMPLETE Column stats: NONE
+ Reducer 7
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from part_null where p_size IN (select p_size from part_null) AND p_brand IN (select p_brand from part_null)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part_null
+#### A masked pattern was here ####
+POSTHOOK: query: select * from part_null where p_size IN (select p_size from part_null) AND p_brand IN (select p_brand from part_null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part_null
+#### A masked pattern was here ####
+65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr
+85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull
+86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully
+121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h
+121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h
+110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously
+191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle
+105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ
+146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref
+132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even
+195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de
+90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl
+40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s
+112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car
+144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about
+17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the
+33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful
+49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick
+17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve
+48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i
+45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful
+42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl
+192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir
+78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith
+78487 NULL Manufacturer#6 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith
+155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra
+15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu
+Warning: Shuffle Join MERGEJOIN[40][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
+PREHOOK: query: -- NOT IN has null value so should return 0 rows
+explain select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_name from part_null)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- NOT IN has null value so should return 0 rows
+explain select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_name from part_null)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 10 <- Map 9 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+ Reducer 6 <- Map 5 (SIMPLE_EDGE)
+ Reducer 8 <- Map 7 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part_null
+ Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: part_null
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: p_name (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: part_null
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_name (type: string)
+ outputColumnNames: p_name
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(), count(p_name)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: part_null
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_name (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), true (type: boolean)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: boolean)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean)
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 10
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: boolean)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: boolean)
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 17 Data size: 1790 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 17 Data size: 1790 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11
+ Statistics: Num rows: 17 Data size: 2079 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: string)
+ Statistics: Num rows: 17 Data size: 2079 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: bigint), _col11 (type: bigint)
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col3 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col13
+ Statistics: Num rows: 18 Data size: 2286 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col10 = 0) or (_col13 is null and _col3 is not null and (_col11 >= _col10))) (type: boolean)
+ Statistics: Num rows: 12 Data size: 1524 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 12 Data size: 1524 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 1524 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 6
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE
+ Reducer 8
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), count(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join MERGEJOIN[40][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
+PREHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_name from part_null)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part_null
+#### A masked pattern was here ####
+POSTHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_name from part_null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part_null
+#### A masked pattern was here ####
+Warning: Shuffle Join MERGEJOIN[40][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
+PREHOOK: query: -- NOT IN is always true and IN is false for where p_name is NULL, hence should return all but one row
+explain select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_type from part_null)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- NOT IN is always true and IN is false for where p_name is NULL, hence should return all but one row
+explain select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_type from part_null)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 10 <- Map 9 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+ Reducer 6 <- Map 5 (SIMPLE_EDGE)
+ Reducer 8 <- Map 7 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part_null
+ Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: part_null
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: p_name (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: part_null
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_type (type: string)
+ outputColumnNames: p_type
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(), count(p_type)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: part_null
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_type (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), true (type: boolean)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: boolean)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean)
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 10
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: boolean)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: boolean)
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 17 Data size: 1790 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 17 Data size: 1790 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11
+ Statistics: Num rows: 17 Data size: 2079 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: string)
+ Statistics: Num rows: 17 Data size: 2079 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: bigint), _col11 (type: bigint)
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col3 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col13
+ Statistics: Num rows: 18 Data size: 2286 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col10 = 0) or (_col13 is null and _col3 is not null and (_col11 >= _col10))) (type: boolean)
+ Statistics: Num rows: 12 Data size: 1524 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 12 Data size: 1524 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 1524 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 6
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE
+ Reducer 8
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), count(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join MERGEJOIN[40][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
+PREHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_type from part_null)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part_null
+#### A masked pattern was here ####
+POSTHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_type from part_null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part_null
+#### A masked pattern was here ####
+65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr
+85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull
+86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully
+121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h
+121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h
+110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously
+191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle
+105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ
+146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref
+132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even
+195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de
+90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl
+40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s
+112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car
+144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about
+17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the
+33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful
+49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick
+17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve
+48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i
+45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful
+42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl
+192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir
+78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith
+155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra
+15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu
+Warning: Shuffle Join MERGEJOIN[40][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
+PREHOOK: query: -- NOT IN has one NULL value so this whole query should not return any row
+explain select * from part_null where p_brand IN (select p_brand from part_null) AND p_brand NOT IN (select p_name from part_null)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- NOT IN has one NULL value so this whole query should not return any row
+explain select * from part_null where p_brand IN (select p_brand from part_null) AND p_brand NOT IN (select p_name from part_null)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 10 <- Map 9 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+ Reducer 6 <- Map 5 (SIMPLE_EDGE)
+ Reducer 8 <- Map 7 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part_null
+ Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: string)
+ Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: part_null
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: p_brand (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: part_null
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_name (type: string)
+ outputColumnNames: p_name
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(), count(p_name)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: part_null
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_name (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), true (type: boolean)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: boolean)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean)
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 10
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: boolean)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: boolean)
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 17 Data size: 1790 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 17 Data size: 1790 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11
+ Statistics: Num rows: 17 Data size: 2079 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: string)
+ Statistics: Num rows: 17 Data size: 2079 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: bigint), _col11 (type: bigint)
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col3 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col13
+ Statistics: Num rows: 18 Data size: 2286 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col10 = 0) or (_col13 is null and _col3 is not null and (_col11 >= _col10))) (type: boolean)
+ Statistics: Num rows: 12 Data size: 1524 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 12 Data size: 1524 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 1524 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 6
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE
+ Reducer 8
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), count(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join MERGEJOIN[40][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
+PREHOOK: query: select * from part_null where p_brand IN (select p_brand from part_null) AND p_brand NOT IN (select p_name from part_null)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part_null
+#### A masked pattern was here ####
+POSTHOOK: query: select * from part_null where p_brand IN (select p_brand from part_null) AND p_brand NOT IN (select p_name from part_null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part_null
+#### A masked pattern was here ####
+Warning: Shuffle Join MERGEJOIN[42][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+PREHOOK: query: -- NOT IN is always true irrespective of p_name being null/non-null since inner query is empty
+-- second query is always true so this should return all rows
+explain select * from part_null where p_name NOT IN (select c from tempty) AND p_brand IN (select p_brand from part_null)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- NOT IN is always true irrespective of p_name being null/non-null since inner query is empty
+-- second query is always true so this should return all rows
+explain select * from part_null where p_name NOT IN (select c from tempty) AND p_brand IN (select p_brand from part_null)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 10 <- Map 9 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+ Reducer 6 <- Map 5 (SIMPLE_EDGE)
+ Reducer 8 <- Map 7 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part_null
+ Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: tempty
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: c (type: char(2))
+ outputColumnNames: c
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ aggregations: count(), count(c)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: tempty
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: c (type: char(2))
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: char(2)), true (type: boolean)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: char(2)), _col1 (type: boolean)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: char(2)), _col1 (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: part_null
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: p_brand (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 10
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ Statistics: Num rows: 5 Data size: 3341 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 5 Data size: 3341 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: bigint)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 UDFToString(_col0) (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12
+ Statistics: Num rows: 5 Data size: 3675 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col9 = 0) or (_col12 is null and _col1 is not null and (_col10 >= _col9))) (type: boolean)
+ Statistics: Num rows: 2 Data size: 1470 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 2 Data size: 1470 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: string)
+ Statistics: Num rows: 2 Data size: 1470 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 17 Data size: 1790 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 17 Data size: 1790 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 6
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), count(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint)
+ Reducer 8
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: char(2)), KEY._col1 (type: boolean)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: UDFToString(_col0) (type: string)
+ sort order: +
+ Map-reduce partition columns: UDFToString(_col0) (type: string)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col1 (type: boolean)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join MERGEJOIN[42][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+PREHOOK: query: select * from part_null where p_name NOT IN (select c from tempty) AND p_brand IN (select p_brand from part_null)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part_null
+PREHOOK: Input: default@tempty
+#### A masked pattern was here ####
+POSTHOOK: query: select * from part_null where p_name NOT IN (select c from tempty) AND p_brand IN (select p_brand from part_null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part_null
+POSTHOOK: Input: default@tempty
+#### A masked pattern was here ####
+65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr
+86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully
+85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull
+121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h
+121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h
+110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously
+191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle
+105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ
+146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref
+132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even
+195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de
+90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl
+40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s
+112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car
+144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about
+17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the
+49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick
+17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve
+33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful
+48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i
+45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful
+42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl
+78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith
+192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir
+78487 NULL Manufacturer#6 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith
+15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu
+155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra
+Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
+PREHOOK: query: -- IN, EXISTS
+explain select * from part_null where p_name IN (select p_name from part_null) AND EXISTS (select c from tnull)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- IN, EXISTS
+explain select * from part_null where p_name IN (select p_name from part_null) AND EXISTS (select c from tnull)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
+ Reducer 5 <- Map 4 (SIMPLE_EDGE)
+ Reducer 7 <- Map 6 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part_null
+ Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: part_null
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: p_name (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: tnull
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: true (type: boolean)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: boolean)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 17 Data size: 1790 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 17 Data size: 1790 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 17 Data size: 1875 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 17 Data size: 1875 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE
+ Reducer 7
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: boolean)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
+PREHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND EXISTS (select c from tnull)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part_null
+PREHOOK: Input: default@tnull
+#### A masked pattern was here ####
+POSTHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND EXISTS (select c from tnull)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part_null
+POSTHOOK: Input: default@tnull
+#### A masked pattern was here ####
+192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir
+121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h
+121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h
+90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl
+85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull
+17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the
+49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick
+42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl
+112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car
+40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s
+144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about
+110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously
+155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra
+105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ
+48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i
+191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle
+86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully
+15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu
+45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful
+146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref
+65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr
+132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even
+195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de
+17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve
+33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful
+78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith
+Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
+PREHOOK: query: explain select * from part_null where p_size IN (select p_size from part_null) AND EXISTS (select c from tempty)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from part_null where p_size IN (select p_size from part_null) AND EXISTS (select c from tempty)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
+ Reducer 5 <- Map 4 (SIMPLE_EDGE)
+ Reducer 7 <- Map 6 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part_null
+ Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col5 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col5 (type: int)
+ Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: part_null
+ Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: p_size (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: tempty
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Group By Operator
+ keys: true (type: boolean)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: boolean)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col5 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 447 Data size: 1790 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 447 Data size: 1790 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 447 Data size: 2237 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 447 Data size: 2237 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 407 Data size: 1628 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 407 Data size: 1628 Basic stats: COMPLETE Column stats: NONE
+ Reducer 7
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: boolean)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
+PREHOOK: query: select * from part_null where p_size IN (select p_size from part_null) AND EXISTS (select c from tempty)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part_null
+PREHOOK: Input: default@tempty
+#### A masked pattern was here ####
+POSTHOOK: query: select * from part_null where p_size IN (select p_size from part_null) AND EXISTS (select c from tempty)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part_null
+POSTHOOK: Input: default@tempty
+#### A masked pattern was here ####
+Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
+PREHOOK: query: explain select * from part_null whe
<TRUNCATED>
[2/9] hive git commit: HIVE-15481 : Support multiple and nested
subqueries (Vineet Garg via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan
Posted by ha...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientpositive/subquery_in_having.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/subquery_in_having.q.out b/ql/src/test/results/clientpositive/subquery_in_having.q.out
index 1ab7e7f..86a9d9a 100644
--- a/ql/src/test/results/clientpositive/subquery_in_having.q.out
+++ b/ql/src/test/results/clientpositive/subquery_in_having.q.out
@@ -1093,6 +1093,36 @@ STAGE PLANS:
Processor Tree:
ListSink
+PREHOOK: query: select key, value, count(*)
+from src b
+where b.key in (select key from src where src.key > '8')
+group by key, value
+having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, count(*)
+from src b
+where b.key in (select key from src where src.key > '8')
+group by key, value
+having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+80 val_80 1
+82 val_82 1
+83 val_83 2
+84 val_84 2
+85 val_85 1
+86 val_86 1
+87 val_87 1
+9 val_9 1
+90 val_90 3
+92 val_92 1
+95 val_95 2
+96 val_96 1
+97 val_97 2
+98 val_98 2
PREHOOK: query: -- Plan is:
-- Stage 5: group by on sq2:src (subquery in having)
-- Stage 10: hashtable for sq1:src (subquery in where)
@@ -1347,96 +1377,251 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: -- non agg, non corr, windowing
+PREHOOK: query: -- both having and where corr
explain
-select p_mfgr, p_name, avg(p_size)
-from part_subq
-group by p_mfgr, p_name
-having p_name in
- (select first_value(p_name) over(partition by p_mfgr order by p_size) from part_subq)
+select key, value, count(*)
+from src b
+where b.key in (select key from src where src.value = b.value)
+group by key, value
+having count(*) in (select count(*) from src s1 where s1.key > '9' and s1.value = b.value group by s1.key )
PREHOOK: type: QUERY
-POSTHOOK: query: -- non agg, non corr, windowing
+POSTHOOK: query: -- both having and where corr
explain
-select p_mfgr, p_name, avg(p_size)
-from part_subq
-group by p_mfgr, p_name
-having p_name in
- (select first_value(p_name) over(partition by p_mfgr order by p_size) from part_subq)
+select key, value, count(*)
+from src b
+where b.key in (select key from src where src.value = b.value)
+group by key, value
+having count(*) in (select count(*) from src s1 where s1.key > '9' and s1.value = b.value group by s1.key )
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1, Stage-3
Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
+ Stage-2 depends on stages: Stage-1, Stage-6
+ Stage-9 is a root stage
+ Stage-7 depends on stages: Stage-9
+ Stage-8 depends on stages: Stage-7
+ Stage-12 depends on stages: Stage-8
+ Stage-6 depends on stages: Stage-12
Stage-0 depends on stages: Stage-2
STAGE PLANS:
- Stage: Stage-1
+ Stage: Stage-3
Map Reduce
Map Operator Tree:
TableScan
- alias: part_subq
- Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int)
- outputColumnNames: p_name, p_mfgr, p_size
- Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string)
+ outputColumnNames: value
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(p_size)
- keys: p_name (type: string), p_mfgr (type: string)
+ keys: value (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: string)
- sort order: ++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col2 (type: struct<count:bigint,sum:double,input:int>)
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Operator Tree:
- Group By Operator
- aggregations: avg(VALUE._col0)
- keys: KEY._col0 (type: string), KEY._col1 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 7 Data size: 1480 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: string), _col0 (type: string), _col2 (type: double)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 7 Data size: 1480 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Demux Operator
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Mux Operator
+ Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: string), _col0 (type: string)
+ outputColumnNames: _col2, _col0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Group By Operator
+ keys: _col2 (type: string), _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Mux Operator
+ Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: string), _col0 (type: string)
+ outputColumnNames: _col2, _col0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Group By Operator
+ keys: _col2 (type: string), _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string), _col0 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: string), _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Operator Tree:
+ Demux Operator
+ Statistics: Num rows: 501 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Mux Operator
+ Statistics: Num rows: 751 Data size: 7962 Basic stats: COMPLETE Column stats: NONE
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string), _col0 (type: string)
+ 1 _col1 (type: string), _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string)
+ outputColumnNames: _col1, _col0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Mux Operator
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col1 (type: string), _col0 (type: string)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string), _col2 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2650 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2650 Basic stats: COMPLETE Column stats: NONE
+ Mux Operator
+ Statistics: Num rows: 751 Data size: 7962 Basic stats: COMPLETE Column stats: NONE
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string), _col0 (type: string)
+ 1 _col1 (type: string), _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string)
+ outputColumnNames: _col1, _col0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Mux Operator
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col1 (type: string), _col0 (type: string)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string), _col2 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-2
Map Reduce
Map Operator Tree:
TableScan
Reduce Output Operator
- key expressions: _col1 (type: string)
- sort order: +
- Map-reduce partition columns: _col1 (type: string)
- Statistics: Num rows: 7 Data size: 1480 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col2 (type: double)
+ key expressions: _col1 (type: string), _col2 (type: bigint)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: string), _col2 (type: bigint)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col0 (type: string)
TableScan
Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
+ key expressions: _col0 (type: string), _col1 (type: bigint)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
Reduce Operator Tree:
Demux Operator
- Statistics: Num rows: 22 Data size: 4653 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 92 Data size: 969 Basic stats: COMPLETE Column stats: NONE
Mux Operator
- Statistics: Num rows: 33 Data size: 6979 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 138 Data size: 1453 Basic stats: COMPLETE Column stats: NONE
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 _col1 (type: string)
- 1 _col0 (type: string)
+ 0 _col1 (type: string), _col2 (type: bigint)
+ 1 _col1 (type: string), _col0 (type: bigint)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
File Output Operator
@@ -1447,74 +1632,1585 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 46 Data size: 484 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: bigint), _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 46 Data size: 484 Basic stats: COMPLETE Column stats: NONE
+ Mux Operator
+ Statistics: Num rows: 138 Data size: 1453 Basic stats: COMPLETE Column stats: NONE
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string), _col2 (type: bigint)
+ 1 _col1 (type: string), _col0 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-9
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string)
+ outputColumnNames: value
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: value (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Demux Operator
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Mux Operator
+ Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: string), _col0 (type: string)
+ outputColumnNames: _col2, _col0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Group By Operator
+ keys: _col2 (type: string), _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Group By Operator
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 11 Data size: 2326 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Mux Operator
- Statistics: Num rows: 33 Data size: 6979 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE
Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col1 (type: string)
1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col2
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
- File Output Operator
- compressed: false
+ Select Operator
+ expressions: _col2 (type: string), _col0 (type: string)
+ outputColumnNames: _col2, _col0
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Group By Operator
+ keys: _col2 (type: string), _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- Stage: Stage-3
+ Stage: Stage-7
Map Reduce
Map Operator Tree:
TableScan
- alias: part_subq
- Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
+ alias: b
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string), _col0 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: string), _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Operator Tree:
+ Demux Operator
+ Statistics: Num rows: 501 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Mux Operator
+ Statistics: Num rows: 751 Data size: 7962 Basic stats: COMPLETE Column stats: NONE
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string), _col0 (type: string)
+ 1 _col1 (type: string), _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string)
+ outputColumnNames: _col1, _col0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: string), _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2650 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2650 Basic stats: COMPLETE Column stats: NONE
+ Mux Operator
+ Statistics: Num rows: 751 Data size: 7962 Basic stats: COMPLETE Column stats: NONE
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string), _col0 (type: string)
+ 1 _col1 (type: string), _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string)
+ outputColumnNames: _col1, _col0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: string), _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-8
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: string)
+ mode: complete
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-12
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_1:$hdt$_1:$hdt$_1:s1
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $hdt$_1:$hdt$_1:$hdt$_1:s1
+ TableScan
+ alias: s1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key > '9') (type: boolean)
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+
+ Stage: Stage-6
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: string), _col0 (type: string)
+ outputColumnNames: _col2, _col0
+ Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col2 (type: string), _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: bigint)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: string), _col2 (type: bigint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key, value, count(*)
+from src b
+where b.key in (select key from src where src.value = b.value)
+group by key, value
+having count(*) in (select count(*) from src s1 where s1.key > '9' and s1.value = b.value group by s1.key )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, count(*)
+from src b
+where b.key in (select key from src where src.value = b.value)
+group by key, value
+having count(*) in (select count(*) from src s1 where s1.key > '9' and s1.value = b.value group by s1.key )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+90 val_90 3
+92 val_92 1
+95 val_95 2
+96 val_96 1
+97 val_97 2
+98 val_98 2
+PREHOOK: query: -- non agg, non corr, windowing
+explain
+select p_mfgr, p_name, avg(p_size)
+from part_subq
+group by p_mfgr, p_name
+having p_name in
+ (select first_value(p_name) over(partition by p_mfgr order by p_size) from part_subq)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- non agg, non corr, windowing
+explain
+select p_mfgr, p_name, avg(p_size)
+from part_subq
+group by p_mfgr, p_name
+having p_name in
+ (select first_value(p_name) over(partition by p_mfgr order by p_size) from part_subq)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: part_subq
+ Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int)
+ outputColumnNames: p_name, p_mfgr, p_size
+ Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: avg(p_size)
+ keys: p_name (type: string), p_mfgr (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: struct<count:bigint,sum:double,input:int>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 7 Data size: 1480 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string), _col2 (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 7 Data size: 1480 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 7 Data size: 1480 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col2 (type: double)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Demux Operator
+ Statistics: Num rows: 22 Data size: 4653 Basic stats: COMPLETE Column stats: NONE
+ Mux Operator
+ Statistics: Num rows: 33 Data size: 6979 Basic stats: COMPLETE Column stats: NONE
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 11 Data size: 2326 Basic stats: COMPLETE Column stats: NONE
+ Mux Operator
+ Statistics: Num rows: 33 Data size: 6979 Basic stats: COMPLETE Column stats: NONE
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: part_subq
+ Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
key expressions: p_mfgr (type: string), p_size (type: int)
sort order: ++
- Map-reduce partition columns: p_mfgr (type: string)
- Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
- value expressions: p_name (type: string)
+ Map-reduce partition columns: p_mfgr (type: string)
+ Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
+ value expressions: p_name (type: string)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int)
+ outputColumnNames: _col1, _col2, _col5
+ Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col1: string, _col2: string, _col5: int
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col5 ASC NULLS FIRST
+ partition by: _col2
+ raw input shape:
+ window functions:
+ window function definition
+ alias: first_value_window_0
+ arguments: _col1
+ name: first_value
+ window function: GenericUDAFFirstValueEvaluator
+ window frame: PRECEDING(MAX)~CURRENT
+ Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: first_value_window_0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: CREATE TABLE src_null (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@src_null
+POSTHOOK: query: CREATE TABLE src_null (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@src_null
+PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" INTO TABLE src_null
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@src_null
+POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" INTO TABLE src_null
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@src_null
+PREHOOK: query: INSERT INTO src_null values('5444', null)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@src_null
+POSTHOOK: query: INSERT INTO src_null values('5444', null)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@src_null
+POSTHOOK: Lineage: src_null.key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: src_null.value SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+Warning: Map Join MAPJOIN[235][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[236][bigTable=?] in task 'Stage-8:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[237][bigTable=?] in task 'Stage-9:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[238][bigTable=?] in task 'Stage-14:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[239][bigTable=?] in task 'Stage-17:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[240][bigTable=?] in task 'Stage-21:MAPRED' is a cross product
+PREHOOK: query: explain
+select key, value, count(*)
+from src_null b
+where NOT EXISTS (select key from src_null where src_null.value <> b.value)
+group by key, value
+having count(*) not in (select count(*) from src_null s1 where s1.key > '9' and s1.value <> b.value group by s1.key )
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, count(*)
+from src_null b
+where NOT EXISTS (select key from src_null where src_null.value <> b.value)
+group by key, value
+having count(*) not in (select count(*) from src_null s1 where s1.key > '9' and s1.value <> b.value group by s1.key )
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-6 is a root stage
+ Stage-37 depends on stages: Stage-6
+ Stage-1 depends on stages: Stage-37
+ Stage-2 depends on stages: Stage-1
+ Stage-28 depends on stages: Stage-2, Stage-8 , consists of Stage-36, Stage-3
+ Stage-36 has a backup stage: Stage-3
+ Stage-27 depends on stages: Stage-36
+ Stage-26 depends on stages: Stage-3, Stage-16, Stage-27 , consists of Stage-35, Stage-4
+ Stage-35 has a backup stage: Stage-4
+ Stage-25 depends on stages: Stage-35
+ Stage-4
+ Stage-3
+ Stage-12 is a root stage
+ Stage-39 depends on stages: Stage-12
+ Stage-9 depends on stages: Stage-39
+ Stage-10 depends on stages: Stage-9
+ Stage-38 depends on stages: Stage-10
+ Stage-8 depends on stages: Stage-38
+ Stage-20 is a root stage
+ Stage-41 depends on stages: Stage-20
+ Stage-17 depends on stages: Stage-41
+ Stage-18 depends on stages: Stage-17
+ Stage-40 depends on stages: Stage-18
+ Stage-14 depends on stages: Stage-40
+ Stage-15 depends on stages: Stage-14
+ Stage-16 depends on stages: Stage-15, Stage-22
+ Stage-24 is a root stage
+ Stage-42 depends on stages: Stage-24
+ Stage-21 depends on stages: Stage-42
+ Stage-22 depends on stages: Stage-21
+ Stage-0 depends on stages: Stage-25, Stage-4
+
+STAGE PLANS:
+ Stage: Stage-6
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string)
+ outputColumnNames: value
+ Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: value (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 29 Data size: 2910 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-37
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_0:$hdt$_1:$hdt$_1:src_null
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $hdt$_0:$hdt$_1:$hdt$_1:src_null
+ TableScan
+ alias: src_null
+ Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0
+ 1
+
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 29 Data size: 5820 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 29 Data size: 5820 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 29 Data size: 5820 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ TableScan
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col0 <> _col1) (type: boolean)
+ Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string)
+ outputColumnNames: _col1
+ Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Demux Operator
+ Statistics: Num rows: 1711 Data size: 345062 Basic stats: COMPLETE Column stats: NONE
+ Mux Operator
+ Statistics: Num rows: 2566 Data size: 517492 Basic stats: COMPLETE Column stats: NONE
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Filter Operator
+ predicate: _col3 is null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string)
+ outputColumnNames: _col1, _col0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col1 (type: string), _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 855 Data size: 172430 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), true (type: boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 855 Data size: 172430 Basic stats: COMPLETE Column stats: NONE
+ Mux Operator
+ Statistics: Num rows: 2566 Data size: 517492 Basic stats: COMPLETE Column stats: NONE
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Filter Operator
+ predicate: _col3 is null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string)
+ outputColumnNames: _col1, _col0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col1 (type: string), _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col2 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string), _col2 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-28
+ Conditional Operator
+
+ Stage: Stage-36
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME1
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME1
+ TableScan
+ HashTable Sink Operator
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+
+ Stage: Stage-27
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5
+ Statistics: Num rows: 2 Data size: 443 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-26
+ Conditional Operator
+
+ Stage: Stage-35
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $INTNAME1
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $INTNAME1
+ TableScan
+ HashTable Sink Operator
+ keys:
+ 0 _col1 (type: string), _col2 (type: bigint)
+ 1 _col1 (type: string), _col3 (type: bigint)
+
+ Stage: Stage-25
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col1 (type: string), _col2 (type: bigint)
+ 1 _col1 (type: string), _col3 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col8
+ Statistics: Num rows: 2 Data size: 487 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: CASE WHEN ((_col4 = 0)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col8 is not null) THEN (false) WHEN (_col2 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean)
+ Statistics: Num rows: 1 Data size: 243 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 243 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 243 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col1 (type: string), _col2 (type: bigint)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: string), _col2 (type: bigint)
+ Statistics: Num rows: 2 Data size: 443 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col4 (type: bigint), _col5 (type: bigint)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col1 (type: string), _col3 (type: bigint)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: string), _col3 (type: bigint)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ value expressions: _col2 (type: boolean)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col1 (type: string), _col2 (type: bigint)
+ 1 _col1 (type: string), _col3 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col8
+ Statistics: Num rows: 2 Data size: 487 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: CASE WHEN ((_col4 = 0)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col8 is not null) THEN (false) WHEN (_col2 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean)
+ Statistics: Num rows: 1 Data size: 243 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 243 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 243 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col0 (type: string), _col2 (type: bigint)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 2 Data size: 403 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5
+ Statistics: Num rows: 2 Data size: 443 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-12
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string)
+ outputColumnNames: value
+ Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: value (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 29 Data size: 2910 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-39
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_1:$hdt$_1:$hdt$_2:$hdt$_2:$hdt$_3:$hdt$_3:src_null
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $hdt$_1:$hdt$_1:$hdt$_2:$hdt$_2:$hdt$_3:$hdt$_3:src_null
+ TableScan
+ alias: src_null
+ Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0
+ 1
+
+ Stage: Stage-9
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 29 Data size: 5820 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 29 Data size: 5820 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 29 Data size: 5820 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ TableScan
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col0 <> _col1) (type: boolean)
+ Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string)
+ outputColumnNames: _col1
+ Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Demux Operator
+ Statistics: Num rows: 1711 Data size: 345062 Basic stats: COMPLETE Column stats: NONE
+ Mux Operator
+ Statistics: Num rows: 2566 Data size: 517492 Basic stats: COMPLETE Column stats: NONE
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Filter Operator
+ predicate: _col3 is null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string)
+ outputColumnNames: _col1, _col0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: string), _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 855 Data size: 172430 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), true (type: boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 855 Data size: 172430 Basic stats: COMPLETE Column stats: NONE
+ Mux Operator
+ Statistics: Num rows: 2566 Data size: 517492 Basic stats: COMPLETE Column stats: NONE
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Filter Operator
+ predicate: _col3 is null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string)
+ outputColumnNames: _col1, _col0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: string), _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-10
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: string)
+ mode: complete
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-38
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_1:$hdt$_1:$hdt$_1:s1
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $hdt$_1:$hdt$_1:$hdt$_1:s1
+ TableScan
+ alias: s1
+ Statistics: Num rows: 29 Data size: 5820 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key > '9') (type: boolean)
+ Statistics: Num rows: 9 Data size: 1806 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 9 Data size: 1806 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0
+ 1
+
+ Stage: Stage-8
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 9 Data size: 1815 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col1 <> _col2) (type: boolean)
+ Statistics: Num rows: 9 Data size: 1815 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: string), _col0 (type: string)
+ outputColumnNames: _col2, _col0
+ Statistics: Num rows: 9 Data size: 1815 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col2 (type: string), _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 9 Data size: 1815 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 9 Data size: 1815 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint)
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 4 Data size: 806 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: bigint)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 4 Data size: 806 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(), count(_col2)
+ keys: _col1 (type: string)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 403 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-20
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string)
+ outputColumnNames: value
+ Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: value (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 29 Data size: 2910 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-41
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_2:$hdt$_2:$hdt$_2:$hdt$_2:$hdt$_3:$hdt$_3:$hdt$_4:$hdt$_4:src_null
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $hdt$_2:$hdt$_2:$hdt$_2:$hdt$_2:$hdt$_3:$hdt$_3:$hdt$_4:$hdt$_4:src_null
+ TableScan
+ alias: src_null
+ Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: value (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 58 Data size: 5820 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0
+ 1
+
+ Stage: Stage-17
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 29 Data size: 5820 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 29 Data size: 5820 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 29 Data size: 5820 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ TableScan
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col0 <> _col1) (type: boolean)
+ Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string)
+ outputColumnNames: _col1
+ Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1682 Data size: 339242 Basic stats: COMPLETE Column stats: NONE
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Demux Operator
+ Statistics: Num rows: 1711 Data size: 345062 Basic stats: COMPLETE Column stats: NONE
+ Mux Operator
+ Statistics: Num rows: 2566 Data size: 517492 Basic stats: COMPLETE Column stats: NONE
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Filter Operator
+ predicate: _col3 is null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string)
+ outputColumnNames: _col1, _col0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: string), _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 855 Data size: 172430 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), true (type: boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 855 Data size: 172430 Basic stats: COMPLETE Column stats: NONE
+ Mux Operator
+ Statistics: Num rows: 2566 Data size: 517492 Basic stats: COMPLETE Column stats: NONE
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Filter Operator
+ predicate: _col3 is null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string)
+ outputColumnNames: _col1, _col0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Group By Operator
+ keys: _col1 (type: string), _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-18
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Reduce Operator Tree:
- Select Operator
- expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int)
- outputColumnNames: _col1, _col2, _col5
- Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
- PTF Operator
- Function definitions:
- Input definition
- input alias: ptf_0
- output shape: _col1: string, _col2: string, _col5: int
- type: WINDOWING
- Windowing table definition
- input alias: ptf_1
- name: windowingtablefunction
- order by: _col5 ASC NULLS FIRST
- partition by: _col2
- raw input shape:
- window functions:
- window function definition
- alias: first_value_window_0
- arguments: _col1
- name: first_val
<TRUNCATED>
[6/9] hive git commit: HIVE-15481 : Support multiple and nested
subqueries (Vineet Garg via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan
Posted by ha...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
index 252b058..eb99650 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
@@ -119,15 +119,15 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col5
Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) (type: boolean)
- Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE
+ predicate: ((_col2 = 0) or (_col5 is null and _col0 is not null and (_col3 >= _col2))) (type: boolean)
+ Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -867,15 +867,15 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col5
Statistics: Num rows: 26 Data size: 3770 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col1 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) (type: boolean)
- Statistics: Num rows: 13 Data size: 1885 Basic stats: COMPLETE Column stats: COMPLETE
+ predicate: ((_col2 = 0) or (_col5 is null and _col1 is not null and (_col3 >= _col2))) (type: boolean)
+ Statistics: Num rows: 26 Data size: 3770 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), _col1 (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -1855,15 +1855,15 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col4
Statistics: Num rows: 166 Data size: 17762 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (not CASE WHEN ((_col1 = 0)) THEN (false) WHEN (_col4 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col2 < _col1)) THEN (true) ELSE (false) END) (type: boolean)
- Statistics: Num rows: 83 Data size: 8881 Basic stats: COMPLETE Column stats: COMPLETE
+ predicate: ((_col1 = 0) or (_col4 is null and _col0 is not null and (_col2 >= _col1))) (type: boolean)
+ Statistics: Num rows: 166 Data size: 17762 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -2414,15 +2414,15 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12
Statistics: Num rows: 26 Data size: 16614 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (not CASE WHEN ((_col9 = 0)) THEN (false) WHEN (_col12 is not null) THEN (true) WHEN ((_col5 - 1) is null) THEN (null) WHEN ((_col10 < _col9)) THEN (true) ELSE (false) END) (type: boolean)
- Statistics: Num rows: 13 Data size: 8307 Basic stats: COMPLETE Column stats: COMPLETE
+ predicate: ((_col9 = 0) or (_col12 is null and (_col5 - 1) is not null and (_col10 >= _col9))) (type: boolean)
+ Statistics: Num rows: 26 Data size: 16614 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -2646,15 +2646,15 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12
Statistics: Num rows: 26 Data size: 16614 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (not CASE WHEN ((_col9 = 0)) THEN (false) WHEN (_col12 is not null) THEN (true) WHEN ((_col0 * _col5) is null) THEN (null) WHEN ((_col10 < _col9)) THEN (true) ELSE (false) END) (type: boolean)
- Statistics: Num rows: 13 Data size: 8307 Basic stats: COMPLETE Column stats: COMPLETE
+ predicate: ((_col9 = 0) or (_col12 is null and (_col0 * _col5) is not null and (_col10 >= _col9))) (type: boolean)
+ Statistics: Num rows: 26 Data size: 16614 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -3230,15 +3230,15 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12
Statistics: Num rows: 26 Data size: 16614 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (not CASE WHEN ((_col9 = 0)) THEN (false) WHEN (_col12 is not null) THEN (true) WHEN (floor(_col7) is null) THEN (null) WHEN ((_col10 < _col9)) THEN (true) ELSE (false) END) (type: boolean)
- Statistics: Num rows: 13 Data size: 8307 Basic stats: COMPLETE Column stats: COMPLETE
+ predicate: ((_col9 = 0) or (_col12 is null and floor(_col7) is not null and (_col10 >= _col9))) (type: boolean)
+ Statistics: Num rows: 26 Data size: 16614 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -4828,23 +4828,23 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col5
Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col1 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) (type: boolean)
- Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE
+ predicate: ((_col2 = 0) or (_col5 is null and _col1 is not null and (_col3 >= _col2))) (type: boolean)
+ Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint)
Reducer 4
Execution mode: llap
@@ -4854,12 +4854,12 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col1 (type: bigint)
sort order: +
Map-reduce partition columns: _col1 (type: bigint)
- Statistics: Num rows: 103 Data size: 9785 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: string)
Reducer 5
Execution mode: llap
@@ -5481,16 +5481,16 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12
Statistics: Num rows: 26 Data size: 16614 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (not CASE WHEN ((_col9 = 0)) THEN (false) WHEN (_col12 is not null) THEN (true) WHEN ((_col5 - 1) is null) THEN (null) WHEN ((_col10 < _col9)) THEN (true) ELSE (false) END) (type: boolean)
- Statistics: Num rows: 13 Data size: 8307 Basic stats: COMPLETE Column stats: COMPLETE
+ predicate: ((_col9 = 0) or (_col12 is null and (_col5 - 1) is not null and (_col10 >= _col9))) (type: boolean)
+ Statistics: Num rows: 26 Data size: 16614 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col3 (type: string)
sort order: +
- Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
Reducer 4
Execution mode: llap
@@ -5498,10 +5498,10 @@ STAGE PLANS:
Select Operator
expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: int), VALUE._col5 (type: string), VALUE._col6 (type: double), VALUE._col7 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -5728,16 +5728,16 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12
Statistics: Num rows: 26 Data size: 16614 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (not CASE WHEN ((_col9 = 0)) THEN (false) WHEN (_col12 is not null) THEN (true) WHEN ((_col5 - 1) is null) THEN (null) WHEN ((_col10 < _col9)) THEN (true) ELSE (false) END) (type: boolean)
- Statistics: Num rows: 13 Data size: 8307 Basic stats: COMPLETE Column stats: COMPLETE
+ predicate: ((_col9 = 0) or (_col12 is null and (_col5 - 1) is not null and (_col10 >= _col9))) (type: boolean)
+ Statistics: Num rows: 26 Data size: 16614 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col3 (type: string)
sort order: +
- Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
TopN Hash Memory Usage: 0.1
value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
Reducer 4
@@ -5746,7 +5746,7 @@ STAGE PLANS:
Select Operator
expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: int), VALUE._col5 (type: string), VALUE._col6 (type: double), VALUE._col7 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 4
Statistics: Num rows: 4 Data size: 2476 Basic stats: COMPLETE Column stats: COMPLETE
@@ -6004,15 +6004,15 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col5
Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) (type: boolean)
- Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE
+ predicate: ((_col2 = 0) or (_col5 is null and _col0 is not null and (_col3 >= _col2))) (type: boolean)
+ Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -7030,7 +7030,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col4
Statistics: Num rows: 4 Data size: 419 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (not CASE WHEN ((_col1 = 0)) THEN (false) WHEN (_col4 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col2 < _col1)) THEN (true) ELSE (false) END) (type: boolean)
+ predicate: ((_col1 = 0) or (_col4 is null and _col0 is not null and (_col2 >= _col1))) (type: boolean)
Statistics: Num rows: 2 Data size: 209 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int)
@@ -8936,15 +8936,15 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 3 Data size: 67 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col4 is not null) THEN (true) WHEN (_col1 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END) (type: boolean)
- Statistics: Num rows: 2 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((_col2 = 0) or (_col4 is null and _col1 is not null and (_col3 >= _col2))) (type: boolean)
+ Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 2 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -9103,15 +9103,15 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 3 Data size: 67 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (not CASE WHEN ((_col1 = 0)) THEN (false) WHEN (_col3 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col2 < _col1)) THEN (true) ELSE (false) END) (type: boolean)
- Statistics: Num rows: 2 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((_col1 = 0) or (_col3 is null and _col0 is not null and (_col2 >= _col1))) (type: boolean)
+ Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 2 Data size: 44 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -9165,3 +9165,864 @@ PREHOOK: query: drop table t1
PREHOOK: type: DROPTABLE
POSTHOOK: query: drop table t1
POSTHOOK: type: DROPTABLE
+Warning: Shuffle Join MERGEJOIN[70][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product
+Warning: Shuffle Join MERGEJOIN[72][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 10' is a cross product
+PREHOOK: query: -- corr predicate is not equi
+explain select *
+from src b
+where b.key not in
+ (select a.key
+ from src a
+ where b.value > a.value and a.key > '9'
+ )
+PREHOOK: type: QUERY
+POSTHOOK: query: -- corr predicate is not equi
+explain select *
+from src b
+where b.key not in
+ (select a.key
+ from src a
+ where b.value > a.value and a.key > '9'
+ )
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 10 <- Map 9 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE)
+ Reducer 11 <- Reducer 10 (SIMPLE_EDGE)
+ Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE)
+ Reducer 14 <- Map 13 (SIMPLE_EDGE)
+ Reducer 16 <- Map 15 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+ Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+ Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
+ Reducer 8 <- Map 7 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 13
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: value (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 15
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key > '9') (type: boolean)
+ Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: value (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key > '9') (type: boolean)
+ Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 10
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 35524 Data size: 9555956 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (_col2 > _col1) (type: boolean)
+ Statistics: Num rows: 11841 Data size: 3185229 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 11841 Data size: 3185229 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: string), _col2 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1656 Data size: 294768 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 1656 Data size: 294768 Basic stats: COMPLETE Column stats: COMPLETE
+ Reducer 11
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1656 Data size: 294768 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), true (type: boolean)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1656 Data size: 301392 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1656 Data size: 301392 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string), _col2 (type: boolean)
+ Reducer 12
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 1656 Data size: 301392 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col3 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col3 (type: string), _col1 (type: string)
+ Statistics: Num rows: 1656 Data size: 301392 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: boolean)
+ Reducer 14
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string)
+ Reducer 16
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3, _col4
+ Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col3 (type: bigint), _col4 (type: bigint)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col0 (type: string), _col1 (type: string)
+ 1 _col3 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col3, _col4, _col7
+ Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: CASE WHEN ((_col3 = 0)) THEN (true) WHEN (_col3 is null) THEN (true) WHEN (_col7 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (false) ELSE (true) END (type: boolean)
+ Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 35524 Data size: 9555956 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (_col2 > _col1) (type: boolean)
+ Statistics: Num rows: 11841 Data size: 3185229 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col2 (type: string), _col0 (type: string)
+ outputColumnNames: _col2, _col0
+ Statistics: Num rows: 11841 Data size: 3185229 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(), count(_col0)
+ keys: _col2 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 72 Data size: 7704 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 72 Data size: 7704 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
+ Reducer 6
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), count(VALUE._col1)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 72 Data size: 7704 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 72 Data size: 7704 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
+ Reducer 8
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join MERGEJOIN[70][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product
+Warning: Shuffle Join MERGEJOIN[72][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 10' is a cross product
+PREHOOK: query: select *
+from src b
+where b.key not in
+ (select a.key
+ from src a
+ where b.value > a.value and a.key > '9'
+ )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from src b
+where b.key not in
+ (select a.key
+ from src a
+ where b.value > a.value and a.key > '9'
+ )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 val_0
+0 val_0
+0 val_0
+10 val_10
+100 val_100
+100 val_100
+103 val_103
+103 val_103
+104 val_104
+104 val_104
+105 val_105
+11 val_11
+111 val_111
+113 val_113
+113 val_113
+114 val_114
+116 val_116
+118 val_118
+118 val_118
+119 val_119
+119 val_119
+119 val_119
+12 val_12
+12 val_12
+120 val_120
+120 val_120
+125 val_125
+125 val_125
+126 val_126
+128 val_128
+128 val_128
+128 val_128
+129 val_129
+129 val_129
+131 val_131
+133 val_133
+134 val_134
+134 val_134
+136 val_136
+137 val_137
+137 val_137
+138 val_138
+138 val_138
+138 val_138
+138 val_138
+143 val_143
+145 val_145
+146 val_146
+146 val_146
+149 val_149
+149 val_149
+15 val_15
+15 val_15
+150 val_150
+152 val_152
+152 val_152
+153 val_153
+155 val_155
+156 val_156
+157 val_157
+158 val_158
+160 val_160
+162 val_162
+163 val_163
+164 val_164
+164 val_164
+165 val_165
+165 val_165
+166 val_166
+167 val_167
+167 val_167
+167 val_167
+168 val_168
+169 val_169
+169 val_169
+169 val_169
+169 val_169
+17 val_17
+170 val_170
+172 val_172
+172 val_172
+174 val_174
+174 val_174
+175 val_175
+175 val_175
+176 val_176
+176 val_176
+177 val_177
+178 val_178
+179 val_179
+179 val_179
+18 val_18
+18 val_18
+180 val_180
+181 val_181
+183 val_183
+186 val_186
+187 val_187
+187 val_187
+187 val_187
+189 val_189
+19 val_19
+190 val_190
+191 val_191
+191 val_191
+192 val_192
+193 val_193
+193 val_193
+193 val_193
+194 val_194
+195 val_195
+195 val_195
+196 val_196
+197 val_197
+197 val_197
+199 val_199
+199 val_199
+199 val_199
+2 val_2
+20 val_20
+200 val_200
+200 val_200
+201 val_201
+202 val_202
+203 val_203
+203 val_203
+205 val_205
+205 val_205
+207 val_207
+207 val_207
+208 val_208
+208 val_208
+208 val_208
+209 val_209
+209 val_209
+213 val_213
+213 val_213
+214 val_214
+216 val_216
+216 val_216
+217 val_217
+217 val_217
+218 val_218
+219 val_219
+219 val_219
+221 val_221
+221 val_221
+222 val_222
+223 val_223
+223 val_223
+224 val_224
+224 val_224
+226 val_226
+228 val_228
+229 val_229
+229 val_229
+230 val_230
+230 val_230
+230 val_230
+230 val_230
+230 val_230
+233 val_233
+233 val_233
+235 val_235
+237 val_237
+237 val_237
+238 val_238
+238 val_238
+239 val_239
+239 val_239
+24 val_24
+24 val_24
+241 val_241
+242 val_242
+242 val_242
+244 val_244
+247 val_247
+248 val_248
+249 val_249
+252 val_252
+255 val_255
+255 val_255
+256 val_256
+256 val_256
+257 val_257
+258 val_258
+26 val_26
+26 val_26
+260 val_260
+262 val_262
+263 val_263
+265 val_265
+265 val_265
+266 val_266
+27 val_27
+272 val_272
+272 val_272
+273 val_273
+273 val_273
+273 val_273
+274 val_274
+275 val_275
+277 val_277
+277 val_277
+277 val_277
+277 val_277
+278 val_278
+278 val_278
+28 val_28
+280 val_280
+280 val_280
+281 val_281
+281 val_281
+282 val_282
+282 val_282
+283 val_283
+284 val_284
+285 val_285
+286 val_286
+287 val_287
+288 val_288
+288 val_288
+289 val_289
+291 val_291
+292 val_292
+296 val_296
+298 val_298
+298 val_298
+298 val_298
+30 val_30
+302 val_302
+305 val_305
+306 val_306
+307 val_307
+307 val_307
+308 val_308
+309 val_309
+309 val_309
+310 val_310
+311 val_311
+311 val_311
+311 val_311
+315 val_315
+316 val_316
+316 val_316
+316 val_316
+317 val_317
+317 val_317
+318 val_318
+318 val_318
+318 val_318
+321 val_321
+321 val_321
+322 val_322
+322 val_322
+323 val_323
+325 val_325
+325 val_325
+327 val_327
+327 val_327
+327 val_327
+33 val_33
+331 val_331
+331 val_331
+332 val_332
+333 val_333
+333 val_333
+335 val_335
+336 val_336
+338 val_338
+339 val_339
+34 val_34
+341 val_341
+342 val_342
+342 val_342
+344 val_344
+344 val_344
+345 val_345
+348 val_348
+348 val_348
+348 val_348
+348 val_348
+348 val_348
+35 val_35
+35 val_35
+35 val_35
+351 val_351
+353 val_353
+353 val_353
+356 val_356
+360 val_360
+362 val_362
+364 val_364
+365 val_365
+366 val_366
+367 val_367
+367 val_367
+368 val_368
+369 val_369
+369 val_369
+369 val_369
+37 val_37
+37 val_37
+373 val_373
+374 val_374
+375 val_375
+377 val_377
+378 val_378
+379 val_379
+382 val_382
+382 val_382
+384 val_384
+384 val_384
+384 val_384
+386 val_386
+389 val_389
+392 val_392
+393 val_393
+394 val_394
+395 val_395
+395 val_395
+396 val_396
+396 val_396
+396 val_396
+397 val_397
+397 val_397
+399 val_399
+399 val_399
+4 val_4
+400 val_400
+401 val_401
+401 val_401
+401 val_401
+401 val_401
+401 val_401
+402 val_402
+403 val_403
+403 val_403
+403 val_403
+404 val_404
+404 val_404
+406 val_406
+406 val_406
+406 val_406
+406 val_406
+407 val_407
+409 val_409
+409 val_409
+409 val_409
+41 val_41
+411 val_411
+413 val_413
+413 val_413
+414 val_414
+414 val_414
+417 val_417
+417 val_417
+417 val_417
+418 val_418
+419 val_419
+42 val_42
+42 val_42
+421 val_421
+424 val_424
+424 val_424
+427 val_427
+429 val_429
+429 val_429
+43 val_43
+430 val_430
+430 val_430
+430 val_430
+431 val_431
+431 val_431
+431 val_431
+432 val_432
+435 val_435
+436 val_436
+437 val_437
+438 val_438
+438 val_438
+438 val_438
+439 val_439
+439 val_439
+44 val_44
+443 val_443
+444 val_444
+446 val_446
+448 val_448
+449 val_449
+452 val_452
+453 val_453
+454 val_454
+454 val_454
+454 val_454
+455 val_455
+457 val_457
+458 val_458
+458 val_458
+459 val_459
+459 val_459
+460 val_460
+462 val_462
+462 val_462
+463 val_463
+463 val_463
+466 val_466
+466 val_466
+466 val_466
+467 val_467
+468 val_468
+468 val_468
+468 val_468
+468 val_468
+469 val_469
+469 val_469
+469 val_469
+469 val_469
+469 val_469
+47 val_47
+470 val_470
+472 val_472
+475 val_475
+477 val_477
+478 val_478
+478 val_478
+479 val_479
+480 val_480
+480 val_480
+480 val_480
+481 val_481
+482 val_482
+483 val_483
+484 val_484
+485 val_485
+487 val_487
+489 val_489
+489 val_489
+489 val_489
+489 val_489
+490 val_490
+491 val_491
+492 val_492
+492 val_492
+493 val_493
+494 val_494
+495 val_495
+496 val_496
+497 val_497
+498 val_498
+498 val_498
+498 val_498
+5 val_5
+5 val_5
+5 val_5
+51 val_51
+51 val_51
+53 val_53
+54 val_54
+57 val_57
+58 val_58
+58 val_58
+64 val_64
+65 val_65
+66 val_66
+67 val_67
+67 val_67
+69 val_69
+70 val_70
+70 val_70
+70 val_70
+72 val_72
+72 val_72
+74 val_74
+76 val_76
+76 val_76
+77 val_77
+78 val_78
+8 val_8
+80 val_80
+82 val_82
+83 val_83
+83 val_83
+84 val_84
+84 val_84
+85 val_85
+86 val_86
+87 val_87
+9 val_9
+90 val_90
+90 val_90
+90 val_90
+92 val_92
+95 val_95
+95 val_95
+96 val_96
+97 val_97
+97 val_97
+98 val_98
+98 val_98
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out
index 27f32db..3ab6eb9 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out
@@ -29,14 +29,14 @@ Stage-0
Stage-1
Reducer 2 vectorized, llap
File Output Operator [FS_37]
- Select Operator [SEL_36] (rows=250 width=178)
+ Select Operator [SEL_36] (rows=500 width=178)
Output:["_col0","_col1"]
<-Map 1 [SIMPLE_EDGE] llap
SHUFFLE [RS_23]
- Select Operator [SEL_22] (rows=250 width=178)
+ Select Operator [SEL_22] (rows=500 width=178)
Output:["_col0","_col1"]
- Filter Operator [FIL_21] (rows=250 width=198)
- predicate:(not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END)
+ Filter Operator [FIL_21] (rows=500 width=198)
+ predicate:((_col2 = 0) or (_col5 is null and _col0 is not null and (_col3 >= _col2)))
Map Join Operator [MAPJOIN_29] (rows=500 width=198)
Conds:MAPJOIN_28._col0=RS_35._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5"]
<-Reducer 6 [BROADCAST_EDGE] vectorized, llap
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientpositive/perf/query16.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query16.q.out b/ql/src/test/results/clientpositive/perf/query16.q.out
new file mode 100644
index 0000000..1f33e43
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/query16.q.out
@@ -0,0 +1,330 @@
+PREHOOK: query: explain select
+ count(distinct cs_order_number) as `order count`
+ ,sum(cs_ext_ship_cost) as `total shipping cost`
+ ,sum(cs_net_profit) as `total net profit`
+from
+ catalog_sales cs1
+ ,date_dim
+ ,customer_address
+ ,call_center
+where
+ d_date between '2001-4-01' and
+ (cast('2001-4-01' as date) + 60 days)
+and cs1.cs_ship_date_sk = d_date_sk
+and cs1.cs_ship_addr_sk = ca_address_sk
+and ca_state = 'NY'
+and cs1.cs_call_center_sk = cc_call_center_sk
+and cc_county in ('Ziebach County','Levy County','Huron County','Franklin Parish',
+ 'Daviess County'
+)
+and exists (select *
+ from catalog_sales cs2
+ where cs1.cs_order_number = cs2.cs_order_number
+ and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk)
+and not exists(select *
+ from catalog_returns cr1
+ where cs1.cs_order_number = cr1.cr_order_number)
+order by `order count`
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select
+ count(distinct cs_order_number) as `order count`
+ ,sum(cs_ext_ship_cost) as `total shipping cost`
+ ,sum(cs_net_profit) as `total net profit`
+from
+ catalog_sales cs1
+ ,date_dim
+ ,customer_address
+ ,call_center
+where
+ d_date between '2001-4-01' and
+ (cast('2001-4-01' as date) + 60 days)
+and cs1.cs_ship_date_sk = d_date_sk
+and cs1.cs_ship_addr_sk = ca_address_sk
+and ca_state = 'NY'
+and cs1.cs_call_center_sk = cc_call_center_sk
+and cc_county in ('Ziebach County','Levy County','Huron County','Franklin Parish',
+ 'Daviess County'
+)
+and exists (select *
+ from catalog_sales cs2
+ where cs1.cs_order_number = cs2.cs_order_number
+ and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk)
+and not exists(select *
+ from catalog_returns cr1
+ where cs1.cs_order_number = cr1.cr_order_number)
+order by `order count`
+limit 100
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 13 <- Map 12 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE)
+Reducer 14 <- Reducer 13 (SIMPLE_EDGE)
+Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE)
+Reducer 17 <- Map 21 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE)
+Reducer 18 <- Map 22 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE)
+Reducer 19 <- Reducer 18 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
+Reducer 24 <- Map 23 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE)
+Reducer 25 <- Reducer 24 (SIMPLE_EDGE)
+Reducer 27 <- Map 26 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE)
+Reducer 28 <- Map 32 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE)
+Reducer 29 <- Map 33 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE)
+Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 30 <- Reducer 29 (SIMPLE_EDGE)
+Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 5 <- Reducer 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+Reducer 6 <- Reducer 25 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
+Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:100
+ Stage-1
+ Reducer 8
+ File Output Operator [FS_110]
+ Limit [LIM_109] (rows=1 width=344)
+ Number of rows:100
+ Select Operator [SEL_108] (rows=1 width=344)
+ Output:["_col0","_col1","_col2"]
+ <-Reducer 7 [SIMPLE_EDGE]
+ SHUFFLE [RS_107]
+ Group By Operator [GBY_105] (rows=1 width=344)
+ Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col0:0._col0)","sum(VALUE._col1)","sum(VALUE._col2)"]
+ <-Reducer 6 [SIMPLE_EDGE]
+ SHUFFLE [RS_104]
+ Group By Operator [GBY_103] (rows=231905279 width=135)
+ Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT _col4)","sum(_col5)","sum(_col6)"],keys:_col4
+ Select Operator [SEL_102] (rows=231905279 width=135)
+ Output:["_col4","_col5","_col6"]
+ Filter Operator [FIL_101] (rows=231905279 width=135)
+ predicate:_col16 is null
+ Select Operator [SEL_100] (rows=463810558 width=135)
+ Output:["_col4","_col5","_col6","_col16"]
+ Merge Join Operator [MERGEJOIN_193] (rows=463810558 width=135)
+ Conds:RS_97._col4, _col3=RS_98._col1, _col0(Inner),Output:["_col4","_col5","_col6","_col14"]
+ <-Reducer 25 [SIMPLE_EDGE]
+ SHUFFLE [RS_98]
+ PartitionCols:_col1, _col0
+ Select Operator [SEL_84] (rows=158394413 width=135)
+ Output:["_col0","_col1"]
+ Group By Operator [GBY_83] (rows=158394413 width=135)
+ Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
+ <-Reducer 24 [SIMPLE_EDGE]
+ SHUFFLE [RS_82]
+ PartitionCols:_col0, _col1
+ Group By Operator [GBY_81] (rows=316788826 width=135)
+ Output:["_col0","_col1"],keys:_col3, _col2
+ Select Operator [SEL_80] (rows=316788826 width=135)
+ Output:["_col3","_col2"]
+ Filter Operator [FIL_79] (rows=316788826 width=135)
+ predicate:(_col2 <> _col0)
+ Merge Join Operator [MERGEJOIN_188] (rows=316788826 width=135)
+ Conds:RS_76._col1=RS_77._col1(Inner),Output:["_col0","_col2","_col3"]
+ <-Map 23 [SIMPLE_EDGE]
+ SHUFFLE [RS_76]
+ PartitionCols:_col1
+ Select Operator [SEL_49] (rows=287989836 width=135)
+ Output:["_col0","_col1"]
+ TableScan [TS_48] (rows=287989836 width=135)
+ default@catalog_sales,cs2,Tbl:COMPLETE,Col:NONE,Output:["cs_warehouse_sk","cs_order_number"]
+ <-Reducer 30 [SIMPLE_EDGE]
+ SHUFFLE [RS_77]
+ PartitionCols:_col1
+ Select Operator [SEL_75] (rows=191657247 width=135)
+ Output:["_col0","_col1"]
+ Group By Operator [GBY_74] (rows=191657247 width=135)
+ Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
+ <-Reducer 29 [SIMPLE_EDGE]
+ SHUFFLE [RS_73]
+ PartitionCols:_col0, _col1
+ Group By Operator [GBY_72] (rows=383314495 width=135)
+ Output:["_col0","_col1"],keys:_col4, _col3
+ Select Operator [SEL_71] (rows=383314495 width=135)
+ Output:["_col4","_col3"]
+ Merge Join Operator [MERGEJOIN_187] (rows=383314495 width=135)
+ Conds:RS_68._col2=RS_69._col0(Inner),Output:["_col3","_col4"]
+ <-Map 33 [SIMPLE_EDGE]
+ SHUFFLE [RS_69]
+ PartitionCols:_col0
+ Select Operator [SEL_61] (rows=30 width=2045)
+ Output:["_col0"]
+ Filter Operator [FIL_180] (rows=30 width=2045)
+ predicate:((cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') and cc_call_center_sk is not null)
+ TableScan [TS_59] (rows=60 width=2045)
+ default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_county"]
+ <-Reducer 28 [SIMPLE_EDGE]
+ SHUFFLE [RS_68]
+ PartitionCols:_col2
+ Merge Join Operator [MERGEJOIN_186] (rows=348467716 width=135)
+ Conds:RS_65._col1=RS_66._col0(Inner),Output:["_col2","_col3","_col4"]
+ <-Map 32 [SIMPLE_EDGE]
+ SHUFFLE [RS_66]
+ PartitionCols:_col0
+ Select Operator [SEL_58] (rows=20000000 width=1014)
+ Output:["_col0"]
+ Filter Operator [FIL_179] (rows=20000000 width=1014)
+ predicate:((ca_state = 'NY') and ca_address_sk is not null)
+ TableScan [TS_56] (rows=40000000 width=1014)
+ default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
+ <-Reducer 27 [SIMPLE_EDGE]
+ SHUFFLE [RS_65]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_185] (rows=316788826 width=135)
+ Conds:RS_62._col0=RS_63._col0(Inner),Output:["_col1","_col2","_col3","_col4"]
+ <-Map 26 [SIMPLE_EDGE]
+ SHUFFLE [RS_62]
+ PartitionCols:_col0
+ Select Operator [SEL_52] (rows=287989836 width=135)
+ Output:["_col0","_col1","_col2","_col3","_col4"]
+ Filter Operator [FIL_177] (rows=287989836 width=135)
+ predicate:(cs_ship_date_sk is not null and cs_ship_addr_sk is not null and cs_call_center_sk is not null)
+ TableScan [TS_50] (rows=287989836 width=135)
+ default@catalog_sales,cs1,Tbl:COMPLETE,Col:NONE,Output:["cs_ship_date_sk","cs_ship_addr_sk","cs_call_center_sk","cs_warehouse_sk","cs_order_number"]
+ <-Map 31 [SIMPLE_EDGE]
+ SHUFFLE [RS_63]
+ PartitionCols:_col0
+ Select Operator [SEL_55] (rows=8116 width=1119)
+ Output:["_col0"]
+ Filter Operator [FIL_178] (rows=8116 width=1119)
+ predicate:(d_date BETWEEN '2001-4-01' AND 2001-05-31 01:00:00.0 and d_date_sk is not null)
+ TableScan [TS_53] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"]
+ <-Reducer 5 [SIMPLE_EDGE]
+ SHUFFLE [RS_97]
+ PartitionCols:_col4, _col3
+ Merge Join Operator [MERGEJOIN_192] (rows=421645953 width=135)
+ Conds:RS_94._col4=RS_95._col0(Left Outer),Output:["_col3","_col4","_col5","_col6","_col14"]
+ <-Reducer 14 [SIMPLE_EDGE]
+ SHUFFLE [RS_95]
+ PartitionCols:_col0
+ Select Operator [SEL_47] (rows=105411488 width=135)
+ Output:["_col0","_col1"]
+ Group By Operator [GBY_46] (rows=105411488 width=135)
+ Output:["_col0"],keys:KEY._col0
+ <-Reducer 13 [SIMPLE_EDGE]
+ SHUFFLE [RS_45]
+ PartitionCols:_col0
+ Group By Operator [GBY_44] (rows=210822976 width=135)
+ Output:["_col0"],keys:_col1
+ Merge Join Operator [MERGEJOIN_184] (rows=210822976 width=135)
+ Conds:RS_40._col0=RS_41._col0(Inner),Output:["_col1"]
+ <-Map 12 [SIMPLE_EDGE]
+ SHUFFLE [RS_40]
+ PartitionCols:_col0
+ Select Operator [SEL_13] (rows=28798881 width=106)
+ Output:["_col0"]
+ TableScan [TS_12] (rows=28798881 width=106)
+ default@catalog_returns,cr1,Tbl:COMPLETE,Col:NONE,Output:["cr_order_number"]
+ <-Reducer 19 [SIMPLE_EDGE]
+ SHUFFLE [RS_41]
+ PartitionCols:_col0
+ Group By Operator [GBY_38] (rows=191657247 width=135)
+ Output:["_col0"],keys:KEY._col0
+ <-Reducer 18 [SIMPLE_EDGE]
+ SHUFFLE [RS_37]
+ PartitionCols:_col0
+ Group By Operator [GBY_36] (rows=383314495 width=135)
+ Output:["_col0"],keys:_col3
+ Merge Join Operator [MERGEJOIN_183] (rows=383314495 width=135)
+ Conds:RS_32._col2=RS_33._col0(Inner),Output:["_col3"]
+ <-Map 22 [SIMPLE_EDGE]
+ SHUFFLE [RS_33]
+ PartitionCols:_col0
+ Select Operator [SEL_25] (rows=30 width=2045)
+ Output:["_col0"]
+ Filter Operator [FIL_175] (rows=30 width=2045)
+ predicate:((cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') and cc_call_center_sk is not null)
+ TableScan [TS_23] (rows=60 width=2045)
+ default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_county"]
+ <-Reducer 17 [SIMPLE_EDGE]
+ SHUFFLE [RS_32]
+ PartitionCols:_col2
+ Merge Join Operator [MERGEJOIN_182] (rows=348467716 width=135)
+ Conds:RS_29._col1=RS_30._col0(Inner),Output:["_col2","_col3"]
+ <-Map 21 [SIMPLE_EDGE]
+ SHUFFLE [RS_30]
+ PartitionCols:_col0
+ Select Operator [SEL_22] (rows=20000000 width=1014)
+ Output:["_col0"]
+ Filter Operator [FIL_174] (rows=20000000 width=1014)
+ predicate:((ca_state = 'NY') and ca_address_sk is not null)
+ TableScan [TS_20] (rows=40000000 width=1014)
+ default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
+ <-Reducer 16 [SIMPLE_EDGE]
+ SHUFFLE [RS_29]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_181] (rows=316788826 width=135)
+ Conds:RS_26._col0=RS_27._col0(Inner),Output:["_col1","_col2","_col3"]
+ <-Map 15 [SIMPLE_EDGE]
+ SHUFFLE [RS_26]
+ PartitionCols:_col0
+ Select Operator [SEL_16] (rows=287989836 width=135)
+ Output:["_col0","_col1","_col2","_col3"]
+ Filter Operator [FIL_172] (rows=287989836 width=135)
+ predicate:(cs_ship_date_sk is not null and cs_ship_addr_sk is not null and cs_call_center_sk is not null)
+ TableScan [TS_14] (rows=287989836 width=135)
+ default@catalog_sales,cs1,Tbl:COMPLETE,Col:NONE,Output:["cs_ship_date_sk","cs_ship_addr_sk","cs_call_center_sk","cs_order_number"]
+ <-Map 20 [SIMPLE_EDGE]
+ SHUFFLE [RS_27]
+ PartitionCols:_col0
+ Select Operator [SEL_19] (rows=8116 width=1119)
+ Output:["_col0"]
+ Filter Operator [FIL_173] (rows=8116 width=1119)
+ predicate:(d_date BETWEEN '2001-4-01' AND 2001-05-31 01:00:00.0 and d_date_sk is not null)
+ TableScan [TS_17] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"]
+ <-Reducer 4 [SIMPLE_EDGE]
+ SHUFFLE [RS_94]
+ PartitionCols:_col4
+ Merge Join Operator [MERGEJOIN_191] (rows=383314495 width=135)
+ Conds:RS_91._col2=RS_92._col0(Inner),Output:["_col3","_col4","_col5","_col6"]
+ <-Map 11 [SIMPLE_EDGE]
+ SHUFFLE [RS_92]
+ PartitionCols:_col0
+ Select Operator [SEL_11] (rows=30 width=2045)
+ Output:["_col0"]
+ Filter Operator [FIL_170] (rows=30 width=2045)
+ predicate:((cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') and cc_call_center_sk is not null)
+ TableScan [TS_9] (rows=60 width=2045)
+ default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_county"]
+ <-Reducer 3 [SIMPLE_EDGE]
+ SHUFFLE [RS_91]
+ PartitionCols:_col2
+ Merge Join Operator [MERGEJOIN_190] (rows=348467716 width=135)
+ Conds:RS_88._col1=RS_89._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"]
+ <-Map 10 [SIMPLE_EDGE]
+ SHUFFLE [RS_89]
+ PartitionCols:_col0
+ Select Operator [SEL_8] (rows=20000000 width=1014)
+ Output:["_col0"]
+ Filter Operator [FIL_169] (rows=20000000 width=1014)
+ predicate:((ca_state = 'NY') and ca_address_sk is not null)
+ TableScan [TS_6] (rows=40000000 width=1014)
+ default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
+ <-Reducer 2 [SIMPLE_EDGE]
+ SHUFFLE [RS_88]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_189] (rows=316788826 width=135)
+ Conds:RS_85._col0=RS_86._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"]
+ <-Map 1 [SIMPLE_EDGE]
+ SHUFFLE [RS_85]
+ PartitionCols:_col0
+ Select Operator [SEL_2] (rows=287989836 width=135)
+ Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
+ Filter Operator [FIL_167] (rows=287989836 width=135)
+ predicate:(cs_ship_date_sk is not null and cs_ship_addr_sk is not null and cs_call_center_sk is not null)
+ TableScan [TS_0] (rows=287989836 width=135)
+ default@catalog_sales,cs1,Tbl:COMPLETE,Col:NONE,Output:["cs_ship_date_sk","cs_ship_addr_sk","cs_call_center_sk","cs_warehouse_sk","cs_order_number","cs_ext_ship_cost","cs_net_profit"]
+ <-Map 9 [SIMPLE_EDGE]
+ SHUFFLE [RS_86]
+ PartitionCols:_col0
+ Select Operator [SEL_5] (rows=8116 width=1119)
+ Output:["_col0"]
+ Filter Operator [FIL_168] (rows=8116 width=1119)
+ predicate:(d_date BETWEEN '2001-4-01' AND 2001-05-31 01:00:00.0 and d_date_sk is not null)
+ TableScan [TS_3] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"]
+
[4/9] hive git commit: HIVE-15481 : Support multiple and nested
subqueries (Vineet Garg via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan
Posted by ha...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientpositive/perf/query60.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query60.q.out b/ql/src/test/results/clientpositive/perf/query60.q.out
new file mode 100644
index 0000000..a7c8d03
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/query60.q.out
@@ -0,0 +1,443 @@
+PREHOOK: query: -- start query 1 in stream 0 using template query60.tpl and seed 1930872976
+explain with ss as (
+ select
+ i_item_id,sum(ss_ext_sales_price) total_sales
+ from
+ store_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from
+ item
+where i_category in ('Children'))
+ and ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 9
+ and ss_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_item_id),
+ cs as (
+ select
+ i_item_id,sum(cs_ext_sales_price) total_sales
+ from
+ catalog_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from
+ item
+where i_category in ('Children'))
+ and cs_item_sk = i_item_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 9
+ and cs_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_item_id),
+ ws as (
+ select
+ i_item_id,sum(ws_ext_sales_price) total_sales
+ from
+ web_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from
+ item
+where i_category in ('Children'))
+ and ws_item_sk = i_item_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 9
+ and ws_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_item_id)
+ select
+ i_item_id
+,sum(total_sales) total_sales
+ from (select * from ss
+ union all
+ select * from cs
+ union all
+ select * from ws) tmp1
+ group by i_item_id
+ order by i_item_id
+ ,total_sales
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: -- start query 1 in stream 0 using template query60.tpl and seed 1930872976
+explain with ss as (
+ select
+ i_item_id,sum(ss_ext_sales_price) total_sales
+ from
+ store_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from
+ item
+where i_category in ('Children'))
+ and ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 9
+ and ss_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_item_id),
+ cs as (
+ select
+ i_item_id,sum(cs_ext_sales_price) total_sales
+ from
+ catalog_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from
+ item
+where i_category in ('Children'))
+ and cs_item_sk = i_item_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 9
+ and cs_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_item_id),
+ ws as (
+ select
+ i_item_id,sum(ws_ext_sales_price) total_sales
+ from
+ web_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from
+ item
+where i_category in ('Children'))
+ and ws_item_sk = i_item_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 9
+ and ws_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_item_id)
+ select
+ i_item_id
+,sum(total_sales) total_sales
+ from (select * from ss
+ union all
+ select * from cs
+ union all
+ select * from ws) tmp1
+ group by i_item_id
+ order by i_item_id
+ ,total_sales
+ limit 100
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE)
+Reducer 12 <- Map 14 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE)
+Reducer 16 <- Map 15 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE)
+Reducer 17 <- Reducer 16 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE)
+Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Union 5 (CONTAINS)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
+Reducer 20 <- Map 19 (SIMPLE_EDGE)
+Reducer 22 <- Map 21 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE)
+Reducer 23 <- Map 25 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE)
+Reducer 27 <- Map 26 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE)
+Reducer 28 <- Reducer 27 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE)
+Reducer 29 <- Reducer 28 (SIMPLE_EDGE), Union 5 (CONTAINS)
+Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 31 <- Map 30 (SIMPLE_EDGE)
+Reducer 33 <- Map 32 (SIMPLE_EDGE), Map 35 (SIMPLE_EDGE)
+Reducer 34 <- Map 36 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS)
+Reducer 6 <- Union 5 (SIMPLE_EDGE)
+Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
+Reducer 9 <- Map 8 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:100
+ Stage-1
+ Reducer 7
+ File Output Operator [FS_122]
+ Limit [LIM_121] (rows=100 width=108)
+ Number of rows:100
+ Select Operator [SEL_120] (rows=335408073 width=108)
+ Output:["_col0","_col1"]
+ <-Reducer 6 [SIMPLE_EDGE]
+ SHUFFLE [RS_119]
+ Group By Operator [GBY_117] (rows=335408073 width=108)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Union 5 [SIMPLE_EDGE]
+ <-Reducer 18 [CONTAINS]
+ Reduce Output Operator [RS_116]
+ PartitionCols:_col0
+ Group By Operator [GBY_115] (rows=670816147 width=108)
+ Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0
+ Group By Operator [GBY_72] (rows=191657247 width=135)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 17 [SIMPLE_EDGE]
+ SHUFFLE [RS_71]
+ PartitionCols:_col0
+ Group By Operator [GBY_70] (rows=383314495 width=135)
+ Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1
+ Merge Join Operator [MERGEJOIN_184] (rows=383314495 width=135)
+ Conds:RS_66._col0=RS_67._col4(Inner),Output:["_col1","_col8"]
+ <-Reducer 16 [SIMPLE_EDGE]
+ SHUFFLE [RS_66]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_177] (rows=508200 width=1436)
+ Conds:RS_63._col1=RS_64._col0(Inner),Output:["_col0","_col1"]
+ <-Map 15 [SIMPLE_EDGE]
+ SHUFFLE [RS_63]
+ PartitionCols:_col1
+ Select Operator [SEL_39] (rows=462000 width=1436)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_164] (rows=462000 width=1436)
+ predicate:i_item_sk is not null
+ TableScan [TS_37] (rows=462000 width=1436)
+ default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"]
+ <-Reducer 20 [SIMPLE_EDGE]
+ SHUFFLE [RS_64]
+ PartitionCols:_col0
+ Group By Operator [GBY_45] (rows=115500 width=1436)
+ Output:["_col0"],keys:KEY._col0
+ <-Map 19 [SIMPLE_EDGE]
+ SHUFFLE [RS_44]
+ PartitionCols:_col0
+ Group By Operator [GBY_43] (rows=231000 width=1436)
+ Output:["_col0"],keys:i_item_id
+ Select Operator [SEL_42] (rows=231000 width=1436)
+ Output:["i_item_id"]
+ Filter Operator [FIL_165] (rows=231000 width=1436)
+ predicate:(i_category) IN ('Children')
+ TableScan [TS_40] (rows=462000 width=1436)
+ default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_id","i_category"]
+ <-Reducer 23 [SIMPLE_EDGE]
+ SHUFFLE [RS_67]
+ PartitionCols:_col4
+ Select Operator [SEL_62] (rows=348467716 width=135)
+ Output:["_col4","_col5"]
+ Merge Join Operator [MERGEJOIN_179] (rows=348467716 width=135)
+ Conds:RS_59._col1=RS_60._col0(Inner),Output:["_col2","_col3"]
+ <-Map 25 [SIMPLE_EDGE]
+ SHUFFLE [RS_60]
+ PartitionCols:_col0
+ Select Operator [SEL_55] (rows=20000000 width=1014)
+ Output:["_col0"]
+ Filter Operator [FIL_168] (rows=20000000 width=1014)
+ predicate:((ca_gmt_offset = -6) and ca_address_sk is not null)
+ TableScan [TS_53] (rows=40000000 width=1014)
+ default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_gmt_offset"]
+ <-Reducer 22 [SIMPLE_EDGE]
+ SHUFFLE [RS_59]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_178] (rows=316788826 width=135)
+ Conds:RS_56._col0=RS_57._col0(Inner),Output:["_col1","_col2","_col3"]
+ <-Map 21 [SIMPLE_EDGE]
+ SHUFFLE [RS_56]
+ PartitionCols:_col0
+ Select Operator [SEL_49] (rows=287989836 width=135)
+ Output:["_col0","_col1","_col2","_col3"]
+ Filter Operator [FIL_166] (rows=287989836 width=135)
+ predicate:(cs_sold_date_sk is not null and cs_bill_addr_sk is not null and cs_item_sk is not null)
+ TableScan [TS_47] (rows=287989836 width=135)
+ default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"]
+ <-Map 24 [SIMPLE_EDGE]
+ SHUFFLE [RS_57]
+ PartitionCols:_col0
+ Select Operator [SEL_52] (rows=18262 width=1119)
+ Output:["_col0"]
+ Filter Operator [FIL_167] (rows=18262 width=1119)
+ predicate:((d_year = 1999) and (d_moy = 9) and d_date_sk is not null)
+ TableScan [TS_50] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
+ <-Reducer 29 [CONTAINS]
+ Reduce Output Operator [RS_116]
+ PartitionCols:_col0
+ Group By Operator [GBY_115] (rows=670816147 width=108)
+ Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0
+ Group By Operator [GBY_111] (rows=95833781 width=135)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 28 [SIMPLE_EDGE]
+ SHUFFLE [RS_110]
+ PartitionCols:_col0
+ Group By Operator [GBY_109] (rows=191667562 width=135)
+ Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1
+ Merge Join Operator [MERGEJOIN_185] (rows=191667562 width=135)
+ Conds:RS_105._col0=RS_106._col3(Inner),Output:["_col1","_col8"]
+ <-Reducer 27 [SIMPLE_EDGE]
+ SHUFFLE [RS_105]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_180] (rows=508200 width=1436)
+ Conds:RS_102._col1=RS_103._col0(Inner),Output:["_col0","_col1"]
+ <-Map 26 [SIMPLE_EDGE]
+ SHUFFLE [RS_102]
+ PartitionCols:_col1
+ Select Operator [SEL_78] (rows=462000 width=1436)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_169] (rows=462000 width=1436)
+ predicate:i_item_sk is not null
+ TableScan [TS_76] (rows=462000 width=1436)
+ default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"]
+ <-Reducer 31 [SIMPLE_EDGE]
+ SHUFFLE [RS_103]
+ PartitionCols:_col0
+ Group By Operator [GBY_84] (rows=115500 width=1436)
+ Output:["_col0"],keys:KEY._col0
+ <-Map 30 [SIMPLE_EDGE]
+ SHUFFLE [RS_83]
+ PartitionCols:_col0
+ Group By Operator [GBY_82] (rows=231000 width=1436)
+ Output:["_col0"],keys:i_item_id
+ Select Operator [SEL_81] (rows=231000 width=1436)
+ Output:["i_item_id"]
+ Filter Operator [FIL_170] (rows=231000 width=1436)
+ predicate:(i_category) IN ('Children')
+ TableScan [TS_79] (rows=462000 width=1436)
+ default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_id","i_category"]
+ <-Reducer 34 [SIMPLE_EDGE]
+ SHUFFLE [RS_106]
+ PartitionCols:_col3
+ Select Operator [SEL_101] (rows=174243235 width=135)
+ Output:["_col3","_col5"]
+ Merge Join Operator [MERGEJOIN_182] (rows=174243235 width=135)
+ Conds:RS_98._col2=RS_99._col0(Inner),Output:["_col1","_col3"]
+ <-Map 36 [SIMPLE_EDGE]
+ SHUFFLE [RS_99]
+ PartitionCols:_col0
+ Select Operator [SEL_94] (rows=20000000 width=1014)
+ Output:["_col0"]
+ Filter Operator [FIL_173] (rows=20000000 width=1014)
+ predicate:((ca_gmt_offset = -6) and ca_address_sk is not null)
+ TableScan [TS_92] (rows=40000000 width=1014)
+ default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_gmt_offset"]
+ <-Reducer 33 [SIMPLE_EDGE]
+ SHUFFLE [RS_98]
+ PartitionCols:_col2
+ Merge Join Operator [MERGEJOIN_181] (rows=158402938 width=135)
+ Conds:RS_95._col0=RS_96._col0(Inner),Output:["_col1","_col2","_col3"]
+ <-Map 32 [SIMPLE_EDGE]
+ SHUFFLE [RS_95]
+ PartitionCols:_col0
+ Select Operator [SEL_88] (rows=144002668 width=135)
+ Output:["_col0","_col1","_col2","_col3"]
+ Filter Operator [FIL_171] (rows=144002668 width=135)
+ predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_item_sk is not null)
+ TableScan [TS_86] (rows=144002668 width=135)
+ default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"]
+ <-Map 35 [SIMPLE_EDGE]
+ SHUFFLE [RS_96]
+ PartitionCols:_col0
+ Select Operator [SEL_91] (rows=18262 width=1119)
+ Output:["_col0"]
+ Filter Operator [FIL_172] (rows=18262 width=1119)
+ predicate:((d_year = 1999) and (d_moy = 9) and d_date_sk is not null)
+ TableScan [TS_89] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
+ <-Reducer 4 [CONTAINS]
+ Reduce Output Operator [RS_116]
+ PartitionCols:_col0
+ Group By Operator [GBY_115] (rows=670816147 width=108)
+ Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0
+ Group By Operator [GBY_35] (rows=383325119 width=88)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 3 [SIMPLE_EDGE]
+ SHUFFLE [RS_34]
+ PartitionCols:_col0
+ Group By Operator [GBY_33] (rows=766650239 width=88)
+ Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1
+ Merge Join Operator [MERGEJOIN_183] (rows=766650239 width=88)
+ Conds:RS_29._col0=RS_30._col3(Inner),Output:["_col1","_col8"]
+ <-Reducer 12 [SIMPLE_EDGE]
+ SHUFFLE [RS_30]
+ PartitionCols:_col3
+ Select Operator [SEL_25] (rows=696954748 width=88)
+ Output:["_col3","_col5"]
+ Merge Join Operator [MERGEJOIN_176] (rows=696954748 width=88)
+ Conds:RS_22._col2=RS_23._col0(Inner),Output:["_col1","_col3"]
+ <-Map 14 [SIMPLE_EDGE]
+ SHUFFLE [RS_23]
+ PartitionCols:_col0
+ Select Operator [SEL_18] (rows=20000000 width=1014)
+ Output:["_col0"]
+ Filter Operator [FIL_163] (rows=20000000 width=1014)
+ predicate:((ca_gmt_offset = -6) and ca_address_sk is not null)
+ TableScan [TS_16] (rows=40000000 width=1014)
+ default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_gmt_offset"]
+ <-Reducer 11 [SIMPLE_EDGE]
+ SHUFFLE [RS_22]
+ PartitionCols:_col2
+ Merge Join Operator [MERGEJOIN_175] (rows=633595212 width=88)
+ Conds:RS_19._col0=RS_20._col0(Inner),Output:["_col1","_col2","_col3"]
+ <-Map 10 [SIMPLE_EDGE]
+ SHUFFLE [RS_19]
+ PartitionCols:_col0
+ Select Operator [SEL_12] (rows=575995635 width=88)
+ Output:["_col0","_col1","_col2","_col3"]
+ Filter Operator [FIL_161] (rows=575995635 width=88)
+ predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_item_sk is not null)
+ TableScan [TS_10] (rows=575995635 width=88)
+ default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"]
+ <-Map 13 [SIMPLE_EDGE]
+ SHUFFLE [RS_20]
+ PartitionCols:_col0
+ Select Operator [SEL_15] (rows=18262 width=1119)
+ Output:["_col0"]
+ Filter Operator [FIL_162] (rows=18262 width=1119)
+ predicate:((d_year = 1999) and (d_moy = 9) and d_date_sk is not null)
+ TableScan [TS_13] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
+ <-Reducer 2 [SIMPLE_EDGE]
+ SHUFFLE [RS_29]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_174] (rows=508200 width=1436)
+ Conds:RS_26._col1=RS_27._col0(Inner),Output:["_col0","_col1"]
+ <-Map 1 [SIMPLE_EDGE]
+ SHUFFLE [RS_26]
+ PartitionCols:_col1
+ Select Operator [SEL_2] (rows=462000 width=1436)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_159] (rows=462000 width=1436)
+ predicate:i_item_sk is not null
+ TableScan [TS_0] (rows=462000 width=1436)
+ default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"]
+ <-Reducer 9 [SIMPLE_EDGE]
+ SHUFFLE [RS_27]
+ PartitionCols:_col0
+ Group By Operator [GBY_8] (rows=115500 width=1436)
+ Output:["_col0"],keys:KEY._col0
+ <-Map 8 [SIMPLE_EDGE]
+ SHUFFLE [RS_7]
+ PartitionCols:_col0
+ Group By Operator [GBY_6] (rows=231000 width=1436)
+ Output:["_col0"],keys:i_item_id
+ Select Operator [SEL_5] (rows=231000 width=1436)
+ Output:["i_item_id"]
+ Filter Operator [FIL_160] (rows=231000 width=1436)
+ predicate:(i_category) IN ('Children')
+ TableScan [TS_3] (rows=462000 width=1436)
+ default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_id","i_category"]
+
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientpositive/perf/query69.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query69.q.out b/ql/src/test/results/clientpositive/perf/query69.q.out
new file mode 100644
index 0000000..89c3b43
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/query69.q.out
@@ -0,0 +1,591 @@
+Warning: Shuffle Join MERGEJOIN[351][tables = [$hdt$_5, $hdt$_6, $hdt$_4, $hdt$_3]] in Stage 'Reducer 13' is a cross product
+Warning: Shuffle Join MERGEJOIN[353][tables = [$hdt$_6, $hdt$_7, $hdt$_5, $hdt$_4]] in Stage 'Reducer 31' is a cross product
+Warning: Shuffle Join MERGEJOIN[356][tables = [$hdt$_3, $hdt$_4, $hdt$_2, $hdt$_1]] in Stage 'Reducer 49' is a cross product
+PREHOOK: query: explain select
+ cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ count(*) cnt1,
+ cd_purchase_estimate,
+ count(*) cnt2,
+ cd_credit_rating,
+ count(*) cnt3
+ from
+ customer c,customer_address ca,customer_demographics
+ where
+ c.c_current_addr_sk = ca.ca_address_sk and
+ ca_state in ('CO','IL','MN') and
+ cd_demo_sk = c.c_current_cdemo_sk and
+ exists (select *
+ from store_sales,date_dim
+ where c.c_customer_sk = ss_customer_sk and
+ ss_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_moy between 1 and 1+2) and
+ (not exists (select *
+ from web_sales,date_dim
+ where c.c_customer_sk = ws_bill_customer_sk and
+ ws_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_moy between 1 and 1+2) and
+ not exists (select *
+ from catalog_sales,date_dim
+ where c.c_customer_sk = cs_ship_customer_sk and
+ cs_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_moy between 1 and 1+2))
+ group by cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ cd_purchase_estimate,
+ cd_credit_rating
+ order by cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ cd_purchase_estimate,
+ cd_credit_rating
+ limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select
+ cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ count(*) cnt1,
+ cd_purchase_estimate,
+ count(*) cnt2,
+ cd_credit_rating,
+ count(*) cnt3
+ from
+ customer c,customer_address ca,customer_demographics
+ where
+ c.c_current_addr_sk = ca.ca_address_sk and
+ ca_state in ('CO','IL','MN') and
+ cd_demo_sk = c.c_current_cdemo_sk and
+ exists (select *
+ from store_sales,date_dim
+ where c.c_customer_sk = ss_customer_sk and
+ ss_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_moy between 1 and 1+2) and
+ (not exists (select *
+ from web_sales,date_dim
+ where c.c_customer_sk = ws_bill_customer_sk and
+ ws_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_moy between 1 and 1+2) and
+ not exists (select *
+ from catalog_sales,date_dim
+ where c.c_customer_sk = cs_ship_customer_sk and
+ cs_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_moy between 1 and 1+2))
+ group by cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ cd_purchase_estimate,
+ cd_credit_rating
+ order by cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ cd_purchase_estimate,
+ cd_credit_rating
+ limit 100
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE)
+Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE)
+Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE)
+Reducer 14 <- Reducer 13 (SIMPLE_EDGE)
+Reducer 17 <- Map 16 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE)
+Reducer 18 <- Map 21 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE)
+Reducer 19 <- Reducer 18 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
+Reducer 23 <- Map 22 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE)
+Reducer 24 <- Map 27 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE)
+Reducer 25 <- Reducer 24 (SIMPLE_EDGE)
+Reducer 29 <- Map 28 (SIMPLE_EDGE), Map 33 (SIMPLE_EDGE)
+Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 30 <- Reducer 29 (SIMPLE_EDGE), Reducer 37 (SIMPLE_EDGE)
+Reducer 31 <- Reducer 30 (SIMPLE_EDGE), Reducer 43 (SIMPLE_EDGE)
+Reducer 32 <- Reducer 31 (SIMPLE_EDGE)
+Reducer 35 <- Map 34 (SIMPLE_EDGE), Map 38 (SIMPLE_EDGE)
+Reducer 36 <- Map 39 (SIMPLE_EDGE), Reducer 35 (SIMPLE_EDGE)
+Reducer 37 <- Reducer 36 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Reducer 32 (SIMPLE_EDGE)
+Reducer 41 <- Map 40 (SIMPLE_EDGE), Map 44 (SIMPLE_EDGE)
+Reducer 42 <- Map 45 (SIMPLE_EDGE), Reducer 41 (SIMPLE_EDGE)
+Reducer 43 <- Reducer 42 (SIMPLE_EDGE)
+Reducer 47 <- Map 46 (SIMPLE_EDGE), Map 51 (SIMPLE_EDGE)
+Reducer 48 <- Reducer 47 (SIMPLE_EDGE), Reducer 55 (SIMPLE_EDGE)
+Reducer 49 <- Reducer 48 (SIMPLE_EDGE), Reducer 61 (SIMPLE_EDGE)
+Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 50 (SIMPLE_EDGE)
+Reducer 50 <- Reducer 49 (SIMPLE_EDGE)
+Reducer 53 <- Map 52 (SIMPLE_EDGE), Map 56 (SIMPLE_EDGE)
+Reducer 54 <- Map 57 (SIMPLE_EDGE), Reducer 53 (SIMPLE_EDGE)
+Reducer 55 <- Reducer 54 (SIMPLE_EDGE)
+Reducer 59 <- Map 58 (SIMPLE_EDGE), Map 62 (SIMPLE_EDGE)
+Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
+Reducer 60 <- Map 63 (SIMPLE_EDGE), Reducer 59 (SIMPLE_EDGE)
+Reducer 61 <- Reducer 60 (SIMPLE_EDGE)
+Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:100
+ Stage-1
+ Reducer 7
+ File Output Operator [FS_214]
+ Limit [LIM_213] (rows=100 width=248)
+ Number of rows:100
+ Select Operator [SEL_212] (rows=5102057559316637 width=248)
+ Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
+ <-Reducer 6 [SIMPLE_EDGE]
+ SHUFFLE [RS_211]
+ Select Operator [SEL_210] (rows=5102057559316637 width=248)
+ Output:["_col0","_col1","_col2","_col3","_col4","_col6"]
+ Group By Operator [GBY_209] (rows=5102057559316637 width=248)
+ Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4
+ <-Reducer 5 [SIMPLE_EDGE]
+ SHUFFLE [RS_208]
+ PartitionCols:_col0, _col1, _col2, _col3, _col4
+ Group By Operator [GBY_207] (rows=10204115118633274 width=248)
+ Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count()"],keys:_col6, _col7, _col8, _col9, _col10
+ Select Operator [SEL_206] (rows=10204115118633274 width=248)
+ Output:["_col6","_col7","_col8","_col9","_col10"]
+ Filter Operator [FIL_205] (rows=10204115118633274 width=248)
+ predicate:_col15 is null
+ Merge Join Operator [MERGEJOIN_357] (rows=20408230237266548 width=248)
+ Conds:RS_202._col0=RS_203._col0(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col15"]
+ <-Reducer 4 [SIMPLE_EDGE]
+ SHUFFLE [RS_202]
+ PartitionCols:_col0
+ Select Operator [SEL_141] (rows=18552936177209164 width=248)
+ Output:["_col0","_col10","_col6","_col7","_col8","_col9"]
+ Filter Operator [FIL_140] (rows=18552936177209164 width=248)
+ predicate:_col13 is null
+ Select Operator [SEL_139] (rows=37105872354418328 width=248)
+ Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col13"]
+ Merge Join Operator [MERGEJOIN_354] (rows=37105872354418328 width=248)
+ Conds:RS_135._col0=RS_136._col0(Left Outer),RS_135._col0=RS_137._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col12"]
+ <-Reducer 14 [SIMPLE_EDGE]
+ SHUFFLE [RS_136]
+ PartitionCols:_col0
+ Select Operator [SEL_68] (rows=4216686374121617 width=996)
+ Output:["_col0","_col1"]
+ Group By Operator [GBY_67] (rows=4216686374121617 width=996)
+ Output:["_col0"],keys:KEY._col0
+ <-Reducer 13 [SIMPLE_EDGE]
+ SHUFFLE [RS_66]
+ PartitionCols:_col0
+ Group By Operator [GBY_65] (rows=8433372748243235 width=996)
+ Output:["_col0"],keys:_col6
+ Merge Join Operator [MERGEJOIN_351] (rows=8433372748243235 width=996)
+ Conds:(Inner),Output:["_col6"]
+ <-Reducer 12 [SIMPLE_EDGE]
+ SHUFFLE [RS_61]
+ Merge Join Operator [MERGEJOIN_350] (rows=174243235 width=135)
+ Conds:RS_58._col1=RS_59._col0(Inner)
+ <-Reducer 11 [SIMPLE_EDGE]
+ SHUFFLE [RS_58]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_335] (rows=158402938 width=135)
+ Conds:RS_55._col0=RS_56._col0(Inner),Output:["_col1"]
+ <-Map 10 [SIMPLE_EDGE]
+ SHUFFLE [RS_55]
+ PartitionCols:_col0
+ Select Operator [SEL_11] (rows=144002668 width=135)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_309] (rows=144002668 width=135)
+ predicate:ws_sold_date_sk is not null
+ TableScan [TS_9] (rows=144002668 width=135)
+ default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"]
+ <-Map 15 [SIMPLE_EDGE]
+ SHUFFLE [RS_56]
+ PartitionCols:_col0
+ Select Operator [SEL_14] (rows=4058 width=1119)
+ Output:["_col0"]
+ Filter Operator [FIL_310] (rows=4058 width=1119)
+ predicate:((d_year = 1999) and d_moy BETWEEN 1 AND 3 and d_date_sk is not null)
+ TableScan [TS_12] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
+ <-Reducer 19 [SIMPLE_EDGE]
+ SHUFFLE [RS_59]
+ PartitionCols:_col0
+ Group By Operator [GBY_33] (rows=48400001 width=860)
+ Output:["_col0"],keys:KEY._col0
+ <-Reducer 18 [SIMPLE_EDGE]
+ SHUFFLE [RS_32]
+ PartitionCols:_col0
+ Group By Operator [GBY_31] (rows=96800003 width=860)
+ Output:["_col0"],keys:_col0
+ Merge Join Operator [MERGEJOIN_337] (rows=96800003 width=860)
+ Conds:RS_27._col1=RS_28._col0(Inner),Output:["_col0"]
+ <-Map 21 [SIMPLE_EDGE]
+ SHUFFLE [RS_28]
+ PartitionCols:_col0
+ Select Operator [SEL_23] (rows=1861800 width=385)
+ Output:["_col0"]
+ Filter Operator [FIL_313] (rows=1861800 width=385)
+ predicate:cd_demo_sk is not null
+ TableScan [TS_21] (rows=1861800 width=385)
+ default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk"]
+ <-Reducer 17 [SIMPLE_EDGE]
+ SHUFFLE [RS_27]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_336] (rows=88000001 width=860)
+ Conds:RS_24._col2=RS_25._col0(Inner),Output:["_col0","_col1"]
+ <-Map 16 [SIMPLE_EDGE]
+ SHUFFLE [RS_24]
+ PartitionCols:_col2
+ Select Operator [SEL_17] (rows=80000000 width=860)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_311] (rows=80000000 width=860)
+ predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null)
+ TableScan [TS_15] (rows=80000000 width=860)
+ default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"]
+ <-Map 20 [SIMPLE_EDGE]
+ SHUFFLE [RS_25]
+ PartitionCols:_col0
+ Select Operator [SEL_20] (rows=20000000 width=1014)
+ Output:["_col0"]
+ Filter Operator [FIL_312] (rows=20000000 width=1014)
+ predicate:((ca_state) IN ('CO', 'IL', 'MN') and ca_address_sk is not null)
+ TableScan [TS_18] (rows=40000000 width=1014)
+ default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
+ <-Reducer 25 [SIMPLE_EDGE]
+ SHUFFLE [RS_62]
+ Group By Operator [GBY_53] (rows=48400001 width=860)
+ Output:["_col0"],keys:KEY._col0
+ <-Reducer 24 [SIMPLE_EDGE]
+ SHUFFLE [RS_52]
+ PartitionCols:_col0
+ Group By Operator [GBY_51] (rows=96800003 width=860)
+ Output:["_col0"],keys:_col0
+ Merge Join Operator [MERGEJOIN_339] (rows=96800003 width=860)
+ Conds:RS_47._col1=RS_48._col0(Inner),Output:["_col0"]
+ <-Map 27 [SIMPLE_EDGE]
+ SHUFFLE [RS_48]
+ PartitionCols:_col0
+ Select Operator [SEL_43] (rows=1861800 width=385)
+ Output:["_col0"]
+ Filter Operator [FIL_316] (rows=1861800 width=385)
+ predicate:cd_demo_sk is not null
+ TableScan [TS_41] (rows=1861800 width=385)
+ default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk"]
+ <-Reducer 23 [SIMPLE_EDGE]
+ SHUFFLE [RS_47]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_338] (rows=88000001 width=860)
+ Conds:RS_44._col2=RS_45._col0(Inner),Output:["_col0","_col1"]
+ <-Map 22 [SIMPLE_EDGE]
+ SHUFFLE [RS_44]
+ PartitionCols:_col2
+ Select Operator [SEL_37] (rows=80000000 width=860)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_314] (rows=80000000 width=860)
+ predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null)
+ TableScan [TS_35] (rows=80000000 width=860)
+ default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"]
+ <-Map 26 [SIMPLE_EDGE]
+ SHUFFLE [RS_45]
+ PartitionCols:_col0
+ Select Operator [SEL_40] (rows=20000000 width=1014)
+ Output:["_col0"]
+ Filter Operator [FIL_315] (rows=20000000 width=1014)
+ predicate:((ca_state) IN ('CO', 'IL', 'MN') and ca_address_sk is not null)
+ TableScan [TS_38] (rows=40000000 width=1014)
+ default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
+ <-Reducer 3 [SIMPLE_EDGE]
+ SHUFFLE [RS_135]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_334] (rows=96800003 width=860)
+ Conds:RS_132._col1=RS_133._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10"]
+ <-Map 9 [SIMPLE_EDGE]
+ SHUFFLE [RS_133]
+ PartitionCols:_col0
+ Select Operator [SEL_8] (rows=1861800 width=385)
+ Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
+ Filter Operator [FIL_308] (rows=1861800 width=385)
+ predicate:cd_demo_sk is not null
+ TableScan [TS_6] (rows=1861800 width=385)
+ default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating"]
+ <-Reducer 2 [SIMPLE_EDGE]
+ SHUFFLE [RS_132]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_333] (rows=88000001 width=860)
+ Conds:RS_129._col2=RS_130._col0(Inner),Output:["_col0","_col1"]
+ <-Map 1 [SIMPLE_EDGE]
+ SHUFFLE [RS_129]
+ PartitionCols:_col2
+ Select Operator [SEL_2] (rows=80000000 width=860)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_306] (rows=80000000 width=860)
+ predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null)
+ TableScan [TS_0] (rows=80000000 width=860)
+ default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"]
+ <-Map 8 [SIMPLE_EDGE]
+ SHUFFLE [RS_130]
+ PartitionCols:_col0
+ Select Operator [SEL_5] (rows=20000000 width=1014)
+ Output:["_col0"]
+ Filter Operator [FIL_307] (rows=20000000 width=1014)
+ predicate:((ca_state) IN ('CO', 'IL', 'MN') and ca_address_sk is not null)
+ TableScan [TS_3] (rows=40000000 width=1014)
+ default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
+ <-Reducer 32 [SIMPLE_EDGE]
+ SHUFFLE [RS_137]
+ PartitionCols:_col0
+ Group By Operator [GBY_127] (rows=16866305250077374 width=273)
+ Output:["_col0"],keys:KEY._col0
+ <-Reducer 31 [SIMPLE_EDGE]
+ SHUFFLE [RS_126]
+ PartitionCols:_col0
+ Group By Operator [GBY_125] (rows=33732610500154748 width=273)
+ Output:["_col0"],keys:_col6
+ Merge Join Operator [MERGEJOIN_353] (rows=33732610500154748 width=273)
+ Conds:(Inner),Output:["_col6"]
+ <-Reducer 30 [SIMPLE_EDGE]
+ SHUFFLE [RS_121]
+ Merge Join Operator [MERGEJOIN_352] (rows=696954748 width=88)
+ Conds:RS_118._col1=RS_119._col0(Inner)
+ <-Reducer 29 [SIMPLE_EDGE]
+ SHUFFLE [RS_118]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_340] (rows=633595212 width=88)
+ Conds:RS_115._col0=RS_116._col0(Inner),Output:["_col1"]
+ <-Map 28 [SIMPLE_EDGE]
+ SHUFFLE [RS_115]
+ PartitionCols:_col0
+ Select Operator [SEL_71] (rows=575995635 width=88)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_317] (rows=575995635 width=88)
+ predicate:ss_sold_date_sk is not null
+ TableScan [TS_69] (rows=575995635 width=88)
+ default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"]
+ <-Map 33 [SIMPLE_EDGE]
+ SHUFFLE [RS_116]
+ PartitionCols:_col0
+ Select Operator [SEL_74] (rows=4058 width=1119)
+ Output:["_col0"]
+ Filter Operator [FIL_318] (rows=4058 width=1119)
+ predicate:((d_year = 1999) and d_moy BETWEEN 1 AND 3 and d_date_sk is not null)
+ TableScan [TS_72] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
+ <-Reducer 37 [SIMPLE_EDGE]
+ SHUFFLE [RS_119]
+ PartitionCols:_col0
+ Group By Operator [GBY_93] (rows=48400001 width=860)
+ Output:["_col0"],keys:KEY._col0
+ <-Reducer 36 [SIMPLE_EDGE]
+ SHUFFLE [RS_92]
+ PartitionCols:_col0
+ Group By Operator [GBY_91] (rows=96800003 width=860)
+ Output:["_col0"],keys:_col0
+ Merge Join Operator [MERGEJOIN_342] (rows=96800003 width=860)
+ Conds:RS_87._col1=RS_88._col0(Inner),Output:["_col0"]
+ <-Map 39 [SIMPLE_EDGE]
+ SHUFFLE [RS_88]
+ PartitionCols:_col0
+ Select Operator [SEL_83] (rows=1861800 width=385)
+ Output:["_col0"]
+ Filter Operator [FIL_321] (rows=1861800 width=385)
+ predicate:cd_demo_sk is not null
+ TableScan [TS_81] (rows=1861800 width=385)
+ default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk"]
+ <-Reducer 35 [SIMPLE_EDGE]
+ SHUFFLE [RS_87]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_341] (rows=88000001 width=860)
+ Conds:RS_84._col2=RS_85._col0(Inner),Output:["_col0","_col1"]
+ <-Map 34 [SIMPLE_EDGE]
+ SHUFFLE [RS_84]
+ PartitionCols:_col2
+ Select Operator [SEL_77] (rows=80000000 width=860)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_319] (rows=80000000 width=860)
+ predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null)
+ TableScan [TS_75] (rows=80000000 width=860)
+ default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"]
+ <-Map 38 [SIMPLE_EDGE]
+ SHUFFLE [RS_85]
+ PartitionCols:_col0
+ Select Operator [SEL_80] (rows=20000000 width=1014)
+ Output:["_col0"]
+ Filter Operator [FIL_320] (rows=20000000 width=1014)
+ predicate:((ca_state) IN ('CO', 'IL', 'MN') and ca_address_sk is not null)
+ TableScan [TS_78] (rows=40000000 width=1014)
+ default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
+ <-Reducer 43 [SIMPLE_EDGE]
+ SHUFFLE [RS_122]
+ Group By Operator [GBY_113] (rows=48400001 width=860)
+ Output:["_col0"],keys:KEY._col0
+ <-Reducer 42 [SIMPLE_EDGE]
+ SHUFFLE [RS_112]
+ PartitionCols:_col0
+ Group By Operator [GBY_111] (rows=96800003 width=860)
+ Output:["_col0"],keys:_col0
+ Merge Join Operator [MERGEJOIN_344] (rows=96800003 width=860)
+ Conds:RS_107._col1=RS_108._col0(Inner),Output:["_col0"]
+ <-Map 45 [SIMPLE_EDGE]
+ SHUFFLE [RS_108]
+ PartitionCols:_col0
+ Select Operator [SEL_103] (rows=1861800 width=385)
+ Output:["_col0"]
+ Filter Operator [FIL_324] (rows=1861800 width=385)
+ predicate:cd_demo_sk is not null
+ TableScan [TS_101] (rows=1861800 width=385)
+ default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk"]
+ <-Reducer 41 [SIMPLE_EDGE]
+ SHUFFLE [RS_107]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_343] (rows=88000001 width=860)
+ Conds:RS_104._col2=RS_105._col0(Inner),Output:["_col0","_col1"]
+ <-Map 40 [SIMPLE_EDGE]
+ SHUFFLE [RS_104]
+ PartitionCols:_col2
+ Select Operator [SEL_97] (rows=80000000 width=860)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_322] (rows=80000000 width=860)
+ predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null)
+ TableScan [TS_95] (rows=80000000 width=860)
+ default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"]
+ <-Map 44 [SIMPLE_EDGE]
+ SHUFFLE [RS_105]
+ PartitionCols:_col0
+ Select Operator [SEL_100] (rows=20000000 width=1014)
+ Output:["_col0"]
+ Filter Operator [FIL_323] (rows=20000000 width=1014)
+ predicate:((ca_state) IN ('CO', 'IL', 'MN') and ca_address_sk is not null)
+ TableScan [TS_98] (rows=40000000 width=1014)
+ default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
+ <-Reducer 50 [SIMPLE_EDGE]
+ SHUFFLE [RS_203]
+ PartitionCols:_col0
+ Select Operator [SEL_201] (rows=8432918901433858 width=546)
+ Output:["_col0","_col1"]
+ Group By Operator [GBY_200] (rows=8432918901433858 width=546)
+ Output:["_col0"],keys:KEY._col0
+ <-Reducer 49 [SIMPLE_EDGE]
+ SHUFFLE [RS_199]
+ PartitionCols:_col0
+ Group By Operator [GBY_198] (rows=16865837802867716 width=546)
+ Output:["_col0"],keys:_col6
+ Merge Join Operator [MERGEJOIN_356] (rows=16865837802867716 width=546)
+ Conds:(Inner),Output:["_col6"]
+ <-Reducer 48 [SIMPLE_EDGE]
+ SHUFFLE [RS_194]
+ Merge Join Operator [MERGEJOIN_355] (rows=348467716 width=135)
+ Conds:RS_191._col1=RS_192._col0(Inner)
+ <-Reducer 47 [SIMPLE_EDGE]
+ SHUFFLE [RS_191]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_345] (rows=316788826 width=135)
+ Conds:RS_188._col0=RS_189._col0(Inner),Output:["_col1"]
+ <-Map 46 [SIMPLE_EDGE]
+ SHUFFLE [RS_188]
+ PartitionCols:_col0
+ Select Operator [SEL_144] (rows=287989836 width=135)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_325] (rows=287989836 width=135)
+ predicate:cs_sold_date_sk is not null
+ TableScan [TS_142] (rows=287989836 width=135)
+ default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_customer_sk"]
+ <-Map 51 [SIMPLE_EDGE]
+ SHUFFLE [RS_189]
+ PartitionCols:_col0
+ Select Operator [SEL_147] (rows=4058 width=1119)
+ Output:["_col0"]
+ Filter Operator [FIL_326] (rows=4058 width=1119)
+ predicate:((d_year = 1999) and d_moy BETWEEN 1 AND 3 and d_date_sk is not null)
+ TableScan [TS_145] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
+ <-Reducer 55 [SIMPLE_EDGE]
+ SHUFFLE [RS_192]
+ PartitionCols:_col0
+ Group By Operator [GBY_166] (rows=48400001 width=860)
+ Output:["_col0"],keys:KEY._col0
+ <-Reducer 54 [SIMPLE_EDGE]
+ SHUFFLE [RS_165]
+ PartitionCols:_col0
+ Group By Operator [GBY_164] (rows=96800003 width=860)
+ Output:["_col0"],keys:_col0
+ Merge Join Operator [MERGEJOIN_347] (rows=96800003 width=860)
+ Conds:RS_160._col1=RS_161._col0(Inner),Output:["_col0"]
+ <-Map 57 [SIMPLE_EDGE]
+ SHUFFLE [RS_161]
+ PartitionCols:_col0
+ Select Operator [SEL_156] (rows=1861800 width=385)
+ Output:["_col0"]
+ Filter Operator [FIL_329] (rows=1861800 width=385)
+ predicate:cd_demo_sk is not null
+ TableScan [TS_154] (rows=1861800 width=385)
+ default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk"]
+ <-Reducer 53 [SIMPLE_EDGE]
+ SHUFFLE [RS_160]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_346] (rows=88000001 width=860)
+ Conds:RS_157._col2=RS_158._col0(Inner),Output:["_col0","_col1"]
+ <-Map 52 [SIMPLE_EDGE]
+ SHUFFLE [RS_157]
+ PartitionCols:_col2
+ Select Operator [SEL_150] (rows=80000000 width=860)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_327] (rows=80000000 width=860)
+ predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null)
+ TableScan [TS_148] (rows=80000000 width=860)
+ default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"]
+ <-Map 56 [SIMPLE_EDGE]
+ SHUFFLE [RS_158]
+ PartitionCols:_col0
+ Select Operator [SEL_153] (rows=20000000 width=1014)
+ Output:["_col0"]
+ Filter Operator [FIL_328] (rows=20000000 width=1014)
+ predicate:((ca_state) IN ('CO', 'IL', 'MN') and ca_address_sk is not null)
+ TableScan [TS_151] (rows=40000000 width=1014)
+ default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
+ <-Reducer 61 [SIMPLE_EDGE]
+ SHUFFLE [RS_195]
+ Group By Operator [GBY_186] (rows=48400001 width=860)
+ Output:["_col0"],keys:KEY._col0
+ <-Reducer 60 [SIMPLE_EDGE]
+ SHUFFLE [RS_185]
+ PartitionCols:_col0
+ Group By Operator [GBY_184] (rows=96800003 width=860)
+ Output:["_col0"],keys:_col0
+ Merge Join Operator [MERGEJOIN_349] (rows=96800003 width=860)
+ Conds:RS_180._col1=RS_181._col0(Inner),Output:["_col0"]
+ <-Map 63 [SIMPLE_EDGE]
+ SHUFFLE [RS_181]
+ PartitionCols:_col0
+ Select Operator [SEL_176] (rows=1861800 width=385)
+ Output:["_col0"]
+ Filter Operator [FIL_332] (rows=1861800 width=385)
+ predicate:cd_demo_sk is not null
+ TableScan [TS_174] (rows=1861800 width=385)
+ default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk"]
+ <-Reducer 59 [SIMPLE_EDGE]
+ SHUFFLE [RS_180]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_348] (rows=88000001 width=860)
+ Conds:RS_177._col2=RS_178._col0(Inner),Output:["_col0","_col1"]
+ <-Map 58 [SIMPLE_EDGE]
+ SHUFFLE [RS_177]
+ PartitionCols:_col2
+ Select Operator [SEL_170] (rows=80000000 width=860)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_330] (rows=80000000 width=860)
+ predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null)
+ TableScan [TS_168] (rows=80000000 width=860)
+ default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"]
+ <-Map 62 [SIMPLE_EDGE]
+ SHUFFLE [RS_178]
+ PartitionCols:_col0
+ Select Operator [SEL_173] (rows=20000000 width=1014)
+ Output:["_col0"]
+ Filter Operator [FIL_331] (rows=20000000 width=1014)
+ predicate:((ca_state) IN ('CO', 'IL', 'MN') and ca_address_sk is not null)
+ TableScan [TS_171] (rows=40000000 width=1014)
+ default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
+