You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/10/09 02:11:11 UTC
svn commit: r1630257 - in /hive/branches/spark:
itests/src/test/resources/testconfiguration.properties
ql/src/test/results/clientpositive/spark/subquery_exists.q.out
ql/src/test/results/clientpositive/spark/subquery_in.q.out
Author: xuefu
Date: Thu Oct 9 00:11:11 2014
New Revision: 1630257
URL: http://svn.apache.org/r1630257
Log:
HIVE-7880: Support subquery [Spark Branch]
Added:
hive/branches/spark/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/subquery_in.q.out
Modified:
hive/branches/spark/itests/src/test/resources/testconfiguration.properties
Modified: hive/branches/spark/itests/src/test/resources/testconfiguration.properties
URL: http://svn.apache.org/viewvc/hive/branches/spark/itests/src/test/resources/testconfiguration.properties?rev=1630257&r1=1630256&r2=1630257&view=diff
==============================================================================
--- hive/branches/spark/itests/src/test/resources/testconfiguration.properties (original)
+++ hive/branches/spark/itests/src/test/resources/testconfiguration.properties Thu Oct 9 00:11:11 2014
@@ -612,4 +612,6 @@ spark.query.files=add_part_multiple.q \
vector_cast_constant.q \
vectorization_9.q \
vectorization_12.q \
- windowing.q
+ windowing.q \
+ subquery_exists.q \
+ subquery_in.q
Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/subquery_exists.q.out?rev=1630257&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/subquery_exists.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/subquery_exists.q.out Thu Oct 9 00:11:11 2014
@@ -0,0 +1,194 @@
+PREHOOK: query: -- no agg, corr
+explain
+select *
+from src b
+where exists
+ (select a.key
+ from src a
+ where b.value = a.value and a.key = b.key and a.value > 'val_9'
+ )
+PREHOOK: type: QUERY
+POSTHOOK: query: -- no agg, corr
+explain
+select *
+from src b
+where exists
+ (select a.key
+ from src a
+ where b.value = a.value and a.key = b.key and a.value > 'val_9'
+ )
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT), Map 3 (GROUP PARTITION-LEVEL SORT)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Filter Operator
+ predicate: (value is not null and key is not null) (type: boolean)
+ Reduce Output Operator
+ key expressions: value (type: string), key (type: string)
+ sort order: ++
+ Map-reduce partition columns: value (type: string), key (type: string)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Filter Operator
+ predicate: (((value > 'val_9') and value is not null) and key is not null) (type: boolean)
+ Select Operator
+ expressions: value (type: string), key (type: string)
+ outputColumnNames: _col1, _col2
+ Group By Operator
+ keys: _col1 (type: string), _col2 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0}
+ 1
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select *
+from src b
+where exists
+ (select a.key
+ from src a
+ where b.value = a.value and a.key = b.key and a.value > 'val_9'
+ )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from src b
+where exists
+ (select a.key
+ from src a
+ where b.value = a.value and a.key = b.key and a.value > 'val_9'
+ )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+90 val_90
+90 val_90
+90 val_90
+92 val_92
+95 val_95
+95 val_95
+96 val_96
+97 val_97
+97 val_97
+98 val_98
+98 val_98
+PREHOOK: query: -- view test
+create view cv1 as
+select *
+from src b
+where exists
+ (select a.key
+ from src a
+ where b.value = a.value and a.key = b.key and a.value > 'val_9')
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@cv1
+POSTHOOK: query: -- view test
+create view cv1 as
+select *
+from src b
+where exists
+ (select a.key
+ from src a
+ where b.value = a.value and a.key = b.key and a.value > 'val_9')
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@cv1
+PREHOOK: query: select * from cv1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cv1
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select * from cv1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cv1
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+90 val_90
+90 val_90
+90 val_90
+92 val_92
+95 val_95
+95 val_95
+96 val_96
+97 val_97
+97 val_97
+98 val_98
+98 val_98
+PREHOOK: query: -- sq in from
+select *
+from (select *
+ from src b
+ where exists
+ (select a.key
+ from src a
+ where b.value = a.value and a.key = b.key and a.value > 'val_9')
+ ) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: -- sq in from
+select *
+from (select *
+ from src b
+ where exists
+ (select a.key
+ from src a
+ where b.value = a.value and a.key = b.key and a.value > 'val_9')
+ ) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+90 val_90
+90 val_90
+90 val_90
+92 val_92
+95 val_95
+95 val_95
+96 val_96
+97 val_97
+97 val_97
+98 val_98
+98 val_98
Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/subquery_in.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/subquery_in.q.out?rev=1630257&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/subquery_in.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/subquery_in.q.out Thu Oct 9 00:11:11 2014
@@ -0,0 +1,907 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+DROP TABLE part
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+DROP TABLE part
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: -- data setup
+CREATE TABLE part(
+ p_partkey INT,
+ p_name STRING,
+ p_mfgr STRING,
+ p_brand STRING,
+ p_type STRING,
+ p_size INT,
+ p_container STRING,
+ p_retailprice DOUBLE,
+ p_comment STRING
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@part
+POSTHOOK: query: -- data setup
+CREATE TABLE part(
+ p_partkey INT,
+ p_name STRING,
+ p_mfgr STRING,
+ p_brand STRING,
+ p_type STRING,
+ p_size INT,
+ p_container STRING,
+ p_retailprice DOUBLE,
+ p_comment STRING
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@part
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@part
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@part
+PREHOOK: query: DROP TABLE lineitem
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE lineitem
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE lineitem (L_ORDERKEY INT,
+ L_PARTKEY INT,
+ L_SUPPKEY INT,
+ L_LINENUMBER INT,
+ L_QUANTITY DOUBLE,
+ L_EXTENDEDPRICE DOUBLE,
+ L_DISCOUNT DOUBLE,
+ L_TAX DOUBLE,
+ L_RETURNFLAG STRING,
+ L_LINESTATUS STRING,
+ l_shipdate STRING,
+ L_COMMITDATE STRING,
+ L_RECEIPTDATE STRING,
+ L_SHIPINSTRUCT STRING,
+ L_SHIPMODE STRING,
+ L_COMMENT STRING)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@lineitem
+POSTHOOK: query: CREATE TABLE lineitem (L_ORDERKEY INT,
+ L_PARTKEY INT,
+ L_SUPPKEY INT,
+ L_LINENUMBER INT,
+ L_QUANTITY DOUBLE,
+ L_EXTENDEDPRICE DOUBLE,
+ L_DISCOUNT DOUBLE,
+ L_TAX DOUBLE,
+ L_RETURNFLAG STRING,
+ L_LINESTATUS STRING,
+ l_shipdate STRING,
+ L_COMMITDATE STRING,
+ L_RECEIPTDATE STRING,
+ L_SHIPINSTRUCT STRING,
+ L_SHIPMODE STRING,
+ L_COMMENT STRING)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@lineitem
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@lineitem
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@lineitem
+PREHOOK: query: -- non agg, non corr
+explain
+ select *
+from src
+where src.key in (select key from src s1 where s1.key > '9')
+PREHOOK: type: QUERY
+POSTHOOK: query: -- non agg, non corr
+explain
+ select *
+from src
+where src.key in (select key from src s1 where s1.key > '9')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT), Map 3 (GROUP PARTITION-LEVEL SORT)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: s1
+ Filter Operator
+ predicate: ((key > '9') and key is not null) (type: boolean)
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ value expressions: value (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0} {VALUE._col0}
+ 1
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select *
+from src
+where src.key in (select key from src s1 where s1.key > '9')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from src
+where src.key in (select key from src s1 where s1.key > '9')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+90 val_90
+90 val_90
+90 val_90
+92 val_92
+95 val_95
+95 val_95
+96 val_96
+97 val_97
+97 val_97
+98 val_98
+98 val_98
+PREHOOK: query: -- non agg, corr
+explain
+select *
+from src b
+where b.key in
+ (select a.key
+ from src a
+ where b.value = a.value and a.key > '9'
+ )
+PREHOOK: type: QUERY
+POSTHOOK: query: -- non agg, corr
+explain
+select *
+from src b
+where b.key in
+ (select a.key
+ from src a
+ where b.value = a.value and a.key > '9'
+ )
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT), Map 3 (GROUP PARTITION-LEVEL SORT)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Filter Operator
+ predicate: (key is not null and value is not null) (type: boolean)
+ Reduce Output Operator
+ key expressions: key (type: string), value (type: string)
+ sort order: ++
+ Map-reduce partition columns: key (type: string), value (type: string)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Filter Operator
+ predicate: (((key > '9') and key is not null) and value is not null) (type: boolean)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1}
+ 1
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select *
+from src b
+where b.key in
+ (select a.key
+ from src a
+ where b.value = a.value and a.key > '9'
+ )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from src b
+where b.key in
+ (select a.key
+ from src a
+ where b.value = a.value and a.key > '9'
+ )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+90 val_90
+90 val_90
+90 val_90
+92 val_92
+95 val_95
+95 val_95
+96 val_96
+97 val_97
+97 val_97
+98 val_98
+98 val_98
+PREHOOK: query: -- agg, non corr
+explain
+select p_name, p_size
+from
+part where part.p_size in
+ (select avg(p_size)
+ from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a
+ where r <= 2
+ )
+PREHOOK: type: QUERY
+POSTHOOK: query: -- agg, non corr
+explain
+select p_name, p_size
+from
+part where part.p_size in
+ (select avg(p_size)
+ from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a
+ where r <= 2
+ )
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP SORT)
+ Reducer 3 <- Reducer 2 (GROUP)
+ Reducer 4 <- Map 5 (GROUP PARTITION-LEVEL SORT), Reducer 3 (GROUP PARTITION-LEVEL SORT)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Reduce Output Operator
+ key expressions: p_mfgr (type: string), p_size (type: int)
+ sort order: ++
+ Map-reduce partition columns: p_mfgr (type: string)
+ value expressions: p_mfgr (type: string), p_size (type: int)
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Filter Operator
+ predicate: UDFToDouble(p_size) is not null (type: boolean)
+ Reduce Output Operator
+ key expressions: UDFToDouble(p_size) (type: double)
+ sort order: +
+ Map-reduce partition columns: UDFToDouble(p_size) (type: double)
+ value expressions: p_name (type: string), p_size (type: int)
+ Reducer 2
+ Reduce Operator Tree:
+ Extract
+ PTF Operator
+ Filter Operator
+ predicate: (_wcol0 <= 2) (type: boolean)
+ Select Operator
+ expressions: _col5 (type: int)
+ outputColumnNames: _col0
+ Group By Operator
+ aggregations: avg(_col0)
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: struct<count:bigint,sum:double,input:int>)
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Filter Operator
+ predicate: _col0 is not null (type: boolean)
+ Select Operator
+ expressions: _col0 (type: double)
+ outputColumnNames: _col0
+ Group By Operator
+ keys: _col0 (type: double)
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: double)
+ Reducer 4
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col1} {VALUE._col5}
+ 1
+ outputColumnNames: _col1, _col5
+ Select Operator
+ expressions: _col1 (type: string), _col5 (type: int)
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select p_name, p_size
+from
+part where part.p_size in
+ (select avg(p_size)
+ from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a
+ where r <= 2
+ )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select p_name, p_size
+from
+part where part.p_size in
+ (select avg(p_size)
+ from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a
+ where r <= 2
+ )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+almond antique medium spring khaki 6
+almond antique salmon chartreuse burlywood 6
+PREHOOK: query: -- agg, corr
+explain
+select p_mfgr, p_name, p_size
+from part b where b.p_size in
+ (select min(p_size)
+ from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a
+ where r <= 2 and b.p_mfgr = a.p_mfgr
+ )
+PREHOOK: type: QUERY
+POSTHOOK: query: -- agg, corr
+explain
+select p_mfgr, p_name, p_size
+from part b where b.p_size in
+ (select min(p_size)
+ from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a
+ where r <= 2 and b.p_mfgr = a.p_mfgr
+ )
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT), Reducer 5 (GROUP PARTITION-LEVEL SORT)
+ Reducer 4 <- Map 3 (GROUP SORT)
+ Reducer 5 <- Reducer 4 (GROUP)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Filter Operator
+ predicate: (p_size is not null and p_mfgr is not null) (type: boolean)
+ Reduce Output Operator
+ key expressions: p_size (type: int), p_mfgr (type: string)
+ sort order: ++
+ Map-reduce partition columns: p_size (type: int), p_mfgr (type: string)
+ value expressions: p_name (type: string)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Reduce Output Operator
+ key expressions: p_mfgr (type: string), p_size (type: int)
+ sort order: ++
+ Map-reduce partition columns: p_mfgr (type: string)
+ value expressions: p_mfgr (type: string), p_size (type: int)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col1} {KEY.reducesinkkey1} {KEY.reducesinkkey0}
+ 1
+ outputColumnNames: _col1, _col2, _col5
+ Select Operator
+ expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 4
+ Reduce Operator Tree:
+ Extract
+ PTF Operator
+ Filter Operator
+ predicate: ((_wcol0 <= 2) and _col2 is not null) (type: boolean)
+ Select Operator
+ expressions: _col2 (type: string), _col5 (type: int)
+ outputColumnNames: _col0, _col1
+ Group By Operator
+ aggregations: min(_col1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ value expressions: _col1 (type: int)
+ Reducer 5
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Filter Operator
+ predicate: _col1 is not null (type: boolean)
+ Select Operator
+ expressions: _col1 (type: int), _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Group By Operator
+ keys: _col0 (type: int), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select p_mfgr, p_name, p_size
+from part b where b.p_size in
+ (select min(p_size)
+ from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a
+ where r <= 2 and b.p_mfgr = a.p_mfgr
+ )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select p_mfgr, p_name, p_size
+from part b where b.p_size in
+ (select min(p_size)
+ from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a
+ where r <= 2 and b.p_mfgr = a.p_mfgr
+ )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1 almond antique burnished rose metallic 2
+Manufacturer#1 almond antique burnished rose metallic 2
+Manufacturer#2 almond aquamarine midnight light salmon 2
+Manufacturer#3 almond antique misty red olive 1
+Manufacturer#4 almond aquamarine yellow dodger mint 7
+Manufacturer#5 almond antique sky peru orange 2
+PREHOOK: query: -- distinct, corr
+explain
+select *
+from src b
+where b.key in
+ (select distinct a.key
+ from src a
+ where b.value = a.value and a.key > '9'
+ )
+PREHOOK: type: QUERY
+POSTHOOK: query: -- distinct, corr
+explain
+select *
+from src b
+where b.key in
+ (select distinct a.key
+ from src a
+ where b.value = a.value and a.key > '9'
+ )
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT), Reducer 4 (GROUP PARTITION-LEVEL SORT)
+ Reducer 4 <- Map 3 (GROUP)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Filter Operator
+ predicate: (key is not null and value is not null) (type: boolean)
+ Reduce Output Operator
+ key expressions: key (type: string), value (type: string)
+ sort order: ++
+ Map-reduce partition columns: key (type: string), value (type: string)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Filter Operator
+ predicate: (((key > '9') and key is not null) and value is not null) (type: boolean)
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: key, value
+ Group By Operator
+ keys: key (type: string), value (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1}
+ 1
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select *
+from src b
+where b.key in
+ (select distinct a.key
+ from src a
+ where b.value = a.value and a.key > '9'
+ )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from src b
+where b.key in
+ (select distinct a.key
+ from src a
+ where b.value = a.value and a.key > '9'
+ )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+90 val_90
+90 val_90
+90 val_90
+92 val_92
+95 val_95
+95 val_95
+96 val_96
+97 val_97
+97 val_97
+98 val_98
+98 val_98
+PREHOOK: query: -- non agg, non corr, windowing
+select p_mfgr, p_name, p_size
+from part
+where part.p_size in
+ (select first_value(p_size) over(partition by p_mfgr order by p_size) from part)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- non agg, non corr, windowing
+select p_mfgr, p_name, p_size
+from part
+where part.p_size in
+ (select first_value(p_size) over(partition by p_mfgr order by p_size) from part)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1 almond antique burnished rose metallic 2
+Manufacturer#1 almond antique burnished rose metallic 2
+Manufacturer#2 almond aquamarine midnight light salmon 2
+Manufacturer#3 almond antique misty red olive 1
+Manufacturer#4 almond aquamarine yellow dodger mint 7
+Manufacturer#5 almond antique sky peru orange 2
+PREHOOK: query: -- non agg, non corr, with join in Parent Query
+explain
+select p.p_partkey, li.l_suppkey
+from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
+where li.l_linenumber = 1 and
+ li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR')
+PREHOOK: type: QUERY
+POSTHOOK: query: -- non agg, non corr, with join in Parent Query
+explain
+select p.p_partkey, li.l_suppkey
+from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
+where li.l_linenumber = 1 and
+ li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT), Reducer 5 (GROUP PARTITION-LEVEL SORT)
+ Reducer 3 <- Map 6 (GROUP PARTITION-LEVEL SORT), Reducer 2 (GROUP PARTITION-LEVEL SORT)
+ Reducer 5 <- Map 4 (GROUP)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: li
+ Filter Operator
+ predicate: ((l_partkey is not null and l_orderkey is not null) and (l_linenumber = 1)) (type: boolean)
+ Reduce Output Operator
+ key expressions: l_partkey (type: int)
+ sort order: +
+ Map-reduce partition columns: l_partkey (type: int)
+ value expressions: l_orderkey (type: int), l_suppkey (type: int)
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: lineitem
+ Filter Operator
+ predicate: l_partkey is not null (type: boolean)
+ Select Operator
+ expressions: l_partkey (type: int)
+ outputColumnNames: l_partkey
+ Group By Operator
+ keys: l_partkey (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: lineitem
+ Filter Operator
+ predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean)
+ Select Operator
+ expressions: l_orderkey (type: int)
+ outputColumnNames: _col0
+ Group By Operator
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0}
+ 1 {VALUE._col0} {VALUE._col1}
+ outputColumnNames: _col0, _col1, _col3
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ value expressions: _col0 (type: int), _col3 (type: int)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col2}
+ 1
+ outputColumnNames: _col0, _col3
+ Select Operator
+ expressions: _col0 (type: int), _col3 (type: int)
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col0
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select p.p_partkey, li.l_suppkey
+from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
+where li.l_linenumber = 1 and
+ li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@lineitem
+#### A masked pattern was here ####
+POSTHOOK: query: select p.p_partkey, li.l_suppkey
+from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
+where li.l_linenumber = 1 and
+ li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@lineitem
+#### A masked pattern was here ####
+108570 8571
+115118 7630
+115209 7721
+155190 7706
+2320 9821
+40216 217
+4297 1798
+61336 8855
+64128 9141
+82704 7721
+PREHOOK: query: -- non agg, corr, with join in Parent Query
+select p.p_partkey, li.l_suppkey
+from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
+where li.l_linenumber = 1 and
+ li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@lineitem
+#### A masked pattern was here ####
+POSTHOOK: query: -- non agg, corr, with join in Parent Query
+select p.p_partkey, li.l_suppkey
+from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
+where li.l_linenumber = 1 and
+ li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@lineitem
+#### A masked pattern was here ####
+108570 8571
+4297 1798