You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vg...@apache.org on 2018/01/22 05:18:52 UTC
hive git commit: HIVE-18490 : Query with EXISTS and NOT EXISTS with
non-equi predicate can produce wrong result (Vineet Garg,
reviewed by Jesus Camacho Rodriguez)
Repository: hive
Updated Branches:
refs/heads/master d62a038a1 -> a59cb886b
HIVE-18490 : Query with EXISTS and NOT EXISTS with non-equi predicate can produce wrong result (Vineet Garg, reviewed by Jesus Camacho Rodriguez)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a59cb886
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a59cb886
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a59cb886
Branch: refs/heads/master
Commit: a59cb886ba17aa1f2600f80405989bf38f00289a
Parents: d62a038
Author: Vineet Garg <vg...@apache.org>
Authored: Sun Jan 21 21:17:44 2018 -0800
Committer: Vineet Garg <vg...@apache.org>
Committed: Sun Jan 21 21:17:44 2018 -0800
----------------------------------------------------------------------
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 22 +-
.../queries/clientpositive/subquery_multi.q | 11 +
.../clientpositive/llap/subquery_multi.q.out | 179 ++++++++++++++++
.../clientpositive/perf/spark/query16.q.out | 101 +++++----
.../clientpositive/perf/spark/query94.q.out | 101 +++++----
.../clientpositive/perf/tez/query16.q.out | 212 ++++++++++---------
.../clientpositive/perf/tez/query94.q.out | 212 ++++++++++---------
.../clientpositive/spark/subquery_multi.q.out | 169 +++++++++++++++
8 files changed, 713 insertions(+), 294 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/a59cb886/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index d0fe8ab..511014c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -9410,6 +9410,25 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
return true;
}
+ boolean shouldMerge(final QBJoinTree node, final QBJoinTree target) {
+ boolean isNodeOuterJoin=false, isNodeSemiJoin=false, hasNodePostJoinFilters=false;
+ boolean isTargetOuterJoin=false, isTargetSemiJoin=false, hasTargetPostJoinFilters=false;
+
+ isNodeOuterJoin = !node.getNoOuterJoin();
+ isNodeSemiJoin= !node.getNoSemiJoin();
+ hasNodePostJoinFilters = node.getPostJoinFilters().size() !=0;
+
+ isTargetOuterJoin = !target.getNoOuterJoin();
+ isTargetSemiJoin= !target.getNoSemiJoin();
+ hasTargetPostJoinFilters = target.getPostJoinFilters().size() !=0;
+
+ if((hasNodePostJoinFilters && (isNodeOuterJoin || isNodeSemiJoin))
+ || (hasTargetPostJoinFilters && (isTargetOuterJoin || isTargetSemiJoin))) {
+ return false;
+ }
+ return true;
+ }
+
// try merge join tree from inner most source
// (it was merged from outer most to inner, which could be invalid)
//
@@ -9446,8 +9465,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
if (prevType != null && prevType != currType) {
break;
}
- if ((!node.getNoOuterJoin() && node.getPostJoinFilters().size() != 0) ||
- (!target.getNoOuterJoin() && target.getPostJoinFilters().size() != 0)) {
+ if(!shouldMerge(node, target)) {
// Outer joins with post-filtering conditions cannot be merged
break;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a59cb886/ql/src/test/queries/clientpositive/subquery_multi.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_multi.q b/ql/src/test/queries/clientpositive/subquery_multi.q
index 86305ad..780647d 100644
--- a/ql/src/test/queries/clientpositive/subquery_multi.q
+++ b/ql/src/test/queries/clientpositive/subquery_multi.q
@@ -126,6 +126,17 @@ select count(*) from src
or src.value is not null
or exists(select key from src);
+-- EXISTS and NOT EXISTS with non-equi predicate
+explain select * from part ws1 where
+ exists (select * from part ws2 where ws1.p_type= ws2.p_type
+ and ws1.p_retailprice <> ws2.p_retailprice)
+ and not exists(select * from part_null wr1 where ws1.p_type = wr1.p_name);
+select * from part ws1 where
+ exists (select * from part ws2 where ws1.p_type= ws2.p_type
+ and ws1.p_retailprice <> ws2.p_retailprice)
+ and not exists(select * from part_null wr1 where ws1.p_type = wr1.p_name);
+
+
drop table tnull;
drop table tempty;
drop table part_null;
http://git-wip-us.apache.org/repos/asf/hive/blob/a59cb886/ql/src/test/results/clientpositive/llap/subquery_multi.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out
index 216bc23..67a22c8 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out
@@ -4161,6 +4161,185 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
500
+PREHOOK: query: explain select * from part ws1 where
+ exists (select * from part ws2 where ws1.p_type= ws2.p_type
+ and ws1.p_retailprice <> ws2.p_retailprice)
+ and not exists(select * from part_null wr1 where ws1.p_type = wr1.p_name)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from part ws1 where
+ exists (select * from part ws2 where ws1.p_type= ws2.p_type
+ and ws1.p_retailprice <> ws2.p_retailprice)
+ and not exists(select * from part_null wr1 where ws1.p_type = wr1.p_name)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE)
+ Reducer 6 <- Map 5 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: ws1
+ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: p_type is not null (type: boolean)
+ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col4 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col4 (type: string)
+ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: ws2
+ Statistics: Num rows: 26 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (p_retailprice is not null and p_type is not null) (type: boolean)
+ Statistics: Num rows: 26 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: p_type (type: string), p_retailprice (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 26 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: double)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: double)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: wr1
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: p_name is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: p_name (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0 _col4 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10
+ residual filter predicates: {(_col7 <> _col10)}
+ Statistics: Num rows: 14 Data size: 8778 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 14 Data size: 8666 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col4 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col4 (type: string)
+ Statistics: Num rows: 14 Data size: 8666 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col4 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10
+ Statistics: Num rows: 15 Data size: 9532 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col10 is null (type: boolean)
+ Statistics: Num rows: 7 Data size: 4448 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 7 Data size: 4448 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 7 Data size: 4448 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 6
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), true (type: boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: boolean)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from part ws1 where
+ exists (select * from part ws2 where ws1.p_type= ws2.p_type
+ and ws1.p_retailprice <> ws2.p_retailprice)
+ and not exists(select * from part_null wr1 where ws1.p_type = wr1.p_name)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+PREHOOK: Input: default@part_null
+#### A masked pattern was here ####
+POSTHOOK: query: select * from part ws1 where
+ exists (select * from part ws2 where ws1.p_type= ws2.p_type
+ and ws1.p_retailprice <> ws2.p_retailprice)
+ and not exists(select * from part_null wr1 where ws1.p_type = wr1.p_name)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+POSTHOOK: Input: default@part_null
+#### A masked pattern was here ####
+192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir
+90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl
PREHOOK: query: drop table tnull
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@tnull
http://git-wip-us.apache.org/repos/asf/hive/blob/a59cb886/ql/src/test/results/clientpositive/perf/spark/query16.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query16.q.out b/ql/src/test/results/clientpositive/perf/spark/query16.q.out
index fd31b22..1b763e2 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query16.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query16.q.out
@@ -69,7 +69,7 @@ STAGE PLANS:
Spark
#### A masked pattern was here ####
Vertices:
- Map 9
+ Map 10
Map Operator Tree:
TableScan
alias: call_center
@@ -92,7 +92,7 @@ STAGE PLANS:
Spark
#### A masked pattern was here ####
Vertices:
- Map 7
+ Map 8
Map Operator Tree:
TableScan
alias: date_dim
@@ -114,12 +114,13 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 12 <- Map 11 (GROUP, 24)
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 494), Map 8 (PARTITION-LEVEL SORT, 494)
- Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 723), Reducer 12 (PARTITION-LEVEL SORT, 723), Reducer 2 (PARTITION-LEVEL SORT, 723)
- Reducer 4 <- Reducer 3 (GROUP, 447)
- Reducer 5 <- Reducer 4 (GROUP, 1)
- Reducer 6 <- Reducer 5 (SORT, 1)
+ Reducer 13 <- Map 12 (GROUP, 24)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 494), Map 9 (PARTITION-LEVEL SORT, 494)
+ Reducer 3 <- Map 11 (PARTITION-LEVEL SORT, 711), Reducer 2 (PARTITION-LEVEL SORT, 711)
+ Reducer 4 <- Reducer 13 (PARTITION-LEVEL SORT, 459), Reducer 3 (PARTITION-LEVEL SORT, 459)
+ Reducer 5 <- Reducer 4 (GROUP, 246)
+ Reducer 6 <- Reducer 5 (GROUP, 1)
+ Reducer 7 <- Reducer 6 (SORT, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -142,7 +143,7 @@ STAGE PLANS:
1 _col0 (type: int)
outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6
input vertices:
- 1 Map 7
+ 1 Map 8
Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: int)
@@ -152,7 +153,7 @@ STAGE PLANS:
value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
Local Work:
Map Reduce Local Work
- Map 10
+ Map 11
Map Operator Tree:
TableScan
alias: cs2
@@ -175,7 +176,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int)
- Map 11
+ Map 12
Map Operator Tree:
TableScan
alias: cr1
@@ -193,7 +194,7 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE
- Map 8
+ Map 9
Map Operator Tree:
TableScan
alias: customer_address
@@ -210,7 +211,7 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
- Reducer 12
+ Reducer 13
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: int)
@@ -247,7 +248,7 @@ STAGE PLANS:
1 _col0 (type: int)
outputColumnNames: _col3, _col4, _col5, _col6
input vertices:
- 1 Map 9
+ 1 Map 10
Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col4 (type: int)
@@ -260,45 +261,59 @@ STAGE PLANS:
Join Operator
condition map:
Left Semi Join 0 to 1
- Left Outer Join 0 to 2
keys:
0 _col4 (type: int)
1 _col0 (type: int)
- 2 _col0 (type: int)
- outputColumnNames: _col3, _col4, _col5, _col6, _col14, _col16
+ outputColumnNames: _col3, _col4, _col5, _col6, _col14
residual filter predicates: {(_col3 <> _col14)}
- Statistics: Num rows: 843291907 Data size: 114198664830 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col16 (type: boolean)
- outputColumnNames: _col4, _col5, _col6, _col16
- Statistics: Num rows: 843291907 Data size: 114198664830 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: _col16 is null (type: boolean)
- Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
- outputColumnNames: _col4, _col5, _col6
- Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: sum(_col5), sum(_col6)
- keys: _col4 (type: int)
- mode: hash
- outputColumnNames: _col0, _col2, _col3
- Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2))
+ expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+ outputColumnNames: _col4, _col5, _col6
+ Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col4 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col4 (type: int)
+ Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
Reducer 4
Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col4 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col4, _col5, _col6, _col14
+ Statistics: Num rows: 463810558 Data size: 62809267017 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col14 is null (type: boolean)
+ Statistics: Num rows: 231905279 Data size: 31404633508 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+ outputColumnNames: _col4, _col5, _col6
+ Statistics: Num rows: 231905279 Data size: 31404633508 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col5), sum(_col6)
+ keys: _col4 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col2, _col3
+ Statistics: Num rows: 231905279 Data size: 31404633508 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 231905279 Data size: 31404633508 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2))
+ Reducer 5
+ Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0), sum(VALUE._col1)
keys: KEY._col0 (type: int)
mode: partial2
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 231905279 Data size: 31404633508 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(_col0), sum(_col1), sum(_col2)
mode: partial2
@@ -308,7 +323,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2))
- Reducer 5
+ Reducer 6
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2)
@@ -325,7 +340,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2))
- Reducer 6
+ Reducer 7
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: decimal(17,2))
http://git-wip-us.apache.org/repos/asf/hive/blob/a59cb886/ql/src/test/results/clientpositive/perf/spark/query94.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query94.q.out b/ql/src/test/results/clientpositive/perf/spark/query94.q.out
index 995875a..39c4476a 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query94.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query94.q.out
@@ -65,7 +65,7 @@ STAGE PLANS:
Spark
#### A masked pattern was here ####
Vertices:
- Map 9
+ Map 10
Map Operator Tree:
TableScan
alias: web_site
@@ -88,7 +88,7 @@ STAGE PLANS:
Spark
#### A masked pattern was here ####
Vertices:
- Map 7
+ Map 8
Map Operator Tree:
TableScan
alias: date_dim
@@ -110,12 +110,13 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 12 <- Map 11 (GROUP, 11)
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 327), Map 8 (PARTITION-LEVEL SORT, 327)
- Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 362), Reducer 12 (PARTITION-LEVEL SORT, 362), Reducer 2 (PARTITION-LEVEL SORT, 362)
- Reducer 4 <- Reducer 3 (GROUP, 224)
- Reducer 5 <- Reducer 4 (GROUP, 1)
- Reducer 6 <- Reducer 5 (SORT, 1)
+ Reducer 13 <- Map 12 (GROUP, 11)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 327), Map 9 (PARTITION-LEVEL SORT, 327)
+ Reducer 3 <- Map 11 (PARTITION-LEVEL SORT, 357), Reducer 2 (PARTITION-LEVEL SORT, 357)
+ Reducer 4 <- Reducer 13 (PARTITION-LEVEL SORT, 230), Reducer 3 (PARTITION-LEVEL SORT, 230)
+ Reducer 5 <- Reducer 4 (GROUP, 124)
+ Reducer 6 <- Reducer 5 (GROUP, 1)
+ Reducer 7 <- Reducer 6 (SORT, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -138,7 +139,7 @@ STAGE PLANS:
1 _col0 (type: int)
outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6
input vertices:
- 1 Map 7
+ 1 Map 8
Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: int)
@@ -148,7 +149,7 @@ STAGE PLANS:
value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
Local Work:
Map Reduce Local Work
- Map 10
+ Map 11
Map Operator Tree:
TableScan
alias: ws2
@@ -171,7 +172,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int)
- Map 11
+ Map 12
Map Operator Tree:
TableScan
alias: wr1
@@ -189,7 +190,7 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE
- Map 8
+ Map 9
Map Operator Tree:
TableScan
alias: customer_address
@@ -206,7 +207,7 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
- Reducer 12
+ Reducer 13
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: int)
@@ -243,7 +244,7 @@ STAGE PLANS:
1 _col0 (type: int)
outputColumnNames: _col3, _col4, _col5, _col6
input vertices:
- 1 Map 9
+ 1 Map 10
Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col4 (type: int)
@@ -256,45 +257,59 @@ STAGE PLANS:
Join Operator
condition map:
Left Semi Join 0 to 1
- Left Outer Join 0 to 2
keys:
0 _col4 (type: int)
1 _col0 (type: int)
- 2 _col0 (type: int)
- outputColumnNames: _col3, _col4, _col5, _col6, _col14, _col16
+ outputColumnNames: _col3, _col4, _col5, _col6, _col14
residual filter predicates: {(_col3 <> _col14)}
- Statistics: Num rows: 421668645 Data size: 57334741373 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col16 (type: boolean)
- outputColumnNames: _col4, _col5, _col6, _col16
- Statistics: Num rows: 421668645 Data size: 57334741373 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: _col16 is null (type: boolean)
- Statistics: Num rows: 210834322 Data size: 28667370618 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
- outputColumnNames: _col4, _col5, _col6
- Statistics: Num rows: 210834322 Data size: 28667370618 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: sum(_col5), sum(_col6)
- keys: _col4 (type: int)
- mode: hash
- outputColumnNames: _col0, _col2, _col3
- Statistics: Num rows: 210834322 Data size: 28667370618 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 210834322 Data size: 28667370618 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2))
+ expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+ outputColumnNames: _col4, _col5, _col6
+ Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col4 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col4 (type: int)
+ Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
Reducer 4
Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col4 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col4, _col5, _col6, _col14
+ Statistics: Num rows: 231917759 Data size: 31534108438 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col14 is null (type: boolean)
+ Statistics: Num rows: 115958879 Data size: 15767054151 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+ outputColumnNames: _col4, _col5, _col6
+ Statistics: Num rows: 115958879 Data size: 15767054151 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col5), sum(_col6)
+ keys: _col4 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col2, _col3
+ Statistics: Num rows: 115958879 Data size: 15767054151 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 115958879 Data size: 15767054151 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2))
+ Reducer 5
+ Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0), sum(VALUE._col1)
keys: KEY._col0 (type: int)
mode: partial2
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 210834322 Data size: 28667370618 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 115958879 Data size: 15767054151 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(_col0), sum(_col1), sum(_col2)
mode: partial2
@@ -304,7 +319,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2))
- Reducer 5
+ Reducer 6
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2)
@@ -321,7 +336,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2))
- Reducer 6
+ Reducer 7
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: decimal(17,2))
http://git-wip-us.apache.org/repos/asf/hive/blob/a59cb886/ql/src/test/results/clientpositive/perf/tez/query16.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query16.q.out b/ql/src/test/results/clientpositive/perf/tez/query16.q.out
index 8107a05..1c05391 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query16.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query16.q.out
@@ -61,126 +61,132 @@ POSTHOOK: type: QUERY
Plan optimized by CBO.
Vertex dependency in root stage
-Reducer 14 <- Map 13 (SIMPLE_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
-Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
-Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 14 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE)
-Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
-Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE)
-Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
+Reducer 15 <- Map 14 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE)
+Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 5 <- Map 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+Reducer 6 <- Reducer 15 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE)
+Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
+Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE)
+Reducer 9 <- Reducer 8 (SIMPLE_EDGE)
Stage-0
Fetch Operator
limit:-1
Stage-1
- Reducer 8
- File Output Operator [FS_49]
- Limit [LIM_47] (rows=1 width=344)
+ Reducer 9
+ File Output Operator [FS_51]
+ Limit [LIM_49] (rows=1 width=344)
Number of rows:100
- Select Operator [SEL_46] (rows=1 width=344)
+ Select Operator [SEL_48] (rows=1 width=344)
Output:["_col0","_col1","_col2"]
- <-Reducer 7 [SIMPLE_EDGE]
- SHUFFLE [RS_45]
- Select Operator [SEL_44] (rows=1 width=344)
+ <-Reducer 8 [SIMPLE_EDGE]
+ SHUFFLE [RS_47]
+ Select Operator [SEL_46] (rows=1 width=344)
Output:["_col1","_col2","_col3"]
- Group By Operator [GBY_77] (rows=1 width=344)
+ Group By Operator [GBY_78] (rows=1 width=344)
Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"]
- <-Reducer 6 [CUSTOM_SIMPLE_EDGE]
- PARTITION_ONLY_SHUFFLE [RS_76]
- Group By Operator [GBY_75] (rows=1 width=344)
+ <-Reducer 7 [CUSTOM_SIMPLE_EDGE]
+ PARTITION_ONLY_SHUFFLE [RS_77]
+ Group By Operator [GBY_76] (rows=1 width=344)
Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"]
- Group By Operator [GBY_74] (rows=421645953 width=135)
+ Group By Operator [GBY_75] (rows=231905279 width=135)
Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0
- <-Reducer 5 [SIMPLE_EDGE]
- SHUFFLE [RS_73]
+ <-Reducer 6 [SIMPLE_EDGE]
+ SHUFFLE [RS_74]
PartitionCols:_col0
- Group By Operator [GBY_72] (rows=421645953 width=135)
+ Group By Operator [GBY_73] (rows=231905279 width=135)
Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4
- Select Operator [SEL_40] (rows=421645953 width=135)
+ Select Operator [SEL_42] (rows=231905279 width=135)
Output:["_col4","_col5","_col6"]
- Filter Operator [FIL_39] (rows=421645953 width=135)
- predicate:_col16 is null
- Select Operator [SEL_38] (rows=843291907 width=135)
- Output:["_col4","_col5","_col6","_col16"]
- Merge Join Operator [MERGEJOIN_81] (rows=843291907 width=135)
- Conds:RS_34._col4=RS_35._col0(Left Semi),RS_34._col4=RS_36._col0(Left Outer),Output:["_col3","_col4","_col5","_col6","_col14","_col16"],residual filter predicates:{(_col3 <> _col14)}
- <-Map 12 [SIMPLE_EDGE]
- SHUFFLE [RS_35]
- PartitionCols:_col0
- Group By Operator [GBY_33] (rows=287989836 width=135)
- Output:["_col0","_col1"],keys:_col0, _col1
- Select Operator [SEL_24] (rows=287989836 width=135)
- Output:["_col0","_col1"]
- Filter Operator [FIL_70] (rows=287989836 width=135)
- predicate:(cs_order_number is not null and cs_warehouse_sk is not null)
- TableScan [TS_22] (rows=287989836 width=135)
- default@catalog_sales,cs2,Tbl:COMPLETE,Col:NONE,Output:["cs_warehouse_sk","cs_order_number"]
- <-Reducer 14 [ONE_TO_ONE_EDGE]
- FORWARD [RS_36]
- PartitionCols:_col0
- Select Operator [SEL_31] (rows=14399440 width=106)
- Output:["_col0","_col1"]
- Group By Operator [GBY_30] (rows=14399440 width=106)
- Output:["_col0"],keys:KEY._col0
- <-Map 13 [SIMPLE_EDGE]
- SHUFFLE [RS_29]
- PartitionCols:_col0
- Group By Operator [GBY_28] (rows=28798881 width=106)
- Output:["_col0"],keys:cr_order_number
- Filter Operator [FIL_71] (rows=28798881 width=106)
- predicate:cr_order_number is not null
- TableScan [TS_25] (rows=28798881 width=106)
- default@catalog_returns,cr1,Tbl:COMPLETE,Col:NONE,Output:["cr_order_number"]
- <-Reducer 4 [SIMPLE_EDGE]
- SHUFFLE [RS_34]
- PartitionCols:_col4
- Merge Join Operator [MERGEJOIN_80] (rows=383314495 width=135)
- Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col3","_col4","_col5","_col6"]
- <-Map 11 [SIMPLE_EDGE]
- SHUFFLE [RS_19]
+ Filter Operator [FIL_41] (rows=231905279 width=135)
+ predicate:_col14 is null
+ Merge Join Operator [MERGEJOIN_83] (rows=463810558 width=135)
+ Conds:RS_38._col4=RS_39._col0(Left Outer),Output:["_col4","_col5","_col6","_col14"]
+ <-Reducer 15 [ONE_TO_ONE_EDGE]
+ FORWARD [RS_39]
+ PartitionCols:_col0
+ Select Operator [SEL_31] (rows=14399440 width=106)
+ Output:["_col0","_col1"]
+ Group By Operator [GBY_30] (rows=14399440 width=106)
+ Output:["_col0"],keys:KEY._col0
+ <-Map 14 [SIMPLE_EDGE]
+ SHUFFLE [RS_29]
PartitionCols:_col0
- Select Operator [SEL_11] (rows=30 width=2045)
- Output:["_col0"]
- Filter Operator [FIL_69] (rows=30 width=2045)
- predicate:((cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') and cc_call_center_sk is not null)
- TableScan [TS_9] (rows=60 width=2045)
- default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_county"]
- <-Reducer 3 [SIMPLE_EDGE]
- SHUFFLE [RS_18]
- PartitionCols:_col2
- Merge Join Operator [MERGEJOIN_79] (rows=348467716 width=135)
- Conds:RS_15._col1=RS_16._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"]
- <-Map 10 [SIMPLE_EDGE]
- SHUFFLE [RS_16]
+ Group By Operator [GBY_28] (rows=28798881 width=106)
+ Output:["_col0"],keys:cr_order_number
+ Filter Operator [FIL_72] (rows=28798881 width=106)
+ predicate:cr_order_number is not null
+ TableScan [TS_25] (rows=28798881 width=106)
+ default@catalog_returns,cr1,Tbl:COMPLETE,Col:NONE,Output:["cr_order_number"]
+ <-Reducer 5 [ONE_TO_ONE_EDGE]
+ FORWARD [RS_38]
+ PartitionCols:_col4
+ Select Operator [SEL_37] (rows=421645953 width=135)
+ Output:["_col4","_col5","_col6"]
+ Merge Join Operator [MERGEJOIN_82] (rows=421645953 width=135)
+ Conds:RS_34._col4=RS_35._col0(Left Semi),Output:["_col3","_col4","_col5","_col6","_col14"],residual filter predicates:{(_col3 <> _col14)}
+ <-Map 13 [SIMPLE_EDGE]
+ SHUFFLE [RS_35]
+ PartitionCols:_col0
+ Group By Operator [GBY_33] (rows=287989836 width=135)
+ Output:["_col0","_col1"],keys:_col0, _col1
+ Select Operator [SEL_24] (rows=287989836 width=135)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_71] (rows=287989836 width=135)
+ predicate:(cs_order_number is not null and cs_warehouse_sk is not null)
+ TableScan [TS_22] (rows=287989836 width=135)
+ default@catalog_sales,cs2,Tbl:COMPLETE,Col:NONE,Output:["cs_warehouse_sk","cs_order_number"]
+ <-Reducer 4 [SIMPLE_EDGE]
+ SHUFFLE [RS_34]
+ PartitionCols:_col4
+ Merge Join Operator [MERGEJOIN_81] (rows=383314495 width=135)
+ Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col3","_col4","_col5","_col6"]
+ <-Map 12 [SIMPLE_EDGE]
+ SHUFFLE [RS_19]
PartitionCols:_col0
- Select Operator [SEL_8] (rows=20000000 width=1014)
+ Select Operator [SEL_11] (rows=30 width=2045)
Output:["_col0"]
- Filter Operator [FIL_68] (rows=20000000 width=1014)
- predicate:((ca_state = 'NY') and ca_address_sk is not null)
- TableScan [TS_6] (rows=40000000 width=1014)
- default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
- <-Reducer 2 [SIMPLE_EDGE]
- SHUFFLE [RS_15]
- PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_78] (rows=316788826 width=135)
- Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"]
- <-Map 1 [SIMPLE_EDGE]
- SHUFFLE [RS_12]
- PartitionCols:_col0
- Select Operator [SEL_2] (rows=287989836 width=135)
- Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
- Filter Operator [FIL_66] (rows=287989836 width=135)
- predicate:(cs_call_center_sk is not null and cs_order_number is not null and cs_ship_addr_sk is not null and cs_ship_date_sk is not null)
- TableScan [TS_0] (rows=287989836 width=135)
- default@catalog_sales,cs1,Tbl:COMPLETE,Col:NONE,Output:["cs_ship_date_sk","cs_ship_addr_sk","cs_call_center_sk","cs_warehouse_sk","cs_order_number","cs_ext_ship_cost","cs_net_profit"]
- <-Map 9 [SIMPLE_EDGE]
- SHUFFLE [RS_13]
+ Filter Operator [FIL_70] (rows=30 width=2045)
+ predicate:((cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') and cc_call_center_sk is not null)
+ TableScan [TS_9] (rows=60 width=2045)
+ default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_county"]
+ <-Reducer 3 [SIMPLE_EDGE]
+ SHUFFLE [RS_18]
+ PartitionCols:_col2
+ Merge Join Operator [MERGEJOIN_80] (rows=348467716 width=135)
+ Conds:RS_15._col1=RS_16._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"]
+ <-Map 11 [SIMPLE_EDGE]
+ SHUFFLE [RS_16]
PartitionCols:_col0
- Select Operator [SEL_5] (rows=8116 width=1119)
+ Select Operator [SEL_8] (rows=20000000 width=1014)
Output:["_col0"]
- Filter Operator [FIL_67] (rows=8116 width=1119)
- predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 2001-04-01 00:00:00.0 AND 2001-05-31 01:00:00.0 and d_date_sk is not null)
- TableScan [TS_3] (rows=73049 width=1119)
- default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"]
+ Filter Operator [FIL_69] (rows=20000000 width=1014)
+ predicate:((ca_state = 'NY') and ca_address_sk is not null)
+ TableScan [TS_6] (rows=40000000 width=1014)
+ default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
+ <-Reducer 2 [SIMPLE_EDGE]
+ SHUFFLE [RS_15]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_79] (rows=316788826 width=135)
+ Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"]
+ <-Map 1 [SIMPLE_EDGE]
+ SHUFFLE [RS_12]
+ PartitionCols:_col0
+ Select Operator [SEL_2] (rows=287989836 width=135)
+ Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
+ Filter Operator [FIL_67] (rows=287989836 width=135)
+ predicate:(cs_call_center_sk is not null and cs_order_number is not null and cs_ship_addr_sk is not null and cs_ship_date_sk is not null)
+ TableScan [TS_0] (rows=287989836 width=135)
+ default@catalog_sales,cs1,Tbl:COMPLETE,Col:NONE,Output:["cs_ship_date_sk","cs_ship_addr_sk","cs_call_center_sk","cs_warehouse_sk","cs_order_number","cs_ext_ship_cost","cs_net_profit"]
+ <-Map 10 [SIMPLE_EDGE]
+ SHUFFLE [RS_13]
+ PartitionCols:_col0
+ Select Operator [SEL_5] (rows=8116 width=1119)
+ Output:["_col0"]
+ Filter Operator [FIL_68] (rows=8116 width=1119)
+ predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 2001-04-01 00:00:00.0 AND 2001-05-31 01:00:00.0 and d_date_sk is not null)
+ TableScan [TS_3] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"]
http://git-wip-us.apache.org/repos/asf/hive/blob/a59cb886/ql/src/test/results/clientpositive/perf/tez/query94.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query94.q.out b/ql/src/test/results/clientpositive/perf/tez/query94.q.out
index 1c9d16b..e18d45b 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query94.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query94.q.out
@@ -57,126 +57,132 @@ POSTHOOK: type: QUERY
Plan optimized by CBO.
Vertex dependency in root stage
-Reducer 14 <- Map 13 (SIMPLE_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
-Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
-Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 14 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE)
-Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
-Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE)
-Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
+Reducer 15 <- Map 14 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE)
+Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 5 <- Map 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+Reducer 6 <- Reducer 15 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE)
+Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
+Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE)
+Reducer 9 <- Reducer 8 (SIMPLE_EDGE)
Stage-0
Fetch Operator
limit:-1
Stage-1
- Reducer 8
- File Output Operator [FS_49]
- Limit [LIM_47] (rows=1 width=344)
+ Reducer 9
+ File Output Operator [FS_51]
+ Limit [LIM_49] (rows=1 width=344)
Number of rows:100
- Select Operator [SEL_46] (rows=1 width=344)
+ Select Operator [SEL_48] (rows=1 width=344)
Output:["_col0","_col1","_col2"]
- <-Reducer 7 [SIMPLE_EDGE]
- SHUFFLE [RS_45]
- Select Operator [SEL_44] (rows=1 width=344)
+ <-Reducer 8 [SIMPLE_EDGE]
+ SHUFFLE [RS_47]
+ Select Operator [SEL_46] (rows=1 width=344)
Output:["_col1","_col2","_col3"]
- Group By Operator [GBY_77] (rows=1 width=344)
+ Group By Operator [GBY_78] (rows=1 width=344)
Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"]
- <-Reducer 6 [CUSTOM_SIMPLE_EDGE]
- PARTITION_ONLY_SHUFFLE [RS_76]
- Group By Operator [GBY_75] (rows=1 width=344)
+ <-Reducer 7 [CUSTOM_SIMPLE_EDGE]
+ PARTITION_ONLY_SHUFFLE [RS_77]
+ Group By Operator [GBY_76] (rows=1 width=344)
Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"]
- Group By Operator [GBY_74] (rows=210834322 width=135)
+ Group By Operator [GBY_75] (rows=115958879 width=135)
Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0
- <-Reducer 5 [SIMPLE_EDGE]
- SHUFFLE [RS_73]
+ <-Reducer 6 [SIMPLE_EDGE]
+ SHUFFLE [RS_74]
PartitionCols:_col0
- Group By Operator [GBY_72] (rows=210834322 width=135)
+ Group By Operator [GBY_73] (rows=115958879 width=135)
Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4
- Select Operator [SEL_40] (rows=210834322 width=135)
+ Select Operator [SEL_42] (rows=115958879 width=135)
Output:["_col4","_col5","_col6"]
- Filter Operator [FIL_39] (rows=210834322 width=135)
- predicate:_col16 is null
- Select Operator [SEL_38] (rows=421668645 width=135)
- Output:["_col4","_col5","_col6","_col16"]
- Merge Join Operator [MERGEJOIN_81] (rows=421668645 width=135)
- Conds:RS_34._col4=RS_35._col0(Left Semi),RS_34._col4=RS_36._col0(Left Outer),Output:["_col3","_col4","_col5","_col6","_col14","_col16"],residual filter predicates:{(_col3 <> _col14)}
- <-Map 12 [SIMPLE_EDGE]
- SHUFFLE [RS_35]
- PartitionCols:_col0
- Group By Operator [GBY_33] (rows=144002668 width=135)
- Output:["_col0","_col1"],keys:_col0, _col1
- Select Operator [SEL_24] (rows=144002668 width=135)
- Output:["_col0","_col1"]
- Filter Operator [FIL_70] (rows=144002668 width=135)
- predicate:(ws_order_number is not null and ws_warehouse_sk is not null)
- TableScan [TS_22] (rows=144002668 width=135)
- default@web_sales,ws2,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"]
- <-Reducer 14 [ONE_TO_ONE_EDGE]
- FORWARD [RS_36]
- PartitionCols:_col0
- Select Operator [SEL_31] (rows=7199233 width=92)
- Output:["_col0","_col1"]
- Group By Operator [GBY_30] (rows=7199233 width=92)
- Output:["_col0"],keys:KEY._col0
- <-Map 13 [SIMPLE_EDGE]
- SHUFFLE [RS_29]
- PartitionCols:_col0
- Group By Operator [GBY_28] (rows=14398467 width=92)
- Output:["_col0"],keys:wr_order_number
- Filter Operator [FIL_71] (rows=14398467 width=92)
- predicate:wr_order_number is not null
- TableScan [TS_25] (rows=14398467 width=92)
- default@web_returns,wr1,Tbl:COMPLETE,Col:NONE,Output:["wr_order_number"]
- <-Reducer 4 [SIMPLE_EDGE]
- SHUFFLE [RS_34]
- PartitionCols:_col4
- Merge Join Operator [MERGEJOIN_80] (rows=191667562 width=135)
- Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col3","_col4","_col5","_col6"]
- <-Map 11 [SIMPLE_EDGE]
- SHUFFLE [RS_19]
+ Filter Operator [FIL_41] (rows=115958879 width=135)
+ predicate:_col14 is null
+ Merge Join Operator [MERGEJOIN_83] (rows=231917759 width=135)
+ Conds:RS_38._col4=RS_39._col0(Left Outer),Output:["_col4","_col5","_col6","_col14"]
+ <-Reducer 15 [ONE_TO_ONE_EDGE]
+ FORWARD [RS_39]
+ PartitionCols:_col0
+ Select Operator [SEL_31] (rows=7199233 width=92)
+ Output:["_col0","_col1"]
+ Group By Operator [GBY_30] (rows=7199233 width=92)
+ Output:["_col0"],keys:KEY._col0
+ <-Map 14 [SIMPLE_EDGE]
+ SHUFFLE [RS_29]
PartitionCols:_col0
- Select Operator [SEL_11] (rows=42 width=1850)
- Output:["_col0"]
- Filter Operator [FIL_69] (rows=42 width=1850)
- predicate:((web_company_name = 'pri') and web_site_sk is not null)
- TableScan [TS_9] (rows=84 width=1850)
- default@web_site,web_site,Tbl:COMPLETE,Col:NONE,Output:["web_site_sk","web_company_name"]
- <-Reducer 3 [SIMPLE_EDGE]
- SHUFFLE [RS_18]
- PartitionCols:_col2
- Merge Join Operator [MERGEJOIN_79] (rows=174243235 width=135)
- Conds:RS_15._col1=RS_16._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"]
- <-Map 10 [SIMPLE_EDGE]
- SHUFFLE [RS_16]
+ Group By Operator [GBY_28] (rows=14398467 width=92)
+ Output:["_col0"],keys:wr_order_number
+ Filter Operator [FIL_72] (rows=14398467 width=92)
+ predicate:wr_order_number is not null
+ TableScan [TS_25] (rows=14398467 width=92)
+ default@web_returns,wr1,Tbl:COMPLETE,Col:NONE,Output:["wr_order_number"]
+ <-Reducer 5 [ONE_TO_ONE_EDGE]
+ FORWARD [RS_38]
+ PartitionCols:_col4
+ Select Operator [SEL_37] (rows=210834322 width=135)
+ Output:["_col4","_col5","_col6"]
+ Merge Join Operator [MERGEJOIN_82] (rows=210834322 width=135)
+ Conds:RS_34._col4=RS_35._col0(Left Semi),Output:["_col3","_col4","_col5","_col6","_col14"],residual filter predicates:{(_col3 <> _col14)}
+ <-Map 13 [SIMPLE_EDGE]
+ SHUFFLE [RS_35]
+ PartitionCols:_col0
+ Group By Operator [GBY_33] (rows=144002668 width=135)
+ Output:["_col0","_col1"],keys:_col0, _col1
+ Select Operator [SEL_24] (rows=144002668 width=135)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_71] (rows=144002668 width=135)
+ predicate:(ws_order_number is not null and ws_warehouse_sk is not null)
+ TableScan [TS_22] (rows=144002668 width=135)
+ default@web_sales,ws2,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"]
+ <-Reducer 4 [SIMPLE_EDGE]
+ SHUFFLE [RS_34]
+ PartitionCols:_col4
+ Merge Join Operator [MERGEJOIN_81] (rows=191667562 width=135)
+ Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col3","_col4","_col5","_col6"]
+ <-Map 12 [SIMPLE_EDGE]
+ SHUFFLE [RS_19]
PartitionCols:_col0
- Select Operator [SEL_8] (rows=20000000 width=1014)
+ Select Operator [SEL_11] (rows=42 width=1850)
Output:["_col0"]
- Filter Operator [FIL_68] (rows=20000000 width=1014)
- predicate:((ca_state = 'TX') and ca_address_sk is not null)
- TableScan [TS_6] (rows=40000000 width=1014)
- default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
- <-Reducer 2 [SIMPLE_EDGE]
- SHUFFLE [RS_15]
- PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_78] (rows=158402938 width=135)
- Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"]
- <-Map 1 [SIMPLE_EDGE]
- SHUFFLE [RS_12]
- PartitionCols:_col0
- Select Operator [SEL_2] (rows=144002668 width=135)
- Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
- Filter Operator [FIL_66] (rows=144002668 width=135)
- predicate:(ws_order_number is not null and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_web_site_sk is not null)
- TableScan [TS_0] (rows=144002668 width=135)
- default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_warehouse_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"]
- <-Map 9 [SIMPLE_EDGE]
- SHUFFLE [RS_13]
+ Filter Operator [FIL_70] (rows=42 width=1850)
+ predicate:((web_company_name = 'pri') and web_site_sk is not null)
+ TableScan [TS_9] (rows=84 width=1850)
+ default@web_site,web_site,Tbl:COMPLETE,Col:NONE,Output:["web_site_sk","web_company_name"]
+ <-Reducer 3 [SIMPLE_EDGE]
+ SHUFFLE [RS_18]
+ PartitionCols:_col2
+ Merge Join Operator [MERGEJOIN_80] (rows=174243235 width=135)
+ Conds:RS_15._col1=RS_16._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"]
+ <-Map 11 [SIMPLE_EDGE]
+ SHUFFLE [RS_16]
PartitionCols:_col0
- Select Operator [SEL_5] (rows=8116 width=1119)
+ Select Operator [SEL_8] (rows=20000000 width=1014)
Output:["_col0"]
- Filter Operator [FIL_67] (rows=8116 width=1119)
- predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 1999-05-01 00:00:00.0 AND 1999-06-30 00:00:00.0 and d_date_sk is not null)
- TableScan [TS_3] (rows=73049 width=1119)
- default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"]
+ Filter Operator [FIL_69] (rows=20000000 width=1014)
+ predicate:((ca_state = 'TX') and ca_address_sk is not null)
+ TableScan [TS_6] (rows=40000000 width=1014)
+ default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
+ <-Reducer 2 [SIMPLE_EDGE]
+ SHUFFLE [RS_15]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_79] (rows=158402938 width=135)
+ Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"]
+ <-Map 1 [SIMPLE_EDGE]
+ SHUFFLE [RS_12]
+ PartitionCols:_col0
+ Select Operator [SEL_2] (rows=144002668 width=135)
+ Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
+ Filter Operator [FIL_67] (rows=144002668 width=135)
+ predicate:(ws_order_number is not null and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_web_site_sk is not null)
+ TableScan [TS_0] (rows=144002668 width=135)
+ default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_warehouse_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"]
+ <-Map 10 [SIMPLE_EDGE]
+ SHUFFLE [RS_13]
+ PartitionCols:_col0
+ Select Operator [SEL_5] (rows=8116 width=1119)
+ Output:["_col0"]
+ Filter Operator [FIL_68] (rows=8116 width=1119)
+ predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 1999-05-01 00:00:00.0 AND 1999-06-30 00:00:00.0 and d_date_sk is not null)
+ TableScan [TS_3] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"]
http://git-wip-us.apache.org/repos/asf/hive/blob/a59cb886/ql/src/test/results/clientpositive/spark/subquery_multi.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/subquery_multi.q.out b/ql/src/test/results/clientpositive/spark/subquery_multi.q.out
index d816571..3764d92 100644
--- a/ql/src/test/results/clientpositive/spark/subquery_multi.q.out
+++ b/ql/src/test/results/clientpositive/spark/subquery_multi.q.out
@@ -4122,6 +4122,175 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
500
+PREHOOK: query: explain select * from part ws1 where
+ exists (select * from part ws2 where ws1.p_type= ws2.p_type
+ and ws1.p_retailprice <> ws2.p_retailprice)
+ and not exists(select * from part_null wr1 where ws1.p_type = wr1.p_name)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from part ws1 where
+ exists (select * from part ws2 where ws1.p_type= ws2.p_type
+ and ws1.p_retailprice <> ws2.p_retailprice)
+ and not exists(select * from part_null wr1 where ws1.p_type = wr1.p_name)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2)
+ Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2)
+ Reducer 6 <- Map 5 (GROUP, 2)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: ws1
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: p_type is not null (type: boolean)
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col4 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col4 (type: string)
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: ws2
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (p_retailprice is not null and p_type is not null) (type: boolean)
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_type (type: string), p_retailprice (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: double)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double)
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: wr1
+ Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: p_name is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: p_name (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0 _col4 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10
+ residual filter predicates: {(_col7 <> _col10)}
+ Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col4 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col4 (type: string)
+ Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col4 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10
+ Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col10 is null (type: boolean)
+ Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 6
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), true (type: boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: boolean)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from part ws1 where
+ exists (select * from part ws2 where ws1.p_type= ws2.p_type
+ and ws1.p_retailprice <> ws2.p_retailprice)
+ and not exists(select * from part_null wr1 where ws1.p_type = wr1.p_name)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+PREHOOK: Input: default@part_null
+#### A masked pattern was here ####
+POSTHOOK: query: select * from part ws1 where
+ exists (select * from part ws2 where ws1.p_type= ws2.p_type
+ and ws1.p_retailprice <> ws2.p_retailprice)
+ and not exists(select * from part_null wr1 where ws1.p_type = wr1.p_name)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+POSTHOOK: Input: default@part_null
+#### A masked pattern was here ####
+192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir
+90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl
PREHOOK: query: drop table tnull
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@tnull