You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2018/08/02 16:33:32 UTC
hive git commit: HIVE-20281: SharedWorkOptimizer fails with 'operator
cache contents and actual plan differ' (Jesus Camacho Rodriguez,
reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master db605f0d2 -> 8f5974a51
HIVE-20281: SharedWorkOptimizer fails with 'operator cache contents and actual plan differ' (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8f5974a5
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8f5974a5
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8f5974a5
Branch: refs/heads/master
Commit: 8f5974a511b7021dac075692d392be199458130b
Parents: db605f0
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Wed Aug 1 18:13:56 2018 -0700
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Thu Aug 2 09:32:41 2018 -0700
----------------------------------------------------------------------
.../hive/ql/optimizer/SharedWorkOptimizer.java | 18 +-
.../queries/clientpositive/subquery_in_having.q | 2 -
.../llap/subquery_in_having.q.out | 199 ++++++-------------
3 files changed, 71 insertions(+), 148 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/8f5974a5/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
index aff5520..2573754 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
@@ -519,10 +519,11 @@ public class SharedWorkOptimizer extends Transform {
Entry<String, TableScanOperator> e = it.next();
for (Operator<?> op : OperatorUtils.findOperators(e.getValue(), Operator.class)) {
if (!visited.contains(op)) {
- if (!findWorkOperators(optimizerCache, op).equals(
- findWorkOperators(op, new HashSet<Operator<?>>()))) {
- throw new SemanticException("Error in shared work optimizer: operator cache contents"
- + "and actual plan differ");
+ Set<Operator<?>> workCachedOps = findWorkOperators(optimizerCache, op);
+ Set<Operator<?>> workPlanOps = findWorkOperators(op, new HashSet<>());
+ if (!workCachedOps.equals(workPlanOps)) {
+ throw new SemanticException("Error in shared work optimizer: operator cache contents "
+ + "and actual plan differ\nIn cache: " + workCachedOps + "\nIn plan: " + workPlanOps);
}
visited.add(op);
}
@@ -799,7 +800,7 @@ public class SharedWorkOptimizer extends Transform {
}
return extractSharedOptimizationInfo(pctx, optimizerCache, equalOp1, equalOp2,
- currentOp1, currentOp2, retainableOps, discardableOps, discardableInputOps, false);
+ currentOp1, currentOp2, retainableOps, discardableOps, discardableInputOps);
}
private static SharedResult extractSharedOptimizationInfo(ParseContext pctx,
@@ -810,7 +811,7 @@ public class SharedWorkOptimizer extends Transform {
Operator<?> discardableOp) throws SemanticException {
return extractSharedOptimizationInfo(pctx, optimizerCache,
retainableOpEqualParent, discardableOpEqualParent, retainableOp, discardableOp,
- new LinkedHashSet<>(), new LinkedHashSet<>(), new HashSet<>(), true);
+ new LinkedHashSet<>(), new LinkedHashSet<>(), new HashSet<>());
}
private static SharedResult extractSharedOptimizationInfo(ParseContext pctx,
@@ -821,8 +822,7 @@ public class SharedWorkOptimizer extends Transform {
Operator<?> discardableOp,
LinkedHashSet<Operator<?>> retainableOps,
LinkedHashSet<Operator<?>> discardableOps,
- Set<Operator<?>> discardableInputOps,
- boolean removeInputBranch) throws SemanticException {
+ Set<Operator<?>> discardableInputOps) throws SemanticException {
Operator<?> equalOp1 = retainableOpEqualParent;
Operator<?> equalOp2 = discardableOpEqualParent;
Operator<?> currentOp1 = retainableOp;
@@ -847,7 +847,7 @@ public class SharedWorkOptimizer extends Transform {
for (; idx < currentOp1.getParentOperators().size(); idx++) {
Operator<?> parentOp1 = currentOp1.getParentOperators().get(idx);
Operator<?> parentOp2 = currentOp2.getParentOperators().get(idx);
- if (parentOp1 == equalOp1 && parentOp2 == equalOp2 && !removeInputBranch) {
+ if (parentOp1 == equalOp1 && parentOp2 == equalOp2) {
continue;
}
if ((parentOp1 == equalOp1 && parentOp2 != equalOp2) ||
http://git-wip-us.apache.org/repos/asf/hive/blob/8f5974a5/ql/src/test/queries/clientpositive/subquery_in_having.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_in_having.q b/ql/src/test/queries/clientpositive/subquery_in_having.q
index 8b6d1a7..732b3e5 100644
--- a/ql/src/test/queries/clientpositive/subquery_in_having.q
+++ b/ql/src/test/queries/clientpositive/subquery_in_having.q
@@ -1,6 +1,5 @@
--! qt:dataset:src
set hive.mapred.mode=nonstrict;
-set hive.optimize.shared.work.extended=false;
-- SORT_QUERY_RESULTS
-- data setup
@@ -156,4 +155,3 @@ having count(*) not in (select count(*) from src_null_n4 s1 where s1.key > '9' a
DROP TABLE src_null_n4;
DROP TABLE part_subq;
-reset hive.optimize.shared.work.extended;
http://git-wip-us.apache.org/repos/asf/hive/blob/8f5974a5/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out b/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
index fc4f977..3e0615a 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
@@ -1570,9 +1570,8 @@ POSTHOOK: Output: default@src_null_n4
POSTHOOK: Lineage: src_null_n4.key SCRIPT []
POSTHOOK: Lineage: src_null_n4.value EXPRESSION []
Warning: Map Join MAPJOIN[131][bigTable=?] in task 'Map 1' is a cross product
-Warning: Map Join MAPJOIN[132][bigTable=?] in task 'Map 6' is a cross product
-Warning: Map Join MAPJOIN[133][bigTable=?] in task 'Reducer 7' is a cross product
-Warning: Map Join MAPJOIN[135][bigTable=?] in task 'Reducer 9' is a cross product
+Warning: Map Join MAPJOIN[132][bigTable=?] in task 'Map 5' is a cross product
+Warning: Map Join MAPJOIN[133][bigTable=?] in task 'Reducer 6' is a cross product
PREHOOK: query: explain
select key, value, count(*)
from src_null_n4 b
@@ -1597,15 +1596,13 @@ STAGE PLANS:
#### A masked pattern was here ####
Edges:
Map 1 <- Reducer 4 (BROADCAST_EDGE)
- Map 6 <- Reducer 5 (BROADCAST_EDGE)
- Reducer 10 <- Reducer 9 (SIMPLE_EDGE)
- Reducer 11 <- Reducer 10 (SIMPLE_EDGE)
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 11 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE)
+ Map 5 <- Reducer 4 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE)
Reducer 4 <- Map 3 (SIMPLE_EDGE)
- Reducer 5 <- Map 3 (SIMPLE_EDGE)
- Reducer 7 <- Map 1 (BROADCAST_EDGE), Map 6 (SIMPLE_EDGE)
- Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
- Reducer 9 <- Map 1 (BROADCAST_EDGE), Map 6 (SIMPLE_EDGE)
+ Reducer 6 <- Map 1 (BROADCAST_EDGE), Map 5 (SIMPLE_EDGE)
+ Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
+ Reducer 8 <- Reducer 6 (SIMPLE_EDGE)
+ Reducer 9 <- Reducer 8 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1658,17 +1655,6 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string), _col1 (type: string)
- Filter Operator
- predicate: (key > '9') (type: boolean)
- Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col1 (type: string)
Execution mode: llap
LLAP IO: no inputs
Map 3
@@ -1690,14 +1676,9 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized, llap
LLAP IO: no inputs
- Map 6
+ Map 5
Map Operator Tree:
TableScan
alias: b
@@ -1714,7 +1695,7 @@ STAGE PLANS:
1
outputColumnNames: _col0, _col1, _col2, _col3
input vertices:
- 1 Reducer 5
+ 1 Reducer 4
residual filter predicates: {(_col2 <> _col1)}
Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE
Filter Operator
@@ -1734,61 +1715,8 @@ STAGE PLANS:
sort order: ++
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: string)
- sort order: ++
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE
Execution mode: llap
LLAP IO: no inputs
- Reducer 10
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- keys: KEY._col0 (type: string), KEY._col1 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col2 (type: bigint)
- outputColumnNames: _col0, _col2
- Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: _col2 is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col2 (type: bigint)
- outputColumnNames: _col1, _col2
- Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col1 (type: string), _col2 (type: bigint)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: bigint)
- sort order: ++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
- Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- Reducer 11
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: bigint), _col0 (type: string), true (type: boolean)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col1 (type: string), _col0 (type: bigint)
- sort order: ++
- Map-reduce partition columns: _col1 (type: string), _col0 (type: bigint)
- Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col2 (type: boolean)
Reducer 2
Execution mode: vectorized, llap
Reduce Operator Tree:
@@ -1810,7 +1738,7 @@ STAGE PLANS:
1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col4, _col5
input vertices:
- 1 Reducer 8
+ 1 Reducer 7
Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
@@ -1820,7 +1748,7 @@ STAGE PLANS:
1 _col1 (type: string), _col0 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col8
input vertices:
- 1 Reducer 11
+ 1 Reducer 9
Statistics: Num rows: 1 Data size: 668 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: CASE WHEN ((_col4 = 0L)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col8 is not null) THEN (false) WHEN (_col2 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean)
@@ -1852,23 +1780,11 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string), _col1 (type: boolean)
- Reducer 5
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), true (type: boolean)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string), _col1 (type: boolean)
- Reducer 7
+ Reducer 6
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -1912,7 +1828,13 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: bigint)
- Reducer 8
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint)
+ Reducer 7
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -1937,50 +1859,54 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint), _col2 (type: bigint)
- Reducer 9
- Execution mode: llap
+ Reducer 8
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
+ aggregations: count(VALUE._col0)
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col1
- Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col1 (type: string)
- mode: complete
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0
- 1
- outputColumnNames: _col0, _col1, _col2
- input vertices:
- 0 Map 1
- residual filter predicates: {(_col1 <> _col2)}
+ expressions: _col0 (type: string), _col2 (type: bigint)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col2 is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: bigint)
+ outputColumnNames: _col1, _col2
Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col2 (type: string)
- outputColumnNames: _col0, _col2
+ Group By Operator
+ keys: _col1 (type: string), _col2 (type: bigint)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- keys: _col2 (type: string), _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: bigint)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: string)
- sort order: ++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col2 (type: bigint)
+ Reducer 9
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: bigint), _col0 (type: string), true (type: boolean)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string), _col0 (type: bigint)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: string), _col0 (type: bigint)
+ Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: boolean)
Stage: Stage-0
Fetch Operator
@@ -1989,9 +1915,8 @@ STAGE PLANS:
ListSink
Warning: Map Join MAPJOIN[131][bigTable=?] in task 'Map 1' is a cross product
-Warning: Map Join MAPJOIN[132][bigTable=?] in task 'Map 6' is a cross product
-Warning: Map Join MAPJOIN[133][bigTable=?] in task 'Reducer 7' is a cross product
-Warning: Map Join MAPJOIN[135][bigTable=?] in task 'Reducer 9' is a cross product
+Warning: Map Join MAPJOIN[132][bigTable=?] in task 'Map 5' is a cross product
+Warning: Map Join MAPJOIN[133][bigTable=?] in task 'Reducer 6' is a cross product
PREHOOK: query: select key, value, count(*)
from src_null_n4 b
where NOT EXISTS (select key from src_null_n4 where src_null_n4.value <> b.value)