You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2018/08/02 16:33:32 UTC
hive git commit: HIVE-20281: SharedWorkOptimizer fails with 'operator cache contents and actual plan differ' (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/master db605f0d2 -> 8f5974a51


HIVE-20281: SharedWorkOptimizer fails with 'operator cache contents and actual plan differ' (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8f5974a5
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8f5974a5
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8f5974a5

Branch: refs/heads/master
Commit: 8f5974a511b7021dac075692d392be199458130b
Parents: db605f0
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Wed Aug 1 18:13:56 2018 -0700
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Thu Aug 2 09:32:41 2018 -0700

----------------------------------------------------------------------
 .../hive/ql/optimizer/SharedWorkOptimizer.java  |  18 +-
 .../queries/clientpositive/subquery_in_having.q |   2 -
 .../llap/subquery_in_having.q.out               | 199 ++++++-------------
 3 files changed, 71 insertions(+), 148 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/8f5974a5/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
index aff5520..2573754 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
@@ -519,10 +519,11 @@ public class SharedWorkOptimizer extends Transform {
         Entry<String, TableScanOperator> e = it.next();
         for (Operator<?> op : OperatorUtils.findOperators(e.getValue(), Operator.class)) {
           if (!visited.contains(op)) {
-            if (!findWorkOperators(optimizerCache, op).equals(
-                findWorkOperators(op, new HashSet<Operator<?>>()))) {
-              throw new SemanticException("Error in shared work optimizer: operator cache contents"
-                  + "and actual plan differ");
+            Set<Operator<?>> workCachedOps = findWorkOperators(optimizerCache, op);
+            Set<Operator<?>> workPlanOps = findWorkOperators(op, new HashSet<>());
+            if (!workCachedOps.equals(workPlanOps)) {
+              throw new SemanticException("Error in shared work optimizer: operator cache contents "
+                  + "and actual plan differ\nIn cache: " + workCachedOps + "\nIn plan: " + workPlanOps);
             }
             visited.add(op);
           }
@@ -799,7 +800,7 @@ public class SharedWorkOptimizer extends Transform {
     }
 
     return extractSharedOptimizationInfo(pctx, optimizerCache, equalOp1, equalOp2,
-        currentOp1, currentOp2, retainableOps, discardableOps, discardableInputOps, false);
+        currentOp1, currentOp2, retainableOps, discardableOps, discardableInputOps);
   }
 
   private static SharedResult extractSharedOptimizationInfo(ParseContext pctx,
@@ -810,7 +811,7 @@ public class SharedWorkOptimizer extends Transform {
       Operator<?> discardableOp) throws SemanticException {
     return extractSharedOptimizationInfo(pctx, optimizerCache,
         retainableOpEqualParent, discardableOpEqualParent, retainableOp, discardableOp,
-        new LinkedHashSet<>(), new LinkedHashSet<>(), new HashSet<>(), true);
+        new LinkedHashSet<>(), new LinkedHashSet<>(), new HashSet<>());
   }
 
   private static SharedResult extractSharedOptimizationInfo(ParseContext pctx,
@@ -821,8 +822,7 @@ public class SharedWorkOptimizer extends Transform {
       Operator<?> discardableOp,
       LinkedHashSet<Operator<?>> retainableOps,
       LinkedHashSet<Operator<?>> discardableOps,
-      Set<Operator<?>> discardableInputOps,
-      boolean removeInputBranch) throws SemanticException {
+      Set<Operator<?>> discardableInputOps) throws SemanticException {
     Operator<?> equalOp1 = retainableOpEqualParent;
     Operator<?> equalOp2 = discardableOpEqualParent;
     Operator<?> currentOp1 = retainableOp;
@@ -847,7 +847,7 @@ public class SharedWorkOptimizer extends Transform {
         for (; idx < currentOp1.getParentOperators().size(); idx++) {
           Operator<?> parentOp1 = currentOp1.getParentOperators().get(idx);
           Operator<?> parentOp2 = currentOp2.getParentOperators().get(idx);
-          if (parentOp1 == equalOp1 && parentOp2 == equalOp2 && !removeInputBranch) {
+          if (parentOp1 == equalOp1 && parentOp2 == equalOp2) {
             continue;
           }
           if ((parentOp1 == equalOp1 && parentOp2 != equalOp2) ||

http://git-wip-us.apache.org/repos/asf/hive/blob/8f5974a5/ql/src/test/queries/clientpositive/subquery_in_having.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_in_having.q b/ql/src/test/queries/clientpositive/subquery_in_having.q
index 8b6d1a7..732b3e5 100644
--- a/ql/src/test/queries/clientpositive/subquery_in_having.q
+++ b/ql/src/test/queries/clientpositive/subquery_in_having.q
@@ -1,6 +1,5 @@
 --! qt:dataset:src
 set hive.mapred.mode=nonstrict;
-set hive.optimize.shared.work.extended=false;
 -- SORT_QUERY_RESULTS
 
 -- data setup
@@ -156,4 +155,3 @@ having count(*) not in (select count(*) from src_null_n4 s1 where s1.key > '9' a
 
 DROP TABLE src_null_n4;
 DROP TABLE part_subq;
-reset hive.optimize.shared.work.extended;

http://git-wip-us.apache.org/repos/asf/hive/blob/8f5974a5/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out b/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
index fc4f977..3e0615a 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
@@ -1570,9 +1570,8 @@ POSTHOOK: Output: default@src_null_n4
 POSTHOOK: Lineage: src_null_n4.key SCRIPT []
 POSTHOOK: Lineage: src_null_n4.value EXPRESSION []
 Warning: Map Join MAPJOIN[131][bigTable=?] in task 'Map 1' is a cross product
-Warning: Map Join MAPJOIN[132][bigTable=?] in task 'Map 6' is a cross product
-Warning: Map Join MAPJOIN[133][bigTable=?] in task 'Reducer 7' is a cross product
-Warning: Map Join MAPJOIN[135][bigTable=?] in task 'Reducer 9' is a cross product
+Warning: Map Join MAPJOIN[132][bigTable=?] in task 'Map 5' is a cross product
+Warning: Map Join MAPJOIN[133][bigTable=?] in task 'Reducer 6' is a cross product
 PREHOOK: query: explain
 select key, value, count(*)
 from src_null_n4 b
@@ -1597,15 +1596,13 @@ STAGE PLANS:
 #### A masked pattern was here ####
       Edges:
         Map 1 <- Reducer 4 (BROADCAST_EDGE)
-        Map 6 <- Reducer 5 (BROADCAST_EDGE)
-        Reducer 10 <- Reducer 9 (SIMPLE_EDGE)
-        Reducer 11 <- Reducer 10 (SIMPLE_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 11 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE)
+        Map 5 <- Reducer 4 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE)
         Reducer 4 <- Map 3 (SIMPLE_EDGE)
-        Reducer 5 <- Map 3 (SIMPLE_EDGE)
-        Reducer 7 <- Map 1 (BROADCAST_EDGE), Map 6 (SIMPLE_EDGE)
-        Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
-        Reducer 9 <- Map 1 (BROADCAST_EDGE), Map 6 (SIMPLE_EDGE)
+        Reducer 6 <- Map 1 (BROADCAST_EDGE), Map 5 (SIMPLE_EDGE)
+        Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
+        Reducer 8 <- Reducer 6 (SIMPLE_EDGE)
+        Reducer 9 <- Reducer 8 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -1658,17 +1655,6 @@ STAGE PLANS:
                         sort order: 
                         Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col0 (type: string), _col1 (type: string)
-                  Filter Operator
-                    predicate: (key > '9') (type: boolean)
-                    Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: string), value (type: string)
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        sort order: 
-                        Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: string), _col1 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
         Map 3 
@@ -1690,14 +1676,9 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: string)
                         Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized, llap
             LLAP IO: no inputs
-        Map 6 
+        Map 5 
             Map Operator Tree:
                 TableScan
                   alias: b
@@ -1714,7 +1695,7 @@ STAGE PLANS:
                         1 
                       outputColumnNames: _col0, _col1, _col2, _col3
                       input vertices:
-                        1 Reducer 5
+                        1 Reducer 4
                       residual filter predicates: {(_col2 <> _col1)}
                       Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE
                       Filter Operator
@@ -1734,61 +1715,8 @@ STAGE PLANS:
                               sort order: ++
                               Map-reduce partition columns: _col0 (type: string)
                               Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE
-                            Reduce Output Operator
-                              key expressions: _col0 (type: string), _col1 (type: string)
-                              sort order: ++
-                              Map-reduce partition columns: _col0 (type: string)
-                              Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE
             Execution mode: llap
             LLAP IO: no inputs
-        Reducer 10 
-            Execution mode: vectorized, llap
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: count(VALUE._col0)
-                keys: KEY._col0 (type: string), KEY._col1 (type: string)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: string), _col2 (type: bigint)
-                  outputColumnNames: _col0, _col2
-                  Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: _col2 is not null (type: boolean)
-                    Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: _col0 (type: string), _col2 (type: bigint)
-                      outputColumnNames: _col1, _col2
-                      Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
-                      Group By Operator
-                        keys: _col1 (type: string), _col2 (type: bigint)
-                        mode: hash
-                        outputColumnNames: _col0, _col1
-                        Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
-                        Reduce Output Operator
-                          key expressions: _col0 (type: string), _col1 (type: bigint)
-                          sort order: ++
-                          Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
-                          Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
-        Reducer 11 
-            Execution mode: vectorized, llap
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col1 (type: bigint), _col0 (type: string), true (type: boolean)
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col1 (type: string), _col0 (type: bigint)
-                    sort order: ++
-                    Map-reduce partition columns: _col1 (type: string), _col0 (type: bigint)
-                    Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: _col2 (type: boolean)
         Reducer 2 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
@@ -1810,7 +1738,7 @@ STAGE PLANS:
                       1 _col0 (type: string)
                     outputColumnNames: _col0, _col1, _col2, _col4, _col5
                     input vertices:
-                      1 Reducer 8
+                      1 Reducer 7
                     Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: NONE
                     Map Join Operator
                       condition map:
@@ -1820,7 +1748,7 @@ STAGE PLANS:
                         1 _col1 (type: string), _col0 (type: bigint)
                       outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col8
                       input vertices:
-                        1 Reducer 11
+                        1 Reducer 9
                       Statistics: Num rows: 1 Data size: 668 Basic stats: COMPLETE Column stats: NONE
                       Filter Operator
                         predicate: CASE WHEN ((_col4 = 0L)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col8 is not null) THEN (false) WHEN (_col2 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean)
@@ -1852,23 +1780,11 @@ STAGE PLANS:
                     sort order: 
                     Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col0 (type: string), _col1 (type: boolean)
-        Reducer 5 
-            Execution mode: vectorized, llap
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: string)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: string), true (type: boolean)
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     sort order: 
                     Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col0 (type: string), _col1 (type: boolean)
-        Reducer 7 
+        Reducer 6 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
@@ -1912,7 +1828,13 @@ STAGE PLANS:
                             Map-reduce partition columns: _col0 (type: string)
                             Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
                             value expressions: _col2 (type: bigint)
-        Reducer 8 
+                          Reduce Output Operator
+                            key expressions: _col0 (type: string), _col1 (type: string)
+                            sort order: ++
+                            Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                            Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
+                            value expressions: _col2 (type: bigint)
+        Reducer 7 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
@@ -1937,50 +1859,54 @@ STAGE PLANS:
                       Map-reduce partition columns: _col0 (type: string)
                       Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
                       value expressions: _col1 (type: bigint), _col2 (type: bigint)
-        Reducer 9 
-            Execution mode: llap
+        Reducer 8 
+            Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
+                aggregations: count(VALUE._col0)
                 keys: KEY._col0 (type: string), KEY._col1 (type: string)
                 mode: mergepartial
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
-                  expressions: _col0 (type: string)
-                  outputColumnNames: _col1
-                  Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE
-                  Group By Operator
-                    keys: _col1 (type: string)
-                    mode: complete
-                    outputColumnNames: _col0
-                    Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE
-                    Map Join Operator
-                      condition map:
-                           Inner Join 0 to 1
-                      keys:
-                        0 
-                        1 
-                      outputColumnNames: _col0, _col1, _col2
-                      input vertices:
-                        0 Map 1
-                      residual filter predicates: {(_col1 <> _col2)}
+                  expressions: _col0 (type: string), _col2 (type: bigint)
+                  outputColumnNames: _col0, _col2
+                  Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: _col2 is not null (type: boolean)
+                    Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: _col0 (type: string), _col2 (type: bigint)
+                      outputColumnNames: _col1, _col2
                       Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
-                      Select Operator
-                        expressions: _col0 (type: string), _col2 (type: string)
-                        outputColumnNames: _col0, _col2
+                      Group By Operator
+                        keys: _col1 (type: string), _col2 (type: bigint)
+                        mode: hash
+                        outputColumnNames: _col0, _col1
                         Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
-                        Group By Operator
-                          aggregations: count()
-                          keys: _col2 (type: string), _col0 (type: string)
-                          mode: hash
-                          outputColumnNames: _col0, _col1, _col2
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string), _col1 (type: bigint)
+                          sort order: ++
+                          Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
                           Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
-                          Reduce Output Operator
-                            key expressions: _col0 (type: string), _col1 (type: string)
-                            sort order: ++
-                            Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                            Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
-                            value expressions: _col2 (type: bigint)
+        Reducer 9 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col1 (type: bigint), _col0 (type: string), true (type: boolean)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col1 (type: string), _col0 (type: bigint)
+                    sort order: ++
+                    Map-reduce partition columns: _col1 (type: string), _col0 (type: bigint)
+                    Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col2 (type: boolean)
 
   Stage: Stage-0
     Fetch Operator
@@ -1989,9 +1915,8 @@ STAGE PLANS:
         ListSink
 
 Warning: Map Join MAPJOIN[131][bigTable=?] in task 'Map 1' is a cross product
-Warning: Map Join MAPJOIN[132][bigTable=?] in task 'Map 6' is a cross product
-Warning: Map Join MAPJOIN[133][bigTable=?] in task 'Reducer 7' is a cross product
-Warning: Map Join MAPJOIN[135][bigTable=?] in task 'Reducer 9' is a cross product
+Warning: Map Join MAPJOIN[132][bigTable=?] in task 'Map 5' is a cross product
+Warning: Map Join MAPJOIN[133][bigTable=?] in task 'Reducer 6' is a cross product
 PREHOOK: query: select key, value, count(*)
 from src_null_n4 b
 where NOT EXISTS (select key from src_null_n4 where src_null_n4.value <> b.value)