You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2012/09/21 12:51:27 UTC

svn commit: r1388412 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java test/queries/clientpositive/multi_join_union.q test/results/clientpositive/multi_join_union.q.out

Author: namit
Date: Fri Sep 21 10:51:26 2012
New Revision: 1388412

URL: http://svn.apache.org/viewvc?rev=1388412&view=rev
Log:
HIVE-3496 Query plan for multi-join where the third table joined is a subquery containing a map-only 
union with hive.auto.convert.join=true is wrong
(Kevin Wilfong via namit)



Added:
    hive/trunk/ql/src/test/queries/clientpositive/multi_join_union.q
    hive/trunk/ql/src/test/results/clientpositive/multi_join_union.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java?rev=1388412&r1=1388411&r2=1388412&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java Fri Sep 21 10:51:26 2012
@@ -399,7 +399,8 @@ public final class GenMapRedUtils {
     List<Task<? extends Serializable>> parTasks = null;
     if (opProcCtx.getRootTasks().contains(currentUnionTask)) {
       opProcCtx.getRootTasks().remove(currentUnionTask);
-      if (!opProcCtx.getRootTasks().contains(existingTask)) {
+      if (!opProcCtx.getRootTasks().contains(existingTask) &&
+          (existingTask.getParentTasks() == null || existingTask.getParentTasks().isEmpty())) {
         opProcCtx.getRootTasks().add(existingTask);
       }
     }
@@ -423,6 +424,7 @@ public final class GenMapRedUtils {
         }
       }
     }
+
     opProcCtx.setCurrTask(existingTask);
   }
 

Added: hive/trunk/ql/src/test/queries/clientpositive/multi_join_union.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/multi_join_union.q?rev=1388412&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/multi_join_union.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/multi_join_union.q Fri Sep 21 10:51:26 2012
@@ -0,0 +1,13 @@
+
+set hive.auto.convert.join=true;
+
+CREATE TABLE src11 as SELECT * FROM src;
+CREATE TABLE src12 as SELECT * FROM src;
+CREATE TABLE src13 as SELECT * FROM src;
+CREATE TABLE src14 as SELECT * FROM src;
+
+
+EXPLAIN SELECT * FROM 
+src11 a JOIN
+src12 b ON (a.key = b.key) JOIN
+(SELECT * FROM (SELECT * FROM src13 UNION ALL SELECT * FROM src14)a )c ON c.value = b.value;
\ No newline at end of file

Added: hive/trunk/ql/src/test/results/clientpositive/multi_join_union.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/multi_join_union.q.out?rev=1388412&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/multi_join_union.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/multi_join_union.q.out Fri Sep 21 10:51:26 2012
@@ -0,0 +1,563 @@
+PREHOOK: query: CREATE TABLE src11 as SELECT * FROM src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+POSTHOOK: query: CREATE TABLE src11 as SELECT * FROM src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@src11
+PREHOOK: query: CREATE TABLE src12 as SELECT * FROM src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+POSTHOOK: query: CREATE TABLE src12 as SELECT * FROM src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@src12
+PREHOOK: query: CREATE TABLE src13 as SELECT * FROM src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+POSTHOOK: query: CREATE TABLE src13 as SELECT * FROM src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@src13
+PREHOOK: query: CREATE TABLE src14 as SELECT * FROM src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+POSTHOOK: query: CREATE TABLE src14 as SELECT * FROM src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@src14
+PREHOOK: query: EXPLAIN SELECT * FROM 
+src11 a JOIN
+src12 b ON (a.key = b.key) JOIN
+(SELECT * FROM (SELECT * FROM src13 UNION ALL SELECT * FROM src14)a )c ON c.value = b.value
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT * FROM 
+src11 a JOIN
+src12 b ON (a.key = b.key) JOIN
+(SELECT * FROM (SELECT * FROM src13 UNION ALL SELECT * FROM src14)a )c ON c.value = b.value
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME src11) a) (TOK_TABREF (TOK_TABNAME src12) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src13))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src14))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) c) (= (. (TOK_TABLE_OR_COL c) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))
+
+STAGE DEPENDENCIES:
+  Stage-11 is a root stage , consists of Stage-14, Stage-15, Stage-1
+  Stage-14 has a backup stage: Stage-1
+  Stage-9 depends on stages: Stage-14
+  Stage-8 depends on stages: Stage-1, Stage-9, Stage-10 , consists of Stage-12, Stage-13, Stage-2
+  Stage-12 has a backup stage: Stage-2
+  Stage-6 depends on stages: Stage-12
+  Stage-13 has a backup stage: Stage-2
+  Stage-7 depends on stages: Stage-13
+  Stage-2
+  Stage-15 has a backup stage: Stage-1
+  Stage-10 depends on stages: Stage-15
+  Stage-1
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-11
+    Conditional Operator
+
+  Stage: Stage-14
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        b 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        b 
+          TableScan
+            alias: b
+            HashTable Sink Operator
+              condition expressions:
+                0 {key} {value}
+                1 {key} {value}
+              handleSkewJoin: false
+              keys:
+                0 [Column[key]]
+                1 [Column[key]]
+              Position of Big Table: 0
+
+  Stage: Stage-9
+    Map Reduce
+      Alias -> Map Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              condition expressions:
+                0 {key} {value}
+                1 {key} {value}
+              handleSkewJoin: false
+              keys:
+                0 [Column[key]]
+                1 [Column[key]]
+              outputColumnNames: _col0, _col1, _col4, _col5
+              Position of Big Table: 0
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-8
+    Conditional Operator
+
+  Stage: Stage-12
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        c-subquery1:a-subquery1:src13 
+          Fetch Operator
+            limit: -1
+        c-subquery2:a-subquery2:src14 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        c-subquery1:a-subquery1:src13 
+          TableScan
+            alias: src13
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: value
+                    type: string
+              outputColumnNames: _col0, _col1
+              Union
+                Select Operator
+                  expressions:
+                        expr: _col0
+                        type: string
+                        expr: _col1
+                        type: string
+                  outputColumnNames: _col0, _col1
+                  HashTable Sink Operator
+                    condition expressions:
+                      0 {_col4} {_col5} {_col0} {_col1}
+                      1 {_col0} {_col1}
+                    handleSkewJoin: false
+                    keys:
+                      0 [Column[_col5]]
+                      1 [Column[_col1]]
+                    Position of Big Table: 0
+        c-subquery2:a-subquery2:src14 
+          TableScan
+            alias: src14
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: value
+                    type: string
+              outputColumnNames: _col0, _col1
+              Union
+                Select Operator
+                  expressions:
+                        expr: _col0
+                        type: string
+                        expr: _col1
+                        type: string
+                  outputColumnNames: _col0, _col1
+                  HashTable Sink Operator
+                    condition expressions:
+                      0 {_col4} {_col5} {_col0} {_col1}
+                      1 {_col0} {_col1}
+                    handleSkewJoin: false
+                    keys:
+                      0 [Column[_col5]]
+                      1 [Column[_col1]]
+                    Position of Big Table: 0
+
+  Stage: Stage-6
+    Map Reduce
+      Alias -> Map Operator Tree:
+        $INTNAME 
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              condition expressions:
+                0 {_col4} {_col5} {_col0} {_col1}
+                1 {_col0} {_col1}
+              handleSkewJoin: false
+              keys:
+                0 [Column[_col5]]
+                1 [Column[_col1]]
+              outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9
+              Position of Big Table: 0
+              Select Operator
+                expressions:
+                      expr: _col4
+                      type: string
+                      expr: _col5
+                      type: string
+                      expr: _col0
+                      type: string
+                      expr: _col1
+                      type: string
+                      expr: _col8
+                      type: string
+                      expr: _col9
+                      type: string
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-13
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $INTNAME 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $INTNAME 
+            HashTable Sink Operator
+              condition expressions:
+                0 {_col4} {_col5} {_col0} {_col1}
+                1 {_col0} {_col1}
+              handleSkewJoin: false
+              keys:
+                0 [Column[_col5]]
+                1 [Column[_col1]]
+              Position of Big Table: 1
+
+  Stage: Stage-7
+    Map Reduce
+      Alias -> Map Operator Tree:
+        c-subquery1:a-subquery1:src13 
+          TableScan
+            alias: src13
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: value
+                    type: string
+              outputColumnNames: _col0, _col1
+              Union
+                Select Operator
+                  expressions:
+                        expr: _col0
+                        type: string
+                        expr: _col1
+                        type: string
+                  outputColumnNames: _col0, _col1
+                  Map Join Operator
+                    condition map:
+                         Inner Join 0 to 1
+                    condition expressions:
+                      0 {_col4} {_col5} {_col0} {_col1}
+                      1 {_col0} {_col1}
+                    handleSkewJoin: false
+                    keys:
+                      0 [Column[_col5]]
+                      1 [Column[_col1]]
+                    outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9
+                    Position of Big Table: 1
+                    Select Operator
+                      expressions:
+                            expr: _col4
+                            type: string
+                            expr: _col5
+                            type: string
+                            expr: _col0
+                            type: string
+                            expr: _col1
+                            type: string
+                            expr: _col8
+                            type: string
+                            expr: _col9
+                            type: string
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                      File Output Operator
+                        compressed: false
+                        GlobalTableId: 0
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+        c-subquery2:a-subquery2:src14 
+          TableScan
+            alias: src14
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: value
+                    type: string
+              outputColumnNames: _col0, _col1
+              Union
+                Select Operator
+                  expressions:
+                        expr: _col0
+                        type: string
+                        expr: _col1
+                        type: string
+                  outputColumnNames: _col0, _col1
+                  Map Join Operator
+                    condition map:
+                         Inner Join 0 to 1
+                    condition expressions:
+                      0 {_col4} {_col5} {_col0} {_col1}
+                      1 {_col0} {_col1}
+                    handleSkewJoin: false
+                    keys:
+                      0 [Column[_col5]]
+                      1 [Column[_col1]]
+                    outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9
+                    Position of Big Table: 1
+                    Select Operator
+                      expressions:
+                            expr: _col4
+                            type: string
+                            expr: _col5
+                            type: string
+                            expr: _col0
+                            type: string
+                            expr: _col1
+                            type: string
+                            expr: _col8
+                            type: string
+                            expr: _col9
+                            type: string
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                      File Output Operator
+                        compressed: false
+                        GlobalTableId: 0
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+        $INTNAME 
+            Reduce Output Operator
+              key expressions:
+                    expr: _col5
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: _col5
+                    type: string
+              tag: 0
+              value expressions:
+                    expr: _col4
+                    type: string
+                    expr: _col5
+                    type: string
+                    expr: _col0
+                    type: string
+                    expr: _col1
+                    type: string
+        c-subquery1:a-subquery1:src13 
+          TableScan
+            alias: src13
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: value
+                    type: string
+              outputColumnNames: _col0, _col1
+              Union
+                Select Operator
+                  expressions:
+                        expr: _col0
+                        type: string
+                        expr: _col1
+                        type: string
+                  outputColumnNames: _col0, _col1
+                  Reduce Output Operator
+                    key expressions:
+                          expr: _col1
+                          type: string
+                    sort order: +
+                    Map-reduce partition columns:
+                          expr: _col1
+                          type: string
+                    tag: 1
+                    value expressions:
+                          expr: _col0
+                          type: string
+                          expr: _col1
+                          type: string
+        c-subquery2:a-subquery2:src14 
+          TableScan
+            alias: src14
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: value
+                    type: string
+              outputColumnNames: _col0, _col1
+              Union
+                Select Operator
+                  expressions:
+                        expr: _col0
+                        type: string
+                        expr: _col1
+                        type: string
+                  outputColumnNames: _col0, _col1
+                  Reduce Output Operator
+                    key expressions:
+                          expr: _col1
+                          type: string
+                    sort order: +
+                    Map-reduce partition columns:
+                          expr: _col1
+                          type: string
+                    tag: 1
+                    value expressions:
+                          expr: _col0
+                          type: string
+                          expr: _col1
+                          type: string
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {VALUE._col0} {VALUE._col1} {VALUE._col4} {VALUE._col5}
+            1 {VALUE._col0} {VALUE._col1}
+          handleSkewJoin: false
+          outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9
+          Select Operator
+            expressions:
+                  expr: _col4
+                  type: string
+                  expr: _col5
+                  type: string
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: string
+                  expr: _col8
+                  type: string
+                  expr: _col9
+                  type: string
+            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-15
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        a 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        a 
+          TableScan
+            alias: a
+            HashTable Sink Operator
+              condition expressions:
+                0 {key} {value}
+                1 {key} {value}
+              handleSkewJoin: false
+              keys:
+                0 [Column[key]]
+                1 [Column[key]]
+              Position of Big Table: 1
+
+  Stage: Stage-10
+    Map Reduce
+      Alias -> Map Operator Tree:
+        b 
+          TableScan
+            alias: b
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              condition expressions:
+                0 {key} {value}
+                1 {key} {value}
+              handleSkewJoin: false
+              keys:
+                0 [Column[key]]
+                1 [Column[key]]
+              outputColumnNames: _col0, _col1, _col4, _col5
+              Position of Big Table: 1
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Reduce Output Operator
+              key expressions:
+                    expr: key
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: key
+                    type: string
+              tag: 0
+              value expressions:
+                    expr: key
+                    type: string
+                    expr: value
+                    type: string
+        b 
+          TableScan
+            alias: b
+            Reduce Output Operator
+              key expressions:
+                    expr: key
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: key
+                    type: string
+              tag: 1
+              value expressions:
+                    expr: key
+                    type: string
+                    expr: value
+                    type: string
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {VALUE._col0} {VALUE._col1}
+            1 {VALUE._col0} {VALUE._col1}
+          handleSkewJoin: false
+          outputColumnNames: _col0, _col1, _col4, _col5
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+