You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2011/03/24 06:05:09 UTC

svn commit: r1084847 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java test/queries/clientpositive/auto_join27.q test/results/clientpositive/auto_join27.q.out

Author: namit
Date: Thu Mar 24 05:05:09 2011
New Revision: 1084847

URL: http://svn.apache.org/viewvc?rev=1084847&view=rev
Log:
HIVE-1965 Auto convert mapjoin should not throw exception if the top
          operator is union operator (Yongqiang He via namit)
[Title]

Summary:

Trac Bug: #

Blame Rev:

Reviewed By:

Test Plan:

Revert Plan:

Database Impact:

Memcache Impact:

Other Notes:

EImportant:

- begin *PUBLIC* platform impact section -
Bugzilla: #
- end platform impact -


Added:
    hive/trunk/ql/src/test/queries/clientpositive/auto_join27.q
    hive/trunk/ql/src/test/results/clientpositive/auto_join27.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java?rev=1084847&r1=1084846&r2=1084847&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java Thu Mar 24 05:05:09 2011
@@ -121,11 +121,9 @@ public class DefaultGraphWalker implemen
     if ((nd.getChildren() == null)
         || getDispatchedList().containsAll(nd.getChildren())) {
       // all children are done or no need to walk the children
-      if (getDispatchedList().contains(nd)) {
-        // sanity check
-        assert false;
+      if (!getDispatchedList().contains(nd)) {
+        dispatch(nd, opStack);        
       }
-      dispatch(nd, opStack);
       opStack.pop();
       return;
     }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java?rev=1084847&r1=1084846&r2=1084847&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java Thu Mar 24 05:05:09 2011
@@ -129,16 +129,11 @@ public class MapJoinProcessor implements
         .entrySet()) {
       String alias = entry.getKey();
       Operator<? extends Serializable> op = entry.getValue();
-      // get table scan op
-      if (!(op instanceof TableScanOperator)) {
-        throw new SemanticException("top op is not table scan");
-      }
-      TableScanOperator tableScanOp = (TableScanOperator) op;
 
       // if the table scan is for big table; then skip it
       // tracing down the operator tree from the table scan operator
-      Operator<? extends Serializable> parentOp = tableScanOp;
-      Operator<? extends Serializable> childOp = tableScanOp.getChildOperators().get(0);
+      Operator<? extends Serializable> parentOp = op;
+      Operator<? extends Serializable> childOp = op.getChildOperators().get(0);
       while ((childOp != null) && (!childOp.equals(mapJoinOp))) {
         parentOp = childOp;
         assert parentOp.getChildOperators().size() == 1;
@@ -155,7 +150,7 @@ public class MapJoinProcessor implements
         continue;
       }
       // set alias to work and put into smallTableAliasList
-      newLocalWork.getAliasToWork().put(alias, tableScanOp);
+      newLocalWork.getAliasToWork().put(alias, op);
       smallTableAliasList.add(alias);
       // get input path and remove this alias from pathToAlias
       // because this file will be fetched by fetch operator

Added: hive/trunk/ql/src/test/queries/clientpositive/auto_join27.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_join27.q?rev=1084847&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_join27.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_join27.q Thu Mar 24 05:05:09 2011
@@ -0,0 +1,25 @@
+set hive.auto.convert.join = true;
+
+explain
+SELECT count(1)
+FROM
+(
+SELECT src.key, src.value from src
+UNION ALL
+SELECT DISTINCT src.key, src.value from src
+) src_12
+JOIN
+(SELECT src.key as k, src.value as v from src) src3
+ON src_12.key = src3.k AND src3.k < 200;
+
+
+SELECT count(1)
+FROM
+(
+SELECT src.key, src.value from src
+UNION ALL
+SELECT DISTINCT src.key, src.value from src
+) src_12
+JOIN
+(SELECT src.key as k, src.value as v from src) src3
+ON src_12.key = src3.k AND src3.k < 200;

Added: hive/trunk/ql/src/test/results/clientpositive/auto_join27.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/auto_join27.q.out?rev=1084847&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/auto_join27.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/auto_join27.q.out Thu Mar 24 05:05:09 2011
@@ -0,0 +1,425 @@
+PREHOOK: query: explain
+SELECT count(1)
+FROM
+(
+SELECT src.key, src.value from src
+UNION ALL
+SELECT DISTINCT src.key, src.value from src
+) src_12
+JOIN
+(SELECT src.key as k, src.value as v from src) src3
+ON src_12.key = src3.k AND src3.k < 200
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+SELECT count(1)
+FROM
+(
+SELECT src.key, src.value from src
+UNION ALL
+SELECT DISTINCT src.key, src.value from src
+) src_12
+JOIN
+(SELECT src.key as k, src.value as v from src) src3
+ON src_12.key = src3.k AND src3.k < 200
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value)))))) src_12) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key) k) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value) v)))) src3) (AND (= (. (TOK_TABLE_OR_COL src_12) key) (. (TOK_TABLE_OR_COL src3) k)) (< (. (TOK_TABLE_OR_COL src3) k) 200)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1)))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-8 depends on stages: Stage-1, Stage-4 , consists of Stage-9, Stage-10, Stage-2
+  Stage-9 has a backup stage: Stage-2
+  Stage-6 depends on stages: Stage-9
+  Stage-3 depends on stages: Stage-2, Stage-6, Stage-7
+  Stage-10 has a backup stage: Stage-2
+  Stage-7 depends on stages: Stage-10
+  Stage-2
+  Stage-4 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        null-subquery2:src_12-subquery2:src 
+          TableScan
+            alias: src
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: value
+                    type: string
+              outputColumnNames: key, value
+              Group By Operator
+                bucketGroup: false
+                keys:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions:
+                        expr: _col0
+                        type: string
+                        expr: _col1
+                        type: string
+                  sort order: ++
+                  Map-reduce partition columns:
+                        expr: _col0
+                        type: string
+                        expr: _col1
+                        type: string
+                  tag: -1
+      Reduce Operator Tree:
+        Group By Operator
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+                expr: KEY._col1
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: string
+            outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-8
+    Conditional Operator
+
+  Stage: Stage-9
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        src3:src 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        src3:src 
+          TableScan
+            alias: src
+            Filter Operator
+              predicate:
+                  expr: (key < 200)
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: string
+                outputColumnNames: _col0
+                Filter Operator
+                  predicate:
+                      expr: (_col0 < 200)
+                      type: boolean
+                  HashTable Sink Operator
+                    condition expressions:
+                      0 
+                      1 
+                    handleSkewJoin: false
+                    keys:
+                      0 [Column[_col0]]
+                      1 [Column[_col0]]
+                    Position of Big Table: 0
+
+  Stage: Stage-6
+    Map Reduce
+      Alias -> Map Operator Tree:
+        file:/tmp/njain/hive_2011-03-23_22-02-43_416_1632994873933518743/-mr-10002 
+          Union
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              condition expressions:
+                0 
+                1 
+              handleSkewJoin: false
+              keys:
+                0 [Column[_col0]]
+                1 [Column[_col0]]
+              Position of Big Table: 0
+              Select Operator
+                Group By Operator
+                  aggregations:
+                        expr: count(1)
+                  bucketGroup: false
+                  mode: hash
+                  outputColumnNames: _col0
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 0
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+        file:/tmp/njain/hive_2011-03-23_22-02-43_416_1632994873933518743/-mr-10004 
+          Union
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              condition expressions:
+                0 
+                1 
+              handleSkewJoin: false
+              keys:
+                0 [Column[_col0]]
+                1 [Column[_col0]]
+              Position of Big Table: 0
+              Select Operator
+                Group By Operator
+                  aggregations:
+                        expr: count(1)
+                  bucketGroup: false
+                  mode: hash
+                  outputColumnNames: _col0
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 0
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-3
+    Map Reduce
+      Alias -> Map Operator Tree:
+        file:/tmp/njain/hive_2011-03-23_22-02-43_416_1632994873933518743/-mr-10003 
+            Reduce Output Operator
+              sort order: 
+              tag: -1
+              value expressions:
+                    expr: _col0
+                    type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+          bucketGroup: false
+          mode: mergepartial
+          outputColumnNames: _col0
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: bigint
+            outputColumnNames: _col0
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-10
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        file:/tmp/njain/hive_2011-03-23_22-02-43_416_1632994873933518743/-mr-10002 
+          Fetch Operator
+            limit: -1
+        file:/tmp/njain/hive_2011-03-23_22-02-43_416_1632994873933518743/-mr-10004 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        file:/tmp/njain/hive_2011-03-23_22-02-43_416_1632994873933518743/-mr-10002 
+          Union
+            HashTable Sink Operator
+              condition expressions:
+                0 
+                1 
+              handleSkewJoin: false
+              keys:
+                0 [Column[_col0]]
+                1 [Column[_col0]]
+              Position of Big Table: 1
+        file:/tmp/njain/hive_2011-03-23_22-02-43_416_1632994873933518743/-mr-10004 
+          Union
+            HashTable Sink Operator
+              condition expressions:
+                0 
+                1 
+              handleSkewJoin: false
+              keys:
+                0 [Column[_col0]]
+                1 [Column[_col0]]
+              Position of Big Table: 1
+
+  Stage: Stage-7
+    Map Reduce
+      Alias -> Map Operator Tree:
+        src3:src 
+          TableScan
+            alias: src
+            Filter Operator
+              predicate:
+                  expr: (key < 200)
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: string
+                outputColumnNames: _col0
+                Filter Operator
+                  predicate:
+                      expr: (_col0 < 200)
+                      type: boolean
+                  Map Join Operator
+                    condition map:
+                         Inner Join 0 to 1
+                    condition expressions:
+                      0 
+                      1 
+                    handleSkewJoin: false
+                    keys:
+                      0 [Column[_col0]]
+                      1 [Column[_col0]]
+                    Position of Big Table: 1
+                    Select Operator
+                      Group By Operator
+                        aggregations:
+                              expr: count(1)
+                        bucketGroup: false
+                        mode: hash
+                        outputColumnNames: _col0
+                        File Output Operator
+                          compressed: false
+                          GlobalTableId: 0
+                          table:
+                              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+        file:/tmp/njain/hive_2011-03-23_22-02-43_416_1632994873933518743/-mr-10002 
+          Union
+            Reduce Output Operator
+              key expressions:
+                    expr: _col0
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: _col0
+                    type: string
+              tag: 0
+        file:/tmp/njain/hive_2011-03-23_22-02-43_416_1632994873933518743/-mr-10004 
+          Union
+            Reduce Output Operator
+              key expressions:
+                    expr: _col0
+                    type: string
+              sort order: +
+              Map-reduce partition columns:
+                    expr: _col0
+                    type: string
+              tag: 0
+        src3:src 
+          TableScan
+            alias: src
+            Filter Operator
+              predicate:
+                  expr: (key < 200)
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: string
+                outputColumnNames: _col0
+                Filter Operator
+                  predicate:
+                      expr: (_col0 < 200)
+                      type: boolean
+                  Reduce Output Operator
+                    key expressions:
+                          expr: _col0
+                          type: string
+                    sort order: +
+                    Map-reduce partition columns:
+                          expr: _col0
+                          type: string
+                    tag: 1
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 
+            1 
+          handleSkewJoin: false
+          Select Operator
+            Group By Operator
+              aggregations:
+                    expr: count(1)
+              bucketGroup: false
+              mode: hash
+              outputColumnNames: _col0
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-4
+    Map Reduce
+      Alias -> Map Operator Tree:
+        null-subquery1:src_12-subquery1:src 
+          TableScan
+            alias: src
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: value
+                    type: string
+              outputColumnNames: _col0, _col1
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: SELECT count(1)
+FROM
+(
+SELECT src.key, src.value from src
+UNION ALL
+SELECT DISTINCT src.key, src.value from src
+) src_12
+JOIN
+(SELECT src.key as k, src.value as v from src) src3
+ON src_12.key = src3.k AND src3.k < 200
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/tmp/njain/hive_2011-03-23_22-02-44_469_912794075622273367/-mr-10000
+POSTHOOK: query: SELECT count(1)
+FROM
+(
+SELECT src.key, src.value from src
+UNION ALL
+SELECT DISTINCT src.key, src.value from src
+) src_12
+JOIN
+(SELECT src.key as k, src.value as v from src) src3
+ON src_12.key = src3.k AND src3.k < 200
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/tmp/njain/hive_2011-03-23_22-02-44_469_912794075622273367/-mr-10000
+548