You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2010/10/04 23:36:30 UTC

svn commit: r1004442 - in /hadoop/hive/trunk: CHANGES.txt ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java ql/src/test/queries/clientpositive/mapjoin_mapjoin.q ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out

Author: namit
Date: Mon Oct  4 21:36:30 2010
New Revision: 1004442

URL: http://svn.apache.org/viewvc?rev=1004442&view=rev
Log:
HIVE-1678 Bug in mapjoin followed by another mapjoin
(Amareshwari Sriramadasu via namit)


Added:
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/mapjoin_mapjoin.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=1004442&r1=1004441&r2=1004442&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Mon Oct  4 21:36:30 2010
@@ -318,6 +318,9 @@ Trunk -  Unreleased
     HIVE-1673 Create table bug causes the row format property lost when serde is specified
     (He Yongqiang via namit)
 
+    HIVE-1678 Bug in mapjoin followed by another mapjoin
+    (Amareshwari Sriramadasu via namit)
+
   TESTS
 
     HIVE-1464. improve  test query performance

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java?rev=1004442&r1=1004441&r2=1004442&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java Mon Oct  4 21:36:30 2010
@@ -186,7 +186,7 @@ public final class GenMapRedUtils {
         TableDesc tt_desc;
         Operator<? extends Serializable> rootOp;
 
-        if (mjCtx.getOldMapJoin() == null) {
+        if (mjCtx.getOldMapJoin() == null || setReducer) {
           taskTmpDir = mjCtx.getTaskTmpDir();
           tt_desc = mjCtx.getTTDesc();
           rootOp = mjCtx.getRootMapJoinOp();

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/mapjoin_mapjoin.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/mapjoin_mapjoin.q?rev=1004442&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/mapjoin_mapjoin.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/mapjoin_mapjoin.q Mon Oct  4 21:36:30 2010
@@ -0,0 +1,5 @@
+explain select /*+MAPJOIN(src, src1) */ srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key);
+
+explain select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) group by ds;
+
+select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds;

Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out?rev=1004442&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out Mon Oct  4 21:36:30 2010
@@ -0,0 +1,352 @@
+PREHOOK: query: explain select /*+MAPJOIN(src, src1) */ srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select /*+MAPJOIN(src, src1) */ srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key)
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF srcpart) (TOK_TABREF src) (= (. (TOK_TABLE_OR_COL srcpart) value) (. (TOK_TABLE_OR_COL src) value))) (TOK_TABREF src1) (= (. (TOK_TABLE_OR_COL srcpart) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST src src1))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL srcpart) key)))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        srcpart 
+          TableScan
+            alias: srcpart
+            Common Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              condition expressions:
+                0 {key}
+                1 
+              handleSkewJoin: false
+              keys:
+                0 [Column[value]]
+                1 [Column[value]]
+              outputColumnNames: _col0
+              Position of Big Table: 0
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+      Local Work:
+        Map Reduce Local Work
+          Alias -> Map Local Tables:
+            src 
+              Fetch Operator
+                limit: -1
+          Alias -> Map Local Operator Tree:
+            src 
+              TableScan
+                alias: src
+                Common Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  condition expressions:
+                    0 {key}
+                    1 
+                  handleSkewJoin: false
+                  keys:
+                    0 [Column[value]]
+                    1 [Column[value]]
+                  outputColumnNames: _col0
+                  Position of Big Table: 0
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 0
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+        file:/tmp/amarsri/hive_2010-10-03_23-32-11_193_4494089607373072455/-mr-10002 
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+            outputColumnNames: _col0
+            Common Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              condition expressions:
+                0 {_col0}
+                1 
+              handleSkewJoin: false
+              keys:
+                0 [Column[_col0]]
+                1 [Column[key]]
+              outputColumnNames: _col0
+              Position of Big Table: 0
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: string
+                outputColumnNames: _col0
+                Select Operator
+                  expressions:
+                        expr: _col0
+                        type: string
+                  outputColumnNames: _col0
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 0
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+      Local Work:
+        Map Reduce Local Work
+          Alias -> Map Local Tables:
+            src1 
+              Fetch Operator
+                limit: -1
+          Alias -> Map Local Operator Tree:
+            src1 
+              TableScan
+                alias: src1
+                Common Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  condition expressions:
+                    0 {_col0}
+                    1 
+                  handleSkewJoin: false
+                  keys:
+                    0 [Column[_col0]]
+                    1 [Column[key]]
+                  outputColumnNames: _col0
+                  Position of Big Table: 0
+                  Select Operator
+                    expressions:
+                          expr: _col0
+                          type: string
+                    outputColumnNames: _col0
+                    Select Operator
+                      expressions:
+                            expr: _col0
+                            type: string
+                      outputColumnNames: _col0
+                      File Output Operator
+                        compressed: false
+                        GlobalTableId: 0
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: explain select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) group by ds
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) group by ds
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF srcpart) (TOK_TABREF src) (= (. (TOK_TABLE_OR_COL srcpart) value) (. (TOK_TABLE_OR_COL src) value))) (TOK_TABREF src1) (= (. (TOK_TABLE_OR_COL srcpart) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST src src1))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL ds))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-3 depends on stages: Stage-2
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        srcpart 
+          TableScan
+            alias: srcpart
+            Common Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              condition expressions:
+                0 {key} {ds}
+                1 
+              handleSkewJoin: false
+              keys:
+                0 [Column[value]]
+                1 [Column[value]]
+              outputColumnNames: _col0, _col2
+              Position of Big Table: 0
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+      Local Work:
+        Map Reduce Local Work
+          Alias -> Map Local Tables:
+            src 
+              Fetch Operator
+                limit: -1
+          Alias -> Map Local Operator Tree:
+            src 
+              TableScan
+                alias: src
+                Common Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  condition expressions:
+                    0 {key} {ds}
+                    1 
+                  handleSkewJoin: false
+                  keys:
+                    0 [Column[value]]
+                    1 [Column[value]]
+                  outputColumnNames: _col0, _col2
+                  Position of Big Table: 0
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 0
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+        file:/tmp/amarsri/hive_2010-10-03_23-32-11_566_3712502381018295609/-mr-10002 
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col2
+                  type: string
+            outputColumnNames: _col0, _col2
+            Common Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              condition expressions:
+                0 {_col2}
+                1 
+              handleSkewJoin: false
+              keys:
+                0 [Column[_col0]]
+                1 [Column[key]]
+              outputColumnNames: _col2
+              Position of Big Table: 0
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+      Local Work:
+        Map Reduce Local Work
+          Alias -> Map Local Tables:
+            src1 
+              Fetch Operator
+                limit: -1
+          Alias -> Map Local Operator Tree:
+            src1 
+              TableScan
+                alias: src1
+                Common Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  condition expressions:
+                    0 {_col2}
+                    1 
+                  handleSkewJoin: false
+                  keys:
+                    0 [Column[_col0]]
+                    1 [Column[key]]
+                  outputColumnNames: _col2
+                  Position of Big Table: 0
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 0
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-3
+    Map Reduce
+      Alias -> Map Operator Tree:
+        file:/tmp/amarsri/hive_2010-10-03_23-32-11_566_3712502381018295609/-mr-10003 
+          Select Operator
+            expressions:
+                  expr: _col2
+                  type: string
+            outputColumnNames: _col2
+            Select Operator
+              expressions:
+                    expr: _col2
+                    type: string
+              outputColumnNames: _col2
+              Group By Operator
+                aggregations:
+                      expr: count()
+                bucketGroup: false
+                keys:
+                      expr: _col2
+                      type: string
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions:
+                        expr: _col0
+                        type: string
+                  sort order: +
+                  Map-reduce partition columns:
+                        expr: _col0
+                        type: string
+                  tag: -1
+                  value expressions:
+                        expr: _col1
+                        type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-10-03_23-32-11_744_4150005716053861543/-mr-10000
+POSTHOOK: query: select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-10-03_23-32-11_744_4150005716053861543/-mr-10000
+5308
+5308