You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2009/11/10 22:32:11 UTC

svn commit: r834678 - in /hadoop/hive/trunk: CHANGES.txt ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java ql/src/test/queries/clientpositive/join38.q ql/src/test/results/clientpositive/join38.q.out

Author: namit
Date: Tue Nov 10 21:32:10 2009
New Revision: 834678

URL: http://svn.apache.org/viewvc?rev=834678&view=rev
Log:
HIVE-921 MapJoin schema reordering (Ning Zhang via namit)


Added:
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/join38.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/join38.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=834678&r1=834677&r2=834678&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Tue Nov 10 21:32:10 2009
@@ -252,6 +252,8 @@
     HIVE-804 Support deletion of partitions based on a prefix partition spefication
     (Zheng Shao via namit)
 
+    HIVE-921 MapJoin schema reordering (Ning Zhang via namit)
+
 Release 0.4.0 -  Unreleased
 
   INCOMPATIBLE CHANGES

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java?rev=834678&r1=834677&r2=834678&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java Tue Nov 10 21:32:10 2009
@@ -199,8 +199,8 @@
       Task<? extends Serializable> mjTask = TaskFactory.get(mjPlan, parseCtx.getConf());
       
       tableDesc tt_desc = 
-        PlanUtils.getIntermediateFileTableDesc(PlanUtils.sortFieldSchemas(
-            PlanUtils.getFieldSchemasFromRowSchema(mapJoin.getSchema(), "temporarycol"))); 
+        PlanUtils.getIntermediateFileTableDesc(
+            PlanUtils.getFieldSchemasFromRowSchema(mapJoin.getSchema(), "temporarycol")); 
       
       // generate the temporary file
       Context baseCtx = parseCtx.getContext();
@@ -239,7 +239,7 @@
       return null;
     }
   }
-
+  
   /**
    * MapJoin followed by MapJoin
    */

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/join38.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/join38.q?rev=834678&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/join38.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/join38.q Tue Nov 10 21:32:10 2009
@@ -0,0 +1,20 @@
+drop table tmp;
+
+create table tmp(col0 string, col1 string,col2 string,col3 string,col4 string,col5 string,col6 string,col7 string,col8 string,col9 string,col10 string,col11 string);
+
+insert overwrite table tmp select key, cast(key + 1 as int), key +2, key+3, key+4, cast(key+5 as int), key+6, key+7, key+8, key+9, key+10, cast(key+11 as int) from src where key = 100;
+
+select * from tmp;
+
+explain
+FROM src a JOIN tmp b ON (a.key = b.col11)
+SELECT /*+ MAPJOIN(a) */ a.value, b.col5, count(1) as count
+where b.col11 = 111
+group by a.value, b.col5;
+
+FROM src a JOIN tmp b ON (a.key = b.col11)
+SELECT /*+ MAPJOIN(a) */ a.value, b.col5, count(1) as count
+where b.col11 = 111
+group by a.value, b.col5;
+
+drop table tmp;

Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/join38.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/join38.q.out?rev=834678&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/join38.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/join38.q.out Tue Nov 10 21:32:10 2009
@@ -0,0 +1,207 @@
+PREHOOK: query: drop table tmp
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table tmp
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table tmp(col0 string, col1 string,col2 string,col3 string,col4 string,col5 string,col6 string,col7 string,col8 string,col9 string,col10 string,col11 string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table tmp(col0 string, col1 string,col2 string,col3 string,col4 string,col5 string,col6 string,col7 string,col8 string,col9 string,col10 string,col11 string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@tmp
+PREHOOK: query: insert overwrite table tmp select key, cast(key + 1 as int), key +2, key+3, key+4, cast(key+5 as int), key+6, key+7, key+8, key+9, key+10, cast(key+11 as int) from src where key = 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tmp
+POSTHOOK: query: insert overwrite table tmp select key, cast(key + 1 as int), key +2, key+3, key+4, cast(key+5 as int), key+6, key+7, key+8, key+9, key+10, cast(key+11 as int) from src where key = 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tmp
+PREHOOK: query: select * from tmp
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tmp
+PREHOOK: Output: file:/data/users/nzhang/work/876/apache-hive/build/ql/tmp/1628655450/10000
+POSTHOOK: query: select * from tmp
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tmp
+POSTHOOK: Output: file:/data/users/nzhang/work/876/apache-hive/build/ql/tmp/1628655450/10000
+100	101	102.0	103.0	104.0	105	106.0	107.0	108.0	109.0	110.0	111
+100	101	102.0	103.0	104.0	105	106.0	107.0	108.0	109.0	110.0	111
+PREHOOK: query: explain
+FROM src a JOIN tmp b ON (a.key = b.col11)
+SELECT /*+ MAPJOIN(a) */ a.value, b.col5, count(1) as count
+where b.col11 = 111
+group by a.value, b.col5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+FROM src a JOIN tmp b ON (a.key = b.col11)
+SELECT /*+ MAPJOIN(a) */ a.value, b.col5, count(1) as count
+where b.col11 = 111
+group by a.value, b.col5
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF src a) (TOK_TABREF tmp b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) col11)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) col5)) (TOK_SELEXPR (TOK_FUNCTION count 1) count)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) col11) 111)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) col5))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        b 
+          TableScan
+            alias: b
+            Filter Operator
+              predicate:
+                  expr: (col11 = 111)
+                  type: boolean
+              Common Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                condition expressions:
+                  0 {value}
+                  1 {col5} {col11}
+                keys:
+                  0 [Column[key]]
+                  1 [Column[col11]]
+                outputColumnNames: _col1, _col7, _col13
+                Position of Big Table: 1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+      Local Work:
+        Map Reduce Local Work
+          Alias -> Map Local Tables:
+            a 
+              Fetch Operator
+                limit: -1
+          Alias -> Map Local Operator Tree:
+            a 
+              TableScan
+                alias: a
+                Common Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  condition expressions:
+                    0 {value}
+                    1 {col5} {col11}
+                  keys:
+                    0 [Column[key]]
+                    1 [Column[col11]]
+                  outputColumnNames: _col1, _col7, _col13
+                  Position of Big Table: 1
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 0
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+        file:/data/users/nzhang/work/876/apache-hive/build/ql/tmp/2083729567/10002 
+          Select Operator
+            expressions:
+                  expr: _col1
+                  type: string
+                  expr: _col7
+                  type: string
+                  expr: _col13
+                  type: string
+            outputColumnNames: _col1, _col7, _col13
+            Filter Operator
+              predicate:
+                  expr: (_col13 = 111)
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: _col1
+                      type: string
+                      expr: _col7
+                      type: string
+                outputColumnNames: _col1, _col7
+                Group By Operator
+                  aggregations:
+                        expr: count(1)
+                  keys:
+                        expr: _col1
+                        type: string
+                        expr: _col7
+                        type: string
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Reduce Output Operator
+                    key expressions:
+                          expr: _col0
+                          type: string
+                          expr: _col1
+                          type: string
+                    sort order: ++
+                    Map-reduce partition columns:
+                          expr: _col0
+                          type: string
+                          expr: _col1
+                          type: string
+                    tag: -1
+                    value expressions:
+                          expr: _col2
+                          type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+          keys:
+                expr: KEY._col0
+                type: string
+                expr: KEY._col1
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: string
+                  expr: _col2
+                  type: bigint
+            outputColumnNames: _col0, _col1, _col2
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: FROM src a JOIN tmp b ON (a.key = b.col11)
+SELECT /*+ MAPJOIN(a) */ a.value, b.col5, count(1) as count
+where b.col11 = 111
+group by a.value, b.col5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tmp
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/data/users/nzhang/work/876/apache-hive/build/ql/tmp/1273936990/10000
+POSTHOOK: query: FROM src a JOIN tmp b ON (a.key = b.col11)
+SELECT /*+ MAPJOIN(a) */ a.value, b.col5, count(1) as count
+where b.col11 = 111
+group by a.value, b.col5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tmp
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/data/users/nzhang/work/876/apache-hive/build/ql/tmp/1273936990/10000
+val_111	105	2
+PREHOOK: query: drop table tmp
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table tmp
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: default@tmp