You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2010/10/04 23:36:30 UTC
svn commit: r1004442 - in /hadoop/hive/trunk: CHANGES.txt
ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
ql/src/test/queries/clientpositive/mapjoin_mapjoin.q
ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out
Author: namit
Date: Mon Oct 4 21:36:30 2010
New Revision: 1004442
URL: http://svn.apache.org/viewvc?rev=1004442&view=rev
Log:
HIVE-1678 Bug in mapjoin followed by another mapjoin
(Amareshwari Sriramadasu via namit)
Added:
hadoop/hive/trunk/ql/src/test/queries/clientpositive/mapjoin_mapjoin.q
hadoop/hive/trunk/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=1004442&r1=1004441&r2=1004442&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Mon Oct 4 21:36:30 2010
@@ -318,6 +318,9 @@ Trunk - Unreleased
HIVE-1673 Create table bug causes the row format property lost when serde is specified
(He Yongqiang via namit)
+ HIVE-1678 Bug in mapjoin followed by another mapjoin
+ (Amareshwari Sriramadasu via namit)
+
TESTS
HIVE-1464. improve test query performance
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java?rev=1004442&r1=1004441&r2=1004442&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java Mon Oct 4 21:36:30 2010
@@ -186,7 +186,7 @@ public final class GenMapRedUtils {
TableDesc tt_desc;
Operator<? extends Serializable> rootOp;
- if (mjCtx.getOldMapJoin() == null) {
+ if (mjCtx.getOldMapJoin() == null || setReducer) {
taskTmpDir = mjCtx.getTaskTmpDir();
tt_desc = mjCtx.getTTDesc();
rootOp = mjCtx.getRootMapJoinOp();
Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/mapjoin_mapjoin.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/mapjoin_mapjoin.q?rev=1004442&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/mapjoin_mapjoin.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/mapjoin_mapjoin.q Mon Oct 4 21:36:30 2010
@@ -0,0 +1,5 @@
+explain select /*+MAPJOIN(src, src1) */ srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key);
+
+explain select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) group by ds;
+
+select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds;
Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out?rev=1004442&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out Mon Oct 4 21:36:30 2010
@@ -0,0 +1,352 @@
+PREHOOK: query: explain select /*+MAPJOIN(src, src1) */ srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select /*+MAPJOIN(src, src1) */ srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key)
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF srcpart) (TOK_TABREF src) (= (. (TOK_TABLE_OR_COL srcpart) value) (. (TOK_TABLE_OR_COL src) value))) (TOK_TABREF src1) (= (. (TOK_TABLE_OR_COL srcpart) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST src src1))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL srcpart) key)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ srcpart
+ TableScan
+ alias: srcpart
+ Common Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[value]]
+ 1 [Column[value]]
+ outputColumnNames: _col0
+ Position of Big Table: 0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Local Work:
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ src
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ src
+ TableScan
+ alias: src
+ Common Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[value]]
+ 1 [Column[value]]
+ outputColumnNames: _col0
+ Position of Big Table: 0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ file:/tmp/amarsri/hive_2010-10-03_23-32-11_193_4494089607373072455/-mr-10002
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ outputColumnNames: _col0
+ Common Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[key]]
+ outputColumnNames: _col0
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ Local Work:
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ src1
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ src1
+ TableScan
+ alias: src1
+ Common Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col0}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[key]]
+ outputColumnNames: _col0
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: explain select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) group by ds
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) group by ds
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF srcpart) (TOK_TABREF src) (= (. (TOK_TABLE_OR_COL srcpart) value) (. (TOK_TABLE_OR_COL src) value))) (TOK_TABREF src1) (= (. (TOK_TABLE_OR_COL srcpart) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST src src1))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL ds))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-3 depends on stages: Stage-2
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ srcpart
+ TableScan
+ alias: srcpart
+ Common Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {ds}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[value]]
+ 1 [Column[value]]
+ outputColumnNames: _col0, _col2
+ Position of Big Table: 0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Local Work:
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ src
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ src
+ TableScan
+ alias: src
+ Common Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {ds}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[value]]
+ 1 [Column[value]]
+ outputColumnNames: _col0, _col2
+ Position of Big Table: 0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ file:/tmp/amarsri/hive_2010-10-03_23-32-11_566_3712502381018295609/-mr-10002
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col2
+ type: string
+ outputColumnNames: _col0, _col2
+ Common Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col2}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[key]]
+ outputColumnNames: _col2
+ Position of Big Table: 0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Local Work:
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ src1
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ src1
+ TableScan
+ alias: src1
+ Common Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col2}
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[_col0]]
+ 1 [Column[key]]
+ outputColumnNames: _col2
+ Position of Big Table: 0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-3
+ Map Reduce
+ Alias -> Map Operator Tree:
+ file:/tmp/amarsri/hive_2010-10-03_23-32-11_566_3712502381018295609/-mr-10003
+ Select Operator
+ expressions:
+ expr: _col2
+ type: string
+ outputColumnNames: _col2
+ Select Operator
+ expressions:
+ expr: _col2
+ type: string
+ outputColumnNames: _col2
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: _col2
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: file:/tmp/amarsri/hive_2010-10-03_23-32-11_744_4150005716053861543/-mr-10000
+POSTHOOK: query: select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: file:/tmp/amarsri/hive_2010-10-03_23-32-11_744_4150005716053861543/-mr-10000
+5308
+5308