You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2010/01/13 18:04:25 UTC
svn commit: r898839 - in /hadoop/hive/trunk: CHANGES.txt
ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java
ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java
Author: namit
Date: Wed Jan 13 17:04:25 2010
New Revision: 898839
URL: http://svn.apache.org/viewvc?rev=898839&view=rev
Log:
HIVE-1047. Merge tasks in GenMRUnion1
(Ning Zhang via namit)
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=898839&r1=898838&r2=898839&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Wed Jan 13 17:04:25 2010
@@ -12,6 +12,9 @@
BUG FIXES
+ HIVE-1047. Merge tasks in GenMRUnion1
+ (Ning Zhang via namit)
+
Release 0.5.0 - Unreleased
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java?rev=898839&r1=898838&r2=898839&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java Wed Jan 13 17:04:25 2010
@@ -291,11 +291,10 @@
seenOps.add(currTopOp);
GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, (mapredWork) mapTask.getWork(), false, ctx);
}
- // TODO: merge the currTask with mapTask in GenMRUnion1 so that we will
- // always seen 1 mapTask (which is equals to currTask). After doing this
- // the block should be removed.
- if ( ret && mapTask != currTask )
- currTask.removeDependentTask(mvTask);
+ // mapTask and currTask should be merged by and join/union operator
+ // (e.g., GenMRUnion1j) which has multiple topOps.
+ assert mapTask == currTask :
+ "mapTask.id = " + mapTask.getId() + "; currTask.id = " + currTask.getId();
}
return dest;
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java?rev=898839&r1=898838&r2=898839&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRUnion1.java Wed Jan 13 17:04:25 2010
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.ql.optimizer;
+import java.util.HashMap;
import java.util.List;
import java.util.ArrayList;
import java.util.Stack;
@@ -79,8 +80,16 @@
// Map-only subqueries can be optimized in future to not write to a file in future
Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx();
- // The plan needs to be broken only if one of the sub-queries involve a map-reduce job
+ // The plan needs to be broken only if one of the sub-queries involve a map-reduce job
if (uCtx.isMapOnlySubq()) {
+ // merge currTask from multiple topOps
+ HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap = ctx.getOpTaskMap();
+ if ( opTaskMap != null && opTaskMap.size() > 0 ) {
+ Task<? extends Serializable> tsk = opTaskMap.get(null);
+ if ( tsk != null )
+ ctx.setCurrTask(tsk);
+ }
+
UnionParseContext uPrsCtx = uCtx.getUnionParseContext(union);
if ((uPrsCtx != null) && (uPrsCtx.getMapJoinQuery())) {
GenMapRedUtils.mergeMapJoinUnion(union, ctx, UnionProcFactory.getPositionParent(union, stack));