You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2011/02/02 21:45:07 UTC
svn commit: r1066620 - in /hive/trunk: CHANGES.txt
eclipse-templates/.classpath
ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
Author: namit
Date: Wed Feb 2 20:45:06 2011
New Revision: 1066620
URL: http://svn.apache.org/viewvc?rev=1066620&view=rev
Log:
HIVE-1944 Dynamic partition insert creating different directories for the
same partition during merge (Ning Zhang via namit)
Modified:
hive/trunk/CHANGES.txt
hive/trunk/eclipse-templates/.classpath
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
Modified: hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hive/trunk/CHANGES.txt?rev=1066620&r1=1066619&r2=1066620&view=diff
==============================================================================
--- hive/trunk/CHANGES.txt (original)
+++ hive/trunk/CHANGES.txt Wed Feb 2 20:45:06 2011
@@ -745,6 +745,10 @@ Trunk - Unreleased
HIVE-1934 Alter table rename messes the location
(Paul Yang via namit)
+
+ HIVE-1944 Dynamic partition insert creating different directories for the
+ same partition during merge (Ning Zhang via namit)
+
TESTS
HIVE-1464. improve test query performance
Modified: hive/trunk/eclipse-templates/.classpath
URL: http://svn.apache.org/viewvc/hive/trunk/eclipse-templates/.classpath?rev=1066620&r1=1066619&r2=1066620&view=diff
==============================================================================
--- hive/trunk/eclipse-templates/.classpath (original)
+++ hive/trunk/eclipse-templates/.classpath Wed Feb 2 20:45:06 2011
@@ -16,11 +16,11 @@
<classpathentry exported="true" kind="lib" path="lib/commons-logging-@commons-logging.version@.jar"/>
<classpathentry exported="true" kind="lib" path="lib/commons-logging-api-@commons-logging-api.version@.jar"/>
<classpathentry exported="true" kind="lib" path="lib/derby.jar"/>
- <classpathentry exported="true" kind="lib" path="lib/hbase-@hbase.version@.jar"/>
- <classpathentry exported="true" kind="lib" path="lib/hbase-@hbase-test.version@-test.jar"/>
+ <classpathentry exported="true" kind="lib" path="build/dist/lib/hbase-@hbase.version@.jar"/>
+ <classpathentry exported="true" kind="lib" path="build/dist/lib/hbase-@hbase-test.version@-tests.jar"/>
<classpathentry exported="true" kind="lib" path="lib/thrift-fb303-@thrift-fb303.version@.jar"/>
<classpathentry exported="true" kind="lib" path="lib/thrift-@thrift.version@.jar"/>
- <classpathentry exported="true" kind="lib" path="lib/zookeeper-@zookeeper.version@.jar"/>
+ <classpathentry exported="true" kind="lib" path="build/dist/lib/zookeeper-@zookeeper.version@.jar"/>
<classpathentry exported="true" kind="lib" path="lib/log4j-@log4j.version@.jar"/>
<classpathentry exported="true" kind="lib" path="ql/lib/antlr-@antlr.version@.jar"/>
<classpathentry exported="true" kind="lib" path="ql/lib/antlr-runtime-@antlr-runtime.version@.jar"/>
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java?rev=1066620&r1=1066619&r2=1066620&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java Wed Feb 2 20:45:06 2011
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.plan;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
+import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
@@ -177,13 +178,13 @@ public class ConditionalResolverMergeFil
// add the merge MR job
setupMapRedWork(conf, work, trgtSize, totalSz);
resTsks.add(mrTask);
-
+
// add the move task for those partitions that do not need merging
if (toMove.size() > 0) { //
// modify the existing move task as it is already in the candidate running tasks
MoveWork mvWork = (MoveWork) mvTask.getWork();
LoadFileDesc lfd = mvWork.getLoadFileWork();
-
+
String targetDir = lfd.getTargetDir();
List<String> targetDirs = new ArrayList<String>(toMove.size());
int numDPCols = dpCtx.getNumDPCols();
@@ -200,7 +201,7 @@ public class ConditionalResolverMergeFil
target = target + Path.SEPARATOR + moveStrSplits[dpIndex];
dpIndex ++;
}
-
+
targetDirs.add(target);
}
@@ -209,7 +210,21 @@ public class ConditionalResolverMergeFil
mvWork.setLoadFileWork(null);
mvWork.setLoadTableWork(null);
mvWork.setMultiFilesDesc(lmfd);
- resTsks.add(mvTask);
+
+ // running the MoveTask and MR task in parallel may
+ // cause the mvTask write to /ds=1 and MR task write
+ // to /ds=1_1 for the same partition.
+ // make the MoveTask as the child of the MR Task
+ List<Task <? extends Serializable>> cTasks = mrTask.getDependentTasks();
+ if (cTasks != null) {
+ Iterator<Task <? extends Serializable>> itr = cTasks.iterator();
+ while (itr.hasNext()) {
+ Task<? extends Serializable> cld = itr.next();
+ itr.remove();
+ mvTask.addDependentTask(cld);
+ }
+ }
+ mrTask.addDependentTask(mvTask);
}
} else { // add the move task
resTsks.add(mvTask);
@@ -263,7 +278,7 @@ public class ConditionalResolverMergeFil
for (FileStatus fStat : fStats) {
totalSz += fStat.getLen();
}
-
+
if (totalSz < avgSize * fStats.length) {
return totalSz;
} else {