You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2011/02/02 21:45:07 UTC

svn commit: r1066620 - in /hive/trunk: CHANGES.txt eclipse-templates/.classpath ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java

Author: namit
Date: Wed Feb  2 20:45:06 2011
New Revision: 1066620

URL: http://svn.apache.org/viewvc?rev=1066620&view=rev
Log:
HIVE-1944 Dynamic partition insert creating different directories for the
same partition during merge (Ning Zhang via namit)


Modified:
    hive/trunk/CHANGES.txt
    hive/trunk/eclipse-templates/.classpath
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java

Modified: hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hive/trunk/CHANGES.txt?rev=1066620&r1=1066619&r2=1066620&view=diff
==============================================================================
--- hive/trunk/CHANGES.txt (original)
+++ hive/trunk/CHANGES.txt Wed Feb  2 20:45:06 2011
@@ -745,6 +745,10 @@ Trunk -  Unreleased
 
     HIVE-1934 Alter table rename messes the location
     (Paul Yang via namit)
+
+    HIVE-1944 Dynamic partition insert creating different directories for the
+    same partition during merge (Ning Zhang via namit)
+
   TESTS
 
     HIVE-1464. improve  test query performance

Modified: hive/trunk/eclipse-templates/.classpath
URL: http://svn.apache.org/viewvc/hive/trunk/eclipse-templates/.classpath?rev=1066620&r1=1066619&r2=1066620&view=diff
==============================================================================
--- hive/trunk/eclipse-templates/.classpath (original)
+++ hive/trunk/eclipse-templates/.classpath Wed Feb  2 20:45:06 2011
@@ -16,11 +16,11 @@
   <classpathentry exported="true" kind="lib" path="lib/commons-logging-@commons-logging.version@.jar"/>
   <classpathentry exported="true" kind="lib" path="lib/commons-logging-api-@commons-logging-api.version@.jar"/>
   <classpathentry exported="true" kind="lib" path="lib/derby.jar"/>
-  <classpathentry exported="true" kind="lib" path="lib/hbase-@hbase.version@.jar"/>
-  <classpathentry exported="true" kind="lib" path="lib/hbase-@hbase-test.version@-test.jar"/>
+  <classpathentry exported="true" kind="lib" path="build/dist/lib/hbase-@hbase.version@.jar"/>
+  <classpathentry exported="true" kind="lib" path="build/dist/lib/hbase-@hbase-test.version@-tests.jar"/>
   <classpathentry exported="true" kind="lib" path="lib/thrift-fb303-@thrift-fb303.version@.jar"/>
   <classpathentry exported="true" kind="lib" path="lib/thrift-@thrift.version@.jar"/>
-  <classpathentry exported="true" kind="lib" path="lib/zookeeper-@zookeeper.version@.jar"/>
+  <classpathentry exported="true" kind="lib" path="build/dist/lib/zookeeper-@zookeeper.version@.jar"/>
   <classpathentry exported="true" kind="lib" path="lib/log4j-@log4j.version@.jar"/>
   <classpathentry exported="true" kind="lib" path="ql/lib/antlr-@antlr.version@.jar"/>
   <classpathentry exported="true" kind="lib" path="ql/lib/antlr-runtime-@antlr-runtime.version@.jar"/>

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java?rev=1066620&r1=1066619&r2=1066620&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java Wed Feb  2 20:45:06 2011
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.plan;
 import java.io.IOException;
 import java.io.Serializable;
 import java.util.ArrayList;
+import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
@@ -177,13 +178,13 @@ public class ConditionalResolverMergeFil
             // add the merge MR job
             setupMapRedWork(conf, work, trgtSize, totalSz);
             resTsks.add(mrTask);
-            
+
             // add the move task for those partitions that do not need merging
           	if (toMove.size() > 0) { //
           	  // modify the existing move task as it is already in the candidate running tasks
           	  MoveWork mvWork = (MoveWork) mvTask.getWork();
           	  LoadFileDesc lfd = mvWork.getLoadFileWork();
-          	  
+
           	  String targetDir = lfd.getTargetDir();
           	  List<String> targetDirs = new ArrayList<String>(toMove.size());
           	  int numDPCols = dpCtx.getNumDPCols();
@@ -200,7 +201,7 @@ public class ConditionalResolverMergeFil
                   target = target + Path.SEPARATOR + moveStrSplits[dpIndex];
                   dpIndex ++;
                 }
-                
+
                 targetDirs.add(target);
               }
 
@@ -209,7 +210,21 @@ public class ConditionalResolverMergeFil
           	  mvWork.setLoadFileWork(null);
           	  mvWork.setLoadTableWork(null);
           	  mvWork.setMultiFilesDesc(lmfd);
-          	  resTsks.add(mvTask);
+
+          	  // running the MoveTask and MR task in parallel may
+          	  // cause the mvTask write to /ds=1 and MR task write
+          	  // to /ds=1_1 for the same partition.
+          	  // make the MoveTask as the child of the MR Task
+          	  List<Task <? extends Serializable>> cTasks = mrTask.getDependentTasks();
+          	  if (cTasks != null) {
+          	    Iterator<Task <? extends Serializable>> itr = cTasks.iterator();
+          	    while (itr.hasNext()) {
+          	      Task<? extends Serializable> cld = itr.next();
+          	      itr.remove();
+          	      mvTask.addDependentTask(cld);
+          	    }
+          	  }
+          	  mrTask.addDependentTask(mvTask);
           	}
           } else { // add the move task
             resTsks.add(mvTask);
@@ -263,7 +278,7 @@ public class ConditionalResolverMergeFil
       for (FileStatus fStat : fStats) {
         totalSz += fStat.getLen();
       }
-      
+
       if (totalSz < avgSize * fStats.length) {
         return totalSz;
       } else {