You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by sc...@apache.org on 2011/01/08 00:37:13 UTC

svn commit: r1056560 - in /hadoop/mapreduce/trunk: CHANGES.txt src/java/org/apache/hadoop/mapred/JobTracker.java src/test/mapred/org/apache/hadoop/mapred/TestTaskFail.java

Author: schen
Date: Fri Jan  7 23:37:13 2011
New Revision: 1056560

URL: http://svn.apache.org/viewvc?rev=1056560&view=rev
Log:
MAPREDUCE-2207. Task-cleanup task should not be scheduled on the node that the
task just failed. (Liyin Liang via schen)

Modified:
    hadoop/mapreduce/trunk/CHANGES.txt
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java
    hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/mapred/TestTaskFail.java

Modified: hadoop/mapreduce/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=1056560&r1=1056559&r2=1056560&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/CHANGES.txt (original)
+++ hadoop/mapreduce/trunk/CHANGES.txt Fri Jan  7 23:37:13 2011
@@ -466,6 +466,9 @@ Release 0.22.0 - Unreleased
     MAPREDUCE-2219. JobTracker should not try to remove mapred.system.dir
     during startup. (todd)
 
+    MAPREDUCE-2207. Task-cleanup task should not be scheduled on the node that
+    the task just failed. (Liyin Liang via schen)
+
 Release 0.21.1 - Unreleased
 
   NEW FEATURES

Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java?rev=1056560&r1=1056559&r2=1056560&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java (original)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java Fri Jan  7 23:37:13 2011
@@ -2897,6 +2897,7 @@ public class JobTracker implements MRCon
     int numReduces = taskTracker.countOccupiedReduceSlots();
     int numTaskTrackers = getClusterStatus().getTaskTrackers();
     int numUniqueHosts = getNumberOfUniqueHosts();
+    boolean hasFailedUncleanTask = hasFailedUncleanTask(taskTracker);
 
     Task t = null;
     synchronized (jobs) {
@@ -2910,12 +2911,15 @@ public class JobTracker implements MRCon
             return Collections.singletonList(t);
           }
         }
-        for (Iterator<JobInProgress> it = jobs.values().iterator();
-             it.hasNext();) {
-          JobInProgress job = it.next();
-          t = job.obtainTaskCleanupTask(taskTracker, true);
-          if (t != null) {
-            return Collections.singletonList(t);
+        // Don't schedule task-cleanup task on the node that the task just failed.
+        if (!hasFailedUncleanTask) {
+          for (Iterator<JobInProgress> it = jobs.values().iterator();
+               it.hasNext();) {
+            JobInProgress job = it.next();
+            t = job.obtainTaskCleanupTask(taskTracker, true);
+            if (t != null) {
+              return Collections.singletonList(t);
+            }
           }
         }
         for (Iterator<JobInProgress> it = jobs.values().iterator();
@@ -2938,12 +2942,15 @@ public class JobTracker implements MRCon
             return Collections.singletonList(t);
           }
         }
-        for (Iterator<JobInProgress> it = jobs.values().iterator();
-             it.hasNext();) {
-          JobInProgress job = it.next();
-          t = job.obtainTaskCleanupTask(taskTracker, false);
-          if (t != null) {
-            return Collections.singletonList(t);
+        // Don't schedule task-cleanup task on the node that the task just failed.
+        if (!hasFailedUncleanTask) {
+          for (Iterator<JobInProgress> it = jobs.values().iterator();
+               it.hasNext();) {
+            JobInProgress job = it.next();
+            t = job.obtainTaskCleanupTask(taskTracker, false);
+            if (t != null) {
+              return Collections.singletonList(t);
+            }
           }
         }
         for (Iterator<JobInProgress> it = jobs.values().iterator();
@@ -2961,6 +2968,18 @@ public class JobTracker implements MRCon
   }
 
   /**
+   * Whether this tracker has tasks with FAILED_UNCLEAN state.   
+   */
+  static boolean hasFailedUncleanTask(TaskTrackerStatus taskTracker) {
+    for (TaskStatus taskStatus : taskTracker.getTaskReports()) {
+      if (taskStatus.getRunState() == TaskStatus.State.FAILED_UNCLEAN) {
+        return true;
+      }
+    }
+    return false;
+  }
+  
+  /**
    * Grab the local fs name
    */
   public synchronized String getFilesystemName() throws IOException {

Modified: hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/mapred/TestTaskFail.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/mapred/TestTaskFail.java?rev=1056560&r1=1056559&r2=1056560&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/mapred/TestTaskFail.java (original)
+++ hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/mapred/TestTaskFail.java Fri Jan  7 23:37:13 2011
@@ -159,6 +159,13 @@ public class TestTaskFail extends TestCa
           + "&filter=STDERR&cleanup=true";
       assertEquals(HttpURLConnection.HTTP_OK, TestWebUIAuthorization
           .getHttpStatusCode(cleanupTasklogUrl, tip.getUser(), "GET"));
+      
+      // Task-cleanup task should not be scheduled on the node that the task just failed
+      if (jt.taskTrackers().size() >= 2) {
+        String trackerRanTask = tip.machineWhereTaskRan(attemptId);
+        String trackerRanCleanupTask = tip.machineWhereCleanupRan(attemptId);
+        assertFalse(trackerRanTask.equals(trackerRanCleanupTask));
+      }
     }
   }