You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by sc...@apache.org on 2011/01/08 00:37:13 UTC
svn commit: r1056560 - in /hadoop/mapreduce/trunk: CHANGES.txt
src/java/org/apache/hadoop/mapred/JobTracker.java
src/test/mapred/org/apache/hadoop/mapred/TestTaskFail.java
Author: schen
Date: Fri Jan 7 23:37:13 2011
New Revision: 1056560
URL: http://svn.apache.org/viewvc?rev=1056560&view=rev
Log:
MAPREDUCE-2207. Task-cleanup task should not be scheduled on the node that the
task just failed. (Liyin Liang via schen)
Modified:
hadoop/mapreduce/trunk/CHANGES.txt
hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java
hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/mapred/TestTaskFail.java
Modified: hadoop/mapreduce/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=1056560&r1=1056559&r2=1056560&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/CHANGES.txt (original)
+++ hadoop/mapreduce/trunk/CHANGES.txt Fri Jan 7 23:37:13 2011
@@ -466,6 +466,9 @@ Release 0.22.0 - Unreleased
MAPREDUCE-2219. JobTracker should not try to remove mapred.system.dir
during startup. (todd)
+ MAPREDUCE-2207. Task-cleanup task should not be scheduled on the node that
+ the task just failed. (Liyin Liang via schen)
+
Release 0.21.1 - Unreleased
NEW FEATURES
Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java?rev=1056560&r1=1056559&r2=1056560&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java (original)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java Fri Jan 7 23:37:13 2011
@@ -2897,6 +2897,7 @@ public class JobTracker implements MRCon
int numReduces = taskTracker.countOccupiedReduceSlots();
int numTaskTrackers = getClusterStatus().getTaskTrackers();
int numUniqueHosts = getNumberOfUniqueHosts();
+ boolean hasFailedUncleanTask = hasFailedUncleanTask(taskTracker);
Task t = null;
synchronized (jobs) {
@@ -2910,12 +2911,15 @@ public class JobTracker implements MRCon
return Collections.singletonList(t);
}
}
- for (Iterator<JobInProgress> it = jobs.values().iterator();
- it.hasNext();) {
- JobInProgress job = it.next();
- t = job.obtainTaskCleanupTask(taskTracker, true);
- if (t != null) {
- return Collections.singletonList(t);
+ // Don't schedule task-cleanup task on the node that the task just failed.
+ if (!hasFailedUncleanTask) {
+ for (Iterator<JobInProgress> it = jobs.values().iterator();
+ it.hasNext();) {
+ JobInProgress job = it.next();
+ t = job.obtainTaskCleanupTask(taskTracker, true);
+ if (t != null) {
+ return Collections.singletonList(t);
+ }
}
}
for (Iterator<JobInProgress> it = jobs.values().iterator();
@@ -2938,12 +2942,15 @@ public class JobTracker implements MRCon
return Collections.singletonList(t);
}
}
- for (Iterator<JobInProgress> it = jobs.values().iterator();
- it.hasNext();) {
- JobInProgress job = it.next();
- t = job.obtainTaskCleanupTask(taskTracker, false);
- if (t != null) {
- return Collections.singletonList(t);
+ // Don't schedule task-cleanup task on the node that the task just failed.
+ if (!hasFailedUncleanTask) {
+ for (Iterator<JobInProgress> it = jobs.values().iterator();
+ it.hasNext();) {
+ JobInProgress job = it.next();
+ t = job.obtainTaskCleanupTask(taskTracker, false);
+ if (t != null) {
+ return Collections.singletonList(t);
+ }
}
}
for (Iterator<JobInProgress> it = jobs.values().iterator();
@@ -2961,6 +2968,18 @@ public class JobTracker implements MRCon
}
/**
+ * Whether this tracker has tasks with FAILED_UNCLEAN state.
+ */
+ static boolean hasFailedUncleanTask(TaskTrackerStatus taskTracker) {
+ for (TaskStatus taskStatus : taskTracker.getTaskReports()) {
+ if (taskStatus.getRunState() == TaskStatus.State.FAILED_UNCLEAN) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
* Grab the local fs name
*/
public synchronized String getFilesystemName() throws IOException {
Modified: hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/mapred/TestTaskFail.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/mapred/TestTaskFail.java?rev=1056560&r1=1056559&r2=1056560&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/mapred/TestTaskFail.java (original)
+++ hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/mapred/TestTaskFail.java Fri Jan 7 23:37:13 2011
@@ -159,6 +159,13 @@ public class TestTaskFail extends TestCa
+ "&filter=STDERR&cleanup=true";
assertEquals(HttpURLConnection.HTTP_OK, TestWebUIAuthorization
.getHttpStatusCode(cleanupTasklogUrl, tip.getUser(), "GET"));
+
+ // Task-cleanup task should not be scheduled on the node that the task just failed
+ if (jt.taskTrackers().size() >= 2) {
+ String trackerRanTask = tip.machineWhereTaskRan(attemptId);
+ String trackerRanCleanupTask = tip.machineWhereCleanupRan(attemptId);
+ assertFalse(trackerRanTask.equals(trackerRanCleanupTask));
+ }
}
}