You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by ac...@apache.org on 2011/09/13 20:12:02 UTC
svn commit: r1170279 - in /hadoop/common/trunk/hadoop-mapreduce-project: ./
hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/
hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java...
Author: acmurthy
Date: Tue Sep 13 18:12:02 2011
New Revision: 1170279
URL: http://svn.apache.org/viewvc?rev=1170279&view=rev
Log:
MAPREDUCE-2995. Better handling of expired containers in MapReduce ApplicationMaster. Contributed by Vinod K V.
Modified:
hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt
hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java
hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestFail.java
hadoop/common/trunk/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java
Modified: hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt?rev=1170279&r1=1170278&r2=1170279&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt Tue Sep 13 18:12:02 2011
@@ -1306,6 +1306,9 @@ Release 0.23.0 - Unreleased
MAPREDUCE-2874. Fix formatting of ApplicationId in web-ui. (Eric Payne via
acmurthy)
+ MAPREDUCE-2995. Better handling of expired containers in MapReduce
+ ApplicationMaster. (vinodkv via acmurthy)
+
Release 0.22.0 - Unreleased
INCOMPATIBLE CHANGES
Modified: hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java?rev=1170279&r1=1170278&r2=1170279&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java Tue Sep 13 18:12:02 2011
@@ -204,6 +204,11 @@ public abstract class TaskAttemptImpl im
.addTransition(TaskAttemptState.ASSIGNED, TaskAttemptState.FAILED,
TaskAttemptEventType.TA_CONTAINER_LAUNCH_FAILED,
new DeallocateContainerTransition(TaskAttemptState.FAILED, false))
+ .addTransition(TaskAttemptState.ASSIGNED,
+ TaskAttemptState.FAIL_CONTAINER_CLEANUP,
+ TaskAttemptEventType.TA_CONTAINER_COMPLETED,
+ CLEANUP_CONTAINER_TRANSITION)
+ // ^ If RM kills the container due to expiry, preemption etc.
.addTransition(TaskAttemptState.ASSIGNED,
TaskAttemptState.KILL_CONTAINER_CLEANUP,
TaskAttemptEventType.TA_KILL, CLEANUP_CONTAINER_TRANSITION)
@@ -925,7 +930,8 @@ public abstract class TaskAttemptImpl im
try {
stateMachine.doTransition(event.getType(), event);
} catch (InvalidStateTransitonException e) {
- LOG.error("Can't handle this event at current state", e);
+ LOG.error("Can't handle this event at current state for "
+ + this.attemptId, e);
eventHandler.handle(new JobDiagnosticsUpdateEvent(
this.attemptId.getTaskId().getJobId(), "Invalid event " + event.getType() +
" on TaskAttempt " + this.attemptId));
Modified: hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java?rev=1170279&r1=1170278&r2=1170279&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java Tue Sep 13 18:12:02 2011
@@ -528,7 +528,8 @@ public abstract class TaskImpl implement
try {
stateMachine.doTransition(event.getType(), event);
} catch (InvalidStateTransitonException e) {
- LOG.error("Can't handle this event at current state", e);
+ LOG.error("Can't handle this event at current state for "
+ + this.taskId, e);
internalError(event.getType());
}
if (oldState != getState()) {
Modified: hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestFail.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestFail.java?rev=1170279&r1=1170278&r2=1170279&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestFail.java (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestFail.java Tue Sep 13 18:12:02 2011
@@ -18,6 +18,7 @@
package org.apache.hadoop.mapreduce.v2.app;
+import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
@@ -36,6 +37,12 @@ import org.apache.hadoop.mapreduce.v2.ap
import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
+import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncher;
+import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherEvent;
+import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherImpl;
+import org.apache.hadoop.yarn.api.ContainerManager;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerToken;
import org.junit.Test;
/**
@@ -160,6 +167,74 @@ public class TestFail {
}
}
+ @Test
+ public void testTaskFailWithUnusedContainer() throws Exception {
+ MRApp app = new FailingTaskWithUnusedContainer();
+ Configuration conf = new Configuration();
+ int maxAttempts = 1;
+ conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, maxAttempts);
+ // disable uberization (requires entire job to be reattempted, so max for
+ // subtask attempts is overridden to 1)
+ conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
+ Job job = app.submit(conf);
+ app.waitForState(job, JobState.RUNNING);
+ Map<TaskId, Task> tasks = job.getTasks();
+ Assert.assertEquals("Num tasks is not correct", 1, tasks.size());
+ Task task = tasks.values().iterator().next();
+ app.waitForState(task, TaskState.SCHEDULED);
+ Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator()
+ .next().getAttempts();
+ Assert.assertEquals("Num attempts is not correct", maxAttempts, attempts
+ .size());
+ TaskAttempt attempt = attempts.values().iterator().next();
+ app.waitForState(attempt, TaskAttemptState.ASSIGNED);
+ app.getDispatcher().getEventHandler().handle(
+ new TaskAttemptEvent(attempt.getID(),
+ TaskAttemptEventType.TA_CONTAINER_COMPLETED));
+ app.waitForState(job, JobState.FAILED);
+ }
+
+ static class FailingTaskWithUnusedContainer extends MRApp {
+
+ public FailingTaskWithUnusedContainer() {
+ super(1, 0, false, "TaskFailWithUnsedContainer", true);
+ }
+
+ protected ContainerLauncher createContainerLauncher(AppContext context,
+ boolean isLocal) {
+ return new ContainerLauncherImpl(context) {
+ @Override
+ public void handle(ContainerLauncherEvent event) {
+
+ switch (event.getType()) {
+ case CONTAINER_REMOTE_LAUNCH:
+ super.handle(event);
+ break;
+ case CONTAINER_REMOTE_CLEANUP:
+ getContext().getEventHandler().handle(
+ new TaskAttemptEvent(event.getTaskAttemptID(),
+ TaskAttemptEventType.TA_CONTAINER_CLEANED));
+ break;
+ }
+ }
+
+ @Override
+ protected ContainerManager getCMProxy(ContainerId containerID,
+ String containerManagerBindAddr, ContainerToken containerToken)
+ throws IOException {
+ try {
+ synchronized (this) {
+ wait(); // Just hang the thread simulating a very slow NM.
+ }
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ return null;
+ }
+ };
+ };
+ }
+
static class TimeOutTaskMRApp extends MRApp {
TimeOutTaskMRApp(int maps, int reduces) {
super(maps, reduces, false, "TimeOutTaskMRApp", true);
@@ -232,5 +307,6 @@ public class TestFail {
t.testTimedOutTask();
t.testMapFailureMaxPercent();
t.testReduceFailureMaxPercent();
+ t.testTaskFailWithUnusedContainer();
}
}
Modified: hadoop/common/trunk/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java?rev=1170279&r1=1170278&r2=1170279&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java Tue Sep 13 18:12:02 2011
@@ -46,7 +46,7 @@ public class SchedulerUtils {
"Container of a completed application";
public static final String EXPIRED_CONTAINER =
- "Container expired since it unused";
+ "Container expired since it was unused";
public static final String UNRESERVED_CONTAINER =
"Container reservation no longer required.";