You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by vi...@apache.org on 2011/09/26 19:31:47 UTC
svn commit: r1175964 - in
/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project: ./
hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/
hadoop-yarn/had...
Author: vinodkv
Date: Mon Sep 26 17:31:47 2011
New Revision: 1175964
URL: http://svn.apache.org/viewvc?rev=1175964&view=rev
Log:
MAPREDUCE-3031. svn merge -c r1175960 --ignore-ancestry ../../trunk/
Modified:
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt?rev=1175964&r1=1175963&r2=1175964&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt Mon Sep 26 17:31:47 2011
@@ -1397,6 +1397,9 @@ Release 0.23.0 - Unreleased
MAPREDUCE-2646. Fixed AMRMProtocol to return containers based on
priority. (Sharad Agarwal and Arun C Murthy via vinodkv)
+ MAPREDUCE-3031. Proper handling of killed containers to prevent stuck
+ containers/AMs on an external kill signal. (Siddharth Seth via vinodkv)
+
Release 0.22.0 - Unreleased
INCOMPATIBLE CHANGES
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java?rev=1175964&r1=1175963&r2=1175964&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java Mon Sep 26 17:31:47 2011
@@ -158,10 +158,12 @@ public class ContainerImpl implements Co
ContainerEventType.CONTAINER_LAUNCHED, new LaunchTransition())
.addTransition(ContainerState.LOCALIZED, ContainerState.EXITED_WITH_FAILURE,
ContainerEventType.CONTAINER_EXITED_WITH_FAILURE,
- new ExitedWithFailureTransition())
+ new ExitedWithFailureTransition(true))
.addTransition(ContainerState.LOCALIZED, ContainerState.LOCALIZED,
ContainerEventType.UPDATE_DIAGNOSTICS_MSG,
UPDATE_DIAGNOSTICS_TRANSITION)
+ // TODO race: Can lead to a CONTAINER_LAUNCHED event at state KILLING,
+ // and a container which will never be killed by the NM.
.addTransition(ContainerState.LOCALIZED, ContainerState.KILLING,
ContainerEventType.KILL_CONTAINER, new KillTransition())
@@ -169,16 +171,19 @@ public class ContainerImpl implements Co
.addTransition(ContainerState.RUNNING,
ContainerState.EXITED_WITH_SUCCESS,
ContainerEventType.CONTAINER_EXITED_WITH_SUCCESS,
- new ExitedWithSuccessTransition())
+ new ExitedWithSuccessTransition(true))
.addTransition(ContainerState.RUNNING,
ContainerState.EXITED_WITH_FAILURE,
ContainerEventType.CONTAINER_EXITED_WITH_FAILURE,
- new ExitedWithFailureTransition())
+ new ExitedWithFailureTransition(true))
.addTransition(ContainerState.RUNNING, ContainerState.RUNNING,
ContainerEventType.UPDATE_DIAGNOSTICS_MSG,
UPDATE_DIAGNOSTICS_TRANSITION)
.addTransition(ContainerState.RUNNING, ContainerState.KILLING,
ContainerEventType.KILL_CONTAINER, new KillTransition())
+ .addTransition(ContainerState.RUNNING, ContainerState.EXITED_WITH_FAILURE,
+ ContainerEventType.CONTAINER_KILLED_ON_REQUEST,
+ new KilledExternallyTransition())
// From CONTAINER_EXITED_WITH_SUCCESS State
.addTransition(ContainerState.EXITED_WITH_SUCCESS, ContainerState.DONE,
@@ -220,10 +225,10 @@ public class ContainerImpl implements Co
ContainerEventType.KILL_CONTAINER)
.addTransition(ContainerState.KILLING, ContainerState.EXITED_WITH_SUCCESS,
ContainerEventType.CONTAINER_EXITED_WITH_SUCCESS,
- new ExitedWithSuccessTransition())
+ new ExitedWithSuccessTransition(false))
.addTransition(ContainerState.KILLING, ContainerState.EXITED_WITH_FAILURE,
ContainerEventType.CONTAINER_EXITED_WITH_FAILURE,
- new ExitedWithFailureTransition())
+ new ExitedWithFailureTransition(false))
.addTransition(ContainerState.KILLING,
ContainerState.DONE,
ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP,
@@ -551,18 +556,38 @@ public class ContainerImpl implements Co
}
}
+ @SuppressWarnings("unchecked") // dispatcher not typed
static class ExitedWithSuccessTransition extends ContainerTransition {
+
+ boolean clCleanupRequired;
+
+ public ExitedWithSuccessTransition(boolean clCleanupRequired) {
+ this.clCleanupRequired = clCleanupRequired;
+ }
+
@Override
public void transition(ContainerImpl container, ContainerEvent event) {
// TODO: Add containerWorkDir to the deletion service.
- // Inform the localizer to decrement reference counts and cleanup
- // resources.
+ if (clCleanupRequired) {
+ container.dispatcher.getEventHandler().handle(
+ new ContainersLauncherEvent(container,
+ ContainersLauncherEventType.CLEANUP_CONTAINER));
+ }
+
container.cleanup();
}
}
+ @SuppressWarnings("unchecked") // dispatcher not typed
static class ExitedWithFailureTransition extends ContainerTransition {
+
+ boolean clCleanupRequired;
+
+ public ExitedWithFailureTransition(boolean clCleanupRequired) {
+ this.clCleanupRequired = clCleanupRequired;
+ }
+
@Override
public void transition(ContainerImpl container, ContainerEvent event) {
ContainerExitEvent exitEvent = (ContainerExitEvent) event;
@@ -571,12 +596,28 @@ public class ContainerImpl implements Co
// TODO: Add containerWorkDir to the deletion service.
// TODO: Add containerOuputDir to the deletion service.
- // Inform the localizer to decrement reference counts and cleanup
- // resources.
+ if (clCleanupRequired) {
+ container.dispatcher.getEventHandler().handle(
+ new ContainersLauncherEvent(container,
+ ContainersLauncherEventType.CLEANUP_CONTAINER));
+ }
+
container.cleanup();
}
}
+ static class KilledExternallyTransition extends ExitedWithFailureTransition {
+ KilledExternallyTransition() {
+ super(true);
+ }
+
+ @Override
+ public void transition(ContainerImpl container, ContainerEvent event) {
+ super.transition(container, event);
+ container.diagnostics.append("Killed by external signal\n");
+ }
+ }
+
static class ResourceFailedTransition implements
SingleArcTransition<ContainerImpl, ContainerEvent> {
@Override
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java?rev=1175964&r1=1175963&r2=1175964&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java Mon Sep 26 17:31:47 2011
@@ -38,8 +38,6 @@ import java.util.Map.Entry;
import java.util.Random;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
-import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.LocalResource;
@@ -137,6 +135,28 @@ public class TestContainer {
@Test
@SuppressWarnings("unchecked") // mocked generic
+ public void testExternalKill() throws Exception {
+ WrappedContainer wc = null;
+ try {
+ wc = new WrappedContainer(13, 314159265358979L, 4344, "yak");
+ wc.initContainer();
+ wc.localizeResources();
+ wc.launchContainer();
+ reset(wc.localizerBus);
+ wc.containerKilledOnRequest();
+ assertEquals(ContainerState.EXITED_WITH_FAILURE,
+ wc.c.getContainerState());
+ verifyCleanupCall(wc);
+ }
+ finally {
+ if (wc != null) {
+ wc.finished();
+ }
+ }
+ }
+
+ @Test
+ @SuppressWarnings("unchecked") // mocked generic
public void testCleanupOnFailure() throws Exception {
WrappedContainer wc = null;
try {