You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by zh...@apache.org on 2014/12/24 20:35:38 UTC
[24/50] hadoop git commit: YARN-2675. containersKilled metrics is not
updated when the container is killed during localization. (Zhihai Xu via
kasha)
YARN-2675. containersKilled metrics is not updated when the container is killed during localization. (Zhihai Xu via kasha)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/838c213b
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/838c213b
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/838c213b
Branch: refs/heads/HDFS-EC
Commit: 838c213bbec3db15e384465402d38cb2d0d7724a
Parents: 89c7bd8
Author: Karthik Kambatla <ka...@apache.org>
Authored: Fri Dec 19 16:00:24 2014 -0800
Committer: Zhe Zhang <zh...@cloudera.com>
Committed: Wed Dec 24 11:22:16 2014 -0800
----------------------------------------------------------------------
hadoop-yarn-project/CHANGES.txt | 3 +
.../container/ContainerImpl.java | 164 +++++++++++++------
.../nodemanager/metrics/NodeManagerMetrics.java | 16 ++
.../container/TestContainer.java | 27 +++
4 files changed, 159 insertions(+), 51 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/838c213b/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 54ca3d2..7287a24 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -250,6 +250,9 @@ Release 2.7.0 - UNRELEASED
YARN-2964. RM prematurely cancels tokens for jobs that submit jobs (oozie)
(Jian He via jlowe)
+ YARN-2675. containersKilled metrics is not updated when the container is killed
+ during localization. (Zhihai Xu via kasha)
+
Release 2.6.0 - 2014-11-18
INCOMPATIBLE CHANGES
http://git-wip-us.apache.org/repos/asf/hadoop/blob/838c213b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
index 58b1e22..f0c506d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
@@ -159,9 +159,6 @@ public class ContainerImpl implements Container {
this.diagnostics.append(diagnostics);
}
- private static final ContainerDoneTransition CONTAINER_DONE_TRANSITION =
- new ContainerDoneTransition();
-
private static final ContainerDiagnosticsUpdateTransition UPDATE_DIAGNOSTICS_TRANSITION =
new ContainerDiagnosticsUpdateTransition();
@@ -202,7 +199,7 @@ public class ContainerImpl implements Container {
.addTransition(ContainerState.LOCALIZATION_FAILED,
ContainerState.DONE,
ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP,
- CONTAINER_DONE_TRANSITION)
+ new LocalizationFailedToDoneTransition())
.addTransition(ContainerState.LOCALIZATION_FAILED,
ContainerState.LOCALIZATION_FAILED,
ContainerEventType.UPDATE_DIAGNOSTICS_MSG,
@@ -254,7 +251,7 @@ public class ContainerImpl implements Container {
// From CONTAINER_EXITED_WITH_SUCCESS State
.addTransition(ContainerState.EXITED_WITH_SUCCESS, ContainerState.DONE,
ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP,
- CONTAINER_DONE_TRANSITION)
+ new ExitedWithSuccessToDoneTransition())
.addTransition(ContainerState.EXITED_WITH_SUCCESS,
ContainerState.EXITED_WITH_SUCCESS,
ContainerEventType.UPDATE_DIAGNOSTICS_MSG,
@@ -266,7 +263,7 @@ public class ContainerImpl implements Container {
// From EXITED_WITH_FAILURE State
.addTransition(ContainerState.EXITED_WITH_FAILURE, ContainerState.DONE,
ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP,
- CONTAINER_DONE_TRANSITION)
+ new ExitedWithFailureToDoneTransition())
.addTransition(ContainerState.EXITED_WITH_FAILURE,
ContainerState.EXITED_WITH_FAILURE,
ContainerEventType.UPDATE_DIAGNOSTICS_MSG,
@@ -301,7 +298,7 @@ public class ContainerImpl implements Container {
.addTransition(ContainerState.KILLING,
ContainerState.DONE,
ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP,
- CONTAINER_DONE_TRANSITION)
+ new KillingToDoneTransition())
// Handle a launched container during killing stage is a no-op
// as cleanup container is always handled after launch container event
// in the container launcher
@@ -313,7 +310,7 @@ public class ContainerImpl implements Container {
.addTransition(ContainerState.CONTAINER_CLEANEDUP_AFTER_KILL,
ContainerState.DONE,
ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP,
- CONTAINER_DONE_TRANSITION)
+ new ContainerCleanedupAfterKillToDoneTransition())
.addTransition(ContainerState.CONTAINER_CLEANEDUP_AFTER_KILL,
ContainerState.CONTAINER_CLEANEDUP_AFTER_KILL,
ContainerEventType.UPDATE_DIAGNOSTICS_MSG,
@@ -463,47 +460,6 @@ public class ContainerImpl implements Container {
}
}
- @SuppressWarnings("fallthrough")
- private void finished() {
- ApplicationId applicationId =
- containerId.getApplicationAttemptId().getApplicationId();
- switch (getContainerState()) {
- case EXITED_WITH_SUCCESS:
- metrics.endRunningContainer();
- metrics.completedContainer();
- NMAuditLogger.logSuccess(user,
- AuditConstants.FINISH_SUCCESS_CONTAINER, "ContainerImpl",
- applicationId, containerId);
- break;
- case EXITED_WITH_FAILURE:
- if (wasLaunched) {
- metrics.endRunningContainer();
- }
- // fall through
- case LOCALIZATION_FAILED:
- metrics.failedContainer();
- NMAuditLogger.logFailure(user,
- AuditConstants.FINISH_FAILED_CONTAINER, "ContainerImpl",
- "Container failed with state: " + getContainerState(),
- applicationId, containerId);
- break;
- case CONTAINER_CLEANEDUP_AFTER_KILL:
- if (wasLaunched) {
- metrics.endRunningContainer();
- }
- // fall through
- case NEW:
- metrics.killedContainer();
- NMAuditLogger.logSuccess(user,
- AuditConstants.FINISH_KILLED_CONTAINER, "ContainerImpl",
- applicationId,
- containerId);
- }
-
- metrics.releaseContainer(this.resource);
- sendFinishedEvents();
- }
-
@SuppressWarnings("unchecked")
private void sendFinishedEvents() {
// Inform the application
@@ -611,7 +567,13 @@ public class ContainerImpl implements Container {
} else if (container.recoveredAsKilled &&
container.recoveredStatus == RecoveredContainerStatus.REQUESTED) {
// container was killed but never launched
- container.finished();
+ container.metrics.killedContainer();
+ NMAuditLogger.logSuccess(container.user,
+ AuditConstants.FINISH_KILLED_CONTAINER, "ContainerImpl",
+ container.containerId.getApplicationAttemptId().getApplicationId(),
+ container.containerId);
+ container.metrics.releaseContainer(container.resource);
+ container.sendFinishedEvents();
return ContainerState.DONE;
}
@@ -994,7 +956,8 @@ public class ContainerImpl implements Container {
@Override
@SuppressWarnings("unchecked")
public void transition(ContainerImpl container, ContainerEvent event) {
- container.finished();
+ container.metrics.releaseContainer(container.resource);
+ container.sendFinishedEvents();
//if the current state is NEW it means the CONTAINER_INIT was never
// sent for the event, thus no need to send the CONTAINER_STOP
if (container.getCurrentState()
@@ -1016,6 +979,105 @@ public class ContainerImpl implements Container {
container.exitCode = killEvent.getContainerExitStatus();
container.addDiagnostics(killEvent.getDiagnostic(), "\n");
container.addDiagnostics("Container is killed before being launched.\n");
+ container.metrics.killedContainer();
+ NMAuditLogger.logSuccess(container.user,
+ AuditConstants.FINISH_KILLED_CONTAINER, "ContainerImpl",
+ container.containerId.getApplicationAttemptId().getApplicationId(),
+ container.containerId);
+ super.transition(container, event);
+ }
+ }
+
+ /**
+ * Handle the following transition:
+ * - LOCALIZATION_FAILED -> DONE upon CONTAINER_RESOURCES_CLEANEDUP
+ */
+ static class LocalizationFailedToDoneTransition extends
+ ContainerDoneTransition {
+ @Override
+ public void transition(ContainerImpl container, ContainerEvent event) {
+ container.metrics.failedContainer();
+ NMAuditLogger.logFailure(container.user,
+ AuditConstants.FINISH_FAILED_CONTAINER, "ContainerImpl",
+ "Container failed with state: " + container.getContainerState(),
+ container.containerId.getApplicationAttemptId().getApplicationId(),
+ container.containerId);
+ super.transition(container, event);
+ }
+ }
+
+ /**
+ * Handle the following transition:
+ * - EXITED_WITH_SUCCESS -> DONE upon CONTAINER_RESOURCES_CLEANEDUP
+ */
+ static class ExitedWithSuccessToDoneTransition extends
+ ContainerDoneTransition {
+ @Override
+ public void transition(ContainerImpl container, ContainerEvent event) {
+ container.metrics.endRunningContainer();
+ container.metrics.completedContainer();
+ NMAuditLogger.logSuccess(container.user,
+ AuditConstants.FINISH_SUCCESS_CONTAINER, "ContainerImpl",
+ container.containerId.getApplicationAttemptId().getApplicationId(),
+ container.containerId);
+ super.transition(container, event);
+ }
+ }
+
+ /**
+ * Handle the following transition:
+ * - EXITED_WITH_FAILURE -> DONE upon CONTAINER_RESOURCES_CLEANEDUP
+ */
+ static class ExitedWithFailureToDoneTransition extends
+ ContainerDoneTransition {
+ @Override
+ public void transition(ContainerImpl container, ContainerEvent event) {
+ if (container.wasLaunched) {
+ container.metrics.endRunningContainer();
+ }
+ container.metrics.failedContainer();
+ NMAuditLogger.logFailure(container.user,
+ AuditConstants.FINISH_FAILED_CONTAINER, "ContainerImpl",
+ "Container failed with state: " + container.getContainerState(),
+ container.containerId.getApplicationAttemptId().getApplicationId(),
+ container.containerId);
+ super.transition(container, event);
+ }
+ }
+
+ /**
+ * Handle the following transition:
+ * - KILLING -> DONE upon CONTAINER_RESOURCES_CLEANEDUP
+ */
+ static class KillingToDoneTransition extends
+ ContainerDoneTransition {
+ @Override
+ public void transition(ContainerImpl container, ContainerEvent event) {
+ container.metrics.killedContainer();
+ NMAuditLogger.logSuccess(container.user,
+ AuditConstants.FINISH_KILLED_CONTAINER, "ContainerImpl",
+ container.containerId.getApplicationAttemptId().getApplicationId(),
+ container.containerId);
+ super.transition(container, event);
+ }
+ }
+
+ /**
+ * Handle the following transition:
+ * CONTAINER_CLEANEDUP_AFTER_KILL -> DONE upon CONTAINER_RESOURCES_CLEANEDUP
+ */
+ static class ContainerCleanedupAfterKillToDoneTransition extends
+ ContainerDoneTransition {
+ @Override
+ public void transition(ContainerImpl container, ContainerEvent event) {
+ if (container.wasLaunched) {
+ container.metrics.endRunningContainer();
+ }
+ container.metrics.killedContainer();
+ NMAuditLogger.logSuccess(container.user,
+ AuditConstants.FINISH_KILLED_CONTAINER, "ContainerImpl",
+ container.containerId.getApplicationAttemptId().getApplicationId(),
+ container.containerId);
super.transition(container, event);
}
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/838c213b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java
index beaafe1..3615fee 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java
@@ -27,6 +27,8 @@ import org.apache.hadoop.metrics2.lib.MutableRate;
import org.apache.hadoop.metrics2.source.JvmMetrics;
import org.apache.hadoop.yarn.api.records.Resource;
+import com.google.common.annotations.VisibleForTesting;
+
@Metrics(about="Metrics for node manager", context="yarn")
public class NodeManagerMetrics {
@Metric MutableCounterInt containersLaunched;
@@ -127,4 +129,18 @@ public class NodeManagerMetrics {
return containersRunning.value();
}
+ @VisibleForTesting
+ public int getKilledContainers() {
+ return containersKilled.value();
+ }
+
+ @VisibleForTesting
+ public int getFailedContainers() {
+ return containersFailed.value();
+ }
+
+ @VisibleForTesting
+ public int getCompletedContainers() {
+ return containersCompleted.value();
+ }
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/838c213b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
index c28d691..2834e30 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
@@ -173,13 +173,20 @@ public class TestContainer {
wc = new WrappedContainer(13, 314159265358979L, 4344, "yak");
wc.initContainer();
wc.localizeResources();
+ int running = metrics.getRunningContainers();
wc.launchContainer();
+ assertEquals(running + 1, metrics.getRunningContainers());
reset(wc.localizerBus);
wc.containerKilledOnRequest();
assertEquals(ContainerState.EXITED_WITH_FAILURE,
wc.c.getContainerState());
assertNull(wc.c.getLocalizedResources());
verifyCleanupCall(wc);
+ int failed = metrics.getFailedContainers();
+ wc.containerResourcesCleanup();
+ assertEquals(ContainerState.DONE, wc.c.getContainerState());
+ assertEquals(failed + 1, metrics.getFailedContainers());
+ assertEquals(running, metrics.getRunningContainers());
}
finally {
if (wc != null) {
@@ -219,13 +226,20 @@ public class TestContainer {
wc = new WrappedContainer(11, 314159265358979L, 4344, "yak");
wc.initContainer();
wc.localizeResources();
+ int running = metrics.getRunningContainers();
wc.launchContainer();
+ assertEquals(running + 1, metrics.getRunningContainers());
reset(wc.localizerBus);
wc.containerSuccessful();
assertEquals(ContainerState.EXITED_WITH_SUCCESS,
wc.c.getContainerState());
assertNull(wc.c.getLocalizedResources());
verifyCleanupCall(wc);
+ int completed = metrics.getCompletedContainers();
+ wc.containerResourcesCleanup();
+ assertEquals(ContainerState.DONE, wc.c.getContainerState());
+ assertEquals(completed + 1, metrics.getCompletedContainers());
+ assertEquals(running, metrics.getRunningContainers());
}
finally {
if (wc != null) {
@@ -319,12 +333,14 @@ public class TestContainer {
try {
wc = new WrappedContainer(13, 314159265358979L, 4344, "yak");
assertEquals(ContainerState.NEW, wc.c.getContainerState());
+ int killed = metrics.getKilledContainers();
wc.killContainer();
assertEquals(ContainerState.DONE, wc.c.getContainerState());
assertEquals(ContainerExitStatus.KILLED_BY_RESOURCEMANAGER,
wc.c.cloneAndGetContainerStatus().getExitStatus());
assertTrue(wc.c.cloneAndGetContainerStatus().getDiagnostics()
.contains("KillRequest"));
+ assertEquals(killed + 1, metrics.getKilledContainers());
} finally {
if (wc != null) {
wc.finished();
@@ -345,6 +361,10 @@ public class TestContainer {
wc.c.cloneAndGetContainerStatus().getExitStatus());
assertTrue(wc.c.cloneAndGetContainerStatus().getDiagnostics()
.contains("KillRequest"));
+ int killed = metrics.getKilledContainers();
+ wc.containerResourcesCleanup();
+ assertEquals(ContainerState.DONE, wc.c.getContainerState());
+ assertEquals(killed + 1, metrics.getKilledContainers());
} finally {
if (wc != null) {
wc.finished();
@@ -365,6 +385,10 @@ public class TestContainer {
assertEquals(ContainerState.LOCALIZATION_FAILED, wc.c.getContainerState());
assertNull(wc.c.getLocalizedResources());
verifyCleanupCall(wc);
+ int failed = metrics.getFailedContainers();
+ wc.containerResourcesCleanup();
+ assertEquals(ContainerState.DONE, wc.c.getContainerState());
+ assertEquals(failed + 1, metrics.getFailedContainers());
} finally {
if (wc != null) {
wc.finished();
@@ -389,8 +413,11 @@ public class TestContainer {
wc.c.getContainerState());
assertNull(wc.c.getLocalizedResources());
verifyCleanupCall(wc);
+ int killed = metrics.getKilledContainers();
wc.c.handle(new ContainerEvent(wc.c.getContainerId(),
ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP));
+ assertEquals(ContainerState.DONE, wc.c.getContainerState());
+ assertEquals(killed + 1, metrics.getKilledContainers());
assertEquals(0, metrics.getRunningContainers());
} finally {
if (wc != null) {