You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by gi...@apache.org on 2019/04/01 21:21:36 UTC

[hadoop] branch trunk updated: YARN-9428. Add metrics for paused containers in NodeManager. Contributed by Abhishek Modi.

This is an automated email from the ASF dual-hosted git repository.

gifuma pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/trunk by this push:
     new ab2bda5  YARN-9428. Add metrics for paused containers in NodeManager. Contributed by Abhishek Modi.
ab2bda5 is described below

commit ab2bda57bd9ad617342586d5769121a4fef4eab1
Author: Giovanni Matteo Fumarola <gi...@apache.org>
AuthorDate: Mon Apr 1 14:21:17 2019 -0700

    YARN-9428. Add metrics for paused containers in NodeManager. Contributed by Abhishek Modi.
---
 .../containermanager/container/ContainerImpl.java         | 15 +++++++++++++++
 .../server/nodemanager/metrics/NodeManagerMetrics.java    | 13 +++++++++++++
 .../containermanager/container/TestContainer.java         |  3 +++
 3 files changed, 31 insertions(+)

diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
index 00e6aa7..cfade27 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
@@ -161,6 +161,7 @@ public class ContainerImpl implements Container {
   private final StringBuilder diagnostics;
   private final int diagnosticsMaxSize;
   private boolean wasLaunched;
+  private boolean wasPaused;
   private long containerLocalizationStartTime;
   private long containerLaunchStartTime;
   private ContainerMetrics containerMetrics;
@@ -1541,6 +1542,7 @@ public class ContainerImpl implements Container {
     public void transition(ContainerImpl container, ContainerEvent event) {
       container.sendContainerMonitorStartEvent();
       container.wasLaunched = true;
+      container.setIsPaused(true);
     }
   }
 
@@ -1561,6 +1563,7 @@ public class ContainerImpl implements Container {
     public void transition(ContainerImpl container, ContainerEvent event) {
 
       container.setIsReInitializing(false);
+      container.setIsPaused(false);
       // Set exit code to 0 on success    	
       container.exitCode = 0;
     	
@@ -1591,6 +1594,7 @@ public class ContainerImpl implements Container {
 
     @Override
     public void transition(ContainerImpl container, ContainerEvent event) {
+      container.setIsPaused(false);
       container.setIsReInitializing(false);
       ContainerExitEvent exitEvent = (ContainerExitEvent) event;
       container.exitCode = exitEvent.getExitCode();
@@ -1835,6 +1839,7 @@ public class ContainerImpl implements Container {
     public void transition(ContainerImpl container, ContainerEvent event) {
       // Kill the process/process-grp
       container.setIsReInitializing(false);
+      container.setIsPaused(false);
       container.dispatcher.getEventHandler().handle(
           new ContainersLauncherEvent(container,
               ContainersLauncherEventType.CLEANUP_CONTAINER));
@@ -2080,6 +2085,8 @@ public class ContainerImpl implements Container {
       SingleArcTransition<ContainerImpl, ContainerEvent> {
     @Override
     public void transition(ContainerImpl container, ContainerEvent event) {
+      container.setIsPaused(true);
+      container.metrics.pausedContainer();
       // Container was PAUSED so tell the scheduler
       container.dispatcher.getEventHandler().handle(
           new ContainerSchedulerEvent(container,
@@ -2096,6 +2103,7 @@ public class ContainerImpl implements Container {
       SingleArcTransition<ContainerImpl, ContainerEvent> {
     @Override
     public void transition(ContainerImpl container, ContainerEvent event) {
+      container.setIsPaused(false);
       // Pause the process/process-grp if it is supported by the container
       container.dispatcher.getEventHandler().handle(
           new ContainersLauncherEvent(container,
@@ -2154,6 +2162,13 @@ public class ContainerImpl implements Container {
     return container.resourceSet.getResourcesUploadPolicies().get(resource);
   }
 
+  private void setIsPaused(boolean paused) {
+    if (this.wasPaused && !paused) {
+      this.metrics.endPausedContainer();
+    }
+    this.wasPaused = paused;
+  }
+
   @VisibleForTesting
   ContainerRetryContext getContainerRetryContext() {
     return containerRetryContext;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java
index 823a9d9..8ecc1a1 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java
@@ -44,6 +44,7 @@ public class NodeManagerMetrics {
   @Metric("# of initializing containers")
       MutableGaugeInt containersIniting;
   @Metric MutableGaugeInt containersRunning;
+  @Metric("# of paused containers") MutableGaugeInt containersPaused;
   @Metric("Current allocated memory in GB")
       MutableGaugeInt allocatedGB;
   @Metric("Current # of allocated containers")
@@ -168,6 +169,14 @@ public class NodeManagerMetrics {
     containersReIniting.decr();
   }
 
+  public void pausedContainer() {
+    containersPaused.incr();
+  }
+
+  public void endPausedContainer() {
+    containersPaused.decr();
+  }
+
   public void allocateContainer(Resource res) {
     allocatedContainers.incr();
     allocatedMB = allocatedMB + res.getMemorySize();
@@ -268,6 +277,10 @@ public class NodeManagerMetrics {
     return containersRunning.value();
   }
 
+  public int getPausedContainers() {
+    return containersPaused.value();
+  }
+
   @VisibleForTesting
   public int getKilledContainers() {
     return containersKilled.value();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
index 4d7559e..ea3acca 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
@@ -246,13 +246,16 @@ public class TestContainer {
       wc.initContainer();
       wc.localizeResources();
       int running = metrics.getRunningContainers();
+      int paused = metrics.getPausedContainers();
       wc.launchContainer();
       assertEquals(running + 1, metrics.getRunningContainers());
       reset(wc.localizerBus);
       wc.pauseContainer();
       assertEquals(ContainerState.PAUSED,
           wc.c.getContainerState());
+      assertEquals(paused + 1, metrics.getPausedContainers());
       wc.resumeContainer();
+      assertEquals(paused, metrics.getPausedContainers());
       assertEquals(ContainerState.RUNNING,
           wc.c.getContainerState());
       wc.containerKilledOnRequest();


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org