You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by sn...@apache.org on 2021/12/17 13:51:57 UTC

[hadoop] branch branch-3.3 updated: YARN-6862. Nodemanager resource usage metrics sometimes are negative. Contributed by Benjamin Teke

This is an automated email from the ASF dual-hosted git repository.

snemeth pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
     new 357423b  YARN-6862. Nodemanager resource usage metrics sometimes are negative. Contributed by Benjamin Teke
357423b is described below

commit 357423b57a93249aebd635eb461ba1cff63c7353
Author: Szilard Nemeth <sn...@apache.org>
AuthorDate: Fri Dec 17 14:51:41 2021 +0100

    YARN-6862. Nodemanager resource usage metrics sometimes are negative. Contributed by Benjamin Teke
---
 .../monitor/ContainersMonitorImpl.java             |  8 ++++
 .../MockCPUResourceCalculatorProcessTree.java      | 10 +++++
 ...> MockMemoryResourceCalculatorProcessTree.java} | 47 +++++++++++++++-------
 .../monitor/MockResourceCalculatorProcessTree.java |  6 +++
 .../TestContainersMonitorResourceChange.java       | 24 ++++++++---
 5 files changed, 75 insertions(+), 20 deletions(-)

diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
index a83ae3a..deccded 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
@@ -537,6 +537,14 @@ public class ContainersMonitorImpl extends AbstractService implements
             pTree.updateProcessTree();    // update process-tree
             long currentVmemUsage = pTree.getVirtualMemorySize();
             long currentPmemUsage = pTree.getRssMemorySize();
+            if (currentVmemUsage < 0 || currentPmemUsage < 0) {
+              // YARN-6862/YARN-5021 If the container just exited or for
+              // another reason the physical/virtual memory is UNAVAILABLE (-1)
+              // the values shouldn't be aggregated.
+              LOG.info("Skipping monitoring container {} because "
+                  + "memory usage is not available.", containerId);
+              continue;
+            }
 
             // if machine has 6 cores and 3 are used,
             // cpuUsagePercentPerCore should be 300%
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockCPUResourceCalculatorProcessTree.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockCPUResourceCalculatorProcessTree.java
index eb35c91..49161f3 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockCPUResourceCalculatorProcessTree.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockCPUResourceCalculatorProcessTree.java
@@ -57,6 +57,16 @@ public class MockCPUResourceCalculatorProcessTree
   }
 
   @Override
+  public long getVirtualMemorySize(int olderThanAge) {
+    return 0;
+  }
+
+  @Override
+  public long getRssMemorySize(int olderThanAge) {
+    return 0;
+  }
+
+  @Override
   public float getCpuUsagePercent() {
     long cpu = this.cpuPercentage;
     // First getter call will be returned with -1, and other calls will
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockCPUResourceCalculatorProcessTree.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockMemoryResourceCalculatorProcessTree.java
similarity index 50%
copy from hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockCPUResourceCalculatorProcessTree.java
copy to hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockMemoryResourceCalculatorProcessTree.java
index eb35c91..ea45ac4 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockCPUResourceCalculatorProcessTree.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockMemoryResourceCalculatorProcessTree.java
@@ -21,19 +21,20 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
 import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
 
 /**
- * Mock class to obtain resource usage (CPU).
+ * Mock class to obtain resource usage (Memory).
  */
-public class MockCPUResourceCalculatorProcessTree
-    extends ResourceCalculatorProcessTree {
+public class MockMemoryResourceCalculatorProcessTree extends ResourceCalculatorProcessTree {
+  private final long memorySize = 500000000L;
 
-  private long cpuPercentage = ResourceCalculatorProcessTree.UNAVAILABLE;
+  private long rssMemorySize = memorySize;
+  private long virtualMemorySize = ResourceCalculatorProcessTree.UNAVAILABLE;
 
   /**
-   * Constructor for MockCPUResourceCalculatorProcessTree with specified root
+   * Constructor for MockMemoryResourceCalculatorProcessTree with specified root
    * process.
    * @param root
    */
-  public MockCPUResourceCalculatorProcessTree(String root) {
+  public MockMemoryResourceCalculatorProcessTree(String root) {
     super(root);
   }
 
@@ -57,14 +58,32 @@ public class MockCPUResourceCalculatorProcessTree
   }
 
   @Override
-  public float getCpuUsagePercent() {
-    long cpu = this.cpuPercentage;
-    // First getter call will be returned with -1, and other calls will
-    // return non-zero value as defined below.
-    if (cpu == ResourceCalculatorProcessTree.UNAVAILABLE) {
-      // Set a default value other than 0 for test.
-      this.cpuPercentage = 50;
+  public long getRssMemorySize(int olderThanAge) {
+    long rssMemory = this.rssMemorySize;
+    // First getter call will return with 500000000, and second call will
+    // return -1, rest of the calls will return a valid value.
+    if (rssMemory == memorySize) {
+      this.rssMemorySize = ResourceCalculatorProcessTree.UNAVAILABLE;
+    }
+    if (rssMemory == ResourceCalculatorProcessTree.UNAVAILABLE) {
+      this.rssMemorySize = 2 * memorySize;
+    }
+    return rssMemory;
+  }
+
+  @Override
+  public long getVirtualMemorySize(int olderThanAge) {
+    long virtualMemory = this.virtualMemorySize;
+    // First getter call will return with -1, and rest of the calls will
+    // return a valid value.
+    if (virtualMemory == ResourceCalculatorProcessTree.UNAVAILABLE) {
+      this.virtualMemorySize = 3 * memorySize;
     }
-    return cpu;
+    return virtualMemory;
+  }
+
+  @Override
+  public float getCpuUsagePercent() {
+    return 0;
   }
 }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorProcessTree.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorProcessTree.java
index ff2a570..8018959 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorProcessTree.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorProcessTree.java
@@ -51,11 +51,17 @@ public class MockResourceCalculatorProcessTree extends ResourceCalculatorProcess
     this.rssMemorySize = rssMemorySize;
   }
 
+  @Override
   public long getRssMemorySize() {
     return this.rssMemorySize;
   }
 
   @Override
+  public long getVirtualMemorySize() {
+    return 0;
+  }
+
+  @Override
   public float getCpuUsagePercent() {
     return 0;
   }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java
index cc8e180..c849619 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java
@@ -282,13 +282,24 @@ public class TestContainersMonitorResourceChange {
 
   @Test
   public void testContainersCPUResourceForDefaultValue() throws Exception {
+    testContainerMonitoringInvalidResources(
+        MockCPUResourceCalculatorProcessTree.class.getCanonicalName());
+  }
+
+  @Test
+  public void testContainersMemoryResourceUnavailable() throws Exception {
+    testContainerMonitoringInvalidResources(
+        MockMemoryResourceCalculatorProcessTree.class.getCanonicalName());
+  }
+
+  private void testContainerMonitoringInvalidResources(
+      String processTreeClassName) throws Exception {
     Configuration newConf = new Configuration(conf);
-    // set container monitor interval to be 20s
+    // set container monitor interval to be 20ms
     newConf.setLong(YarnConfiguration.NM_CONTAINER_MON_INTERVAL_MS, 20L);
     containersMonitor = createContainersMonitor(executor, dispatcher, context);
     newConf.set(YarnConfiguration.NM_CONTAINER_MON_PROCESS_TREE,
-        MockCPUResourceCalculatorProcessTree.class.getCanonicalName());
-    // set container monitor interval to be 20ms
+        processTreeClassName);
     containersMonitor.init(newConf);
     containersMonitor.start();
 
@@ -305,7 +316,7 @@ public class TestContainersMonitorResourceChange {
         0, containersMonitor.getContainersUtilization()
             .compareTo(ResourceUtilization.newInstance(0, 0, 0.0f)));
 
-    // Verify the container utilization value. Since atleast one round is done,
+    // Verify the container utilization value. Since at least one round is done,
     // we can expect a non-zero value for container utilization as
     // MockCPUResourceCalculatorProcessTree#getCpuUsagePercent will return 50.
     waitForContainerResourceUtilizationChange(containersMonitor, 100);
@@ -324,12 +335,13 @@ public class TestContainersMonitorResourceChange {
       }
 
       LOG.info(
-          "Monitor thread is waiting for resource utlization change.");
+          "Monitor thread is waiting for resource utilization change.");
       Thread.sleep(WAIT_MS_PER_LOOP);
       timeWaiting += WAIT_MS_PER_LOOP;
     }
 
-    assertTrue("Resource utilization is not changed from second run onwards",
+    assertTrue("Resource utilization is not changed after " +
+            timeoutMsecs / WAIT_MS_PER_LOOP + " updates",
         0 != containersMonitor.getContainersUtilization()
             .compareTo(ResourceUtilization.newInstance(0, 0, 0.0f)));
   }

---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org