You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by eb...@apache.org on 2021/04/29 17:42:59 UTC

[hadoop] branch trunk updated: YARN-10707. Support custom resources in ResourceUtilization, and update Node GPU Utilization to use. Contributed by Qi Zhu

This is an automated email from the ASF dual-hosted git repository.

ebadger pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 803ac4b  YARN-10707. Support custom resources in ResourceUtilization, and update Node GPU Utilization to use. Contributed by Qi Zhu
803ac4b is described below

commit 803ac4b1a0fc5c0b6c25a5df0733b3ebcdb2f294
Author: Eric Badger <eb...@verizonmedia.com>
AuthorDate: Thu Apr 29 17:42:13 2021 +0000

    YARN-10707. Support custom resources in ResourceUtilization, and update Node GPU Utilization to use. Contributed by Qi Zhu
---
 .../yarn/api/records/ResourceUtilization.java      | 128 ++++++++++++++++++++-
 .../src/main/proto/yarn_protos.proto               |   6 +
 .../yarn/api/records/impl/pb/ProtoUtils.java       |  29 +++++
 .../records/impl/pb/ResourceUtilizationPBImpl.java |  31 ++++-
 .../hadoop/yarn/api/BasePBImplRecordsTest.java     |   2 +
 .../yarn/api/records/TestResourceUtilization.java  |  49 ++++++++
 .../nodemanager/NodeResourceMonitorImpl.java       |  30 +++--
 .../gpu/GpuNodeResourceUpdateHandler.java          |  43 +++++--
 .../resourceplugin/gpu/TestGpuResourcePlugin.java  |   4 +-
 9 files changed, 297 insertions(+), 25 deletions(-)

diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceUtilization.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceUtilization.java
index f6c5a69..ff3cec3 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceUtilization.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceUtilization.java
@@ -22,6 +22,9 @@ import org.apache.hadoop.classification.InterfaceAudience.Public;
 import org.apache.hadoop.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.yarn.util.Records;
 
+import java.util.HashMap;
+import java.util.Map;
+
 /**
  * <p>
  * <code>ResourceUtilization</code> models the utilization of a set of computer
@@ -33,14 +36,26 @@ import org.apache.hadoop.yarn.util.Records;
 public abstract class ResourceUtilization implements
     Comparable<ResourceUtilization> {
 
+  private Map<String, Float> customResources
+      = new HashMap<>();
+
   @Public
   @Unstable
-  public static ResourceUtilization newInstance(int pmem, int vmem, float cpu) {
+  public static ResourceUtilization newInstance(int pmem, int vmem,
+      float cpu) {
+    return newInstance(pmem, vmem, cpu, null);
+  }
+
+  @Public
+  @Unstable
+  public static ResourceUtilization newInstance(int pmem, int vmem,
+      float cpu, Map<String, Float> customResources) {
     ResourceUtilization utilization =
         Records.newRecord(ResourceUtilization.class);
     utilization.setPhysicalMemory(pmem);
     utilization.setVirtualMemory(vmem);
     utilization.setCPU(cpu);
+    utilization.setCustomResources(customResources);
     return utilization;
   }
 
@@ -49,7 +64,9 @@ public abstract class ResourceUtilization implements
   public static ResourceUtilization newInstance(
       ResourceUtilization resourceUtil) {
     return newInstance(resourceUtil.getPhysicalMemory(),
-        resourceUtil.getVirtualMemory(), resourceUtil.getCPU());
+        resourceUtil.getVirtualMemory(),
+        resourceUtil.getCPU(),
+        resourceUtil.getCustomResources());
   }
 
   /**
@@ -106,6 +123,51 @@ public abstract class ResourceUtilization implements
   @Unstable
   public abstract void setCPU(float cpu);
 
+  /**
+   * Get <em>custom resource</em> utilization
+   * (The amount of custom resource used).
+   *
+   * @param resourceName <em>resourceName of custom resource</em>
+   * @return <em>resourceName utilization</em>
+   */
+  @Public
+  @Unstable
+  public float getCustomResource(String resourceName) {
+    if (customResources != null && resourceName != null) {
+      return customResources.get(resourceName);
+    }
+    return 0f;
+  }
+
+  @Public
+  @Unstable
+  public Map<String, Float> getCustomResources() {
+    return customResources;
+  }
+
+  @Public
+  @Unstable
+  public void setCustomResources(Map<String, Float> customResources) {
+    if (customResources != null) {
+      this.customResources = customResources;
+    }
+  }
+
+  /**
+   * Set <em>custom resource</em> utilization
+   * (The amount of custom resource used).
+   * @param resourceName <em>resourceName</em>
+   * @param utilization <em>utilization of custom resource</em>
+   *
+   */
+  @Public
+  @Unstable
+  public void setCustomResource(String resourceName, float utilization) {
+    if (resourceName != null && !resourceName.isEmpty()) {
+      customResources.put(resourceName, utilization);
+    }
+  }
+
   @Override
   public int hashCode() {
     final int prime = 263167;
@@ -113,6 +175,12 @@ public abstract class ResourceUtilization implements
     result = prime * result + getVirtualMemory();
     result = prime * result + getPhysicalMemory();
     result = 31 * result + Float.valueOf(getCPU()).hashCode();
+    if (customResources != null && !customResources.isEmpty()) {
+      for (Map.Entry<String, Float> entry : customResources.entrySet()) {
+        result = 31 * result +
+            customResources.get(entry.getKey()).hashCode();
+      }
+    }
     return result;
   }
 
@@ -130,7 +198,8 @@ public abstract class ResourceUtilization implements
     ResourceUtilization other = (ResourceUtilization) obj;
     if (getVirtualMemory() != other.getVirtualMemory()
         || getPhysicalMemory() != other.getPhysicalMemory()
-        || getCPU() != other.getCPU()) {
+        || getCPU() != other.getCPU()
+        || !customResources.equals(other.customResources)) {
       return false;
     }
     return true;
@@ -138,8 +207,19 @@ public abstract class ResourceUtilization implements
 
   @Override
   public String toString() {
-    return "<pmem:" + getPhysicalMemory() + ", vmem:" + getVirtualMemory()
-        + ", vCores:" + getCPU() + ">";
+    StringBuilder utilizationString = new StringBuilder();
+    utilizationString.append(
+        "<pmem:" + getPhysicalMemory() + ", vmem:" + getVirtualMemory()
+        + ", vCores:" + getCPU());
+    if (getCustomResources() != null && !getCustomResources().isEmpty()) {
+      for (Map.Entry<String, Float> entry : getCustomResources().entrySet()) {
+        utilizationString.append(", "
+            + entry.getKey() + ":" + entry.getValue());
+      }
+    }
+
+    utilizationString.append(">");
+    return utilizationString.toString();
   }
 
   /**
@@ -151,9 +231,28 @@ public abstract class ResourceUtilization implements
   @Public
   @Unstable
   public void addTo(int pmem, int vmem, float cpu) {
+    addTo(pmem, vmem, cpu, null, 0f);
+  }
+
+  /**
+   * Add utilization to the current one.
+   * @param pmem Physical memory used to add.
+   * @param vmem Virtual memory used to add.
+   * @param cpu CPU utilization to add.
+   * @param resourceName of custom resource to add.
+   * @param utilization of custom resource to add.
+   */
+  @Public
+  @Unstable
+  public void addTo(int pmem, int vmem, float cpu,
+      String resourceName, float utilization) {
     this.setPhysicalMemory(this.getPhysicalMemory() + pmem);
     this.setVirtualMemory(this.getVirtualMemory() + vmem);
     this.setCPU(this.getCPU() + cpu);
+    if (resourceName != null) {
+      this.setCustomResource(resourceName,
+          getCustomResource(resourceName) + utilization);
+    }
   }
 
   /**
@@ -165,8 +264,27 @@ public abstract class ResourceUtilization implements
   @Public
   @Unstable
   public void subtractFrom(int pmem, int vmem, float cpu) {
+    subtractFrom(pmem, vmem, cpu, null, 0f);
+  }
+
+  /**
+   * Subtract utilization from the current one.
+   * @param pmem Physical memory to be subtracted.
+   * @param vmem Virtual memory to be subtracted.
+   * @param cpu CPU utilization to be subtracted.
+   * @param resourceName of custom resource to be subtracted.
+   * @param utilization of custom resource to be subtracted.
+   */
+  @Public
+  @Unstable
+  public void subtractFrom(int pmem, int vmem, float cpu,
+      String resourceName, float utilization) {
     this.setPhysicalMemory(this.getPhysicalMemory() - pmem);
     this.setVirtualMemory(this.getVirtualMemory() - vmem);
     this.setCPU(this.getCPU() - cpu);
+    if (resourceName != null) {
+      this.setCustomResource(resourceName,
+          getCustomResource(resourceName) - utilization);
+    }
   }
 }
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto
index 0b360df..558e724 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto
@@ -83,6 +83,7 @@ message ResourceUtilizationProto {
   optional int32 pmem = 1;
   optional int32 vmem = 2;
   optional float cpu = 3;
+  repeated StringFloatMapProto customResources = 4;
 }
 
 message ResourceOptionProto {
@@ -243,6 +244,11 @@ message StringLongMapProto {
   required int64 value = 2;
 }
 
+message StringFloatMapProto {
+  required string key  = 1;
+  required float value = 2;
+}
+
 message ApplicationResourceUsageReportProto {
   optional int32 num_used_containers = 1;
   optional int32 num_reserved_containers = 2;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProtoUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProtoUtils.java
index cdeb417..64bf8cf 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProtoUtils.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProtoUtils.java
@@ -587,6 +587,22 @@ public class ProtoUtils {
     return ret;
   }
 
+  public static List<YarnProtos.StringFloatMapProto>
+      convertMapToStringFloatMapProtoList(
+      Map<String, Float> map) {
+    List<YarnProtos.StringFloatMapProto> ret = new ArrayList<>();
+    if (map != null) {
+      for (Map.Entry<String, Float> entry : map.entrySet()) {
+        YarnProtos.StringFloatMapProto.Builder tmp =
+            YarnProtos.StringFloatMapProto.newBuilder();
+        tmp.setKey(entry.getKey());
+        tmp.setValue(entry.getValue());
+        ret.add(tmp.build());
+      }
+    }
+    return ret;
+  }
+
   public static Map<String, String> convertStringStringMapProtoListToMap(
       List<StringStringMapProto> pList) {
     Map<String, String> ret = new HashMap<>();
@@ -600,6 +616,19 @@ public class ProtoUtils {
     return ret;
   }
 
+  public static Map<String, Float> convertStringFloatMapProtoListToMap(
+      List<YarnProtos.StringFloatMapProto> pList) {
+    Map<String, Float> ret = new HashMap<>();
+    if (pList != null) {
+      for (YarnProtos.StringFloatMapProto p : pList) {
+        if (p.hasKey()) {
+          ret.put(p.getKey(), p.getValue());
+        }
+      }
+    }
+    return ret;
+  }
+
   public static List<YarnProtos.StringStringMapProto> convertToProtoFormat(
       Map<String, String> stringMap) {
     List<YarnProtos.StringStringMapProto> pList = new ArrayList<>();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourceUtilizationPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourceUtilizationPBImpl.java
index e37adbe..023d1e9 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourceUtilizationPBImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ResourceUtilizationPBImpl.java
@@ -24,6 +24,8 @@ import org.apache.hadoop.yarn.proto.YarnProtos.ResourceUtilizationProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.ResourceUtilizationProtoOrBuilder;
 import org.apache.hadoop.yarn.api.records.ResourceUtilization;
 
+import java.util.Map;
+
 @Private
 @Unstable
 public class ResourceUtilizationPBImpl extends ResourceUtilization {
@@ -69,7 +71,7 @@ public class ResourceUtilizationPBImpl extends ResourceUtilization {
   @Override
   public int getVirtualMemory() {
     ResourceUtilizationProtoOrBuilder p = viaProto ? proto : builder;
-    return (p.getVmem());
+    return p.getVmem();
   }
 
   @Override
@@ -91,12 +93,39 @@ public class ResourceUtilizationPBImpl extends ResourceUtilization {
   }
 
   @Override
+  public float getCustomResource(String resourceName) {
+    return getCustomResources().get(resourceName);
+  }
+
+  @Override
+  public Map<String, Float> getCustomResources() {
+    ResourceUtilizationProtoOrBuilder p = viaProto ? proto : builder;
+    return ProtoUtils.
+        convertStringFloatMapProtoListToMap(p.
+            getCustomResourcesList());
+  }
+
+  @Override
+  public void setCustomResources(Map<String, Float> customResources) {
+    if (customResources != null) {
+      maybeInitBuilder();
+      builder.addAllCustomResources(ProtoUtils.
+          convertMapToStringFloatMapProtoList(customResources));
+    }
+  }
+
+  @Override
   public int compareTo(ResourceUtilization other) {
     int diff = this.getPhysicalMemory() - other.getPhysicalMemory();
     if (diff == 0) {
       diff = this.getVirtualMemory() - other.getVirtualMemory();
       if (diff == 0) {
         diff = Float.compare(this.getCPU(), other.getCPU());
+        if (diff == 0) {
+          diff = this.getCustomResources().size() -
+              other.getCustomResources().size();
+          // todo how to compare custom resource in same size
+        }
       }
     }
     return diff;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/BasePBImplRecordsTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/BasePBImplRecordsTest.java
index affa08f..22b687c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/BasePBImplRecordsTest.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/BasePBImplRecordsTest.java
@@ -76,6 +76,8 @@ public class BasePBImplRecordsTest {
           'a' + rand.nextInt(26),
           'a' + rand.nextInt(26),
           'a' + rand.nextInt(26));
+    } else if (type.equals(Float.class)) {
+      return rand.nextFloat();
     } else if (type instanceof Class) {
       Class clazz = (Class)type;
       if (clazz.isArray()) {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/records/TestResourceUtilization.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/records/TestResourceUtilization.java
index 5934846..a2b0570 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/records/TestResourceUtilization.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/records/TestResourceUtilization.java
@@ -21,6 +21,9 @@ package org.apache.hadoop.yarn.api.records;
 import org.junit.Assert;
 import org.junit.Test;
 
+import java.util.HashMap;
+import java.util.Map;
+
 public class TestResourceUtilization {
 
   @Test
@@ -60,4 +63,50 @@ public class TestResourceUtilization {
     u1.subtractFrom(10, 0, 0.0f);
     Assert.assertEquals(u1, u3);
   }
+
+  @Test
+  public void testResourceUtilizationWithCustomResource() {
+    Map<String, Float> customResources = new HashMap<>();
+    customResources.put(ResourceInformation.GPU_URI, 5.0f);
+    ResourceUtilization u1 = ResourceUtilization.
+        newInstance(10, 20, 0.5f, customResources);
+    ResourceUtilization u2 = ResourceUtilization.newInstance(u1);
+    ResourceUtilization u3 = ResourceUtilization.
+        newInstance(10, 20, 0.5f, customResources);
+    ResourceUtilization u4 = ResourceUtilization.
+        newInstance(20, 20, 0.5f, customResources);
+    ResourceUtilization u5 = ResourceUtilization.
+        newInstance(30, 40, 0.8f, customResources);
+
+    Assert.assertEquals(u1, u2);
+    Assert.assertEquals(u1, u3);
+    Assert.assertNotEquals(u1, u4);
+    Assert.assertNotEquals(u2, u5);
+    Assert.assertNotEquals(u4, u5);
+
+    Assert.assertTrue(u1.hashCode() == u2.hashCode());
+    Assert.assertTrue(u1.hashCode() == u3.hashCode());
+    Assert.assertFalse(u1.hashCode() == u4.hashCode());
+    Assert.assertFalse(u2.hashCode() == u5.hashCode());
+    Assert.assertFalse(u4.hashCode() == u5.hashCode());
+
+    Assert.assertTrue(u1.getPhysicalMemory() == 10);
+    Assert.assertFalse(u1.getVirtualMemory() == 10);
+    Assert.assertTrue(u1.getCPU() == 0.5f);
+    Assert.assertTrue(u1.
+        getCustomResource(ResourceInformation.GPU_URI) == 5.0f);
+
+    Assert.assertEquals("<pmem:10, vmem:" + u1.getVirtualMemory()
+        + ", vCores:0.5, yarn.io/gpu:5.0>", u1.toString());
+
+    u1.addTo(10, 0, 0.0f);
+    Assert.assertNotEquals(u1, u2);
+    Assert.assertEquals(u1, u4);
+    u1.addTo(10, 20, 0.3f);
+    Assert.assertEquals(u1, u5);
+    u1.subtractFrom(10, 20, 0.3f);
+    Assert.assertEquals(u1, u4);
+    u1.subtractFrom(10, 0, 0.0f);
+    Assert.assertEquals(u1, u3);
+  }
 }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java
index 7577b55..37fa33e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java
@@ -30,6 +30,9 @@ import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.util.HashMap;
+import java.util.Map;
+
 /**
  * Implementation of the node resource monitor. It periodically tracks the
  * resource utilization of the node and reports it to the NM.
@@ -54,8 +57,11 @@ public class NodeResourceMonitorImpl extends AbstractService implements
   private GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler;
 
   /** Current <em>resource utilization</em> of the node. */
+
+  private Map<String, Float> customResources = new HashMap<>();
+
   private ResourceUtilization nodeUtilization =
-      ResourceUtilization.newInstance(0, 0, 0f);
+      ResourceUtilization.newInstance(0, 0, 0f, customResources);
   private Context nmContext;
 
   /**
@@ -165,22 +171,26 @@ public class NodeResourceMonitorImpl extends AbstractService implements
             resourceCalculatorPlugin.getVirtualMemorySize()
                 - resourceCalculatorPlugin.getAvailableVirtualMemorySize();
         float vcores = resourceCalculatorPlugin.getNumVCoresUsed();
-        nodeUtilization =
-            ResourceUtilization.newInstance(
-                (int) (pmem >> 20), // B -> MB
-                (int) (vmem >> 20), // B -> MB
-                vcores); // Used Virtual Cores
 
-        float nodeGpuUtilization = 0F;
+        float totalNodeGpuUtilization = 0F;
         try {
           if (gpuNodeResourceUpdateHandler != null) {
-            nodeGpuUtilization =
-                gpuNodeResourceUpdateHandler.getNodeGpuUtilization();
+            totalNodeGpuUtilization =
+                gpuNodeResourceUpdateHandler.getTotalNodeGpuUtilization();
           }
         } catch (Exception e) {
           LOG.error("Get Node GPU Utilization error: " + e);
         }
 
+        customResources.
+            put(ResourceInformation.GPU_URI, totalNodeGpuUtilization);
+        nodeUtilization =
+            ResourceUtilization.newInstance(
+                (int) (pmem >> 20), // B -> MB
+                (int) (vmem >> 20), // B -> MB
+                vcores,     // Used Virtual Cores
+                customResources);  // Used GPUs
+
         // Publish the node utilization metrics to node manager
         // metrics system.
         NodeManagerMetrics nmMetrics = nmContext.getNodeManagerMetrics();
@@ -188,7 +198,7 @@ public class NodeResourceMonitorImpl extends AbstractService implements
           nmMetrics.setNodeUsedMemGB(nodeUtilization.getPhysicalMemory());
           nmMetrics.setNodeUsedVMemGB(nodeUtilization.getVirtualMemory());
           nmMetrics.setNodeCpuUtilization(nodeUtilization.getCPU());
-          nmMetrics.setNodeGpuUtilization(nodeGpuUtilization);
+          nmMetrics.setNodeGpuUtilization(totalNodeGpuUtilization);
         }
 
         try {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java
index af81709..c31555e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java
@@ -79,19 +79,48 @@ public class GpuNodeResourceUpdateHandler extends NodeResourceUpdaterPlugin {
     res.setResourceValue(GPU_URI, nUsableGpus);
   }
 
-  public float getNodeGpuUtilization() throws Exception{
+  /**
+   *
+   * @return The average physical GPUs used in this node.
+   *
+   * For example:
+   * Node with total 4 GPUs
+   * Physical used 2.4 GPUs
+   * Will return 2.4/4 = 0.6f
+   *
+   * @throws Exception when any error happens
+   */
+  public float getAvgNodeGpuUtilization() throws Exception{
     List<PerGpuDeviceInformation> gpuList =
         gpuDiscoverer.getGpuDeviceInformation().getGpus();
-    Float totalGpuUtilization = 0F;
+    Float avgGpuUtilization = 0F;
     if (gpuList != null &&
         gpuList.size() != 0) {
 
-      totalGpuUtilization = gpuList
-          .stream()
-          .map(g -> g.getGpuUtilizations().getOverallGpuUtilization())
-          .collect(Collectors.summingDouble(Float::floatValue))
-          .floatValue() / gpuList.size();
+      avgGpuUtilization = getTotalNodeGpuUtilization() / gpuList.size();
     }
+    return avgGpuUtilization;
+  }
+
+  /**
+   *
+   * @return The total physical GPUs used in this node.
+   *
+   * For example:
+   * Node with total 4 GPUs
+   * Physical used 2.4 GPUs
+   * Will return 2.4f
+   *
+   * @throws Exception when any error happens
+   */
+  public float getTotalNodeGpuUtilization() throws Exception{
+    List<PerGpuDeviceInformation> gpuList =
+        gpuDiscoverer.getGpuDeviceInformation().getGpus();
+    Float totalGpuUtilization = gpuList
+        .stream()
+        .map(g -> g.getGpuUtilizations().getOverallGpuUtilization())
+        .collect(Collectors.summingDouble(Float::floatValue))
+        .floatValue();
     return totalGpuUtilization;
   }
 }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java
index 749e0cc..da1a57e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java
@@ -126,7 +126,7 @@ public class TestGpuResourcePlugin {
   }
 
   @Test
-  public void testNodeGPUUtilization()
+  public void testAvgNodeGpuUtilization()
       throws Exception {
     GpuDiscoverer gpuDiscoverer = createNodeGPUUtilizationDiscoverer();
 
@@ -134,7 +134,7 @@ public class TestGpuResourcePlugin {
         new GpuNodeResourceUpdateHandler(gpuDiscoverer, new Configuration());
 
     Assert.assertEquals(0.5F,
-        gpuNodeResourceUpdateHandler.getNodeGpuUtilization(), 1e-6);
+        gpuNodeResourceUpdateHandler.getAvgNodeGpuUtilization(), 1e-6);
   }
 
   private GpuDiscoverer createNodeGPUUtilizationDiscoverer()

---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org