You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by pr...@apache.org on 2020/01/29 08:05:34 UTC

[hadoop] branch trunk updated: YARN-10107. Fix GpuResourcePlugin#getNMResourceInfo to honor Auto Discovery Enabled

This is an automated email from the ASF dual-hosted git repository.

prabhujoseph pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 825db8f  YARN-10107. Fix GpuResourcePlugin#getNMResourceInfo to honor Auto Discovery Enabled
825db8f is described below

commit 825db8fe2ab37bd5a9a54485ea9ecbabf3766ed6
Author: Prabhu Joseph <pr...@apache.org>
AuthorDate: Wed Jan 29 13:29:42 2020 +0530

    YARN-10107. Fix GpuResourcePlugin#getNMResourceInfo to honor Auto Discovery Enabled
    
    Contributed by Szilard Nemeth.
---
 .../resourceplugin/gpu/GpuDiscoverer.java          |  2 +-
 .../resourceplugin/gpu/GpuResourcePlugin.java      | 32 ++++-----
 .../resourceplugin/gpu/TestGpuResourcePlugin.java  | 75 +++++++++++++++++++++-
 3 files changed, 91 insertions(+), 18 deletions(-)

diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
index 3f2b657..4133fb4 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
@@ -136,7 +136,7 @@ public class GpuDiscoverer extends Configured {
     return lastDiscoveredGpuInformation;
   }
 
-  private boolean isAutoDiscoveryEnabled() {
+  boolean isAutoDiscoveryEnabled() {
     String allowedDevicesStr = getConf().get(
         YarnConfiguration.NM_GPU_ALLOWED_DEVICES,
         YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java
index d44160e..25ea193 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java
@@ -94,27 +94,29 @@ public class GpuResourcePlugin implements ResourcePlugin {
 
   @Override
   public synchronized NMResourceInfo getNMResourceInfo() throws YarnException {
-    GpuDeviceInformation gpuDeviceInformation;
-
-    //At this point the gpu plugin is already enabled
-    checkGpuResourceHandler();
-
-    checkErrorCount();
-    try{
-      gpuDeviceInformation = gpuDiscoverer.getGpuDeviceInformation();
-      numOfErrorExecutionSinceLastSucceed = 0;
-    } catch (YarnException e) {
-      LOG.error(e.getMessage(), e);
-      numOfErrorExecutionSinceLastSucceed++;
-      throw e;
+    final GpuDeviceInformation gpuDeviceInformation;
+
+    if (gpuDiscoverer.isAutoDiscoveryEnabled()) {
+      //At this point the gpu plugin is already enabled
+      checkGpuResourceHandler();
+
+      checkErrorCount();
+      try{
+        gpuDeviceInformation = gpuDiscoverer.getGpuDeviceInformation();
+        numOfErrorExecutionSinceLastSucceed = 0;
+      } catch (YarnException e) {
+        LOG.error(e.getMessage(), e);
+        numOfErrorExecutionSinceLastSucceed++;
+        throw e;
+      }
+    } else {
+      gpuDeviceInformation = null;
     }
-
     GpuResourceAllocator gpuResourceAllocator =
         gpuResourceHandler.getGpuAllocator();
     List<GpuDevice> totalGpus = gpuResourceAllocator.getAllowedGpus();
     List<AssignedGpuDevice> assignedGpuDevices =
         gpuResourceAllocator.getAssignedGpus();
-
     return new NMGpuResourceInfo(gpuDeviceInformation, totalGpus,
         assignedGpuDevices);
   }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java
index 888f899..5e065cb 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java
@@ -19,15 +19,38 @@
 package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
 
 import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
 
+import com.google.common.collect.Lists;
 import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInformation;
+import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.NMGpuResourceInfo;
+import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.PerGpuDeviceInformation;
+import org.junit.Assert;
 import org.junit.Test;
+import java.util.List;
 
 public class TestGpuResourcePlugin {
 
+  private GpuDiscoverer createMockDiscoverer() throws YarnException {
+    GpuDiscoverer gpuDiscoverer = mock(GpuDiscoverer.class);
+    when(gpuDiscoverer.isAutoDiscoveryEnabled()).thenReturn(true);
+
+    PerGpuDeviceInformation gpu =
+        new PerGpuDeviceInformation();
+    gpu.setProductName("testGpu");
+    List<PerGpuDeviceInformation> gpus = Lists.newArrayList();
+    gpus.add(gpu);
+
+    GpuDeviceInformation gpuDeviceInfo = new GpuDeviceInformation();
+    gpuDeviceInfo.setGpus(gpus);
+    when(gpuDiscoverer.getGpuDeviceInformation()).thenReturn(gpuDeviceInfo);
+    return gpuDiscoverer;
+  }
+
   @Test(expected = YarnException.class)
   public void testResourceHandlerNotInitialized() throws YarnException {
-    GpuDiscoverer gpuDiscoverer = mock(GpuDiscoverer.class);
+    GpuDiscoverer gpuDiscoverer = createMockDiscoverer();
     GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler =
         mock(GpuNodeResourceUpdateHandler.class);
 
@@ -39,7 +62,7 @@ public class TestGpuResourcePlugin {
 
   @Test
   public void testResourceHandlerIsInitialized() throws YarnException {
-    GpuDiscoverer gpuDiscoverer = mock(GpuDiscoverer.class);
+    GpuDiscoverer gpuDiscoverer = createMockDiscoverer();
     GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler =
         mock(GpuNodeResourceUpdateHandler.class);
 
@@ -51,4 +74,52 @@ public class TestGpuResourcePlugin {
     //Not throwing any exception
     target.getNMResourceInfo();
   }
+
+  @Test
+  public void testGetNMResourceInfoAutoDiscoveryEnabled()
+      throws YarnException {
+    GpuDiscoverer gpuDiscoverer = createMockDiscoverer();
+
+    GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler =
+        mock(GpuNodeResourceUpdateHandler.class);
+
+    GpuResourcePlugin target =
+        new GpuResourcePlugin(gpuNodeResourceUpdateHandler, gpuDiscoverer);
+
+    target.createResourceHandler(null, null, null);
+
+    NMGpuResourceInfo resourceInfo =
+        (NMGpuResourceInfo) target.getNMResourceInfo();
+    Assert.assertNotNull("GpuDeviceInformation should not be null",
+        resourceInfo.getGpuDeviceInformation());
+
+    List<PerGpuDeviceInformation> gpus =
+        resourceInfo.getGpuDeviceInformation().getGpus();
+    Assert.assertNotNull("List of PerGpuDeviceInformation should not be null",
+        gpus);
+
+    Assert.assertEquals("List of PerGpuDeviceInformation should have a " +
+        "size of 1", 1, gpus.size());
+    Assert.assertEquals("Product name of GPU does not match",
+        "testGpu", gpus.get(0).getProductName());
+  }
+
+  @Test
+  public void testGetNMResourceInfoAutoDiscoveryDisabled()
+      throws YarnException {
+    GpuDiscoverer gpuDiscoverer = createMockDiscoverer();
+    when(gpuDiscoverer.isAutoDiscoveryEnabled()).thenReturn(false);
+
+    GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler =
+        mock(GpuNodeResourceUpdateHandler.class);
+
+    GpuResourcePlugin target =
+        new GpuResourcePlugin(gpuNodeResourceUpdateHandler, gpuDiscoverer);
+
+    target.createResourceHandler(null, null, null);
+
+    NMGpuResourceInfo resourceInfo =
+        (NMGpuResourceInfo) target.getNMResourceInfo();
+    Assert.assertNull(resourceInfo.getGpuDeviceInformation());
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org