You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by pr...@apache.org on 2020/01/29 08:05:34 UTC
[hadoop] branch trunk updated: YARN-10107. Fix
GpuResourcePlugin#getNMResourceInfo to honor Auto Discovery Enabled
This is an automated email from the ASF dual-hosted git repository.
prabhujoseph pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new 825db8f YARN-10107. Fix GpuResourcePlugin#getNMResourceInfo to honor Auto Discovery Enabled
825db8f is described below
commit 825db8fe2ab37bd5a9a54485ea9ecbabf3766ed6
Author: Prabhu Joseph <pr...@apache.org>
AuthorDate: Wed Jan 29 13:29:42 2020 +0530
YARN-10107. Fix GpuResourcePlugin#getNMResourceInfo to honor Auto Discovery Enabled
Contributed by Szilard Nemeth.
---
.../resourceplugin/gpu/GpuDiscoverer.java | 2 +-
.../resourceplugin/gpu/GpuResourcePlugin.java | 32 ++++-----
.../resourceplugin/gpu/TestGpuResourcePlugin.java | 75 +++++++++++++++++++++-
3 files changed, 91 insertions(+), 18 deletions(-)
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
index 3f2b657..4133fb4 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
@@ -136,7 +136,7 @@ public class GpuDiscoverer extends Configured {
return lastDiscoveredGpuInformation;
}
- private boolean isAutoDiscoveryEnabled() {
+ boolean isAutoDiscoveryEnabled() {
String allowedDevicesStr = getConf().get(
YarnConfiguration.NM_GPU_ALLOWED_DEVICES,
YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java
index d44160e..25ea193 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java
@@ -94,27 +94,29 @@ public class GpuResourcePlugin implements ResourcePlugin {
@Override
public synchronized NMResourceInfo getNMResourceInfo() throws YarnException {
- GpuDeviceInformation gpuDeviceInformation;
-
- //At this point the gpu plugin is already enabled
- checkGpuResourceHandler();
-
- checkErrorCount();
- try{
- gpuDeviceInformation = gpuDiscoverer.getGpuDeviceInformation();
- numOfErrorExecutionSinceLastSucceed = 0;
- } catch (YarnException e) {
- LOG.error(e.getMessage(), e);
- numOfErrorExecutionSinceLastSucceed++;
- throw e;
+ final GpuDeviceInformation gpuDeviceInformation;
+
+ if (gpuDiscoverer.isAutoDiscoveryEnabled()) {
+ //At this point the gpu plugin is already enabled
+ checkGpuResourceHandler();
+
+ checkErrorCount();
+ try{
+ gpuDeviceInformation = gpuDiscoverer.getGpuDeviceInformation();
+ numOfErrorExecutionSinceLastSucceed = 0;
+ } catch (YarnException e) {
+ LOG.error(e.getMessage(), e);
+ numOfErrorExecutionSinceLastSucceed++;
+ throw e;
+ }
+ } else {
+ gpuDeviceInformation = null;
}
-
GpuResourceAllocator gpuResourceAllocator =
gpuResourceHandler.getGpuAllocator();
List<GpuDevice> totalGpus = gpuResourceAllocator.getAllowedGpus();
List<AssignedGpuDevice> assignedGpuDevices =
gpuResourceAllocator.getAssignedGpus();
-
return new NMGpuResourceInfo(gpuDeviceInformation, totalGpus,
assignedGpuDevices);
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java
index 888f899..5e065cb 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java
@@ -19,15 +19,38 @@
package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+import com.google.common.collect.Lists;
import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInformation;
+import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.NMGpuResourceInfo;
+import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.PerGpuDeviceInformation;
+import org.junit.Assert;
import org.junit.Test;
+import java.util.List;
public class TestGpuResourcePlugin {
+ private GpuDiscoverer createMockDiscoverer() throws YarnException {
+ GpuDiscoverer gpuDiscoverer = mock(GpuDiscoverer.class);
+ when(gpuDiscoverer.isAutoDiscoveryEnabled()).thenReturn(true);
+
+ PerGpuDeviceInformation gpu =
+ new PerGpuDeviceInformation();
+ gpu.setProductName("testGpu");
+ List<PerGpuDeviceInformation> gpus = Lists.newArrayList();
+ gpus.add(gpu);
+
+ GpuDeviceInformation gpuDeviceInfo = new GpuDeviceInformation();
+ gpuDeviceInfo.setGpus(gpus);
+ when(gpuDiscoverer.getGpuDeviceInformation()).thenReturn(gpuDeviceInfo);
+ return gpuDiscoverer;
+ }
+
@Test(expected = YarnException.class)
public void testResourceHandlerNotInitialized() throws YarnException {
- GpuDiscoverer gpuDiscoverer = mock(GpuDiscoverer.class);
+ GpuDiscoverer gpuDiscoverer = createMockDiscoverer();
GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler =
mock(GpuNodeResourceUpdateHandler.class);
@@ -39,7 +62,7 @@ public class TestGpuResourcePlugin {
@Test
public void testResourceHandlerIsInitialized() throws YarnException {
- GpuDiscoverer gpuDiscoverer = mock(GpuDiscoverer.class);
+ GpuDiscoverer gpuDiscoverer = createMockDiscoverer();
GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler =
mock(GpuNodeResourceUpdateHandler.class);
@@ -51,4 +74,52 @@ public class TestGpuResourcePlugin {
//Not throwing any exception
target.getNMResourceInfo();
}
+
+ @Test
+ public void testGetNMResourceInfoAutoDiscoveryEnabled()
+ throws YarnException {
+ GpuDiscoverer gpuDiscoverer = createMockDiscoverer();
+
+ GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler =
+ mock(GpuNodeResourceUpdateHandler.class);
+
+ GpuResourcePlugin target =
+ new GpuResourcePlugin(gpuNodeResourceUpdateHandler, gpuDiscoverer);
+
+ target.createResourceHandler(null, null, null);
+
+ NMGpuResourceInfo resourceInfo =
+ (NMGpuResourceInfo) target.getNMResourceInfo();
+ Assert.assertNotNull("GpuDeviceInformation should not be null",
+ resourceInfo.getGpuDeviceInformation());
+
+ List<PerGpuDeviceInformation> gpus =
+ resourceInfo.getGpuDeviceInformation().getGpus();
+ Assert.assertNotNull("List of PerGpuDeviceInformation should not be null",
+ gpus);
+
+ Assert.assertEquals("List of PerGpuDeviceInformation should have a " +
+ "size of 1", 1, gpus.size());
+ Assert.assertEquals("Product name of GPU does not match",
+ "testGpu", gpus.get(0).getProductName());
+ }
+
+ @Test
+ public void testGetNMResourceInfoAutoDiscoveryDisabled()
+ throws YarnException {
+ GpuDiscoverer gpuDiscoverer = createMockDiscoverer();
+ when(gpuDiscoverer.isAutoDiscoveryEnabled()).thenReturn(false);
+
+ GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler =
+ mock(GpuNodeResourceUpdateHandler.class);
+
+ GpuResourcePlugin target =
+ new GpuResourcePlugin(gpuNodeResourceUpdateHandler, gpuDiscoverer);
+
+ target.createResourceHandler(null, null, null);
+
+ NMGpuResourceInfo resourceInfo =
+ (NMGpuResourceInfo) target.getNMResourceInfo();
+ Assert.assertNull(resourceInfo.getGpuDeviceInformation());
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org