You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by su...@apache.org on 2019/02/27 06:34:03 UTC
[hadoop] branch branch-3.2 updated: YARN-9121. Replace
GpuDiscoverer.getInstance() to a readable object for easy access control.
Contributed by Szilard Nemeth.
This is an automated email from the ASF dual-hosted git repository.
sunilg pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.2 by this push:
new 809e3f2 YARN-9121. Replace GpuDiscoverer.getInstance() to a readable object for easy access control. Contributed by Szilard Nemeth.
809e3f2 is described below
commit 809e3f2453aa26339180f2310625e968c2c3cfc8
Author: Sunil G <su...@apache.org>
AuthorDate: Mon Feb 25 11:30:46 2019 +0530
YARN-9121. Replace GpuDiscoverer.getInstance() to a readable object for easy access control. Contributed by Szilard Nemeth.
(cherry picked from commit 5e91ebd91a405e1585ef02b8fbf03f10d1398adf)
---
.../resources/gpu/GpuResourceHandlerImpl.java | 9 ++++++---
.../resourceplugin/ResourcePluginManager.java | 7 ++++++-
.../resourceplugin/gpu/GpuDiscoverer.java | 9 ---------
.../gpu/GpuNodeResourceUpdateHandler.java | 10 +++++++---
.../resourceplugin/gpu/GpuResourcePlugin.java | 22 +++++++++++++---------
.../resources/gpu/TestGpuResourceHandler.java | 20 +++++++++++---------
6 files changed, 43 insertions(+), 34 deletions(-)
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java
index d3be720..2c9baf2 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java
@@ -52,14 +52,17 @@ public class GpuResourceHandlerImpl implements ResourceHandler {
private final GpuResourceAllocator gpuAllocator;
private final CGroupsHandler cGroupsHandler;
private final PrivilegedOperationExecutor privilegedOperationExecutor;
+ private final GpuDiscoverer gpuDiscoverer;
public GpuResourceHandlerImpl(Context nmContext,
CGroupsHandler cGroupsHandler,
- PrivilegedOperationExecutor privilegedOperationExecutor) {
+ PrivilegedOperationExecutor privilegedOperationExecutor,
+ GpuDiscoverer gpuDiscoverer) {
this.nmContext = nmContext;
this.cGroupsHandler = cGroupsHandler;
this.privilegedOperationExecutor = privilegedOperationExecutor;
- gpuAllocator = new GpuResourceAllocator(nmContext);
+ this.gpuAllocator = new GpuResourceAllocator(nmContext);
+ this.gpuDiscoverer = gpuDiscoverer;
}
@Override
@@ -67,7 +70,7 @@ public class GpuResourceHandlerImpl implements ResourceHandler {
throws ResourceHandlerException {
List<GpuDevice> usableGpus;
try {
- usableGpus = GpuDiscoverer.getInstance().getGpusUsableByYarn();
+ usableGpus = gpuDiscoverer.getGpusUsableByYarn();
if (usableGpus == null || usableGpus.isEmpty()) {
String message = "GPU is enabled on the NodeManager, but couldn't find "
+ "any usable GPU devices, please double check configuration!";
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePluginManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePluginManager.java
index 14218c1..c1f2910 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePluginManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/ResourcePluginManager.java
@@ -25,6 +25,8 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.nodemanager.Context;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.fpga.FpgaResourcePlugin;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDiscoverer;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuNodeResourceUpdateHandler;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuResourcePlugin;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -84,7 +86,10 @@ public class ResourcePluginManager {
ResourcePlugin plugin = null;
if (resourceName.equals(GPU_URI)) {
- plugin = new GpuResourcePlugin();
+ final GpuDiscoverer gpuDiscoverer = new GpuDiscoverer();
+ final GpuNodeResourceUpdateHandler updateHandler =
+ new GpuNodeResourceUpdateHandler(gpuDiscoverer);
+ plugin = new GpuResourcePlugin(updateHandler, gpuDiscoverer);
} else if (resourceName.equals(FPGA_URI)) {
plugin = new FpgaResourcePlugin();
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
index 92792b7..334a86c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
@@ -58,11 +58,6 @@ public class GpuDiscoverer {
// command should not run more than 10 sec.
private static final int MAX_EXEC_TIMEOUT_MS = 10 * 1000;
private static final int MAX_REPEATED_ERROR_ALLOWED = 10;
- private static GpuDiscoverer instance;
-
- static {
- instance = new GpuDiscoverer();
- }
private Configuration conf = null;
private String pathOfGpuBinary = null;
@@ -293,8 +288,4 @@ public class GpuDiscoverer {
String getPathOfGpuBinary() {
return pathOfGpuBinary;
}
-
- public static GpuDiscoverer getInstance() {
- return instance;
- }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java
index 8b19048..4b2258d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java
@@ -35,16 +35,20 @@ import static org.apache.hadoop.yarn.api.records.ResourceInformation.GPU_URI;
public class GpuNodeResourceUpdateHandler extends NodeResourceUpdaterPlugin {
private static final Logger LOG =
LoggerFactory.getLogger(GpuNodeResourceUpdateHandler.class);
+ private final GpuDiscoverer gpuDiscoverer;
+
+ public GpuNodeResourceUpdateHandler(GpuDiscoverer gpuDiscoverer) {
+ this.gpuDiscoverer = gpuDiscoverer;
+ }
@Override
public void updateConfiguredResource(Resource res) throws YarnException {
LOG.info("Initializing configured GPU resources for the NodeManager.");
- List<GpuDevice> usableGpus = GpuDiscoverer.getInstance()
- .getGpusUsableByYarn();
+ List<GpuDevice> usableGpus = gpuDiscoverer.getGpusUsableByYarn();
if (usableGpus == null || usableGpus.isEmpty()) {
String message = "GPU is enabled, " +
- "but couldn't find any usable GPUs on the NodeManager!";
+ "but could not find any usable GPUs on the NodeManager!";
LOG.error(message);
// No gpu can be used by YARN.
throw new YarnException(message);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java
index e49d2f2..393d76e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java
@@ -18,7 +18,6 @@
package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
-import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.nodemanager.Context;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
@@ -34,18 +33,23 @@ import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInforma
import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.NMGpuResourceInfo;
import java.util.List;
-import java.util.Map;
public class GpuResourcePlugin implements ResourcePlugin {
+ private final GpuNodeResourceUpdateHandler resourceDiscoverHandler;
+ private final GpuDiscoverer gpuDiscoverer;
private GpuResourceHandlerImpl gpuResourceHandler = null;
- private GpuNodeResourceUpdateHandler resourceDiscoverHandler = null;
private DockerCommandPlugin dockerCommandPlugin = null;
+ public GpuResourcePlugin(GpuNodeResourceUpdateHandler resourceDiscoverHandler,
+ GpuDiscoverer gpuDiscoverer) {
+ this.resourceDiscoverHandler = resourceDiscoverHandler;
+ this.gpuDiscoverer = gpuDiscoverer;
+ }
+
@Override
public synchronized void initialize(Context context) throws YarnException {
- resourceDiscoverHandler = new GpuNodeResourceUpdateHandler();
- GpuDiscoverer.getInstance().initialize(context.getConf());
- dockerCommandPlugin =
+ this.gpuDiscoverer.initialize(context.getConf());
+ this.dockerCommandPlugin =
GpuDockerCommandPluginFactory.createGpuDockerCommandPlugin(
context.getConf());
}
@@ -56,7 +60,7 @@ public class GpuResourcePlugin implements ResourcePlugin {
PrivilegedOperationExecutor privilegedOperationExecutor) {
if (gpuResourceHandler == null) {
gpuResourceHandler = new GpuResourceHandlerImpl(context, cGroupsHandler,
- privilegedOperationExecutor);
+ privilegedOperationExecutor, gpuDiscoverer);
}
return gpuResourceHandler;
@@ -77,9 +81,9 @@ public class GpuResourcePlugin implements ResourcePlugin {
}
@Override
- public NMResourceInfo getNMResourceInfo() throws YarnException {
+ public synchronized NMResourceInfo getNMResourceInfo() throws YarnException {
GpuDeviceInformation gpuDeviceInformation =
- GpuDiscoverer.getInstance().getGpuDeviceInformation();
+ gpuDiscoverer.getGpuDeviceInformation();
GpuResourceAllocator gpuResourceAllocator =
gpuResourceHandler.getGpuAllocator();
List<GpuDevice> totalGpus = gpuResourceAllocator.getAllowedGpusCopy();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java
index 10e5cd1..1c41ef2 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java
@@ -71,6 +71,7 @@ public class TestGpuResourceHandler {
private GpuResourceHandlerImpl gpuResourceHandler;
private NMStateStoreService mockNMStateStore;
private ConcurrentHashMap<ContainerId, Container> runningContainersMap;
+ private GpuDiscoverer gpuDiscoverer;
@Before
public void setup() {
@@ -88,8 +89,9 @@ public class TestGpuResourceHandler {
runningContainersMap = new ConcurrentHashMap<>();
when(nmctx.getContainers()).thenReturn(runningContainersMap);
+ gpuDiscoverer = new GpuDiscoverer();
gpuResourceHandler = new GpuResourceHandlerImpl(nmctx, mockCGroupsHandler,
- mockPrivilegedExecutor);
+ mockPrivilegedExecutor, gpuDiscoverer);
}
@Test
@@ -97,7 +99,7 @@ public class TestGpuResourceHandler {
Configuration conf = new YarnConfiguration();
conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0");
- GpuDiscoverer.getInstance().initialize(conf);
+ gpuDiscoverer.initialize(conf);
gpuResourceHandler.bootstrap(conf);
verify(mockCGroupsHandler, times(1)).initializeCGroupController(
@@ -161,7 +163,7 @@ public class TestGpuResourceHandler {
throws Exception {
Configuration conf = new YarnConfiguration();
conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4");
- GpuDiscoverer.getInstance().initialize(conf);
+ gpuDiscoverer.initialize(conf);
gpuResourceHandler.bootstrap(conf);
Assert.assertEquals(4,
@@ -250,7 +252,7 @@ public class TestGpuResourceHandler {
throws Exception {
Configuration conf = new YarnConfiguration();
conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4");
- GpuDiscoverer.getInstance().initialize(conf);
+ gpuDiscoverer.initialize(conf);
gpuResourceHandler.bootstrap(conf);
Assert.assertEquals(4,
@@ -279,7 +281,7 @@ public class TestGpuResourceHandler {
public void testAllocationWithoutAllowedGpus() throws Exception {
Configuration conf = new YarnConfiguration();
conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, " ");
- GpuDiscoverer.getInstance().initialize(conf);
+ gpuDiscoverer.initialize(conf);
try {
gpuResourceHandler.bootstrap(conf);
@@ -314,7 +316,7 @@ public class TestGpuResourceHandler {
public void testAllocationStored() throws Exception {
Configuration conf = new YarnConfiguration();
conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4");
- GpuDiscoverer.getInstance().initialize(conf);
+ gpuDiscoverer.initialize(conf);
gpuResourceHandler.bootstrap(conf);
Assert.assertEquals(4,
@@ -360,9 +362,9 @@ public class TestGpuResourceHandler {
GpuResourceHandlerImpl gpuNULLStateResourceHandler =
new GpuResourceHandlerImpl(nmnctx, mockCGroupsHandler,
- mockPrivilegedExecutor);
+ mockPrivilegedExecutor, gpuDiscoverer);
- GpuDiscoverer.getInstance().initialize(conf);
+ gpuDiscoverer.initialize(conf);
gpuNULLStateResourceHandler.bootstrap(conf);
Assert.assertEquals(4,
@@ -382,7 +384,7 @@ public class TestGpuResourceHandler {
public void testRecoverResourceAllocation() throws Exception {
Configuration conf = new YarnConfiguration();
conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4");
- GpuDiscoverer.getInstance().initialize(conf);
+ gpuDiscoverer.initialize(conf);
gpuResourceHandler.bootstrap(conf);
Assert.assertEquals(4,
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org