You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by su...@apache.org on 2019/03/01 13:57:09 UTC
[hadoop] branch branch-3.2 updated: YARN-9139. Simplify initializer
code of GpuDiscoverer. Contributed by Szilard Nemeth.
This is an automated email from the ASF dual-hosted git repository.
sunilg pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.2 by this push:
new d045f02 YARN-9139. Simplify initializer code of GpuDiscoverer. Contributed by Szilard Nemeth.
d045f02 is described below
commit d045f02a8de5b5aa97b5fcb98d6f61862c3ae733
Author: Sunil G <su...@apache.org>
AuthorDate: Fri Mar 1 19:27:03 2019 +0530
YARN-9139. Simplify initializer code of GpuDiscoverer. Contributed by Szilard Nemeth.
---
.../apache/hadoop/yarn/conf/YarnConfiguration.java | 3 -
.../resourceplugin/gpu/GpuDiscoverer.java | 115 +++++++++++++--------
.../resources/gpu/TestGpuResourceHandler.java | 58 +++++++++--
.../resourceplugin/gpu/TestGpuDiscoverer.java | 38 ++++++-
4 files changed, 155 insertions(+), 59 deletions(-)
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index eb36ed8..9774bde 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -1620,9 +1620,6 @@ public class YarnConfiguration extends Configuration {
public static final String NM_GPU_PATH_TO_EXEC =
NM_GPU_RESOURCE_PREFIX + "path-to-discovery-executables";
- @Private
- public static final String DEFAULT_NM_GPU_PATH_TO_EXEC = "";
-
/**
* Settings to control which implementation of docker plugin for GPU will be
* used.
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
index 334a86c..95e51e5 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugi
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
@@ -88,12 +89,6 @@ public class GpuDiscoverer {
throws YarnException {
validateConfOrThrowException();
- if (null == pathOfGpuBinary) {
- throw new YarnException(
- "Failed to find GPU discovery executable, please double check "
- + YarnConfiguration.NM_GPU_PATH_TO_EXEC + " setting.");
- }
-
if (numOfErrorExecutionSinceLastSucceed == MAX_REPEATED_ERROR_ALLOWED) {
String msg =
"Failed to execute GPU device information detection script for "
@@ -227,50 +222,17 @@ public class GpuDiscoverer {
}
}
- public synchronized void initialize(Configuration conf) {
- this.conf = conf;
+ public synchronized void initialize(Configuration config)
+ throws YarnException {
+ this.conf = config;
numOfErrorExecutionSinceLastSucceed = 0;
- String pathToExecutable = conf.get(YarnConfiguration.NM_GPU_PATH_TO_EXEC,
- YarnConfiguration.DEFAULT_NM_GPU_PATH_TO_EXEC);
- if (pathToExecutable.isEmpty()) {
- pathToExecutable = DEFAULT_BINARY_NAME;
- }
-
- File binaryPath = new File(pathToExecutable);
- if (!binaryPath.exists()) {
- // When binary not exist, use default setting.
- boolean found = false;
- for (String dir : DEFAULT_BINARY_SEARCH_DIRS) {
- binaryPath = new File(dir, DEFAULT_BINARY_NAME);
- if (binaryPath.exists()) {
- found = true;
- pathOfGpuBinary = binaryPath.getAbsolutePath();
- break;
- }
- }
-
- if (!found) {
- LOG.warn("Failed to locate binary at:" + binaryPath.getAbsolutePath()
- + ", please double check [" + YarnConfiguration.NM_GPU_PATH_TO_EXEC
- + "] setting. Now use " + "default binary:" + DEFAULT_BINARY_NAME);
- }
- } else{
- // If path specified by user is a directory, use
- if (binaryPath.isDirectory()) {
- binaryPath = new File(binaryPath, DEFAULT_BINARY_NAME);
- LOG.warn("Specified path is a directory, use " + DEFAULT_BINARY_NAME
- + " under the directory, updated path-to-executable:" + binaryPath
- .getAbsolutePath());
- }
- // Validated
- pathOfGpuBinary = binaryPath.getAbsolutePath();
- }
+ lookUpAutoDiscoveryBinary(config);
// Try to discover GPU information once and print
try {
LOG.info("Trying to discover GPU information ...");
GpuDeviceInformation info = getGpuDeviceInformation();
- LOG.info(info.toString());
+ LOG.info("Discovered GPU information: " + info.toString());
} catch (YarnException e) {
String msg =
"Failed to discover GPU information from system, exception message:"
@@ -279,6 +241,71 @@ public class GpuDiscoverer {
}
}
+ private void lookUpAutoDiscoveryBinary(Configuration config)
+ throws YarnException {
+ String configuredBinaryPath = config.get(
+ YarnConfiguration.NM_GPU_PATH_TO_EXEC, DEFAULT_BINARY_NAME);
+ if (configuredBinaryPath.isEmpty()) {
+ configuredBinaryPath = DEFAULT_BINARY_NAME;
+ }
+
+ File binaryPath;
+ File configuredBinaryFile = new File(configuredBinaryPath);
+ if (!configuredBinaryFile.exists()) {
+ binaryPath = lookupBinaryInDefaultDirs();
+ } else if (configuredBinaryFile.isDirectory()) {
+ binaryPath = handleConfiguredBinaryPathIsDirectory(configuredBinaryFile);
+ } else {
+ binaryPath = configuredBinaryFile;
+ }
+ pathOfGpuBinary = binaryPath.getAbsolutePath();
+ }
+
+ private File handleConfiguredBinaryPathIsDirectory(File configuredBinaryFile)
+ throws YarnException {
+ File binaryPath = new File(configuredBinaryFile, DEFAULT_BINARY_NAME);
+ if (!binaryPath.exists()) {
+ throw new YarnException("Failed to find GPU discovery executable, " +
+ "please double check "+ YarnConfiguration.NM_GPU_PATH_TO_EXEC +
+ " setting. The setting points to a directory but " +
+ "no file found in the directory with name:" + DEFAULT_BINARY_NAME);
+ } else {
+ LOG.warn("Specified path is a directory, use " + DEFAULT_BINARY_NAME
+ + " under the directory, updated path-to-executable:"
+ + binaryPath.getAbsolutePath());
+ }
+ return binaryPath;
+ }
+
+ private File lookupBinaryInDefaultDirs() throws YarnException {
+ final File lookedUpBinary = lookupBinaryInDefaultDirsInternal();
+ if (lookedUpBinary == null) {
+ throw new YarnException("Failed to find GPU discovery executable, " +
+ "please double check " + YarnConfiguration.NM_GPU_PATH_TO_EXEC +
+ " setting. Also tried to find the executable " +
+ "in the default directories: " + DEFAULT_BINARY_SEARCH_DIRS);
+ }
+ return lookedUpBinary;
+ }
+
+ private File lookupBinaryInDefaultDirsInternal() {
+ Set<String> triedBinaryPaths = Sets.newHashSet();
+ for (String dir : DEFAULT_BINARY_SEARCH_DIRS) {
+ File binaryPath = new File(dir, DEFAULT_BINARY_NAME);
+ if (binaryPath.exists()) {
+ return binaryPath;
+ } else {
+ triedBinaryPaths.add(binaryPath.getAbsolutePath());
+ }
+ }
+ LOG.warn("Failed to locate GPU device discovery binary, tried paths: "
+ + triedBinaryPaths + "! Please double check the value of config "
+ + YarnConfiguration.NM_GPU_PATH_TO_EXEC +
+ ". Using default binary: " + DEFAULT_BINARY_NAME);
+
+ return null;
+ }
+
@VisibleForTesting
Map<String, String> getEnvironmentToRunCommand() {
return environment;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java
index 1c41ef2..dad30ec 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu;
+import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
@@ -40,11 +41,14 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin
import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeConstants;
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
+import org.junit.After;
import org.apache.hadoop.yarn.util.resource.TestResourceUtils;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
+import java.io.File;
+import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
@@ -72,9 +76,42 @@ public class TestGpuResourceHandler {
private NMStateStoreService mockNMStateStore;
private ConcurrentHashMap<ContainerId, Container> runningContainersMap;
private GpuDiscoverer gpuDiscoverer;
+ private File testDataDirectory;
+
+ public void createTestDataDirectory() throws IOException {
+ String testDirectoryPath = getTestParentDirectory();
+ testDataDirectory = new File(testDirectoryPath);
+ FileUtils.deleteDirectory(testDataDirectory);
+ testDataDirectory.mkdirs();
+ }
+
+ private String getTestParentDirectory() {
+ File f = new File("target/temp/" + TestGpuResourceHandler.class.getName());
+ return f.getAbsolutePath();
+ }
+
+ private void touchFile(File f) throws IOException {
+ new FileOutputStream(f).close();
+ }
+
+ private Configuration createDefaultConfig() throws IOException {
+ Configuration conf = new YarnConfiguration();
+ File fakeBinary = setupFakeGpuDiscoveryBinary();
+ conf.set(YarnConfiguration.NM_GPU_PATH_TO_EXEC,
+ fakeBinary.getAbsolutePath());
+ return conf;
+ }
+
+ private File setupFakeGpuDiscoveryBinary() throws IOException {
+ File fakeBinary = new File(getTestParentDirectory() + "/fake-nvidia-smi");
+ touchFile(fakeBinary);
+ return fakeBinary;
+ }
@Before
- public void setup() {
+ public void setup() throws IOException {
+ createTestDataDirectory();
+
TestResourceUtils.addNewTypesToResources(ResourceInformation.GPU_URI);
mockCGroupsHandler = mock(CGroupsHandler.class);
@@ -94,9 +131,14 @@ public class TestGpuResourceHandler {
mockPrivilegedExecutor, gpuDiscoverer);
}
+ @After
+ public void cleanupTestFiles() throws IOException {
+ FileUtils.deleteDirectory(testDataDirectory);
+ }
+
@Test
public void testBootStrap() throws Exception {
- Configuration conf = new YarnConfiguration();
+ Configuration conf = createDefaultConfig();
conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0");
gpuDiscoverer.initialize(conf);
@@ -161,7 +203,7 @@ public class TestGpuResourceHandler {
private void commonTestAllocation(boolean dockerContainerEnabled)
throws Exception {
- Configuration conf = new YarnConfiguration();
+ Configuration conf = createDefaultConfig();
conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4");
gpuDiscoverer.initialize(conf);
@@ -250,7 +292,7 @@ public class TestGpuResourceHandler {
@Test
public void testAssignedGpuWillBeCleanedupWhenStoreOpFails()
throws Exception {
- Configuration conf = new YarnConfiguration();
+ Configuration conf = createDefaultConfig();
conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4");
gpuDiscoverer.initialize(conf);
@@ -279,7 +321,7 @@ public class TestGpuResourceHandler {
@Test
public void testAllocationWithoutAllowedGpus() throws Exception {
- Configuration conf = new YarnConfiguration();
+ Configuration conf = createDefaultConfig();
conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, " ");
gpuDiscoverer.initialize(conf);
@@ -314,7 +356,7 @@ public class TestGpuResourceHandler {
@Test
public void testAllocationStored() throws Exception {
- Configuration conf = new YarnConfiguration();
+ Configuration conf = createDefaultConfig();
conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4");
gpuDiscoverer.initialize(conf);
@@ -353,7 +395,7 @@ public class TestGpuResourceHandler {
public void testAllocationStoredWithNULLStateStore() throws Exception {
NMNullStateStoreService mockNMNULLStateStore = mock(NMNullStateStoreService.class);
- Configuration conf = new YarnConfiguration();
+ Configuration conf = createDefaultConfig();
conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4");
Context nmnctx = mock(Context.class);
@@ -382,7 +424,7 @@ public class TestGpuResourceHandler {
@Test
public void testRecoverResourceAllocation() throws Exception {
- Configuration conf = new YarnConfiguration();
+ Configuration conf = createDefaultConfig();
conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4");
gpuDiscoverer.initialize(conf);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java
index cbbfded..ecc9c7b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java
@@ -38,6 +38,7 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
public class TestGpuDiscoverer {
@Rule
@@ -52,6 +53,19 @@ public class TestGpuDiscoverer {
new FileOutputStream(f).close();
}
+ private File setupFakeBinary(Configuration conf) {
+ File fakeBinary;
+ try {
+ fakeBinary = new File(getTestParentFolder(),
+ GpuDiscoverer.DEFAULT_BINARY_NAME);
+ touchFile(fakeBinary);
+ conf.set(YarnConfiguration.NM_GPU_PATH_TO_EXEC, getTestParentFolder());
+ } catch (Exception e) {
+ throw new RuntimeException("Failed to init fake binary", e);
+ }
+ return fakeBinary;
+ }
+
@Before
public void before() throws IOException {
String folder = getTestParentFolder();
@@ -63,6 +77,7 @@ public class TestGpuDiscoverer {
private Configuration createConfigWithAllowedDevices(String s) {
Configuration conf = new Configuration(false);
conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, s);
+ setupFakeBinary(conf);
return conf;
}
@@ -83,10 +98,7 @@ public class TestGpuDiscoverer {
plugin.getEnvironmentToRunCommand().get("PATH").contains("nvidia"));
// test case 2, check mandatory set path.
- File fakeBinary = new File(getTestParentFolder(),
- GpuDiscoverer.DEFAULT_BINARY_NAME);
- touchFile(fakeBinary);
- conf.set(YarnConfiguration.NM_GPU_PATH_TO_EXEC, getTestParentFolder());
+ File fakeBinary = setupFakeBinary(conf);
plugin = new GpuDiscoverer();
plugin.initialize(conf);
assertEquals(fakeBinary.getAbsolutePath(),
@@ -276,4 +288,22 @@ public class TestGpuDiscoverer {
plugin.initialize(conf);
plugin.getGpusUsableByYarn();
}
+
+ @Test
+ public void testGpuBinaryIsANotExistingFile() {
+ Configuration conf = new Configuration(false);
+ conf.set(YarnConfiguration.NM_GPU_PATH_TO_EXEC, "/blabla");
+ GpuDiscoverer plugin = new GpuDiscoverer();
+ try {
+ plugin.initialize(conf);
+ plugin.getGpusUsableByYarn();
+ fail("Illegal format, should fail.");
+ } catch (YarnException e) {
+ String message = e.getMessage();
+ assertTrue(message.startsWith("Failed to find GPU discovery " +
+ "executable, please double check"));
+ assertTrue(message.contains("Also tried to find the " +
+ "executable in the default directories:"));
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org