You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by su...@apache.org on 2019/03/01 13:54:40 UTC

[hadoop] branch trunk updated: YARN-9139. Simplify initializer code of GpuDiscoverer. Contributed by Szilard Nemeth.

This is an automated email from the ASF dual-hosted git repository.

sunilg pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/trunk by this push:
     new dcaca19  YARN-9139. Simplify initializer code of GpuDiscoverer. Contributed by Szilard Nemeth.
dcaca19 is described below

commit dcaca19871a7aefc1f33cb6cb543ad4768ec6b50
Author: Sunil G <su...@apache.org>
AuthorDate: Fri Mar 1 19:24:35 2019 +0530

    YARN-9139. Simplify initializer code of GpuDiscoverer. Contributed by Szilard Nemeth.
---
 .../apache/hadoop/yarn/conf/YarnConfiguration.java |   3 -
 .../resourceplugin/gpu/GpuDiscoverer.java          | 115 +++++++++++++--------
 .../resources/gpu/TestGpuResourceHandler.java      |  58 +++++++++--
 .../resourceplugin/gpu/TestGpuDiscoverer.java      |  38 ++++++-
 4 files changed, 155 insertions(+), 59 deletions(-)

diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index a6d1dc5..1b44f42 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -1659,9 +1659,6 @@ public class YarnConfiguration extends Configuration {
   public static final String NM_GPU_PATH_TO_EXEC =
       NM_GPU_RESOURCE_PREFIX + "path-to-discovery-executables";
 
-  @Private
-  public static final String DEFAULT_NM_GPU_PATH_TO_EXEC = "";
-
   /**
    * Settings to control which implementation of docker plugin for GPU will be
    * used.
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
index 334a86c..95e51e5 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugi
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
@@ -88,12 +89,6 @@ public class GpuDiscoverer {
       throws YarnException {
     validateConfOrThrowException();
 
-    if (null == pathOfGpuBinary) {
-      throw new YarnException(
-          "Failed to find GPU discovery executable, please double check "
-              + YarnConfiguration.NM_GPU_PATH_TO_EXEC + " setting.");
-    }
-
     if (numOfErrorExecutionSinceLastSucceed == MAX_REPEATED_ERROR_ALLOWED) {
       String msg =
           "Failed to execute GPU device information detection script for "
@@ -227,50 +222,17 @@ public class GpuDiscoverer {
     }
   }
 
-  public synchronized void initialize(Configuration conf) {
-    this.conf = conf;
+  public synchronized void initialize(Configuration config)
+      throws YarnException {
+    this.conf = config;
     numOfErrorExecutionSinceLastSucceed = 0;
-    String pathToExecutable = conf.get(YarnConfiguration.NM_GPU_PATH_TO_EXEC,
-        YarnConfiguration.DEFAULT_NM_GPU_PATH_TO_EXEC);
-    if (pathToExecutable.isEmpty()) {
-      pathToExecutable = DEFAULT_BINARY_NAME;
-    }
-
-    File binaryPath = new File(pathToExecutable);
-    if (!binaryPath.exists()) {
-      // When binary not exist, use default setting.
-      boolean found = false;
-      for (String dir : DEFAULT_BINARY_SEARCH_DIRS) {
-        binaryPath = new File(dir, DEFAULT_BINARY_NAME);
-        if (binaryPath.exists()) {
-          found = true;
-          pathOfGpuBinary = binaryPath.getAbsolutePath();
-          break;
-        }
-      }
-
-      if (!found) {
-        LOG.warn("Failed to locate binary at:" + binaryPath.getAbsolutePath()
-            + ", please double check [" + YarnConfiguration.NM_GPU_PATH_TO_EXEC
-            + "] setting. Now use " + "default binary:" + DEFAULT_BINARY_NAME);
-      }
-    } else{
-      // If path specified by user is a directory, use
-      if (binaryPath.isDirectory()) {
-        binaryPath = new File(binaryPath, DEFAULT_BINARY_NAME);
-        LOG.warn("Specified path is a directory, use " + DEFAULT_BINARY_NAME
-            + " under the directory, updated path-to-executable:" + binaryPath
-            .getAbsolutePath());
-      }
-      // Validated
-      pathOfGpuBinary = binaryPath.getAbsolutePath();
-    }
+    lookUpAutoDiscoveryBinary(config);
 
     // Try to discover GPU information once and print
     try {
       LOG.info("Trying to discover GPU information ...");
       GpuDeviceInformation info = getGpuDeviceInformation();
-      LOG.info(info.toString());
+      LOG.info("Discovered GPU information: " + info.toString());
     } catch (YarnException e) {
       String msg =
           "Failed to discover GPU information from system, exception message:"
@@ -279,6 +241,71 @@ public class GpuDiscoverer {
     }
   }
 
+  private void lookUpAutoDiscoveryBinary(Configuration config)
+      throws YarnException {
+    String configuredBinaryPath = config.get(
+        YarnConfiguration.NM_GPU_PATH_TO_EXEC, DEFAULT_BINARY_NAME);
+    if (configuredBinaryPath.isEmpty()) {
+      configuredBinaryPath = DEFAULT_BINARY_NAME;
+    }
+
+    File binaryPath;
+    File configuredBinaryFile = new File(configuredBinaryPath);
+    if (!configuredBinaryFile.exists()) {
+      binaryPath = lookupBinaryInDefaultDirs();
+    } else if (configuredBinaryFile.isDirectory()) {
+      binaryPath = handleConfiguredBinaryPathIsDirectory(configuredBinaryFile);
+    } else {
+      binaryPath = configuredBinaryFile;
+    }
+    pathOfGpuBinary = binaryPath.getAbsolutePath();
+  }
+
+  private File handleConfiguredBinaryPathIsDirectory(File configuredBinaryFile)
+      throws YarnException {
+    File binaryPath = new File(configuredBinaryFile, DEFAULT_BINARY_NAME);
+    if (!binaryPath.exists()) {
+      throw new YarnException("Failed to find GPU discovery executable, " +
+          "please double check "+ YarnConfiguration.NM_GPU_PATH_TO_EXEC +
+          " setting. The setting points to a directory but " +
+          "no file found in the directory with name:" + DEFAULT_BINARY_NAME);
+    } else {
+      LOG.warn("Specified path is a directory, use " + DEFAULT_BINARY_NAME
+          + " under the directory, updated path-to-executable:"
+          + binaryPath.getAbsolutePath());
+    }
+    return binaryPath;
+  }
+
+  private File lookupBinaryInDefaultDirs() throws YarnException {
+    final File lookedUpBinary = lookupBinaryInDefaultDirsInternal();
+    if (lookedUpBinary == null) {
+      throw new YarnException("Failed to find GPU discovery executable, " +
+          "please double check " + YarnConfiguration.NM_GPU_PATH_TO_EXEC +
+          " setting. Also tried to find the executable " +
+          "in the default directories: " + DEFAULT_BINARY_SEARCH_DIRS);
+    }
+    return lookedUpBinary;
+  }
+
+  private File lookupBinaryInDefaultDirsInternal() {
+    Set<String> triedBinaryPaths = Sets.newHashSet();
+    for (String dir : DEFAULT_BINARY_SEARCH_DIRS) {
+      File binaryPath = new File(dir, DEFAULT_BINARY_NAME);
+      if (binaryPath.exists()) {
+        return binaryPath;
+      } else {
+        triedBinaryPaths.add(binaryPath.getAbsolutePath());
+      }
+    }
+    LOG.warn("Failed to locate GPU device discovery binary, tried paths: "
+        + triedBinaryPaths + "! Please double check the value of config "
+        + YarnConfiguration.NM_GPU_PATH_TO_EXEC +
+        ". Using default binary: " + DEFAULT_BINARY_NAME);
+
+    return null;
+  }
+
   @VisibleForTesting
   Map<String, String> getEnvironmentToRunCommand() {
     return environment;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java
index fb0df39..966662f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/TestGpuResourceHandler.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu;
 
+import org.apache.commons.io.FileUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
@@ -41,10 +42,13 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.Contai
 import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
 import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
 import org.apache.hadoop.yarn.util.resource.CustomResourceTypesConfigurationProvider;
+import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
+import java.io.File;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -72,9 +76,42 @@ public class TestGpuResourceHandler {
   private NMStateStoreService mockNMStateStore;
   private ConcurrentHashMap<ContainerId, Container> runningContainersMap;
   private GpuDiscoverer gpuDiscoverer;
+  private File testDataDirectory;
+
+  public void createTestDataDirectory() throws IOException {
+    String testDirectoryPath = getTestParentDirectory();
+    testDataDirectory = new File(testDirectoryPath);
+    FileUtils.deleteDirectory(testDataDirectory);
+    testDataDirectory.mkdirs();
+  }
+
+  private String getTestParentDirectory() {
+    File f = new File("target/temp/" + TestGpuResourceHandler.class.getName());
+    return f.getAbsolutePath();
+  }
+
+  private void touchFile(File f) throws IOException {
+    new FileOutputStream(f).close();
+  }
+
+  private Configuration createDefaultConfig() throws IOException {
+    Configuration conf = new YarnConfiguration();
+    File fakeBinary = setupFakeGpuDiscoveryBinary();
+    conf.set(YarnConfiguration.NM_GPU_PATH_TO_EXEC,
+        fakeBinary.getAbsolutePath());
+    return conf;
+  }
+
+  private File setupFakeGpuDiscoveryBinary() throws IOException {
+    File fakeBinary = new File(getTestParentDirectory() + "/fake-nvidia-smi");
+    touchFile(fakeBinary);
+    return fakeBinary;
+  }
 
   @Before
-  public void setup() {
+  public void setup() throws IOException {
+    createTestDataDirectory();
+
     CustomResourceTypesConfigurationProvider.
         initResourceTypes(ResourceInformation.GPU_URI);
 
@@ -95,9 +132,14 @@ public class TestGpuResourceHandler {
         mockPrivilegedExecutor, gpuDiscoverer);
   }
 
+  @After
+  public void cleanupTestFiles() throws IOException {
+    FileUtils.deleteDirectory(testDataDirectory);
+  }
+
   @Test
   public void testBootStrap() throws Exception {
-    Configuration conf = new YarnConfiguration();
+    Configuration conf = createDefaultConfig();
     conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0");
 
     gpuDiscoverer.initialize(conf);
@@ -162,7 +204,7 @@ public class TestGpuResourceHandler {
 
   private void commonTestAllocation(boolean dockerContainerEnabled)
       throws Exception {
-    Configuration conf = new YarnConfiguration();
+    Configuration conf = createDefaultConfig();
     conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4");
     gpuDiscoverer.initialize(conf);
 
@@ -251,7 +293,7 @@ public class TestGpuResourceHandler {
   @Test
   public void testAssignedGpuWillBeCleanedupWhenStoreOpFails()
       throws Exception {
-    Configuration conf = new YarnConfiguration();
+    Configuration conf = createDefaultConfig();
     conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4");
     gpuDiscoverer.initialize(conf);
 
@@ -280,7 +322,7 @@ public class TestGpuResourceHandler {
 
   @Test
   public void testAllocationWithoutAllowedGpus() throws Exception {
-    Configuration conf = new YarnConfiguration();
+    Configuration conf = createDefaultConfig();
     conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, " ");
     gpuDiscoverer.initialize(conf);
 
@@ -315,7 +357,7 @@ public class TestGpuResourceHandler {
 
   @Test
   public void testAllocationStored() throws Exception {
-    Configuration conf = new YarnConfiguration();
+    Configuration conf = createDefaultConfig();
     conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4");
     gpuDiscoverer.initialize(conf);
 
@@ -354,7 +396,7 @@ public class TestGpuResourceHandler {
   public void testAllocationStoredWithNULLStateStore() throws Exception {
     NMNullStateStoreService mockNMNULLStateStore = mock(NMNullStateStoreService.class);
 
-    Configuration conf = new YarnConfiguration();
+    Configuration conf = createDefaultConfig();
     conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4");
 
     Context nmnctx = mock(Context.class);
@@ -383,7 +425,7 @@ public class TestGpuResourceHandler {
 
   @Test
   public void testRecoverResourceAllocation() throws Exception {
-    Configuration conf = new YarnConfiguration();
+    Configuration conf = createDefaultConfig();
     conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:3,3:4");
     gpuDiscoverer.initialize(conf);
 
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java
index cbbfded..ecc9c7b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java
@@ -38,6 +38,7 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 public class TestGpuDiscoverer {
   @Rule
@@ -52,6 +53,19 @@ public class TestGpuDiscoverer {
     new FileOutputStream(f).close();
   }
 
+  private File setupFakeBinary(Configuration conf) {
+    File fakeBinary;
+    try {
+      fakeBinary = new File(getTestParentFolder(),
+          GpuDiscoverer.DEFAULT_BINARY_NAME);
+      touchFile(fakeBinary);
+      conf.set(YarnConfiguration.NM_GPU_PATH_TO_EXEC, getTestParentFolder());
+    } catch (Exception e) {
+      throw new RuntimeException("Failed to init fake binary", e);
+    }
+    return fakeBinary;
+  }
+
   @Before
   public void before() throws IOException {
     String folder = getTestParentFolder();
@@ -63,6 +77,7 @@ public class TestGpuDiscoverer {
   private Configuration createConfigWithAllowedDevices(String s) {
     Configuration conf = new Configuration(false);
     conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, s);
+    setupFakeBinary(conf);
     return conf;
   }
 
@@ -83,10 +98,7 @@ public class TestGpuDiscoverer {
         plugin.getEnvironmentToRunCommand().get("PATH").contains("nvidia"));
 
     // test case 2, check mandatory set path.
-    File fakeBinary = new File(getTestParentFolder(),
-        GpuDiscoverer.DEFAULT_BINARY_NAME);
-    touchFile(fakeBinary);
-    conf.set(YarnConfiguration.NM_GPU_PATH_TO_EXEC, getTestParentFolder());
+    File fakeBinary = setupFakeBinary(conf);
     plugin = new GpuDiscoverer();
     plugin.initialize(conf);
     assertEquals(fakeBinary.getAbsolutePath(),
@@ -276,4 +288,22 @@ public class TestGpuDiscoverer {
     plugin.initialize(conf);
     plugin.getGpusUsableByYarn();
   }
+
+  @Test
+  public void testGpuBinaryIsANotExistingFile() {
+    Configuration conf = new Configuration(false);
+    conf.set(YarnConfiguration.NM_GPU_PATH_TO_EXEC, "/blabla");
+    GpuDiscoverer plugin = new GpuDiscoverer();
+    try {
+      plugin.initialize(conf);
+      plugin.getGpusUsableByYarn();
+      fail("Illegal format, should fail.");
+    } catch (YarnException e) {
+      String message = e.getMessage();
+      assertTrue(message.startsWith("Failed to find GPU discovery " +
+          "executable, please double check"));
+      assertTrue(message.contains("Also tried to find the " +
+          "executable in the default directories:"));
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org