You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by su...@apache.org on 2019/02/22 14:53:57 UTC

[hadoop] branch branch-3.1 updated: YARN-9118. Handle exceptions with parsing user defined GPU devices in GpuDiscoverer. Contributed by Szilard Nemeth.

This is an automated email from the ASF dual-hosted git repository.

sunilg pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new d6377c8  YARN-9118. Handle exceptions with parsing user defined GPU devices in GpuDiscoverer. Contributed by Szilard Nemeth.
d6377c8 is described below

commit d6377c8b681d92252363a4de263771b2c77ca404
Author: Sunil G <su...@apache.org>
AuthorDate: Fri Feb 22 20:22:17 2019 +0530

    YARN-9118. Handle exceptions with parsing user defined GPU devices in GpuDiscoverer. Contributed by Szilard Nemeth.
    
    (cherry picked from commit 95fbbfed75dd309b5d56032ece64996165572287)
---
 .../resources/gpu/GpuResourceHandlerImpl.java      |   5 +-
 .../gpu/GpuDeviceSpecificationException.java       |  82 ++++++++
 .../resourceplugin/gpu/GpuDiscoverer.java          | 124 ++++++++-----
 .../gpu/GpuNodeResourceUpdateHandler.java          |  10 +-
 .../resourceplugin/gpu/package-info.java           |  20 ++
 .../resourceplugin/gpu/TestGpuDiscoverer.java      | 206 +++++++++++++++++----
 6 files changed, 361 insertions(+), 86 deletions(-)

diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java
index e466742..6d158be 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java
@@ -65,11 +65,10 @@ public class GpuResourceHandlerImpl implements ResourceHandler {
       throws ResourceHandlerException {
     List<GpuDevice> usableGpus;
     try {
-      usableGpus = GpuDiscoverer.getInstance()
-          .getGpusUsableByYarn();
+      usableGpus = GpuDiscoverer.getInstance().getGpusUsableByYarn();
       if (usableGpus == null || usableGpus.isEmpty()) {
         String message = "GPU is enabled on the NodeManager, but couldn't find "
-            + "any usable GPU devices, please double check configuration.";
+            + "any usable GPU devices, please double check configuration!";
         LOG.error(message);
         throw new ResourceHandlerException(message);
       }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDeviceSpecificationException.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDeviceSpecificationException.java
new file mode 100644
index 0000000..9d61b91
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDeviceSpecificationException.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
+
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+
+/**
+ * This exception is to be thrown when allowed GPU device specification
+ * is empty or invalid.
+ */
+public final class GpuDeviceSpecificationException extends YarnException {
+  private static final String VALID_FORMAT_MESSAGE = "The valid format " +
+      "should be: index:minor_number";
+
+  private GpuDeviceSpecificationException(String message) {
+    super(message);
+  }
+
+  private GpuDeviceSpecificationException(String message, Exception cause) {
+    super(message, cause);
+  }
+
+  public static GpuDeviceSpecificationException createWithEmptyValueSpecified() {
+    return new GpuDeviceSpecificationException(
+        YarnConfiguration.NM_GPU_ALLOWED_DEVICES +
+        " is set to an empty value! Please specify " +
+        YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES +
+        " to enable auto-discovery or " +
+        "please enter the GPU device IDs manually! " +
+            VALID_FORMAT_MESSAGE);
+  }
+
+  public static GpuDeviceSpecificationException createWithWrongValueSpecified(
+      String device, String configValue, Exception cause) {
+    final String message = createIllegalFormatMessage(device, configValue);
+    return new GpuDeviceSpecificationException(message, cause);
+  }
+
+  public static GpuDeviceSpecificationException createWithWrongValueSpecified(
+      String device, String configValue) {
+    final String message = createIllegalFormatMessage(device, configValue);
+    return new GpuDeviceSpecificationException(message);
+  }
+
+  public static GpuDeviceSpecificationException createWithDuplicateValueSpecified(
+      String device, String configValue) {
+    final String message = createDuplicateFormatMessage(device, configValue);
+    return new GpuDeviceSpecificationException(message);
+  }
+
+  private static String createIllegalFormatMessage(String device,
+      String configValue) {
+    return String.format("Illegal format of individual GPU device: %s, " +
+            "the whole config value was: '%s'! " + VALID_FORMAT_MESSAGE,
+        device, configValue);
+  }
+
+  private static String createDuplicateFormatMessage(String device,
+      String configValue) {
+    return String.format("GPU device %s" +
+            " has a duplicate definition! " +
+            "Please double-check the configuration " +
+            YarnConfiguration.NM_GPU_ALLOWED_DEVICES +
+            "! Current value of the configuration is: %s",
+        device, configValue);
+  }
+}
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
index 6e3cf13..92792b7 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
@@ -19,8 +19,8 @@
 package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
 
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Lists;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
@@ -47,7 +47,7 @@ public class GpuDiscoverer {
   public static final Logger LOG = LoggerFactory.getLogger(
       GpuDiscoverer.class);
   @VisibleForTesting
-  protected static final String DEFAULT_BINARY_NAME = "nvidia-smi";
+  static final String DEFAULT_BINARY_NAME = "nvidia-smi";
 
   // When executable path not set, try to search default dirs
   // By default search /usr/bin, /bin, and /usr/local/nvidia/bin (when
@@ -70,7 +70,7 @@ public class GpuDiscoverer {
   private GpuDeviceInformationParser parser = new GpuDeviceInformationParser();
 
   private int numOfErrorExecutionSinceLastSucceed = 0;
-  GpuDeviceInformation lastDiscoveredGpuInformation = null;
+  private GpuDeviceInformation lastDiscoveredGpuInformation = null;
 
   private void validateConfOrThrowException() throws YarnException {
     if (conf == null) {
@@ -89,7 +89,7 @@ public class GpuDiscoverer {
    * @return GpuDeviceInformation
    * @throws YarnException when any error happens
    */
-  public synchronized GpuDeviceInformation getGpuDeviceInformation()
+  synchronized GpuDeviceInformation getGpuDeviceInformation()
       throws YarnException {
     validateConfOrThrowException();
 
@@ -112,10 +112,9 @@ public class GpuDiscoverer {
     try {
       output = Shell.execCommand(environment,
           new String[] { pathOfGpuBinary, "-x", "-q" }, MAX_EXEC_TIMEOUT_MS);
-      GpuDeviceInformation info = parser.parseXml(output);
+      lastDiscoveredGpuInformation = parser.parseXml(output);
       numOfErrorExecutionSinceLastSucceed = 0;
-      lastDiscoveredGpuInformation = info;
-      return info;
+      return lastDiscoveredGpuInformation;
     } catch (IOException e) {
       numOfErrorExecutionSinceLastSucceed++;
       String msg =
@@ -149,52 +148,91 @@ public class GpuDiscoverer {
         YarnConfiguration.NM_GPU_ALLOWED_DEVICES,
         YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES);
 
-    List<GpuDevice> gpuDevices = new ArrayList<>();
-
     if (allowedDevicesStr.equals(
         YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES)) {
-      // Get gpu device information from system.
-      if (null == lastDiscoveredGpuInformation) {
-        String msg = YarnConfiguration.NM_GPU_ALLOWED_DEVICES + " is set to "
-            + YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES
-            + ", however automatically discovering "
-            + "GPU information failed, please check NodeManager log for more"
-            + " details, as an alternative, admin can specify "
-            + YarnConfiguration.NM_GPU_ALLOWED_DEVICES
-            + " manually to enable GPU isolation.";
-        LOG.error(msg);
-        throw new YarnException(msg);
+      return parseGpuDevicesFromAutoDiscoveredGpuInfo();
+    } else {
+      return parseGpuDevicesFromUserDefinedValues(allowedDevicesStr);
+    }
+  }
+
+  private List<GpuDevice> parseGpuDevicesFromAutoDiscoveredGpuInfo()
+          throws YarnException {
+    if (lastDiscoveredGpuInformation == null) {
+      String msg = YarnConfiguration.NM_GPU_ALLOWED_DEVICES + " is set to "
+          + YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES
+          + ", however automatically discovering "
+          + "GPU information failed, please check NodeManager log for more"
+          + " details, as an alternative, admin can specify "
+          + YarnConfiguration.NM_GPU_ALLOWED_DEVICES
+          + " manually to enable GPU isolation.";
+      LOG.error(msg);
+      throw new YarnException(msg);
+    }
+
+    List<GpuDevice> gpuDevices = new ArrayList<>();
+    if (lastDiscoveredGpuInformation.getGpus() != null) {
+      int numberOfGpus = lastDiscoveredGpuInformation.getGpus().size();
+      LOG.debug("Found {} GPU devices", numberOfGpus);
+      for (int i = 0; i < numberOfGpus; i++) {
+        List<PerGpuDeviceInformation> gpuInfos =
+            lastDiscoveredGpuInformation.getGpus();
+        gpuDevices.add(new GpuDevice(i, gpuInfos.get(i).getMinorNumber()));
       }
+    }
+    return gpuDevices;
+  }
 
-      if (lastDiscoveredGpuInformation.getGpus() != null) {
-        for (int i = 0; i < lastDiscoveredGpuInformation.getGpus().size();
-             i++) {
-          List<PerGpuDeviceInformation> gpuInfos =
-              lastDiscoveredGpuInformation.getGpus();
-          gpuDevices.add(new GpuDevice(i, gpuInfos.get(i).getMinorNumber()));
+  /**
+   * @param devices allowed devices coming from the config.
+   *                          Individual devices should be separated by commas.
+   *                          <br>The format of individual devices should be:
+   *                           &lt;index:&gt;&lt;minorNumber&gt;
+   * @return List of GpuDevices
+   * @throws YarnException when a GPU device is defined as a duplicate.
+   * The first duplicate GPU device will be added to the exception message.
+   */
+  private List<GpuDevice> parseGpuDevicesFromUserDefinedValues(String devices)
+      throws YarnException {
+    if (devices.trim().isEmpty()) {
+      throw GpuDeviceSpecificationException.createWithEmptyValueSpecified();
+    }
+    List<GpuDevice> gpuDevices = Lists.newArrayList();
+    for (String device : devices.split(",")) {
+      if (device.trim().length() > 0) {
+        String[] splitByColon = device.trim().split(":");
+        if (splitByColon.length != 2) {
+          throw GpuDeviceSpecificationException.
+              createWithWrongValueSpecified(device, devices);
         }
-      }
-    } else{
-      for (String s : allowedDevicesStr.split(",")) {
-        if (s.trim().length() > 0) {
-          String[] kv = s.trim().split(":");
-          if (kv.length != 2) {
-            throw new YarnException(
-                "Illegal format, it should be index:minor_number format, now it="
-                    + s);
-          }
 
-          gpuDevices.add(
-              new GpuDevice(Integer.parseInt(kv[0]), Integer.parseInt(kv[1])));
+        GpuDevice gpuDevice = parseGpuDevice(device, splitByColon, devices);
+        if (!gpuDevices.contains(gpuDevice)) {
+          gpuDevices.add(gpuDevice);
+        } else {
+          throw GpuDeviceSpecificationException
+              .createWithDuplicateValueSpecified(device, devices);
         }
       }
-      LOG.info("Allowed GPU devices:" + gpuDevices);
     }
+    LOG.info("Allowed GPU devices:" + gpuDevices);
 
     return gpuDevices;
   }
 
-  public synchronized void initialize(Configuration conf) throws YarnException {
+  private GpuDevice parseGpuDevice(String device, String[] splitByColon,
+      String allowedDevicesStr) throws YarnException {
+    try {
+      int index = Integer.parseInt(splitByColon[0]);
+      int minorNumber = Integer.parseInt(splitByColon[1]);
+      return new GpuDevice(index, minorNumber);
+    } catch (NumberFormatException e) {
+      throw GpuDeviceSpecificationException.
+          createWithWrongValueSpecified(device, allowedDevicesStr, e);
+    }
+  }
+
+  public synchronized void initialize(Configuration conf) {
     this.conf = conf;
     numOfErrorExecutionSinceLastSucceed = 0;
     String pathToExecutable = conf.get(YarnConfiguration.NM_GPU_PATH_TO_EXEC,
@@ -203,9 +241,7 @@ public class GpuDiscoverer {
       pathToExecutable = DEFAULT_BINARY_NAME;
     }
 
-    // Validate file existence
     File binaryPath = new File(pathToExecutable);
-
     if (!binaryPath.exists()) {
       // When binary not exist, use default setting.
       boolean found = false;
@@ -249,12 +285,12 @@ public class GpuDiscoverer {
   }
 
   @VisibleForTesting
-  protected Map<String, String> getEnvironmentToRunCommand() {
+  Map<String, String> getEnvironmentToRunCommand() {
     return environment;
   }
 
   @VisibleForTesting
-  protected String getPathOfGpuBinary() {
+  String getPathOfGpuBinary() {
     return pathOfGpuBinary;
   }
 
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java
index 796eb25..8b19048 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java
@@ -40,11 +40,11 @@ public class GpuNodeResourceUpdateHandler extends NodeResourceUpdaterPlugin {
   public void updateConfiguredResource(Resource res) throws YarnException {
     LOG.info("Initializing configured GPU resources for the NodeManager.");
 
-    List<GpuDevice> usableGpus =
-        GpuDiscoverer.getInstance().getGpusUsableByYarn();
-    if (null == usableGpus || usableGpus.isEmpty()) {
-      String message = "GPU is enabled, but couldn't find any usable GPUs on the "
-          + "NodeManager.";
+    List<GpuDevice> usableGpus = GpuDiscoverer.getInstance()
+            .getGpusUsableByYarn();
+    if (usableGpus == null || usableGpus.isEmpty()) {
+      String message = "GPU is enabled, " +
+              "but couldn't find any usable GPUs on the NodeManager!";
       LOG.error(message);
       // No gpu can be used by YARN.
       throw new YarnException(message);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/package-info.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/package-info.java
new file mode 100644
index 0000000..81f155f
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/package-info.java
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Package for GPU support classes.
+ */
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java
index 4abb633..cbbfded 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java
@@ -23,17 +23,26 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInformation;
-import org.junit.Assert;
 import org.junit.Assume;
 import org.junit.Before;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.util.List;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
 public class TestGpuDiscoverer {
+  @Rule
+  public ExpectedException exception = ExpectedException.none();
+
   private String getTestParentFolder() {
     File f = new File("target/temp/" + TestGpuDiscoverer.class.getName());
     return f.getAbsolutePath();
@@ -51,6 +60,12 @@ public class TestGpuDiscoverer {
     f.mkdirs();
   }
 
+  private Configuration createConfigWithAllowedDevices(String s) {
+    Configuration conf = new Configuration(false);
+    conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, s);
+    return conf;
+  }
+
   @Test
   public void testLinuxGpuResourceDiscoverPluginConfig() throws Exception {
     // Only run this on demand.
@@ -61,10 +76,10 @@ public class TestGpuDiscoverer {
     Configuration conf = new Configuration(false);
     GpuDiscoverer plugin = new GpuDiscoverer();
     plugin.initialize(conf);
-    Assert.assertEquals(GpuDiscoverer.DEFAULT_BINARY_NAME,
+    assertEquals(GpuDiscoverer.DEFAULT_BINARY_NAME,
         plugin.getPathOfGpuBinary());
-    Assert.assertNotNull(plugin.getEnvironmentToRunCommand().get("PATH"));
-    Assert.assertTrue(
+    assertNotNull(plugin.getEnvironmentToRunCommand().get("PATH"));
+    assertTrue(
         plugin.getEnvironmentToRunCommand().get("PATH").contains("nvidia"));
 
     // test case 2, check mandatory set path.
@@ -74,18 +89,18 @@ public class TestGpuDiscoverer {
     conf.set(YarnConfiguration.NM_GPU_PATH_TO_EXEC, getTestParentFolder());
     plugin = new GpuDiscoverer();
     plugin.initialize(conf);
-    Assert.assertEquals(fakeBinary.getAbsolutePath(),
+    assertEquals(fakeBinary.getAbsolutePath(),
         plugin.getPathOfGpuBinary());
-    Assert.assertNull(plugin.getEnvironmentToRunCommand().get("PATH"));
+    assertNull(plugin.getEnvironmentToRunCommand().get("PATH"));
 
     // test case 3, check mandatory set path, but binary doesn't exist so default
     // path will be used.
     fakeBinary.delete();
     plugin = new GpuDiscoverer();
     plugin.initialize(conf);
-    Assert.assertEquals(GpuDiscoverer.DEFAULT_BINARY_NAME,
+    assertEquals(GpuDiscoverer.DEFAULT_BINARY_NAME,
         plugin.getPathOfGpuBinary());
-    Assert.assertTrue(
+    assertTrue(
         plugin.getEnvironmentToRunCommand().get("PATH").contains("nvidia"));
   }
 
@@ -100,42 +115,165 @@ public class TestGpuDiscoverer {
     plugin.initialize(conf);
     GpuDeviceInformation info = plugin.getGpuDeviceInformation();
 
-    Assert.assertTrue(info.getGpus().size() > 0);
-    Assert.assertEquals(plugin.getGpusUsableByYarn().size(),
+    assertTrue(info.getGpus().size() > 0);
+    assertEquals(plugin.getGpusUsableByYarn().size(),
         info.getGpus().size());
   }
 
   @Test
-  public void getNumberOfUsableGpusFromConfig() throws YarnException {
-    Configuration conf = new Configuration(false);
+  public void testGetNumberOfUsableGpusFromConfigSingleDevice()
+      throws YarnException {
+    Configuration conf = createConfigWithAllowedDevices("1:2");
 
-    // Illegal format
-    conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:2,3");
     GpuDiscoverer plugin = new GpuDiscoverer();
-    try {
-      plugin.initialize(conf);
-      plugin.getGpusUsableByYarn();
-      Assert.fail("Illegal format, should fail.");
-    } catch (YarnException e) {
-      // Expected
-    }
-
-    // Valid format
-    conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:2,3:4");
-    plugin = new GpuDiscoverer();
+    plugin.initialize(conf);
+    List<GpuDevice> usableGpuDevices = plugin.getGpusUsableByYarn();
+    assertEquals(1, usableGpuDevices.size());
+
+    assertEquals(1, usableGpuDevices.get(0).getIndex());
+    assertEquals(2, usableGpuDevices.get(0).getMinorNumber());
+  }
+
+  @Test
+  public void testGetNumberOfUsableGpusFromConfigIllegalFormat()
+      throws YarnException {
+    Configuration conf = createConfigWithAllowedDevices("0:0,1:1,2:2,3");
+
+    exception.expect(GpuDeviceSpecificationException.class);
+    GpuDiscoverer plugin = new GpuDiscoverer();
+    plugin.initialize(conf);
+    plugin.getGpusUsableByYarn();
+  }
+
+  @Test
+  public void testGetNumberOfUsableGpusFromConfig() throws YarnException {
+    Configuration conf = createConfigWithAllowedDevices("0:0,1:1,2:2,3:4");
+    GpuDiscoverer plugin = new GpuDiscoverer();
     plugin.initialize(conf);
 
     List<GpuDevice> usableGpuDevices = plugin.getGpusUsableByYarn();
-    Assert.assertEquals(4, usableGpuDevices.size());
+    assertEquals(4, usableGpuDevices.size());
+
+    assertEquals(0, usableGpuDevices.get(0).getIndex());
+    assertEquals(0, usableGpuDevices.get(0).getMinorNumber());
+
+    assertEquals(1, usableGpuDevices.get(1).getIndex());
+    assertEquals(1, usableGpuDevices.get(1).getMinorNumber());
+
+    assertEquals(2, usableGpuDevices.get(2).getIndex());
+    assertEquals(2, usableGpuDevices.get(2).getMinorNumber());
+
+    assertEquals(3, usableGpuDevices.get(3).getIndex());
+    assertEquals(4, usableGpuDevices.get(3).getMinorNumber());
+  }
+
+  @Test
+  public void testGetNumberOfUsableGpusFromConfigDuplicateValues()
+      throws YarnException {
+    Configuration conf = createConfigWithAllowedDevices("0:0,1:1,2:2,1:1");
+
+    exception.expect(GpuDeviceSpecificationException.class);
+    GpuDiscoverer plugin = new GpuDiscoverer();
+    plugin.initialize(conf);
+    plugin.getGpusUsableByYarn();
+  }
+
+  @Test
+  public void testGetNumberOfUsableGpusFromConfigDuplicateValues2()
+      throws YarnException {
+    Configuration conf = createConfigWithAllowedDevices("0:0,1:1,2:2,1:1,2:2");
+
+    exception.expect(GpuDeviceSpecificationException.class);
+    GpuDiscoverer plugin = new GpuDiscoverer();
+    plugin.initialize(conf);
+    plugin.getGpusUsableByYarn();
+  }
+
+  @Test
+  public void testGetNumberOfUsableGpusFromConfigIncludingSpaces()
+      throws YarnException {
+    Configuration conf = createConfigWithAllowedDevices("0 : 0,1 : 1");
+
+    exception.expect(GpuDeviceSpecificationException.class);
+    GpuDiscoverer plugin = new GpuDiscoverer();
+    plugin.initialize(conf);
+    plugin.getGpusUsableByYarn();
+  }
+
+  @Test
+  public void testGetNumberOfUsableGpusFromConfigIncludingGibberish()
+      throws YarnException {
+    Configuration conf = createConfigWithAllowedDevices("0:@$1,1:1");
 
-    Assert.assertTrue(0 == usableGpuDevices.get(0).getIndex());
-    Assert.assertTrue(1 == usableGpuDevices.get(1).getIndex());
-    Assert.assertTrue(2 == usableGpuDevices.get(2).getIndex());
-    Assert.assertTrue(3 == usableGpuDevices.get(3).getIndex());
+    exception.expect(GpuDeviceSpecificationException.class);
+    GpuDiscoverer plugin = new GpuDiscoverer();
+    plugin.initialize(conf);
+    plugin.getGpusUsableByYarn();
+  }
+
+  @Test
+  public void testGetNumberOfUsableGpusFromConfigIncludingLetters()
+      throws YarnException {
+    Configuration conf = createConfigWithAllowedDevices("x:0, 1:y");
+
+    exception.expect(GpuDeviceSpecificationException.class);
+    GpuDiscoverer plugin = new GpuDiscoverer();
+    plugin.initialize(conf);
+    plugin.getGpusUsableByYarn();
+  }
+
+  @Test
+  public void testGetNumberOfUsableGpusFromConfigWithoutIndexNumber()
+      throws YarnException {
+    Configuration conf = createConfigWithAllowedDevices(":0, :1");
+
+    exception.expect(GpuDeviceSpecificationException.class);
+    GpuDiscoverer plugin = new GpuDiscoverer();
+    plugin.initialize(conf);
+    plugin.getGpusUsableByYarn();
+  }
+
+  @Test
+  public void testGetNumberOfUsableGpusFromConfigEmptyString()
+      throws YarnException {
+    Configuration conf = createConfigWithAllowedDevices("");
 
-    Assert.assertTrue(0 == usableGpuDevices.get(0).getMinorNumber());
-    Assert.assertTrue(1 == usableGpuDevices.get(1).getMinorNumber());
-    Assert.assertTrue(2 == usableGpuDevices.get(2).getMinorNumber());
-    Assert.assertTrue(4 == usableGpuDevices.get(3).getMinorNumber());
+    exception.expect(GpuDeviceSpecificationException.class);
+    GpuDiscoverer plugin = new GpuDiscoverer();
+    plugin.initialize(conf);
+    plugin.getGpusUsableByYarn();
+  }
+
+  @Test
+  public void testGetNumberOfUsableGpusFromConfigValueWithoutComma()
+      throws YarnException {
+    Configuration conf = createConfigWithAllowedDevices("0:0 0:1");
+
+    exception.expect(GpuDeviceSpecificationException.class);
+    GpuDiscoverer plugin = new GpuDiscoverer();
+    plugin.initialize(conf);
+    plugin.getGpusUsableByYarn();
+  }
+
+  @Test
+  public void testGetNumberOfUsableGpusFromConfigValueWithoutComma2()
+      throws YarnException {
+    Configuration conf = createConfigWithAllowedDevices("0.1 0.2");
+
+    exception.expect(GpuDeviceSpecificationException.class);
+    GpuDiscoverer plugin = new GpuDiscoverer();
+    plugin.initialize(conf);
+    plugin.getGpusUsableByYarn();
+  }
+
+  @Test
+  public void testGetNumberOfUsableGpusFromConfigValueWithoutColonSeparator()
+      throws YarnException {
+    Configuration conf = createConfigWithAllowedDevices("0.1,0.2");
+
+    exception.expect(GpuDeviceSpecificationException.class);
+    GpuDiscoverer plugin = new GpuDiscoverer();
+    plugin.initialize(conf);
+    plugin.getGpusUsableByYarn();
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org