You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by su...@apache.org on 2019/02/22 14:53:32 UTC
[hadoop] branch branch-3.2 updated: YARN-9118. Handle exceptions
with parsing user defined GPU devices in GpuDiscoverer. Contributed by
Szilard Nemeth.
This is an automated email from the ASF dual-hosted git repository.
sunilg pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.2 by this push:
new d75aa33 YARN-9118. Handle exceptions with parsing user defined GPU devices in GpuDiscoverer. Contributed by Szilard Nemeth.
d75aa33 is described below
commit d75aa33612af3e7e408a8a35c8784bd825de2fed
Author: Sunil G <su...@apache.org>
AuthorDate: Fri Feb 22 20:22:17 2019 +0530
YARN-9118. Handle exceptions with parsing user defined GPU devices in GpuDiscoverer. Contributed by Szilard Nemeth.
(cherry picked from commit 95fbbfed75dd309b5d56032ece64996165572287)
---
.../resources/gpu/GpuResourceHandlerImpl.java | 5 +-
.../gpu/GpuDeviceSpecificationException.java | 82 ++++++++
.../resourceplugin/gpu/GpuDiscoverer.java | 124 ++++++++-----
.../gpu/GpuNodeResourceUpdateHandler.java | 10 +-
.../resourceplugin/gpu/package-info.java | 20 ++
.../resourceplugin/gpu/TestGpuDiscoverer.java | 206 +++++++++++++++++----
6 files changed, 361 insertions(+), 86 deletions(-)
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java
index e25a6b5..84ef55f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.java
@@ -67,11 +67,10 @@ public class GpuResourceHandlerImpl implements ResourceHandler {
throws ResourceHandlerException {
List<GpuDevice> usableGpus;
try {
- usableGpus = GpuDiscoverer.getInstance()
- .getGpusUsableByYarn();
+ usableGpus = GpuDiscoverer.getInstance().getGpusUsableByYarn();
if (usableGpus == null || usableGpus.isEmpty()) {
String message = "GPU is enabled on the NodeManager, but couldn't find "
- + "any usable GPU devices, please double check configuration.";
+ + "any usable GPU devices, please double check configuration!";
LOG.error(message);
throw new ResourceHandlerException(message);
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDeviceSpecificationException.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDeviceSpecificationException.java
new file mode 100644
index 0000000..9d61b91
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDeviceSpecificationException.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
+
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+
+/**
+ * This exception is to be thrown when allowed GPU device specification
+ * is empty or invalid.
+ */
+public final class GpuDeviceSpecificationException extends YarnException {
+ private static final String VALID_FORMAT_MESSAGE = "The valid format " +
+ "should be: index:minor_number";
+
+ private GpuDeviceSpecificationException(String message) {
+ super(message);
+ }
+
+ private GpuDeviceSpecificationException(String message, Exception cause) {
+ super(message, cause);
+ }
+
+ public static GpuDeviceSpecificationException createWithEmptyValueSpecified() {
+ return new GpuDeviceSpecificationException(
+ YarnConfiguration.NM_GPU_ALLOWED_DEVICES +
+ " is set to an empty value! Please specify " +
+ YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES +
+ " to enable auto-discovery or " +
+ "please enter the GPU device IDs manually! " +
+ VALID_FORMAT_MESSAGE);
+ }
+
+ public static GpuDeviceSpecificationException createWithWrongValueSpecified(
+ String device, String configValue, Exception cause) {
+ final String message = createIllegalFormatMessage(device, configValue);
+ return new GpuDeviceSpecificationException(message, cause);
+ }
+
+ public static GpuDeviceSpecificationException createWithWrongValueSpecified(
+ String device, String configValue) {
+ final String message = createIllegalFormatMessage(device, configValue);
+ return new GpuDeviceSpecificationException(message);
+ }
+
+ public static GpuDeviceSpecificationException createWithDuplicateValueSpecified(
+ String device, String configValue) {
+ final String message = createDuplicateFormatMessage(device, configValue);
+ return new GpuDeviceSpecificationException(message);
+ }
+
+ private static String createIllegalFormatMessage(String device,
+ String configValue) {
+ return String.format("Illegal format of individual GPU device: %s, " +
+ "the whole config value was: '%s'! " + VALID_FORMAT_MESSAGE,
+ device, configValue);
+ }
+
+ private static String createDuplicateFormatMessage(String device,
+ String configValue) {
+ return String.format("GPU device %s" +
+ " has a duplicate definition! " +
+ "Please double-check the configuration " +
+ YarnConfiguration.NM_GPU_ALLOWED_DEVICES +
+ "! Current value of the configuration is: %s",
+ device, configValue);
+ }
+}
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
index 6e3cf13..92792b7 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
@@ -19,8 +19,8 @@
package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
import com.google.common.annotations.VisibleForTesting;
-import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Lists;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
@@ -47,7 +47,7 @@ public class GpuDiscoverer {
public static final Logger LOG = LoggerFactory.getLogger(
GpuDiscoverer.class);
@VisibleForTesting
- protected static final String DEFAULT_BINARY_NAME = "nvidia-smi";
+ static final String DEFAULT_BINARY_NAME = "nvidia-smi";
// When executable path not set, try to search default dirs
// By default search /usr/bin, /bin, and /usr/local/nvidia/bin (when
@@ -70,7 +70,7 @@ public class GpuDiscoverer {
private GpuDeviceInformationParser parser = new GpuDeviceInformationParser();
private int numOfErrorExecutionSinceLastSucceed = 0;
- GpuDeviceInformation lastDiscoveredGpuInformation = null;
+ private GpuDeviceInformation lastDiscoveredGpuInformation = null;
private void validateConfOrThrowException() throws YarnException {
if (conf == null) {
@@ -89,7 +89,7 @@ public class GpuDiscoverer {
* @return GpuDeviceInformation
* @throws YarnException when any error happens
*/
- public synchronized GpuDeviceInformation getGpuDeviceInformation()
+ synchronized GpuDeviceInformation getGpuDeviceInformation()
throws YarnException {
validateConfOrThrowException();
@@ -112,10 +112,9 @@ public class GpuDiscoverer {
try {
output = Shell.execCommand(environment,
new String[] { pathOfGpuBinary, "-x", "-q" }, MAX_EXEC_TIMEOUT_MS);
- GpuDeviceInformation info = parser.parseXml(output);
+ lastDiscoveredGpuInformation = parser.parseXml(output);
numOfErrorExecutionSinceLastSucceed = 0;
- lastDiscoveredGpuInformation = info;
- return info;
+ return lastDiscoveredGpuInformation;
} catch (IOException e) {
numOfErrorExecutionSinceLastSucceed++;
String msg =
@@ -149,52 +148,91 @@ public class GpuDiscoverer {
YarnConfiguration.NM_GPU_ALLOWED_DEVICES,
YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES);
- List<GpuDevice> gpuDevices = new ArrayList<>();
-
if (allowedDevicesStr.equals(
YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES)) {
- // Get gpu device information from system.
- if (null == lastDiscoveredGpuInformation) {
- String msg = YarnConfiguration.NM_GPU_ALLOWED_DEVICES + " is set to "
- + YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES
- + ", however automatically discovering "
- + "GPU information failed, please check NodeManager log for more"
- + " details, as an alternative, admin can specify "
- + YarnConfiguration.NM_GPU_ALLOWED_DEVICES
- + " manually to enable GPU isolation.";
- LOG.error(msg);
- throw new YarnException(msg);
+ return parseGpuDevicesFromAutoDiscoveredGpuInfo();
+ } else {
+ return parseGpuDevicesFromUserDefinedValues(allowedDevicesStr);
+ }
+ }
+
+ private List<GpuDevice> parseGpuDevicesFromAutoDiscoveredGpuInfo()
+ throws YarnException {
+ if (lastDiscoveredGpuInformation == null) {
+ String msg = YarnConfiguration.NM_GPU_ALLOWED_DEVICES + " is set to "
+ + YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES
+ + ", however automatically discovering "
+ + "GPU information failed, please check NodeManager log for more"
+ + " details, as an alternative, admin can specify "
+ + YarnConfiguration.NM_GPU_ALLOWED_DEVICES
+ + " manually to enable GPU isolation.";
+ LOG.error(msg);
+ throw new YarnException(msg);
+ }
+
+ List<GpuDevice> gpuDevices = new ArrayList<>();
+ if (lastDiscoveredGpuInformation.getGpus() != null) {
+ int numberOfGpus = lastDiscoveredGpuInformation.getGpus().size();
+ LOG.debug("Found {} GPU devices", numberOfGpus);
+ for (int i = 0; i < numberOfGpus; i++) {
+ List<PerGpuDeviceInformation> gpuInfos =
+ lastDiscoveredGpuInformation.getGpus();
+ gpuDevices.add(new GpuDevice(i, gpuInfos.get(i).getMinorNumber()));
}
+ }
+ return gpuDevices;
+ }
- if (lastDiscoveredGpuInformation.getGpus() != null) {
- for (int i = 0; i < lastDiscoveredGpuInformation.getGpus().size();
- i++) {
- List<PerGpuDeviceInformation> gpuInfos =
- lastDiscoveredGpuInformation.getGpus();
- gpuDevices.add(new GpuDevice(i, gpuInfos.get(i).getMinorNumber()));
+ /**
+ * @param devices allowed devices coming from the config.
+ * Individual devices should be separated by commas.
+ * <br>The format of individual devices should be:
+ * <index:><minorNumber>
+ * @return List of GpuDevices
+ * @throws YarnException when a GPU device is defined as a duplicate.
+ * The first duplicate GPU device will be added to the exception message.
+ */
+ private List<GpuDevice> parseGpuDevicesFromUserDefinedValues(String devices)
+ throws YarnException {
+ if (devices.trim().isEmpty()) {
+ throw GpuDeviceSpecificationException.createWithEmptyValueSpecified();
+ }
+ List<GpuDevice> gpuDevices = Lists.newArrayList();
+ for (String device : devices.split(",")) {
+ if (device.trim().length() > 0) {
+ String[] splitByColon = device.trim().split(":");
+ if (splitByColon.length != 2) {
+ throw GpuDeviceSpecificationException.
+ createWithWrongValueSpecified(device, devices);
}
- }
- } else{
- for (String s : allowedDevicesStr.split(",")) {
- if (s.trim().length() > 0) {
- String[] kv = s.trim().split(":");
- if (kv.length != 2) {
- throw new YarnException(
- "Illegal format, it should be index:minor_number format, now it="
- + s);
- }
- gpuDevices.add(
- new GpuDevice(Integer.parseInt(kv[0]), Integer.parseInt(kv[1])));
+ GpuDevice gpuDevice = parseGpuDevice(device, splitByColon, devices);
+ if (!gpuDevices.contains(gpuDevice)) {
+ gpuDevices.add(gpuDevice);
+ } else {
+ throw GpuDeviceSpecificationException
+ .createWithDuplicateValueSpecified(device, devices);
}
}
- LOG.info("Allowed GPU devices:" + gpuDevices);
}
+ LOG.info("Allowed GPU devices:" + gpuDevices);
return gpuDevices;
}
- public synchronized void initialize(Configuration conf) throws YarnException {
+ private GpuDevice parseGpuDevice(String device, String[] splitByColon,
+ String allowedDevicesStr) throws YarnException {
+ try {
+ int index = Integer.parseInt(splitByColon[0]);
+ int minorNumber = Integer.parseInt(splitByColon[1]);
+ return new GpuDevice(index, minorNumber);
+ } catch (NumberFormatException e) {
+ throw GpuDeviceSpecificationException.
+ createWithWrongValueSpecified(device, allowedDevicesStr, e);
+ }
+ }
+
+ public synchronized void initialize(Configuration conf) {
this.conf = conf;
numOfErrorExecutionSinceLastSucceed = 0;
String pathToExecutable = conf.get(YarnConfiguration.NM_GPU_PATH_TO_EXEC,
@@ -203,9 +241,7 @@ public class GpuDiscoverer {
pathToExecutable = DEFAULT_BINARY_NAME;
}
- // Validate file existence
File binaryPath = new File(pathToExecutable);
-
if (!binaryPath.exists()) {
// When binary not exist, use default setting.
boolean found = false;
@@ -249,12 +285,12 @@ public class GpuDiscoverer {
}
@VisibleForTesting
- protected Map<String, String> getEnvironmentToRunCommand() {
+ Map<String, String> getEnvironmentToRunCommand() {
return environment;
}
@VisibleForTesting
- protected String getPathOfGpuBinary() {
+ String getPathOfGpuBinary() {
return pathOfGpuBinary;
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java
index 796eb25..8b19048 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuNodeResourceUpdateHandler.java
@@ -40,11 +40,11 @@ public class GpuNodeResourceUpdateHandler extends NodeResourceUpdaterPlugin {
public void updateConfiguredResource(Resource res) throws YarnException {
LOG.info("Initializing configured GPU resources for the NodeManager.");
- List<GpuDevice> usableGpus =
- GpuDiscoverer.getInstance().getGpusUsableByYarn();
- if (null == usableGpus || usableGpus.isEmpty()) {
- String message = "GPU is enabled, but couldn't find any usable GPUs on the "
- + "NodeManager.";
+ List<GpuDevice> usableGpus = GpuDiscoverer.getInstance()
+ .getGpusUsableByYarn();
+ if (usableGpus == null || usableGpus.isEmpty()) {
+ String message = "GPU is enabled, " +
+ "but couldn't find any usable GPUs on the NodeManager!";
LOG.error(message);
// No gpu can be used by YARN.
throw new YarnException(message);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/package-info.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/package-info.java
new file mode 100644
index 0000000..81f155f
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/package-info.java
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Package for GPU support classes.
+ */
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java
index 4abb633..cbbfded 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java
@@ -23,17 +23,26 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInformation;
-import org.junit.Assert;
import org.junit.Assume;
import org.junit.Before;
+import org.junit.Rule;
import org.junit.Test;
+import org.junit.rules.ExpectedException;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.List;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
public class TestGpuDiscoverer {
+ @Rule
+ public ExpectedException exception = ExpectedException.none();
+
private String getTestParentFolder() {
File f = new File("target/temp/" + TestGpuDiscoverer.class.getName());
return f.getAbsolutePath();
@@ -51,6 +60,12 @@ public class TestGpuDiscoverer {
f.mkdirs();
}
+ private Configuration createConfigWithAllowedDevices(String s) {
+ Configuration conf = new Configuration(false);
+ conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, s);
+ return conf;
+ }
+
@Test
public void testLinuxGpuResourceDiscoverPluginConfig() throws Exception {
// Only run this on demand.
@@ -61,10 +76,10 @@ public class TestGpuDiscoverer {
Configuration conf = new Configuration(false);
GpuDiscoverer plugin = new GpuDiscoverer();
plugin.initialize(conf);
- Assert.assertEquals(GpuDiscoverer.DEFAULT_BINARY_NAME,
+ assertEquals(GpuDiscoverer.DEFAULT_BINARY_NAME,
plugin.getPathOfGpuBinary());
- Assert.assertNotNull(plugin.getEnvironmentToRunCommand().get("PATH"));
- Assert.assertTrue(
+ assertNotNull(plugin.getEnvironmentToRunCommand().get("PATH"));
+ assertTrue(
plugin.getEnvironmentToRunCommand().get("PATH").contains("nvidia"));
// test case 2, check mandatory set path.
@@ -74,18 +89,18 @@ public class TestGpuDiscoverer {
conf.set(YarnConfiguration.NM_GPU_PATH_TO_EXEC, getTestParentFolder());
plugin = new GpuDiscoverer();
plugin.initialize(conf);
- Assert.assertEquals(fakeBinary.getAbsolutePath(),
+ assertEquals(fakeBinary.getAbsolutePath(),
plugin.getPathOfGpuBinary());
- Assert.assertNull(plugin.getEnvironmentToRunCommand().get("PATH"));
+ assertNull(plugin.getEnvironmentToRunCommand().get("PATH"));
// test case 3, check mandatory set path, but binary doesn't exist so default
// path will be used.
fakeBinary.delete();
plugin = new GpuDiscoverer();
plugin.initialize(conf);
- Assert.assertEquals(GpuDiscoverer.DEFAULT_BINARY_NAME,
+ assertEquals(GpuDiscoverer.DEFAULT_BINARY_NAME,
plugin.getPathOfGpuBinary());
- Assert.assertTrue(
+ assertTrue(
plugin.getEnvironmentToRunCommand().get("PATH").contains("nvidia"));
}
@@ -100,42 +115,165 @@ public class TestGpuDiscoverer {
plugin.initialize(conf);
GpuDeviceInformation info = plugin.getGpuDeviceInformation();
- Assert.assertTrue(info.getGpus().size() > 0);
- Assert.assertEquals(plugin.getGpusUsableByYarn().size(),
+ assertTrue(info.getGpus().size() > 0);
+ assertEquals(plugin.getGpusUsableByYarn().size(),
info.getGpus().size());
}
@Test
- public void getNumberOfUsableGpusFromConfig() throws YarnException {
- Configuration conf = new Configuration(false);
+ public void testGetNumberOfUsableGpusFromConfigSingleDevice()
+ throws YarnException {
+ Configuration conf = createConfigWithAllowedDevices("1:2");
- // Illegal format
- conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:2,3");
GpuDiscoverer plugin = new GpuDiscoverer();
- try {
- plugin.initialize(conf);
- plugin.getGpusUsableByYarn();
- Assert.fail("Illegal format, should fail.");
- } catch (YarnException e) {
- // Expected
- }
-
- // Valid format
- conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:0,1:1,2:2,3:4");
- plugin = new GpuDiscoverer();
+ plugin.initialize(conf);
+ List<GpuDevice> usableGpuDevices = plugin.getGpusUsableByYarn();
+ assertEquals(1, usableGpuDevices.size());
+
+ assertEquals(1, usableGpuDevices.get(0).getIndex());
+ assertEquals(2, usableGpuDevices.get(0).getMinorNumber());
+ }
+
+ @Test
+ public void testGetNumberOfUsableGpusFromConfigIllegalFormat()
+ throws YarnException {
+ Configuration conf = createConfigWithAllowedDevices("0:0,1:1,2:2,3");
+
+ exception.expect(GpuDeviceSpecificationException.class);
+ GpuDiscoverer plugin = new GpuDiscoverer();
+ plugin.initialize(conf);
+ plugin.getGpusUsableByYarn();
+ }
+
+ @Test
+ public void testGetNumberOfUsableGpusFromConfig() throws YarnException {
+ Configuration conf = createConfigWithAllowedDevices("0:0,1:1,2:2,3:4");
+ GpuDiscoverer plugin = new GpuDiscoverer();
plugin.initialize(conf);
List<GpuDevice> usableGpuDevices = plugin.getGpusUsableByYarn();
- Assert.assertEquals(4, usableGpuDevices.size());
+ assertEquals(4, usableGpuDevices.size());
+
+ assertEquals(0, usableGpuDevices.get(0).getIndex());
+ assertEquals(0, usableGpuDevices.get(0).getMinorNumber());
+
+ assertEquals(1, usableGpuDevices.get(1).getIndex());
+ assertEquals(1, usableGpuDevices.get(1).getMinorNumber());
+
+ assertEquals(2, usableGpuDevices.get(2).getIndex());
+ assertEquals(2, usableGpuDevices.get(2).getMinorNumber());
+
+ assertEquals(3, usableGpuDevices.get(3).getIndex());
+ assertEquals(4, usableGpuDevices.get(3).getMinorNumber());
+ }
+
+ @Test
+ public void testGetNumberOfUsableGpusFromConfigDuplicateValues()
+ throws YarnException {
+ Configuration conf = createConfigWithAllowedDevices("0:0,1:1,2:2,1:1");
+
+ exception.expect(GpuDeviceSpecificationException.class);
+ GpuDiscoverer plugin = new GpuDiscoverer();
+ plugin.initialize(conf);
+ plugin.getGpusUsableByYarn();
+ }
+
+ @Test
+ public void testGetNumberOfUsableGpusFromConfigDuplicateValues2()
+ throws YarnException {
+ Configuration conf = createConfigWithAllowedDevices("0:0,1:1,2:2,1:1,2:2");
+
+ exception.expect(GpuDeviceSpecificationException.class);
+ GpuDiscoverer plugin = new GpuDiscoverer();
+ plugin.initialize(conf);
+ plugin.getGpusUsableByYarn();
+ }
+
+ @Test
+ public void testGetNumberOfUsableGpusFromConfigIncludingSpaces()
+ throws YarnException {
+ Configuration conf = createConfigWithAllowedDevices("0 : 0,1 : 1");
+
+ exception.expect(GpuDeviceSpecificationException.class);
+ GpuDiscoverer plugin = new GpuDiscoverer();
+ plugin.initialize(conf);
+ plugin.getGpusUsableByYarn();
+ }
+
+ @Test
+ public void testGetNumberOfUsableGpusFromConfigIncludingGibberish()
+ throws YarnException {
+ Configuration conf = createConfigWithAllowedDevices("0:@$1,1:1");
- Assert.assertTrue(0 == usableGpuDevices.get(0).getIndex());
- Assert.assertTrue(1 == usableGpuDevices.get(1).getIndex());
- Assert.assertTrue(2 == usableGpuDevices.get(2).getIndex());
- Assert.assertTrue(3 == usableGpuDevices.get(3).getIndex());
+ exception.expect(GpuDeviceSpecificationException.class);
+ GpuDiscoverer plugin = new GpuDiscoverer();
+ plugin.initialize(conf);
+ plugin.getGpusUsableByYarn();
+ }
+
+ @Test
+ public void testGetNumberOfUsableGpusFromConfigIncludingLetters()
+ throws YarnException {
+ Configuration conf = createConfigWithAllowedDevices("x:0, 1:y");
+
+ exception.expect(GpuDeviceSpecificationException.class);
+ GpuDiscoverer plugin = new GpuDiscoverer();
+ plugin.initialize(conf);
+ plugin.getGpusUsableByYarn();
+ }
+
+ @Test
+ public void testGetNumberOfUsableGpusFromConfigWithoutIndexNumber()
+ throws YarnException {
+ Configuration conf = createConfigWithAllowedDevices(":0, :1");
+
+ exception.expect(GpuDeviceSpecificationException.class);
+ GpuDiscoverer plugin = new GpuDiscoverer();
+ plugin.initialize(conf);
+ plugin.getGpusUsableByYarn();
+ }
+
+ @Test
+ public void testGetNumberOfUsableGpusFromConfigEmptyString()
+ throws YarnException {
+ Configuration conf = createConfigWithAllowedDevices("");
- Assert.assertTrue(0 == usableGpuDevices.get(0).getMinorNumber());
- Assert.assertTrue(1 == usableGpuDevices.get(1).getMinorNumber());
- Assert.assertTrue(2 == usableGpuDevices.get(2).getMinorNumber());
- Assert.assertTrue(4 == usableGpuDevices.get(3).getMinorNumber());
+ exception.expect(GpuDeviceSpecificationException.class);
+ GpuDiscoverer plugin = new GpuDiscoverer();
+ plugin.initialize(conf);
+ plugin.getGpusUsableByYarn();
+ }
+
+ @Test
+ public void testGetNumberOfUsableGpusFromConfigValueWithoutComma()
+ throws YarnException {
+ Configuration conf = createConfigWithAllowedDevices("0:0 0:1");
+
+ exception.expect(GpuDeviceSpecificationException.class);
+ GpuDiscoverer plugin = new GpuDiscoverer();
+ plugin.initialize(conf);
+ plugin.getGpusUsableByYarn();
+ }
+
+ @Test
+ public void testGetNumberOfUsableGpusFromConfigValueWithoutComma2()
+ throws YarnException {
+ Configuration conf = createConfigWithAllowedDevices("0.1 0.2");
+
+ exception.expect(GpuDeviceSpecificationException.class);
+ GpuDiscoverer plugin = new GpuDiscoverer();
+ plugin.initialize(conf);
+ plugin.getGpusUsableByYarn();
+ }
+
+ @Test
+ public void testGetNumberOfUsableGpusFromConfigValueWithoutColonSeparator()
+ throws YarnException {
+ Configuration conf = createConfigWithAllowedDevices("0.1,0.2");
+
+ exception.expect(GpuDeviceSpecificationException.class);
+ GpuDiscoverer plugin = new GpuDiscoverer();
+ plugin.initialize(conf);
+ plugin.getGpusUsableByYarn();
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org