You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by na...@apache.org on 2017/07/21 20:54:31 UTC
systemml git commit: [SYSTEMML-1795] Specify a set of GPUs to use for
a given machine
Repository: systemml
Updated Branches:
refs/heads/master 1f5b14dda -> fec209306
[SYSTEMML-1795] Specify a set of GPUs to use for a given machine
Can specify:
a) -1 for all GPUs
b) a specific number of GPU
c) a comma separated list of GPUs
d) a range of GPUs
Closes #587
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/fec20930
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/fec20930
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/fec20930
Branch: refs/heads/master
Commit: fec209306d3c7e55673872f431d43ceda53b7a6c
Parents: 1f5b14d
Author: Nakul Jindal <na...@gmail.com>
Authored: Fri Jul 21 13:55:03 2017 -0700
Committer: Nakul Jindal <na...@gmail.com>
Committed: Fri Jul 21 13:55:04 2017 -0700
----------------------------------------------------------------------
conf/SystemML-config.xml.template | 4 +-
.../apache/sysml/api/ScriptExecutorUtils.java | 5 +-
.../java/org/apache/sysml/conf/DMLConfig.java | 4 +-
.../gpu/context/GPUContextPool.java | 90 +++++++++++++++++---
.../org/apache/sysml/test/unit/UtilsTest.java | 78 +++++++++++++++++
5 files changed, 160 insertions(+), 21 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/systemml/blob/fec20930/conf/SystemML-config.xml.template
----------------------------------------------------------------------
diff --git a/conf/SystemML-config.xml.template b/conf/SystemML-config.xml.template
index 11e86ed..8608a9c 100644
--- a/conf/SystemML-config.xml.template
+++ b/conf/SystemML-config.xml.template
@@ -78,6 +78,6 @@
<!-- prints extra statistics information for Deep Neural Networks done in CP mode -->
<systemml.stats.extraDNN>false</systemml.stats.extraDNN>
- <!-- sets the maximum number of GPUs per process, -1 for all GPUs -->
- <systemml.gpu.perProcessMax>-1</systemml.gpu.perProcessMax>
+ <!-- sets the GPUs to use per process, -1 for all GPUs, a specific GPU number (5), a range (eg: 0-2) or a comma separated list (eg: 0,2,4)-->
+ <systemml.gpu.availableGPUs>-1</systemml.gpu.availableGPUs>
</root>
http://git-wip-us.apache.org/repos/asf/systemml/blob/fec20930/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
index 0e0950e..b094c91 100644
--- a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
+++ b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
@@ -78,9 +78,8 @@ public class ScriptExecutorUtils {
LibMatrixDNN.DISPLAY_STATISTICS = dmlconf.getBooleanValue(DMLConfig.EXTRA_DNN_STATS);
DMLScript.FINEGRAINED_STATISTICS = dmlconf.getBooleanValue(DMLConfig.EXTRA_FINEGRAINED_STATS);
- // Sets the maximum number of GPUs per process, -1 for all available
- // GPUs
- GPUContextPool.PER_PROCESS_MAX_GPUS = dmlconf.getIntValue(DMLConfig.MAX_GPUS_PER_PROCESS);
+ // Sets the GPUs to use for this process (a range, all GPUs, comma separated list or a specific GPU)
+ GPUContextPool.AVAILABLE_GPUS = dmlconf.getTextValue(DMLConfig.AVAILABLE_GPUS);
Statistics.startRunTimer();
try {
// run execute (w/ exception handling to ensure proper shutdown)
http://git-wip-us.apache.org/repos/asf/systemml/blob/fec20930/src/main/java/org/apache/sysml/conf/DMLConfig.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/conf/DMLConfig.java b/src/main/java/org/apache/sysml/conf/DMLConfig.java
index c248098..a6a4b5e 100644
--- a/src/main/java/org/apache/sysml/conf/DMLConfig.java
+++ b/src/main/java/org/apache/sysml/conf/DMLConfig.java
@@ -78,7 +78,7 @@ public class DMLConfig
public static final String EXTRA_FINEGRAINED_STATS = "systemml.stats.finegrained"; //boolean
public static final String EXTRA_GPU_STATS = "systemml.stats.extraGPU"; //boolean
public static final String EXTRA_DNN_STATS = "systemml.stats.extraDNN"; //boolean
- public static final String MAX_GPUS_PER_PROCESS = "systemml.gpu.perProcessMax"; // boolean, maximum number of gpus to use, -1 for all
+ public static final String AVAILABLE_GPUS = "systemml.gpu.availableGPUs"; // String to specify which GPUs to use (a range, all GPUs, comma separated list or a specific GPU)
// Fraction of available memory to use. The available memory is computer when the GPUContext is created
// to handle the tradeoff on calling cudaMemGetInfo too often.
@@ -123,7 +123,7 @@ public class DMLConfig
_defaultVals.put(EXTRA_DNN_STATS, "false" );
_defaultVals.put(GPU_MEMORY_UTILIZATION_FACTOR, "0.9" );
- _defaultVals.put(MAX_GPUS_PER_PROCESS, "-1");
+ _defaultVals.put(AVAILABLE_GPUS, "-1");
}
public DMLConfig()
http://git-wip-us.apache.org/repos/asf/systemml/blob/fec20930/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContextPool.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContextPool.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContextPool.java
index ef38da8..88bf403 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContextPool.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContextPool.java
@@ -22,6 +22,7 @@ import static jcuda.driver.JCudaDriver.cuDeviceGetCount;
import static jcuda.driver.JCudaDriver.cuInit;
import static jcuda.runtime.JCuda.cudaGetDeviceProperties;
+import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
@@ -42,9 +43,9 @@ public class GPUContextPool {
protected static final Log LOG = LogFactory.getLog(GPUContextPool.class.getName());
/**
- * Maximum number of gpus to use, -1 for all
+ * GPUs to use, can specify -1 to use all, comma separated list of GPU numbers, a specific GPU or a range
*/
- public static int PER_PROCESS_MAX_GPUS = -1;
+ public static String AVAILABLE_GPUS;
private static long INITIAL_GPU_MEMORY_BUDGET = -1;
@@ -98,22 +99,32 @@ public class GPUContextPool {
deviceCount = deviceCountArray[0];
deviceProperties = new cudaDeviceProp[deviceCount];
- if (PER_PROCESS_MAX_GPUS > 0)
- deviceCount = Math.min(PER_PROCESS_MAX_GPUS, deviceCount);
+ try {
+ ArrayList<Integer> listOfGPUs = parseListString(AVAILABLE_GPUS, deviceCount);
- // Initialize the list of devices
- for (int i = 0; i < deviceCount; i++) {
- cudaDeviceProp properties = new cudaDeviceProp();
- cudaGetDeviceProperties(properties, i);
- deviceProperties[i] = properties;
- }
+ // Initialize the list of devices & the pool of GPUContexts
+ for (int i : listOfGPUs) {
+ cudaDeviceProp properties = new cudaDeviceProp();
+ cudaGetDeviceProperties(properties, i);
+ deviceProperties[i] = properties;
+ GPUContext gCtx = new GPUContext(i);
+ pool.add(gCtx);
+ }
+
+ } catch (IllegalArgumentException e) {
+ LOG.warn("Invalid setting for setting systemml.gpu.availableGPUs, defaulting to use ALL GPUs");
- // Initialize the pool of GPUContexts
- for (int i = 0; i < deviceCount; i++) {
- GPUContext gCtx = new GPUContext(i);
- pool.add(gCtx);
+ // Initialize the list of devices & the pool of GPUContexts
+ for (int i = 0; i < deviceCount; i++) {
+ cudaDeviceProp properties = new cudaDeviceProp();
+ cudaGetDeviceProperties(properties, i);
+ deviceProperties[i] = properties;
+ GPUContext gCtx = new GPUContext(i);
+ pool.add(gCtx);
+ }
}
+
// Initialize the initial memory budget
// If there are heterogeneous GPUs on the machine (different memory sizes)
// initially available memory is set to the GPU with the lowest memory
@@ -128,6 +139,7 @@ public class GPUContextPool {
GPUContext.LOG.info("Total number of GPUs on the machine: " + deviceCount);
+ GPUContext.LOG.info("GPUs being used: " + AVAILABLE_GPUS);
GPUContext.LOG.info("Initial GPU memory: " + initialGPUMemBudget());
//int[] device = {-1};
@@ -142,6 +154,56 @@ public class GPUContextPool {
}
/**
+ * Parses a string into a list. The string can be of these forms:
+ * 1. "-1" : all integers from range 0 to max - [0,1,2,3....max]
+ * 2. "2,3,0" : comma separated list of integers - [0,2,3]
+ * 3. "4" : a specific integer - [4]
+ * 4. "0-4" : a range of integers - [0,1,2,3,4]
+ * In ranges and comma separated lists, all values must be positive. Anything else is invalid.
+ * @param str input string
+ * @param max maximum range of integers
+ * @return the list of integers in the parsed string
+ */
+ public static ArrayList<Integer> parseListString(String str, int max) {
+ ArrayList<Integer> result = new ArrayList<>();
+ str = str.trim();
+ if (str == "-1") { // all
+ for (int i=0; i<max; i++){
+ result.add(i);
+ }
+ } else if (str.contains("-")){ // range
+ String[] numbersStr = str.split("-");
+ if (numbersStr.length != 2) {
+ throw new IllegalArgumentException("Invalid string to parse to a list of numbers : " + str);
+ }
+ String beginStr = numbersStr[0];
+ String endStr = numbersStr[1];
+ int begin = Integer.parseInt(beginStr);
+ int end = Integer.parseInt(endStr);
+
+ for (int i=begin; i<=end; i++){
+ result.add(i);
+ }
+ } else if (str.contains(",")) { // comma separated list
+ String[] numbers = str.split(",");
+ for (int i = 0; i < numbers.length; i++) {
+ int n = Integer.parseInt(numbers[i].trim());
+ result.add(n);
+ }
+ } else { // single number
+ int number = Integer.parseInt(str);
+ result.add(number);
+ }
+ // Check if all numbers between 0 and max
+ for (int n : result){
+ if (n < 0 || n >= max) {
+ throw new IllegalArgumentException("Invalid string (" + str + ") parsed to a list of numbers (" + result + ") which exceeds the maximum range : ");
+ }
+ }
+ return result;
+ }
+
+ /**
* Reserves and gets an initialized list of GPUContexts
*
* @return null if no GPUContexts in pool, otherwise a valid list of GPUContext
http://git-wip-us.apache.org/repos/asf/systemml/blob/fec20930/src/test/java/org/apache/sysml/test/unit/UtilsTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/unit/UtilsTest.java b/src/test/java/org/apache/sysml/test/unit/UtilsTest.java
new file mode 100644
index 0000000..b5dccd0
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/unit/UtilsTest.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.unit;
+
+
+import java.util.Arrays;
+
+import org.apache.sysml.runtime.instructions.gpu.context.GPUContextPool;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * To test utility functions scattered throughout the codebase
+ */
+public class UtilsTest {
+
+ @Test
+ public void testParseListString0() {
+ Assert.assertEquals(Arrays.asList(0), GPUContextPool.parseListString("0", 10));
+ }
+
+ @Test
+ public void testParseListString1() {
+ Assert.assertEquals(Arrays.asList(7), GPUContextPool.parseListString("7", 10));
+ }
+
+ @Test
+ public void testParseListString2() {
+ Assert.assertEquals(Arrays.asList(0,1,2,3), GPUContextPool.parseListString("-1", 4));
+ }
+
+ @Test
+ public void testParseListString3() {
+ Assert.assertEquals(Arrays.asList(0,1,2,3), GPUContextPool.parseListString("0,1,2,3", 6));
+ }
+
+ @Test
+ public void testParseListString4() {
+ Assert.assertEquals(Arrays.asList(0,1,2,3), GPUContextPool.parseListString("0-3", 6));
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testParseListStringFail0() {
+ GPUContextPool.parseListString("7", 4);
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testParseListStringFail1() {
+ GPUContextPool.parseListString("0,1,2,3", 2);
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testParseListStringFail2() {
+ GPUContextPool.parseListString("0,1,2,3-4", 2);
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testParseListStringFail4() {
+ GPUContextPool.parseListString("-1-4", 6);
+ }
+}