You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by na...@apache.org on 2017/07/21 20:54:31 UTC

systemml git commit: [SYSTEMML-1795] Specify a set of GPUs to use for a given machine

Repository: systemml
Updated Branches:
  refs/heads/master 1f5b14dda -> fec209306


[SYSTEMML-1795] Specify a set of GPUs to use for a given machine

Can specify:
a) -1 for all GPUs
b) a specific number of GPU
c) a comma separated list of GPUs
d) a range of GPUs

Closes #587


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/fec20930
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/fec20930
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/fec20930

Branch: refs/heads/master
Commit: fec209306d3c7e55673872f431d43ceda53b7a6c
Parents: 1f5b14d
Author: Nakul Jindal <na...@gmail.com>
Authored: Fri Jul 21 13:55:03 2017 -0700
Committer: Nakul Jindal <na...@gmail.com>
Committed: Fri Jul 21 13:55:04 2017 -0700

----------------------------------------------------------------------
 conf/SystemML-config.xml.template               |  4 +-
 .../apache/sysml/api/ScriptExecutorUtils.java   |  5 +-
 .../java/org/apache/sysml/conf/DMLConfig.java   |  4 +-
 .../gpu/context/GPUContextPool.java             | 90 +++++++++++++++++---
 .../org/apache/sysml/test/unit/UtilsTest.java   | 78 +++++++++++++++++
 5 files changed, 160 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/fec20930/conf/SystemML-config.xml.template
----------------------------------------------------------------------
diff --git a/conf/SystemML-config.xml.template b/conf/SystemML-config.xml.template
index 11e86ed..8608a9c 100644
--- a/conf/SystemML-config.xml.template
+++ b/conf/SystemML-config.xml.template
@@ -78,6 +78,6 @@
    <!-- prints extra statistics information for Deep Neural Networks done in CP mode -->
    <systemml.stats.extraDNN>false</systemml.stats.extraDNN>
 
-   <!-- sets the maximum number of GPUs per process, -1 for all GPUs -->
-   <systemml.gpu.perProcessMax>-1</systemml.gpu.perProcessMax>
+    <!-- sets the GPUs to use per process, -1 for all GPUs, a specific GPU number (5), a range (eg: 0-2) or a comma separated list (eg: 0,2,4)-->
+    <systemml.gpu.availableGPUs>-1</systemml.gpu.availableGPUs>
 </root>

http://git-wip-us.apache.org/repos/asf/systemml/blob/fec20930/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
index 0e0950e..b094c91 100644
--- a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
+++ b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java
@@ -78,9 +78,8 @@ public class ScriptExecutorUtils {
 		LibMatrixDNN.DISPLAY_STATISTICS = dmlconf.getBooleanValue(DMLConfig.EXTRA_DNN_STATS);
 		DMLScript.FINEGRAINED_STATISTICS = dmlconf.getBooleanValue(DMLConfig.EXTRA_FINEGRAINED_STATS);
 
-		// Sets the maximum number of GPUs per process, -1 for all available
-		// GPUs
-		GPUContextPool.PER_PROCESS_MAX_GPUS = dmlconf.getIntValue(DMLConfig.MAX_GPUS_PER_PROCESS);
+		// Sets the GPUs to use for this process (a range, all GPUs, comma separated list or a specific GPU)
+		GPUContextPool.AVAILABLE_GPUS = dmlconf.getTextValue(DMLConfig.AVAILABLE_GPUS);
 		Statistics.startRunTimer();
 		try {
 			// run execute (w/ exception handling to ensure proper shutdown)

http://git-wip-us.apache.org/repos/asf/systemml/blob/fec20930/src/main/java/org/apache/sysml/conf/DMLConfig.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/conf/DMLConfig.java b/src/main/java/org/apache/sysml/conf/DMLConfig.java
index c248098..a6a4b5e 100644
--- a/src/main/java/org/apache/sysml/conf/DMLConfig.java
+++ b/src/main/java/org/apache/sysml/conf/DMLConfig.java
@@ -78,7 +78,7 @@ public class DMLConfig
 	public static final String EXTRA_FINEGRAINED_STATS = "systemml.stats.finegrained"; //boolean
 	public static final String EXTRA_GPU_STATS      = "systemml.stats.extraGPU"; //boolean
 	public static final String EXTRA_DNN_STATS      = "systemml.stats.extraDNN"; //boolean
-	public static final String MAX_GPUS_PER_PROCESS = "systemml.gpu.perProcessMax"; // boolean, maximum number of gpus to use, -1 for all
+	public static final String AVAILABLE_GPUS = "systemml.gpu.availableGPUs"; // String to specify which GPUs to use (a range, all GPUs, comma separated list or a specific GPU)
 
 	// Fraction of available memory to use. The available memory is computer when the GPUContext is created
 	// to handle the tradeoff on calling cudaMemGetInfo too often.
@@ -123,7 +123,7 @@ public class DMLConfig
 		_defaultVals.put(EXTRA_DNN_STATS,        "false" );
 
 		_defaultVals.put(GPU_MEMORY_UTILIZATION_FACTOR,      "0.9" );
-		_defaultVals.put(MAX_GPUS_PER_PROCESS,   "-1");
+		_defaultVals.put(AVAILABLE_GPUS,   "-1");
 	}
 	
 	public DMLConfig()

http://git-wip-us.apache.org/repos/asf/systemml/blob/fec20930/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContextPool.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContextPool.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContextPool.java
index ef38da8..88bf403 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContextPool.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContextPool.java
@@ -22,6 +22,7 @@ import static jcuda.driver.JCudaDriver.cuDeviceGetCount;
 import static jcuda.driver.JCudaDriver.cuInit;
 import static jcuda.runtime.JCuda.cudaGetDeviceProperties;
 
+import java.util.ArrayList;
 import java.util.LinkedList;
 import java.util.List;
 
@@ -42,9 +43,9 @@ public class GPUContextPool {
 	protected static final Log LOG = LogFactory.getLog(GPUContextPool.class.getName());
 
 	/**
-	 * Maximum number of gpus to use, -1 for all
+	 * GPUs to use, can specify -1 to use all, comma separated list of GPU numbers, a specific GPU or a range
 	 */
-	public static int PER_PROCESS_MAX_GPUS = -1;
+	public static String AVAILABLE_GPUS;
 
 
 	private static long INITIAL_GPU_MEMORY_BUDGET = -1;
@@ -98,22 +99,32 @@ public class GPUContextPool {
 		deviceCount = deviceCountArray[0];
 		deviceProperties = new cudaDeviceProp[deviceCount];
 
-		if (PER_PROCESS_MAX_GPUS > 0)
-			deviceCount = Math.min(PER_PROCESS_MAX_GPUS, deviceCount);
+		try {
+			ArrayList<Integer> listOfGPUs = parseListString(AVAILABLE_GPUS, deviceCount);
 
-		// Initialize the list of devices
-		for (int i = 0; i < deviceCount; i++) {
-			cudaDeviceProp properties = new cudaDeviceProp();
-			cudaGetDeviceProperties(properties, i);
-			deviceProperties[i] = properties;
-		}
+			// Initialize the list of devices & the pool of GPUContexts
+			for (int i : listOfGPUs) {
+				cudaDeviceProp properties = new cudaDeviceProp();
+				cudaGetDeviceProperties(properties, i);
+				deviceProperties[i] = properties;
+				GPUContext gCtx = new GPUContext(i);
+				pool.add(gCtx);
+			}
+
+		} catch (IllegalArgumentException e) {
+			LOG.warn("Invalid setting for setting systemml.gpu.availableGPUs, defaulting to use ALL GPUs");
 
-		// Initialize the pool of GPUContexts
-		for (int i = 0; i < deviceCount; i++) {
-			GPUContext gCtx = new GPUContext(i);
-			pool.add(gCtx);
+			// Initialize the list of devices & the pool of GPUContexts
+			for (int i = 0; i < deviceCount; i++) {
+				cudaDeviceProp properties = new cudaDeviceProp();
+				cudaGetDeviceProperties(properties, i);
+				deviceProperties[i] = properties;
+				GPUContext gCtx = new GPUContext(i);
+				pool.add(gCtx);
+			}
 		}
 
+
 		// Initialize the initial memory budget
 		// If there are heterogeneous GPUs on the machine (different memory sizes)
 		// initially available memory is set to the GPU with the lowest memory
@@ -128,6 +139,7 @@ public class GPUContextPool {
 
 
 		GPUContext.LOG.info("Total number of GPUs on the machine: " + deviceCount);
+		GPUContext.LOG.info("GPUs being used: " + AVAILABLE_GPUS);
 		GPUContext.LOG.info("Initial GPU memory: " + initialGPUMemBudget());
 
 		//int[] device = {-1};
@@ -142,6 +154,56 @@ public class GPUContextPool {
 	}
 
 	/**
+	 * Parses a string into a list. The string can be of these forms:
+	 * 1. "-1" : all integers from range 0 to max - [0,1,2,3....max]
+	 * 2. "2,3,0" : comma separated list of integers - [0,2,3]
+	 * 3. "4" : a specific integer - [4]
+	 * 4. "0-4" : a range of integers - [0,1,2,3,4]
+	 * In ranges and comma separated lists, all values must be positive. Anything else is invalid.
+	 * @param str input string
+	 * @param max maximum range of integers
+	 * @return the list of integers in the parsed string
+	 */
+	public static ArrayList<Integer> parseListString(String str, int max) {
+		ArrayList<Integer> result = new ArrayList<>();
+		str = str.trim();
+		if (str == "-1") {  // all
+			for (int i=0; i<max; i++){
+				result.add(i);
+			}
+		} else if (str.contains("-")){  // range
+			String[] numbersStr = str.split("-");
+			if (numbersStr.length != 2) {
+				throw new IllegalArgumentException("Invalid string to parse to a list of numbers : " + str);
+			}
+			String beginStr = numbersStr[0];
+			String endStr = numbersStr[1];
+			int begin = Integer.parseInt(beginStr);
+			int end = Integer.parseInt(endStr);
+
+			for (int i=begin; i<=end; i++){
+				result.add(i);
+			}
+		} else if (str.contains(",")) { // comma separated list
+			String[] numbers = str.split(",");
+			for (int i = 0; i < numbers.length; i++) {
+				int n = Integer.parseInt(numbers[i].trim());
+				result.add(n);
+			}
+		} else {  // single number
+			int number = Integer.parseInt(str);
+			result.add(number);
+		}
+		// Check if all numbers between 0 and max
+		for (int n : result){
+			if (n < 0 || n >= max) {
+				throw new IllegalArgumentException("Invalid string (" + str + ") parsed to a list of numbers (" + result + ") which exceeds the maximum range : ");
+			}
+		}
+		return result;
+	}
+
+	/**
 	 * Reserves and gets an initialized list of GPUContexts
 	 *
 	 * @return null if no GPUContexts in pool, otherwise a valid list of GPUContext

http://git-wip-us.apache.org/repos/asf/systemml/blob/fec20930/src/test/java/org/apache/sysml/test/unit/UtilsTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/unit/UtilsTest.java b/src/test/java/org/apache/sysml/test/unit/UtilsTest.java
new file mode 100644
index 0000000..b5dccd0
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/unit/UtilsTest.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.unit;
+
+
+import java.util.Arrays;
+
+import org.apache.sysml.runtime.instructions.gpu.context.GPUContextPool;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * To test utility functions scattered throughout the codebase
+ */
+public class UtilsTest {
+
+	@Test
+	public void testParseListString0() {
+		Assert.assertEquals(Arrays.asList(0), GPUContextPool.parseListString("0", 10));
+	}
+
+	@Test
+	public void testParseListString1() {
+		Assert.assertEquals(Arrays.asList(7), GPUContextPool.parseListString("7", 10));
+	}
+
+	@Test
+	public void testParseListString2() {
+		Assert.assertEquals(Arrays.asList(0,1,2,3), GPUContextPool.parseListString("-1", 4));
+	}
+
+	@Test
+	public void testParseListString3() {
+		Assert.assertEquals(Arrays.asList(0,1,2,3), GPUContextPool.parseListString("0,1,2,3", 6));
+	}
+
+	@Test
+	public void testParseListString4() {
+		Assert.assertEquals(Arrays.asList(0,1,2,3), GPUContextPool.parseListString("0-3", 6));
+	}
+
+	@Test(expected=IllegalArgumentException.class)
+	public void testParseListStringFail0() {
+		GPUContextPool.parseListString("7", 4);
+	}
+
+	@Test(expected=IllegalArgumentException.class)
+	public void testParseListStringFail1() {
+		GPUContextPool.parseListString("0,1,2,3", 2);
+	}
+
+	@Test(expected=IllegalArgumentException.class)
+	public void testParseListStringFail2() {
+		GPUContextPool.parseListString("0,1,2,3-4", 2);
+	}
+
+	@Test(expected=IllegalArgumentException.class)
+	public void testParseListStringFail4() {
+		GPUContextPool.parseListString("-1-4", 6);
+	}
+}