You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by jx...@apache.org on 2018/05/15 18:39:09 UTC

[incubator-mxnet] branch master updated: Expose the number of GPUs. (#10354)

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new 1214205  Expose the number of GPUs. (#10354)
1214205 is described below

commit 12142056152d1331d4c519f77eb75bad89b4f3eb
Author: Tobias Domhan <td...@gmail.com>
AuthorDate: Tue May 15 20:39:02 2018 +0200

    Expose the number of GPUs. (#10354)
    
    * Expose the number of GPUs.
    
    * Added GPU test.
    
    * Removed trailing whitespace.
    
    * making the compiler happy
    
    * Reverted CPU only logic and added CPU test.
    
    * Updated python docs.
    
    * Removing break from test.
    
    * no longer assert on 0 gpus
---
 include/mxnet/base.h                   | 19 +++++++++++++++++++
 include/mxnet/c_api.h                  |  7 +++++++
 python/mxnet/context.py                | 21 +++++++++++++++++++++
 src/c_api/c_api.cc                     |  6 ++++++
 tests/python/gpu/test_operator_gpu.py  |  3 +++
 tests/python/unittest/test_operator.py | 12 ++++++++++++
 6 files changed, 68 insertions(+)

diff --git a/include/mxnet/base.h b/include/mxnet/base.h
index 7cabfe5..bff2ab4 100644
--- a/include/mxnet/base.h
+++ b/include/mxnet/base.h
@@ -218,6 +218,11 @@ struct Context {
    */
   inline static Context GPU(int32_t dev_id = -1);
   /*!
+   * Get the number of GPUs available.
+   * \return The number of GPUs that are available.
+   */
+  inline static int32_t GetGPUCount();
+  /*!
    * Create a pinned CPU context.
    * \param dev_id the device id for corresponding GPU.
    * \return Pinned CPU context. -1 for current GPU.
@@ -307,6 +312,20 @@ inline Context Context::GPU(int32_t dev_id) {
   return Create(kGPU, dev_id);
 }
 
+inline int32_t Context::GetGPUCount() {
+#if MXNET_USE_CUDA
+  int32_t count;
+  cudaError_t e = cudaGetDeviceCount(&count);
+  if (e == cudaErrorNoDevice) {
+    return 0;
+  }
+  CHECK_EQ(e, cudaSuccess) << " CUDA: " << cudaGetErrorString(e);
+  return count;
+#else
+  return 0;
+#endif
+}
+
 inline Context Context::FromString(const std::string& str) {
   Context ret;
   try {
diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h
index 9ac90d6..06e39bf 100644
--- a/include/mxnet/c_api.h
+++ b/include/mxnet/c_api.h
@@ -384,6 +384,13 @@ MXNET_DLL int MXSetNumOMPThreads(int thread_num);
 MXNET_DLL int MXEngineSetBulkSize(int bulk_size, int* prev_bulk_size);
 
 /*!
+ * \brief Get the number of GPUs.
+ * \param pointer to int that will hold the number of GPUs available.
+ * \return 0 when success, -1 when failure happens.
+ */
+MXNET_DLL int MXGetGPUCount(int* out);
+
+/*!
  * \brief get the MXNet library version as an integer
  * \param pointer to the integer holding the version number
  * \return 0 when success, -1 when failure happens
diff --git a/python/mxnet/context.py b/python/mxnet/context.py
index 5861890..61b7053 100644
--- a/python/mxnet/context.py
+++ b/python/mxnet/context.py
@@ -20,7 +20,11 @@
 from __future__ import absolute_import
 import threading
 import warnings
+import ctypes
 from .base import classproperty, with_metaclass, _MXClassPropertyMetaClass
+from .base import _LIB
+from .base import check_call
+
 
 class Context(with_metaclass(_MXClassPropertyMetaClass, object)):
     """Constructs a context.
@@ -237,6 +241,23 @@ def gpu(device_id=0):
     return Context('gpu', device_id)
 
 
+def num_gpus():
+    """Query CUDA for the number of GPUs present.
+
+    Raises
+    ------
+    Will raise an exception on any CUDA error.
+
+    Returns
+    -------
+    count : int
+        The number of GPUs.
+
+    """
+    count = ctypes.c_int()
+    check_call(_LIB.MXGetGPUCount(ctypes.byref(count)))
+    return count.value
+
 def current_context():
     """Returns the current context.
 
diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc
index b3dcd6a..467118b 100644
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -116,6 +116,12 @@ int MXEngineSetBulkSize(int bulk_size, int* prev_bulk_size) {
   API_END();
 }
 
+int MXGetGPUCount(int* out) {
+  API_BEGIN();
+  *out = Context::GetGPUCount();
+  API_END();
+}
+
 int MXGetVersion(int *out) {
   API_BEGIN();
   *out = static_cast<int>(MXNET_VERSION);
diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py
index 090773c..b9f2b67 100644
--- a/tests/python/gpu/test_operator_gpu.py
+++ b/tests/python/gpu/test_operator_gpu.py
@@ -1853,6 +1853,9 @@ def test_softmax_activation():
         assert_almost_equal(cpu_a.grad.asnumpy(), gpu_a.grad.asnumpy(),
                 atol = 1e-3, rtol = 1e-3)
 
+def test_context_num_gpus():
+    # Test that num_gpus reports at least one GPU, as the test is run on a GPU host.
+    assert mx.context.num_gpus() > 0
 
 if __name__ == '__main__':
     import nose
diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
index 0a6de8e..e7976e0 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -6007,6 +6007,18 @@ def test_activation():
             name, op[0], shape, op[3], op[4], rtol_fd, atol_fd, num_eps)
 
 
+def test_context_num_gpus():
+    try:
+        # Note: the test is run both on GPU and CPU hosts, so that we can not assert
+        # on a specific number here.
+        assert mx.context.num_gpus() >= 0
+    except mx.MXNetError as e:
+        # Note: On a CPU only host CUDA sometimes is not able to determine the number
+        # of GPUs
+        if str(e).find("CUDA") == -1:
+            raise e
+
+
 if __name__ == '__main__':
     import nose
     nose.runmodule()

-- 
To stop receiving notification emails like this one, please contact
jxie@apache.org.