You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by jx...@apache.org on 2017/12/01 18:47:32 UTC
[incubator-mxnet] branch master updated: fix race when temp space is used in copy & fix instance overwrite in g2c (#8867)

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new d7da05b  fix race when temp space is used in copy & fix instance overwrite in g2c (#8867)
d7da05b is described below

commit d7da05b61adc9e4aba3e9995809b0d06965ae3bb
Author: Ziyue Huang <zy...@gmail.com>
AuthorDate: Sat Dec 2 02:47:22 2017 +0800

    fix race when temp space is used in copy & fix instance overwrite in g2c (#8867)
    
    * fix race when temp space is used in copy
    
    * fix instance overwrite in g2c
    
    * example of g2c
    
    * address comments
---
 .../{matrix_fact_parallel_model.py => model.py}    |  0
 ...ix_factorization_model_parallel.py => train.py} |  2 +-
 python/mxnet/module/executor_group.py              |  2 +-
 src/ndarray/ndarray.cc                             | 59 +++++++++++++---------
 tests/python/gpu/test_operator_gpu.py              |  2 +-
 tests/python/unittest/test_module.py               | 11 ++--
 6 files changed, 45 insertions(+), 31 deletions(-)

diff --git a/example/model-parallel/matrix_factorization/matrix_fact_parallel_model.py b/example/model-parallel/matrix_factorization/model.py
similarity index 100%
rename from example/model-parallel/matrix_factorization/matrix_fact_parallel_model.py
rename to example/model-parallel/matrix_factorization/model.py
diff --git a/example/model-parallel/matrix_factorization/matrix_factorization_model_parallel.py b/example/model-parallel/matrix_factorization/train.py
similarity index 97%
rename from example/model-parallel/matrix_factorization/matrix_factorization_model_parallel.py
rename to example/model-parallel/matrix_factorization/train.py
index ab607f1..7a2073b 100644
--- a/example/model-parallel/matrix_factorization/matrix_factorization_model_parallel.py
+++ b/example/model-parallel/matrix_factorization/train.py
@@ -76,7 +76,7 @@ if __name__ == '__main__':
 
     # construct the module
     # map the ctx_group attribute to the context assignment
-    group2ctxs={'dev1':mx.cpu(), 'dev2':[mx.gpu(i) for i in range(num_gpus)]}
+    group2ctxs={'dev1':[mx.cpu()]*num_gpus, 'dev2':[mx.gpu(i) for i in range(num_gpus)]}
     mod = mx.module.Module(symbol=net, context=[mx.cpu()]*num_gpus, data_names=['user', 'item'],
         label_names=['score'], group2ctxs=group2ctxs)
     
diff --git a/python/mxnet/module/executor_group.py b/python/mxnet/module/executor_group.py
index 2d680d4..6d24919 100755
--- a/python/mxnet/module/executor_group.py
+++ b/python/mxnet/module/executor_group.py
@@ -106,7 +106,7 @@ def _prepare_group2ctxs(group2ctxs, ctx_len):
             should be %d" % ctx_len
         return group2ctxs
     elif isinstance(group2ctxs, dict):
-        ret = [{}] * ctx_len
+        ret = [{} for i in range(ctx_len)]
         for k, v in group2ctxs.items():
             ctxs = None
             if isinstance(v, ctx.Context):
diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
index f06196d..4a1963a 100644
--- a/src/ndarray/ndarray.cc
+++ b/src/ndarray/ndarray.cc
@@ -454,25 +454,22 @@ inline void CopyFromToDnsImpl(const NDArray& from, const NDArray& to, RunContext
 
 // Make a copy of an NDArray based on storage type
 template<typename from_xpu, typename to_xpu>
-void CopyFromToImpl(const NDArray& from, const NDArray& to, RunContext rctx) {
+void CopyFromToImpl(const NDArray& from, const NDArray& to,
+                    RunContext rctx, const std::vector<Resource>& requested) {
   using namespace std;
   using namespace mshadow;
   // if storage type doesn't match, cast the storage first
-  auto from_stype = from.storage_type();
-  auto to_stype = to.storage_type();
+  const NDArrayStorageType from_stype = from.storage_type();
+  const NDArrayStorageType to_stype = to.storage_type();
   CHECK(from_stype == kDefaultStorage
       || to_stype == kDefaultStorage
       || from_stype == to_stype)
     << "Copying ndarray of stype = " << from_stype
     << " to stype = " << to_stype << " is not supported";
-  const auto from_ctx = from.ctx();
-  const auto to_ctx = to.ctx();
+  const Context from_ctx = from.ctx();
+  const Context to_ctx = to.ctx();
   bool is_train = Imperative::Get()->is_training();
-  std::vector<Resource> requested;
-  if (is_same<from_xpu, mshadow::gpu>::value && from_stype != to_stype) {
-    requested.push_back(ResourceManager::Get()->Request(from_ctx,
-        ResourceRequest(ResourceRequest::kTempSpace)));
-  }
+
   OpContext opctx{is_train,
                   rctx,
                   engine::CallbackOnComplete(),
@@ -518,43 +515,57 @@ void CopyFromTo(const NDArray& from, const NDArray& to, int priority) {
   CHECK(from.shape().ndim() != 0)
       << "source operands have zero dimension shape";
   // important: callback must always capture by value
-  int a = from.ctx().dev_mask();
-  int b = to.ctx().dev_mask();
+  const Context from_ctx = from.ctx();
+  const int a = from_ctx.dev_mask();
+  const int b = to.ctx().dev_mask();
   std::vector<Engine::VarHandle> const_vars;
   if (from.var() != to.var()) const_vars.push_back(from.var());
 
+  const NDArrayStorageType from_stype = from.storage_type();
+  const NDArrayStorageType to_stype = to.storage_type();
+
+  std::vector<Engine::VarHandle> mutable_vars(1, to.var());
+
+  std::vector<Resource> requested;
+  if (a == gpu::kDevMask && from_stype != to_stype) {
+    Resource rsc = ResourceManager::Get()->Request(from_ctx,
+        ResourceRequest(ResourceRequest::kTempSpace));
+    requested.push_back(rsc);
+    mutable_vars.push_back(rsc.var);
+  }
+
   if (a == cpu::kDevMask && b == cpu::kDevMask) {
     Engine::Get()->PushAsync(
-      [from, to](RunContext ctx, Engine::CallbackOnComplete on_complete) {
-        CopyFromToImpl<cpu, cpu>(from, to, ctx);
+      [from, to, requested](RunContext ctx, Engine::CallbackOnComplete on_complete) {
+        CopyFromToImpl<cpu, cpu>(from, to, ctx, requested);
         on_complete();
-      }, from.ctx(), const_vars, {to.var()},
+      }, from.ctx(), const_vars, mutable_vars,
       FnProperty::kNormal, priority, PROFILER_MESSAGE("CopyCPU2CPU"));
   } else {
 #if MXNET_USE_CUDA
     if (a == cpu::kDevMask && b == gpu::kDevMask) {
       Engine::Get()->PushAsync(
-        [from, to](RunContext ctx, Engine::CallbackOnComplete on_complete) {
-          CopyFromToImpl<cpu, gpu>(from, to, ctx);
+        [from, to, requested](RunContext ctx, Engine::CallbackOnComplete on_complete) {
+          CopyFromToImpl<cpu, gpu>(from, to, ctx, requested);
           ctx.get_stream<gpu>()->Wait();
           on_complete();
-        }, to.ctx(), const_vars, {to.var()},
+        }, to.ctx(), const_vars, mutable_vars,
         FnProperty::kCopyToGPU, priority, PROFILER_MESSAGE("CopyCPU2GPU"));
     } else if (a == gpu::kDevMask && b == cpu::kDevMask) {
       Engine::Get()->PushAsync(
-        [from, to](RunContext ctx, Engine::CallbackOnComplete on_complete) {
-          CopyFromToImpl<gpu, cpu>(from, to, ctx);
+        [from, to, requested](RunContext ctx, Engine::CallbackOnComplete on_complete) {
+          CopyFromToImpl<gpu, cpu>(from, to, ctx, requested);
           ctx.get_stream<gpu>()->Wait();
           on_complete();
-        }, from.ctx(), const_vars, {to.var()},
+        }, from.ctx(), const_vars, mutable_vars,
         FnProperty::kCopyFromGPU, priority, PROFILER_MESSAGE("CopyGPU2CPU"));
     } else if (a == gpu::kDevMask && b == gpu::kDevMask) {
       Engine::Get()->PushAsync(
-        [from, to](RunContext ctx, Engine::CallbackOnComplete on_complete) {
-          CopyFromToImpl<gpu, gpu>(from, to, ctx);
+        [from, to, requested](RunContext ctx, Engine::CallbackOnComplete on_complete) {
+          CopyFromToImpl<gpu, gpu>(from, to, ctx, requested);
           ctx.get_stream<gpu>()->Wait();
           on_complete();
-        }, from.ctx(), const_vars, {to.var()},
+        }, from.ctx(), const_vars, mutable_vars,
         from.dtype() != to.dtype() ? FnProperty::kNormal : FnProperty::kCopyFromGPU,
         priority, PROFILER_MESSAGE("CopyGPU2GPU"));
     } else {
diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py
index fad5940..cecda21 100644
--- a/tests/python/gpu/test_operator_gpu.py
+++ b/tests/python/gpu/test_operator_gpu.py
@@ -37,7 +37,7 @@ from test_gluon_rnn import *
 from test_sparse_ndarray import test_create_csr, test_create_row_sparse, test_sparse_nd_slice
 from test_sparse_ndarray import test_create_sparse_nd_empty, test_create_sparse_nd_from_sparse
 from test_sparse_ndarray import test_create_sparse_nd_from_dense, test_create_sparse_nd_infer_shape
-from test_sparse_ndarray import test_sparse_nd_check_format
+from test_sparse_ndarray import test_sparse_nd_check_format, test_sparse_nd_copy
 from test_sparse_operator import *
 from test_ndarray import *
 
diff --git a/tests/python/unittest/test_module.py b/tests/python/unittest/test_module.py
index ed5fe26..08302b8 100644
--- a/tests/python/unittest/test_module.py
+++ b/tests/python/unittest/test_module.py
@@ -71,7 +71,7 @@ def test_module_input_grads():
 
 
 def test_module_ctx_group():
-    def check_module_ctx_group(ctxs, group2ctxs):
+    def check_module_ctx_group(ctxs, group2ctxs, grad_ctxs=None):
         with mx.AttrScope(ctx_group='dev1'):
             a = mx.symbol.Variable('a')
             a = a * 2
@@ -94,10 +94,13 @@ def test_module_ctx_group():
         mod2.backward([mx.nd.ones(shape)])
         mod2_input_grads = mod2.get_input_grads()
 
-        assert np.all(mod1_input_grads[0].asnumpy() == mod2_input_grads[0].asnumpy())
-        assert np.all(mod1_input_grads[1].asnumpy() == mod2_input_grads[1].asnumpy())
+        if grad_ctxs is not None:
+            assert(mod1_input_grads[0].context == grad_ctxs[0])
+            assert(mod1_input_grads[1].context == grad_ctxs[1])
+        assert(np.all(mod1_input_grads[0].asnumpy() == mod2_input_grads[0].asnumpy()))
+        assert(np.all(mod1_input_grads[1].asnumpy() == mod2_input_grads[1].asnumpy()))
 
-    check_module_ctx_group([mx.cpu(0)], {'dev1': mx.cpu(1), 'dev2': mx.cpu(2)})
+    check_module_ctx_group([mx.cpu(0)], {'dev1': mx.cpu(1), 'dev2': mx.cpu(2)}, grad_ctxs=[mx.cpu(1), mx.cpu(2)])
     check_module_ctx_group([mx.cpu(0), mx.cpu(1)],
         [{'dev1': mx.cpu(2), 'dev2': mx.cpu(3)}, {'dev1': mx.cpu(4), 'dev2': mx.cpu(5)}])
     check_module_ctx_group([mx.cpu(0), mx.cpu(1)], {'dev1': mx.cpu(2), 'dev2': mx.cpu(3)})

-- 
To stop receiving notification emails like this one, please contact
['"commits@mxnet.apache.org" <co...@mxnet.apache.org>'].