You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/06/18 17:21:41 UTC
[GitHub] szha closed pull request #11237: Bring back MXNET_GPU_COPY_NTHREADS env variable

szha closed pull request #11237: Bring back MXNET_GPU_COPY_NTHREADS env variable
URL: https://github.com/apache/incubator-mxnet/pull/11237
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/docs/faq/env_var.md b/docs/faq/env_var.md
index f29301dec7a..12a898aadc2 100644
--- a/docs/faq/env_var.md
+++ b/docs/faq/env_var.md
@@ -14,7 +14,7 @@ export MXNET_GPU_WORKER_NTHREADS=3
   - Values: Int ```(default=2)```
   - The maximum number of threads to use on each GPU. This parameter is used to parallelize the computation within a single GPU card.
 * MXNET_GPU_COPY_NTHREADS
-  - Values: Int ```(default=1)```
+  - Values: Int ```(default=2)```
   - The maximum number of concurrent threads that do the memory copy job on each GPU.
 * MXNET_CPU_WORKER_NTHREADS
   - Values: Int ```(default=1)```
diff --git a/src/engine/threaded_engine_perdevice.cc b/src/engine/threaded_engine_perdevice.cc
index 2f77380baf8..97f258c1061 100644
--- a/src/engine/threaded_engine_perdevice.cc
+++ b/src/engine/threaded_engine_perdevice.cc
@@ -76,6 +76,7 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
     if (is_worker_) return;
     gpu_worker_nthreads_ = common::GetNumThreadsPerGPU();
     cpu_worker_nthreads_ = dmlc::GetEnv("MXNET_CPU_WORKER_NTHREADS", 1);
+    gpu_copy_nthreads_ = dmlc::GetEnv("MXNET_GPU_COPY_NTHREADS", 2);
     // create CPU task
     int cpu_priority_nthreads = dmlc::GetEnv("MXNET_CPU_PRIORITY_NTHREADS", 4);
     cpu_priority_worker_.reset(new ThreadWorkerBlock<kPriorityQueue>());
@@ -128,8 +129,8 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
         const FnProperty prop = opr_block->opr->prop;
         const bool is_copy = (prop == FnProperty::kCopyFromGPU ||
                               prop == FnProperty::kCopyToGPU);
-        const size_t nthread = gpu_worker_nthreads_;
         if (is_copy) {
+          const size_t nthread = gpu_copy_nthreads_;
           auto ptr = gpu_copy_workers_.Get(ctx.dev_id, [this, ctx, is_copy, nthread]() {
             // Signify to kernel that GPU is being used, so reserve cores as necessary
             OpenMP::Get()->set_reserve_cores(GetReserveCoreCount(true));
@@ -150,6 +151,7 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
             }
           }
         } else {
+          const size_t nthread = gpu_worker_nthreads_;
           auto ptr = gpu_normal_workers_.Get(ctx.dev_id, [this, ctx, is_copy, nthread]() {
             // Signify to kernel that GPU is being used, so reserve cores as necessary
             OpenMP::Get()->set_reserve_cores(GetReserveCoreCount(true));
@@ -194,6 +196,8 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
   size_t cpu_worker_nthreads_;
   /*! \brief number of concurrent thread each gpu worker uses */
   size_t gpu_worker_nthreads_;
+  /*! \brief number of concurrent thread each gpu copy worker uses */
+  size_t gpu_copy_nthreads_;
   // cpu worker
   common::LazyAllocArray<ThreadWorkerBlock<kWorkerQueue> > cpu_normal_workers_;
   // cpu priority worker


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services