You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by ha...@apache.org on 2017/12/08 19:27:33 UTC

[incubator-mxnet] branch v1.0.0 updated: Fix race condition in engine start/stop (#8995)

This is an automated email from the ASF dual-hosted git repository.

haibin pushed a commit to branch v1.0.0
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/v1.0.0 by this push:
     new 0f05c65  Fix race condition in engine start/stop (#8995)
0f05c65 is described below

commit 0f05c65492e38f89012e5595c6d60bb67e2d418f
Author: Eric Junyuan Xie <pi...@users.noreply.github.com>
AuthorDate: Fri Dec 8 10:14:57 2017 -0800

    Fix race condition in engine start/stop (#8995)
---
 src/engine/threaded_engine_perdevice.cc | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/engine/threaded_engine_perdevice.cc b/src/engine/threaded_engine_perdevice.cc
index 28bc92f..e7e222f 100644
--- a/src/engine/threaded_engine_perdevice.cc
+++ b/src/engine/threaded_engine_perdevice.cc
@@ -95,9 +95,10 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
     int cpu_priority_nthreads = dmlc::GetEnv("MXNET_CPU_PRIORITY_NTHREADS", 4);
     cpu_priority_worker_.reset(new ThreadWorkerBlock<kPriorityQueue>());
     cpu_priority_worker_->pool.reset(new ThreadPool(
-        cpu_priority_nthreads, [this]() {
-          this->CPUWorker(Context(), cpu_priority_worker_.get());
-        }));
+        cpu_priority_nthreads,
+        [this](std::shared_ptr<ThreadPool::SimpleEvent> ready_event) {
+          this->CPUWorker(Context(), cpu_priority_worker_.get(), ready_event);
+        }, true));
     // GPU tasks will be created lazily
   }
 
@@ -122,9 +123,10 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
           auto ptr =
           cpu_normal_workers_.Get(dev_id, [this, ctx, nthread]() {
               auto blk = new ThreadWorkerBlock<kWorkerQueue>();
-              blk->pool.reset(new ThreadPool(nthread, [this, ctx, blk] () {
-                    this->CPUWorker(ctx, blk);
-                  }));
+              blk->pool.reset(new ThreadPool(nthread,
+                  [this, ctx, blk](std::shared_ptr<ThreadPool::SimpleEvent> ready_event) {
+                    this->CPUWorker(ctx, blk, ready_event);
+                  }, true));
               return blk;
             });
           if (ptr) {
@@ -259,12 +261,14 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
    */
   template<dmlc::ConcurrentQueueType type>
   inline void CPUWorker(Context ctx,
-                        ThreadWorkerBlock<type> *block) {
+                        ThreadWorkerBlock<type> *block,
+                        std::shared_ptr<ThreadPool::SimpleEvent> ready_event) {
     this->is_worker_ = true;
     auto* task_queue = &(block->task_queue);
     RunContext run_ctx{ctx, nullptr};
     // execute task
     OprBlock* opr_block;
+    ready_event->signal();
     while (task_queue->Pop(&opr_block)) {
       this->ExecuteOprBlock(run_ctx, opr_block);
     }

-- 
To stop receiving notification emails like this one, please contact
['"commits@mxnet.apache.org" <co...@mxnet.apache.org>'].