You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/01/31 04:33:46 UTC

[GitHub] cjolivier01 closed pull request #9389: [SUBMODULE] Upgrade ps-lite to the latest version

cjolivier01 closed pull request #9389: [SUBMODULE] Upgrade ps-lite to the latest version
URL: https://github.com/apache/incubator-mxnet/pull/9389
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/ps-lite b/ps-lite
index 2ce8b9a256..aee325276b 160000
--- a/ps-lite
+++ b/ps-lite
@@ -1 +1 @@
-Subproject commit 2ce8b9a256207947acfa2cb9b09ab74b8de74547
+Subproject commit aee325276bccb092f516df0bce30d3a8333f4038
diff --git a/src/kvstore/kvstore.cc b/src/kvstore/kvstore.cc
index 8f749f3f78..4d17fffa1a 100644
--- a/src/kvstore/kvstore.cc
+++ b/src/kvstore/kvstore.cc
@@ -26,8 +26,10 @@
 #include <stdlib.h>
 #include <dmlc/logging.h>
 #include "./kvstore_local.h"
+
 #if MXNET_USE_DIST_KVSTORE
 #include "./kvstore_dist.h"
+std::atomic<int> mxnet::kvstore::KVStoreDist::customer_id_{0};
 #endif  // MXNET_USE_DIST_KVSTORE
 #if MXNET_USE_NCCL
 #include "./kvstore_nccl.h"
diff --git a/src/kvstore/kvstore_dist.h b/src/kvstore/kvstore_dist.h
index b00d0de935..e98102b6b0 100644
--- a/src/kvstore/kvstore_dist.h
+++ b/src/kvstore/kvstore_dist.h
@@ -51,10 +51,12 @@ class KVStoreDist : public KVStoreLocal {
   explicit KVStoreDist(bool use_device_comm)
       : KVStoreLocal(use_device_comm), ps_worker_(nullptr), server_(nullptr) {
     if (IsWorkerNode()) {
-      ps_worker_ = new ps::KVWorker<real_t>(0);
-      ps::StartAsync("mxnet\0");
+      int new_customer_id = GetNewCustomerId();
+      ps_worker_ = new ps::KVWorker<real_t>(0, new_customer_id);
+      ps::StartAsync(new_customer_id, "mxnet\0");
       if (!ps::Postoffice::Get()->is_recovery()) {
         ps::Postoffice::Get()->Barrier(
+          new_customer_id,
           ps::kWorkerGroup + ps::kServerGroup + ps::kScheduler);
       }
     }
@@ -67,12 +69,12 @@ class KVStoreDist : public KVStoreLocal {
     if (IsWorkerNode()) {
       if (barrier_before_exit_) {
         Barrier();
-        if (get_rank() == 0) {
+        if (get_rank() == 0 && ps_worker_->get_customer()->customer_id() == 0) {
           // stop the executor at servers
           SendCommandToServers(static_cast<int>(CommandType::kStopServer), "");
         }
       }
-      ps::Finalize(barrier_before_exit_);
+      ps::Finalize(ps_worker_->get_customer()->customer_id(), barrier_before_exit_);
       delete ps_worker_;
     }
   }
@@ -96,7 +98,7 @@ class KVStoreDist : public KVStoreLocal {
   }
 
   void Barrier() override {
-    ps::Postoffice::Get()->Barrier(ps::kWorkerGroup);
+    ps::Postoffice::Get()->Barrier(ps_worker_->get_customer()->customer_id(), ps::kWorkerGroup);
   }
 
   void SendCommandToServers(int cmd_id,
@@ -127,13 +129,13 @@ class KVStoreDist : public KVStoreLocal {
       server_->set_controller(controller);
     }
 
-    ps::StartAsync("mxnet_server\0");
+    ps::StartAsync(0, "mxnet_server\0");
     if (!ps::Postoffice::Get()->is_recovery()) {
-      ps::Postoffice::Get()->Barrier(
+      ps::Postoffice::Get()->Barrier(0,
         ps::kWorkerGroup + ps::kServerGroup + ps::kScheduler);
     }
     if (server_) server_->Run();
-    ps::Finalize();
+    ps::Finalize(0, true);
     if (server_) {
       delete server_;
     }
@@ -141,6 +143,13 @@ class KVStoreDist : public KVStoreLocal {
   }
 
  private:
+  static std::atomic<int> customer_id_;
+
+  static int GetNewCustomerId() {
+    return customer_id_++;
+  }
+
+
   /**
    * \brief struct for ps keys and lens
    */


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services