You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2015/05/27 16:39:12 UTC
[11/22] incubator-singa git commit: try to fix bugs from
zsock_connect by binding the router before connecting to it;
todo test the changed code.
try to fix bugs from zsock_connect by binding the router before connecting to it; todo test the changed code.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/cd9fc797
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/cd9fc797
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/cd9fc797
Branch: refs/heads/master
Commit: cd9fc7971369e5447b563abefeaefc31d988ac9b
Parents: a617e6c
Author: wang wei <wa...@comp.nus.edu.sg>
Authored: Tue May 26 16:18:01 2015 +0800
Committer: wang wei <wa...@comp.nus.edu.sg>
Committed: Tue May 26 16:18:01 2015 +0800
----------------------------------------------------------------------
include/trainer/trainer.h | 2 ++
src/trainer/trainer.cc | 14 +++++++-------
2 files changed, 9 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/cd9fc797/include/trainer/trainer.h
----------------------------------------------------------------------
diff --git a/include/trainer/trainer.h b/include/trainer/trainer.h
index 57fee8c..37d7106 100644
--- a/include/trainer/trainer.h
+++ b/include/trainer/trainer.h
@@ -9,6 +9,7 @@
#include "neuralnet/neuralnet.h"
#include "trainer/worker.h"
#include "trainer/server.h"
+#include "communication/socket.h"
namespace singa {
/**
@@ -131,6 +132,7 @@ class Trainer{
protected:
int procs_id_;
+ shared_ptr<Router> router_;
};
} /* singa */
#endif // INCLUDE_TRAINER_TRAINER_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/cd9fc797/src/trainer/trainer.cc
----------------------------------------------------------------------
diff --git a/src/trainer/trainer.cc b/src/trainer/trainer.cc
index 02e60a4..3f343af 100644
--- a/src/trainer/trainer.cc
+++ b/src/trainer/trainer.cc
@@ -56,6 +56,11 @@ void Trainer::Start(const ModelProto& mproto, const ClusterProto& cproto,
RegisterDefaultClasses(mproto);
auto cluster=Cluster::Get(cproto, procs_id);
+ router_=make_shared<Router>();
+ router_->Bind(kInprocRouterEndpoint);
+ if(cluster->nprocs()>1)
+ router_->Bind(cluster->endpoint());
+
// create servers
vector<shared_ptr<Server>> servers;
vector<HandleContext> ctx;
@@ -184,18 +189,13 @@ void Trainer::Run(int nworkers, int nservers,
const std::map<int, shared_ptr<Trainer::ParamShard>>& shards){
auto cluster=Cluster::Get();
procs_id_=cluster->procs_id();
- auto router=make_shared<Router>();
- router->Bind(kInprocRouterEndpoint);
- if(cluster->nprocs()>1)
- router->Bind(cluster->endpoint());
-
map<int, shared_ptr<Dealer>> interprocs_dealers;
Metric perf;
int perf_step=-1;
string perf_prefix;
bool stop=false;
while(!stop){
- Msg* msg=router->Receive();
+ Msg* msg=router_->Receive();
if(msg==nullptr){
LOG(ERROR)<<"Connection broken!";
exit(0);
@@ -277,7 +277,7 @@ void Trainer::Run(int nworkers, int nservers,
interprocs_dealers[procs_id]->Send(&msg);
*/
}else{
- router->Send(&msg);
+ router_->Send(&msg);
}
}
}