You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2015/08/14 05:26:51 UTC
incubator-singa git commit: SINGA-56 No automatic launching of
zookeeper service
Repository: incubator-singa
Updated Branches:
refs/heads/master f0071a5c8 -> 539fcee56
SINGA-56 No automatic launching of zookeeper service
disabled launching zk service in singa-run.sh
users need to explicitly start zookeeper
if use zk in /thirdparty, /bin/zk-service can be used to start/stop it
functionality of scripts are minor changed:
- singa-cleanup.sh now will not stop zookeeper
- singa-stop.sh now only remove /singa/app path in zk,
so it will not reset future job id to start from 0
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/539fcee5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/539fcee5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/539fcee5
Branch: refs/heads/master
Commit: 539fcee567cf597d8dae5e9c41a1b3b682db791f
Parents: f0071a5
Author: wang sheng <wa...@gmail.com>
Authored: Thu Aug 13 18:26:21 2015 +0800
Committer: wang sheng <wa...@gmail.com>
Committed: Thu Aug 13 19:09:30 2015 +0800
----------------------------------------------------------------------
bin/singa-cleanup.sh | 11 +++++------
bin/singa-console.sh | 2 +-
bin/singa-env.sh | 2 +-
bin/singa-run.sh | 15 +++++----------
bin/singa-stop.sh | 7 +++----
bin/zk-service.sh | 4 ++--
include/utils/cluster_rt.h | 5 +++--
src/utils/cluster_rt.cc | 18 ++++++++++++++++--
src/utils/tool.cc | 27 +++++++++++++++++++--------
9 files changed, 55 insertions(+), 36 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/539fcee5/bin/singa-cleanup.sh
----------------------------------------------------------------------
diff --git a/bin/singa-cleanup.sh b/bin/singa-cleanup.sh
index c987ca4..2c78dfb 100755
--- a/bin/singa-cleanup.sh
+++ b/bin/singa-cleanup.sh
@@ -20,16 +20,15 @@
# * limitations under the License.
# */
#
-# clean up singa processes and zookeeper metadata
+# kill all singa jobs and clean up zookeeper
#
# get environment variables
. `dirname "${BASH_SOURCE-$0}"`/singa-env.sh
+cd $SINGA_HOME
-# clean singa jobs and data
+# kill singa jobs
$SINGA_BIN/singa-stop.sh || exit 1
-# close zookeeper
-if [ $SINGA_MANAGES_ZK = true ]; then
- $SINGA_BIN/zk-service.sh stop || exit 1
-fi
+# cleanup whole zookeeper
+./singatool cleanup || exit 1
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/539fcee5/bin/singa-console.sh
----------------------------------------------------------------------
diff --git a/bin/singa-console.sh b/bin/singa-console.sh
index 36913ce..b367911 100755
--- a/bin/singa-console.sh
+++ b/bin/singa-console.sh
@@ -78,7 +78,7 @@ case $1 in
$singa_kill
fi
done
- ./singatool clean $2 || exit 1
+ ./singatool remove $2 || exit 1
;;
*)
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/539fcee5/bin/singa-env.sh
----------------------------------------------------------------------
diff --git a/bin/singa-env.sh b/bin/singa-env.sh
index bfefc5e..98a0abc 100755
--- a/bin/singa-env.sh
+++ b/bin/singa-env.sh
@@ -20,7 +20,7 @@
# * limitations under the License.
# */
#
-# set Singa environment variables, includes:
+# set singa environment variables, includes:
# * SINGA_HOME
# * SINGA_BIN
# * SINGA_CONF
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/539fcee5/bin/singa-run.sh
----------------------------------------------------------------------
diff --git a/bin/singa-run.sh b/bin/singa-run.sh
index 4c7bf04..da63d91 100755
--- a/bin/singa-run.sh
+++ b/bin/singa-run.sh
@@ -20,15 +20,15 @@
# * limitations under the License.
# */
#
-# run a Singa job
+# run a singa job
#
usage="Usage: singa-run.sh -conf=JOB_CONF [ --resume ]\n
- # set --resume if want to recover a job\n
+ set --resume if want to recover a job\n
### NOTICE ###\n
- # if you are using model.conf + cluster.conf,\n
- # please see how to combine them to a job.conf:\n
- # http://singa.incubator.apache.org/quick-start.html"
+ if you are using model.conf + cluster.conf,\n
+ please see how to combine them to a job.conf:\n
+ http://singa.incubator.apache.org/quick-start.html"
# check arguments
while [ $# != 0 ]; do
@@ -60,11 +60,6 @@ if [ ! -f $job_conf ]; then
fi
cd $SINGA_HOME
-# start zookeeper
-if [ $SINGA_MANAGES_ZK = true ]; then
- $SINGA_BIN/zk-service.sh start || exit 1
-fi
-
# generate unique job id
job_id=`./singatool create`
[ $? == 0 ] || exit 1
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/539fcee5/bin/singa-stop.sh
----------------------------------------------------------------------
diff --git a/bin/singa-stop.sh b/bin/singa-stop.sh
index 115f3fb..5dce790 100755
--- a/bin/singa-stop.sh
+++ b/bin/singa-stop.sh
@@ -20,7 +20,7 @@
# * limitations under the License.
# */
#
-# clean up singa processes and zookeeper metadata
+# kill all singa jobs
#
# get environment variables
@@ -45,6 +45,5 @@ done
# wait for killall command
sleep 2
-# remove zk data
-echo Cleanning metadata in zookeeper ...
-./singatool cleanup || exit 1
+# remove job paths in zookeeper
+./singatool removeall || exit 1
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/539fcee5/bin/zk-service.sh
----------------------------------------------------------------------
diff --git a/bin/zk-service.sh b/bin/zk-service.sh
index f9d3823..9999913 100755
--- a/bin/zk-service.sh
+++ b/bin/zk-service.sh
@@ -59,12 +59,12 @@ case $1 in
fi
# cd to SINGA_HOME as zookeeper.out will be here
cd $SINGA_HOME
- $ZK_HOME/bin/zkServer.sh start 2>/dev/null
+ $ZK_HOME/bin/zkServer.sh start
;;
stop)
# stop zk service
- $ZK_HOME/bin/zkServer.sh stop 2>/dev/null
+ $ZK_HOME/bin/zkServer.sh stop
;;
*)
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/539fcee5/include/utils/cluster_rt.h
----------------------------------------------------------------------
diff --git a/include/utils/cluster_rt.h b/include/utils/cluster_rt.h
index 90f60cd..4c93c25 100644
--- a/include/utils/cluster_rt.h
+++ b/include/utils/cluster_rt.h
@@ -142,8 +142,9 @@ class JobManager {
bool GenerateJobID(int* id);
bool ListJobs(std::vector<JobInfo>* jobs);
bool ListJobProcs(int job, std::vector<std::string>* procs);
- bool Clean(int job);
- bool Cleanup();
+ bool Remove(int job);
+ bool RemoveAllJobs();
+ bool CleanUp();
private:
const int kJobsNotRemoved = 10;
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/539fcee5/src/utils/cluster_rt.cc
----------------------------------------------------------------------
diff --git a/src/utils/cluster_rt.cc b/src/utils/cluster_rt.cc
index cd11bbd..454ecfc 100644
--- a/src/utils/cluster_rt.cc
+++ b/src/utils/cluster_rt.cc
@@ -84,6 +84,13 @@ bool ZKService::CreateNode(const char* path, const char* val, int flag,
} else if (ret == ZNODEEXISTS) {
LOG(WARNING) << "zookeeper node " << path << " already exists";
return true;
+ } else if (ret == ZCONNECTIONLOSS) {
+ LOG(ERROR) << "Cannot connect to zookeeper, "
+ << "please ensure it is running properly...\n"
+ << "If want to use zookeeper in our thirdparty folder, "
+ << "you can start it by:\n"
+ << "$ ./bin/zk-service start";
+ return false;
}
LOG(FATAL) << "Unhandled ZK error code: " << ret
<< " (zoo_create " << path << ")";
@@ -366,7 +373,7 @@ bool JobManager::ListJobs(vector<JobInfo>* jobs) {
return true;
}
-bool JobManager::Clean(int job) {
+bool JobManager::Remove(int job) {
string path = GetZKJobWorkspace(job) + kZKPathJobProc;
if (zk_.Exist(path.c_str())) {
return CleanPath(path.c_str(), false);
@@ -374,7 +381,14 @@ bool JobManager::Clean(int job) {
return true;
}
-bool JobManager::Cleanup() {
+bool JobManager::RemoveAllJobs() {
+ if (zk_.Exist(kZKPathApp.c_str())) {
+ return CleanPath(kZKPathApp.c_str(), false);
+ }
+ return true;
+}
+
+bool JobManager::CleanUp() {
if (zk_.Exist(kZKPathSinga.c_str())) {
return CleanPath(kZKPathSinga.c_str(), true);
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/539fcee5/src/utils/tool.cc
----------------------------------------------------------------------
diff --git a/src/utils/tool.cc b/src/utils/tool.cc
index 85d74be..ebc0022 100644
--- a/src/utils/tool.cc
+++ b/src/utils/tool.cc
@@ -103,11 +103,19 @@ int view(int id) {
return SUCCESS;
}
-// clean a job path in zookeeper
-int clean(int id) {
+// remove a job path in zookeeper
+int remove(int id) {
singa::JobManager mngr(global.zookeeper_host());
if (!mngr.Init()) return RUN_ERR;
- if (!mngr.Clean(id)) return RUN_ERR;
+ if (!mngr.Remove(id)) return RUN_ERR;
+ return SUCCESS;
+}
+
+// remove all job paths in zookeeper
+int removeall() {
+ singa::JobManager mngr(global.zookeeper_host());
+ if (!mngr.Init()) return RUN_ERR;
+ if (!mngr.RemoveAllJobs()) return RUN_ERR;
return SUCCESS;
}
@@ -115,19 +123,20 @@ int clean(int id) {
int cleanup() {
singa::JobManager mngr(global.zookeeper_host());
if (!mngr.Init()) return RUN_ERR;
- if (!mngr.Cleanup()) return RUN_ERR;
+ if (!mngr.CleanUp()) return RUN_ERR;
return SUCCESS;
}
int main(int argc, char **argv) {
- std::string usage = "usage: singatool <command> <args>\n"
+ std::string usage = "Usage: singatool <command> <args>\n"
" getlogdir : show log dir in global config\n"
" create : generate a unique job id\n"
" genhost JOB_CONF : generate a host list\n"
" list : list running singa jobs\n"
" listall : list all singa jobs\n"
" view JOB_ID : view procs of a singa job\n"
- " clean JOB_ID : clean a job path in zookeeper\n"
+ " remove JOB_ID : remove a job path in zookeeper\n"
+ " removeall : remova all job paths in zookeeper\n"
" cleanup : clean all singa data in zookeeper\n";
// set logging level to ERROR and log to STDERR
FLAGS_logtostderr = 1;
@@ -151,8 +160,10 @@ int main(int argc, char **argv) {
stat = list(true);
else if (!strcmp(argv[1], "view"))
stat = (argc > 2) ? view(atoi(argv[2])) : ARG_ERR;
- else if (!strcmp(argv[1], "clean"))
- stat = (argc > 2) ? clean(atoi(argv[2])) : ARG_ERR;
+ else if (!strcmp(argv[1], "remove"))
+ stat = (argc > 2) ? remove(atoi(argv[2])) : ARG_ERR;
+ else if (!strcmp(argv[1], "removeall"))
+ stat = removeall();
else if (!strcmp(argv[1], "cleanup"))
stat = cleanup();
else