You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2015/08/14 05:26:51 UTC

incubator-singa git commit: SINGA-56 No automatic launching of zookeeper service

Repository: incubator-singa
Updated Branches:
  refs/heads/master f0071a5c8 -> 539fcee56


SINGA-56 No automatic launching of zookeeper service

disabled launching zk service in singa-run.sh
users need to explicitly start zookeeper
if use zk in /thirdparty, /bin/zk-service can be used to start/stop it

functionality of scripts are minor changed:
  - singa-cleanup.sh now will not stop zookeeper
  - singa-stop.sh now only remove /singa/app path in zk,
    so it will not reset future job id to start from 0


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/539fcee5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/539fcee5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/539fcee5

Branch: refs/heads/master
Commit: 539fcee567cf597d8dae5e9c41a1b3b682db791f
Parents: f0071a5
Author: wang sheng <wa...@gmail.com>
Authored: Thu Aug 13 18:26:21 2015 +0800
Committer: wang sheng <wa...@gmail.com>
Committed: Thu Aug 13 19:09:30 2015 +0800

----------------------------------------------------------------------
 bin/singa-cleanup.sh       | 11 +++++------
 bin/singa-console.sh       |  2 +-
 bin/singa-env.sh           |  2 +-
 bin/singa-run.sh           | 15 +++++----------
 bin/singa-stop.sh          |  7 +++----
 bin/zk-service.sh          |  4 ++--
 include/utils/cluster_rt.h |  5 +++--
 src/utils/cluster_rt.cc    | 18 ++++++++++++++++--
 src/utils/tool.cc          | 27 +++++++++++++++++++--------
 9 files changed, 55 insertions(+), 36 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/539fcee5/bin/singa-cleanup.sh
----------------------------------------------------------------------
diff --git a/bin/singa-cleanup.sh b/bin/singa-cleanup.sh
index c987ca4..2c78dfb 100755
--- a/bin/singa-cleanup.sh
+++ b/bin/singa-cleanup.sh
@@ -20,16 +20,15 @@
 # * limitations under the License.
 # */
 # 
-# clean up singa processes and zookeeper metadata
+# kill all singa jobs and clean up zookeeper
 #
 
 # get environment variables
 . `dirname "${BASH_SOURCE-$0}"`/singa-env.sh
+cd $SINGA_HOME
 
-# clean singa jobs and data
+# kill singa jobs
 $SINGA_BIN/singa-stop.sh || exit 1
 
-# close zookeeper
-if [ $SINGA_MANAGES_ZK = true ]; then
-  $SINGA_BIN/zk-service.sh stop || exit 1
-fi
+# cleanup whole zookeeper
+./singatool cleanup || exit 1

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/539fcee5/bin/singa-console.sh
----------------------------------------------------------------------
diff --git a/bin/singa-console.sh b/bin/singa-console.sh
index 36913ce..b367911 100755
--- a/bin/singa-console.sh
+++ b/bin/singa-console.sh
@@ -78,7 +78,7 @@ case $1 in
         $singa_kill
       fi
     done
-    ./singatool clean $2 || exit 1
+    ./singatool remove $2 || exit 1
     ;;
   
   *)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/539fcee5/bin/singa-env.sh
----------------------------------------------------------------------
diff --git a/bin/singa-env.sh b/bin/singa-env.sh
index bfefc5e..98a0abc 100755
--- a/bin/singa-env.sh
+++ b/bin/singa-env.sh
@@ -20,7 +20,7 @@
 # * limitations under the License.
 # */
 #
-# set Singa environment variables, includes:
+# set singa environment variables, includes:
 #   * SINGA_HOME
 #   * SINGA_BIN
 #   * SINGA_CONF

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/539fcee5/bin/singa-run.sh
----------------------------------------------------------------------
diff --git a/bin/singa-run.sh b/bin/singa-run.sh
index 4c7bf04..da63d91 100755
--- a/bin/singa-run.sh
+++ b/bin/singa-run.sh
@@ -20,15 +20,15 @@
 # * limitations under the License.
 # */
 #
-# run a Singa job
+# run a singa job
 #
 
 usage="Usage: singa-run.sh -conf=JOB_CONF [ --resume ]\n
-       # set --resume if want to recover a job\n
+        set --resume if want to recover a job\n
        ### NOTICE ###\n
-       # if you are using model.conf + cluster.conf,\n
-       # please see how to combine them to a job.conf:\n
-       # http://singa.incubator.apache.org/quick-start.html"
+        if you are using model.conf + cluster.conf,\n
+        please see how to combine them to a job.conf:\n
+        http://singa.incubator.apache.org/quick-start.html"
 
 # check arguments
 while [ $# != 0 ]; do
@@ -60,11 +60,6 @@ if [ ! -f $job_conf ]; then
 fi
 cd $SINGA_HOME
 
-# start zookeeper
-if [ $SINGA_MANAGES_ZK = true ]; then
-  $SINGA_BIN/zk-service.sh start || exit 1
-fi
-
 # generate unique job id
 job_id=`./singatool create`
 [ $? == 0 ] || exit 1

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/539fcee5/bin/singa-stop.sh
----------------------------------------------------------------------
diff --git a/bin/singa-stop.sh b/bin/singa-stop.sh
index 115f3fb..5dce790 100755
--- a/bin/singa-stop.sh
+++ b/bin/singa-stop.sh
@@ -20,7 +20,7 @@
 # * limitations under the License.
 # */
 # 
-# clean up singa processes and zookeeper metadata
+# kill all singa jobs
 #
 
 # get environment variables
@@ -45,6 +45,5 @@ done
 # wait for killall command
 sleep 2
 
-# remove zk data
-echo Cleanning metadata in zookeeper ...
-./singatool cleanup || exit 1
+# remove job paths in zookeeper
+./singatool removeall || exit 1

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/539fcee5/bin/zk-service.sh
----------------------------------------------------------------------
diff --git a/bin/zk-service.sh b/bin/zk-service.sh
index f9d3823..9999913 100755
--- a/bin/zk-service.sh
+++ b/bin/zk-service.sh
@@ -59,12 +59,12 @@ case $1 in
     fi
     # cd to SINGA_HOME as zookeeper.out will be here
     cd $SINGA_HOME
-    $ZK_HOME/bin/zkServer.sh start 2>/dev/null
+    $ZK_HOME/bin/zkServer.sh start
     ;;
 
   stop)
     # stop zk service
-    $ZK_HOME/bin/zkServer.sh stop 2>/dev/null
+    $ZK_HOME/bin/zkServer.sh stop
     ;;
   
   *)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/539fcee5/include/utils/cluster_rt.h
----------------------------------------------------------------------
diff --git a/include/utils/cluster_rt.h b/include/utils/cluster_rt.h
index 90f60cd..4c93c25 100644
--- a/include/utils/cluster_rt.h
+++ b/include/utils/cluster_rt.h
@@ -142,8 +142,9 @@ class JobManager {
   bool GenerateJobID(int* id);
   bool ListJobs(std::vector<JobInfo>* jobs);
   bool ListJobProcs(int job, std::vector<std::string>* procs);
-  bool Clean(int job);
-  bool Cleanup();
+  bool Remove(int job);
+  bool RemoveAllJobs();
+  bool CleanUp();
 
  private:
   const int kJobsNotRemoved = 10;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/539fcee5/src/utils/cluster_rt.cc
----------------------------------------------------------------------
diff --git a/src/utils/cluster_rt.cc b/src/utils/cluster_rt.cc
index cd11bbd..454ecfc 100644
--- a/src/utils/cluster_rt.cc
+++ b/src/utils/cluster_rt.cc
@@ -84,6 +84,13 @@ bool ZKService::CreateNode(const char* path, const char* val, int flag,
   } else if (ret == ZNODEEXISTS) {
     LOG(WARNING) << "zookeeper node " << path << " already exists";
     return true;
+  } else if (ret == ZCONNECTIONLOSS) {
+    LOG(ERROR) << "Cannot connect to zookeeper, "
+               << "please ensure it is running properly...\n"
+               << "If want to use zookeeper in our thirdparty folder, "
+               << "you can start it by:\n"
+               << "$ ./bin/zk-service start";
+    return false;
   }
   LOG(FATAL) << "Unhandled ZK error code: " << ret
              << " (zoo_create " << path << ")";
@@ -366,7 +373,7 @@ bool JobManager::ListJobs(vector<JobInfo>* jobs) {
   return true;
 }
 
-bool JobManager::Clean(int job) {
+bool JobManager::Remove(int job) {
   string path = GetZKJobWorkspace(job) + kZKPathJobProc;
   if (zk_.Exist(path.c_str())) {
     return CleanPath(path.c_str(), false);
@@ -374,7 +381,14 @@ bool JobManager::Clean(int job) {
   return true;
 }
 
-bool JobManager::Cleanup() {
+bool JobManager::RemoveAllJobs() {
+  if (zk_.Exist(kZKPathApp.c_str())) {
+    return CleanPath(kZKPathApp.c_str(), false);
+  }
+  return true;
+}
+
+bool JobManager::CleanUp() {
   if (zk_.Exist(kZKPathSinga.c_str())) {
     return CleanPath(kZKPathSinga.c_str(), true);
   }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/539fcee5/src/utils/tool.cc
----------------------------------------------------------------------
diff --git a/src/utils/tool.cc b/src/utils/tool.cc
index 85d74be..ebc0022 100644
--- a/src/utils/tool.cc
+++ b/src/utils/tool.cc
@@ -103,11 +103,19 @@ int view(int id) {
   return SUCCESS;
 }
 
-// clean a job path in zookeeper
-int clean(int id) {
+// remove a job path in zookeeper
+int remove(int id) {
   singa::JobManager mngr(global.zookeeper_host());
   if (!mngr.Init()) return RUN_ERR;
-  if (!mngr.Clean(id)) return RUN_ERR;
+  if (!mngr.Remove(id)) return RUN_ERR;
+  return SUCCESS;
+}
+
+// remove all job paths in zookeeper
+int removeall() {
+  singa::JobManager mngr(global.zookeeper_host());
+  if (!mngr.Init()) return RUN_ERR;
+  if (!mngr.RemoveAllJobs()) return RUN_ERR;
   return SUCCESS;
 }
 
@@ -115,19 +123,20 @@ int clean(int id) {
 int cleanup() {
   singa::JobManager mngr(global.zookeeper_host());
   if (!mngr.Init()) return RUN_ERR;
-  if (!mngr.Cleanup()) return RUN_ERR;
+  if (!mngr.CleanUp()) return RUN_ERR;
   return SUCCESS;
 }
 
 int main(int argc, char **argv) {
-  std::string usage = "usage: singatool <command> <args>\n"
+  std::string usage = "Usage: singatool <command> <args>\n"
       " getlogdir        :  show log dir in global config\n"
       " create           :  generate a unique job id\n"
       " genhost JOB_CONF :  generate a host list\n"
       " list             :  list running singa jobs\n"
       " listall          :  list all singa jobs\n"
       " view JOB_ID      :  view procs of a singa job\n"
-      " clean JOB_ID     :  clean a job path in zookeeper\n"
+      " remove JOB_ID    :  remove a job path in zookeeper\n"
+      " removeall        :  remova all job paths in zookeeper\n"
       " cleanup          :  clean all singa data in zookeeper\n";
   // set logging level to ERROR and log to STDERR
   FLAGS_logtostderr = 1;
@@ -151,8 +160,10 @@ int main(int argc, char **argv) {
       stat = list(true);
     else if (!strcmp(argv[1], "view"))
       stat = (argc > 2) ? view(atoi(argv[2])) : ARG_ERR;
-    else if (!strcmp(argv[1], "clean"))
-      stat = (argc > 2) ? clean(atoi(argv[2])) : ARG_ERR;
+    else if (!strcmp(argv[1], "remove"))
+      stat = (argc > 2) ? remove(atoi(argv[2])) : ARG_ERR;
+    else if (!strcmp(argv[1], "removeall"))
+      stat = removeall();
     else if (!strcmp(argv[1], "cleanup"))
       stat = cleanup();
     else