You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2015/07/14 13:26:17 UTC
[2/2] incubator-singa git commit: SINGA-33 Automatically launch a
number of processes in the cluster
SINGA-33 Automatically launch a number of processes in the cluster
1. add conf/hostfile
This is the global list of hosts that singa can use.
2. add tool/gen_hosts.py
This scripts is to generate a host list for a specific singa job.
3. all hostfiles in examples are removed
By running singa-run.sh script, the gen_hosts.py will read
the cluster.conf and generate a job.hosts in the same dir.
The job.hosts contains the host list that runs this job.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/acb96d4c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/acb96d4c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/acb96d4c
Branch: refs/heads/master
Commit: acb96d4cd9682c4a9f2b85d529ca5d4ce97a0605
Parents: 9a6e09f
Author: wang sheng <wa...@gmail.com>
Authored: Wed Jul 15 00:56:16 2015 +0800
Committer: wang sheng <wa...@gmail.com>
Committed: Wed Jul 15 02:55:01 2015 +0800
----------------------------------------------------------------------
.gitignore | 9 +++++---
bin/singa-run.sh | 22 ++++++++----------
bin/singa-stop.sh | 4 ++--
conf/hostfile | 1 +
examples/cifar10/hostfile | 1 -
examples/mnist/hostfile | 8 -------
tool/gen_hosts.py | 52 ++++++++++++++++++++++++++++++++++++++++++
tool/plot/__init__.py | 0
8 files changed, 71 insertions(+), 26 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acb96d4c/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index 962f6b9..a419725 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,14 +14,17 @@
*.cproject
*.log
*.nfs*
+*_pb2.py
+*.pyc
+*.pb.h
+*.pb.cc
+*.hosts
+*.out
src/test/data/*
tmp
log*
build/
tmp/
-include/proto/*.h
-src/proto/*.cc
-src/proto/*.pb.h
.sync
*lmdb
*.binaryproto
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acb96d4c/bin/singa-run.sh
----------------------------------------------------------------------
diff --git a/bin/singa-run.sh b/bin/singa-run.sh
index 46ed715..45be0a1 100755
--- a/bin/singa-run.sh
+++ b/bin/singa-run.sh
@@ -39,7 +39,7 @@ if [ $# = 1 ] ; then
if [[ $1 = "-conf="* ]] ; then
valid_args=true
conf_path=${1:6}
- host_path=$conf_path/hostfile
+ host_path=$conf_path/job.hosts
fi
elif [ $# = 2 ] ; then
if [[ $1 = "-cluster="* ]] && [[ $2 = "-model="* ]] ; then
@@ -62,11 +62,7 @@ BASE=`cd "$BIN/..">/dev/null; pwd`
cd $BASE
# clenup singa data
-if [ -z $host_path ] ; then
- $BIN/singa-stop.sh
-else
- $BIN/singa-stop.sh $host_path
-fi
+$BIN/singa-stop.sh conf/hostfile
# start zookeeper
$BIN/zk-service.sh start 2>/dev/null
@@ -76,12 +72,18 @@ sleep 3
# check mode
if [ $# = 2 ] ; then
- # start singa process
+ # start single singa process
cmd="./singa "$@
echo starting singa ...
echo executing : $cmd
$cmd
elif [ $# = 1 ] ; then
+ # start multiple singa processes
+ # generate host file
+ cmd=" python tool/gen_hosts.py -conf=$conf_path/cluster.conf \
+ -src=conf/hostfile -dst=$host_path"
+ echo $cmd
+ $cmd
# ssh and start singa processes
ssh_options="-oStrictHostKeyChecking=no \
-oUserKnownHostsFile=/dev/null \
@@ -102,8 +104,4 @@ elif [ $# = 1 ] ; then
fi
# cleanup singa data
-if [ -z $host_path ] ; then
- $BIN/singa-stop.sh
-else
- $BIN/singa-stop.sh $host_path
-fi
+$BIN/singa-stop.sh conf/hostfile
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acb96d4c/bin/singa-stop.sh
----------------------------------------------------------------------
diff --git a/bin/singa-stop.sh b/bin/singa-stop.sh
index ebd74e8..1b36675 100755
--- a/bin/singa-stop.sh
+++ b/bin/singa-stop.sh
@@ -37,7 +37,7 @@ BIN=`cd "$BIN">/dev/null; pwd`
BASE=`cd "$BIN/..">/dev/null; pwd`
ZKDATA_DIR="/tmp/zookeeper"
-PROC_NAME="lt-singa"
+PROC_NAME="*singa"
HOST_FILE=$1
@@ -52,7 +52,7 @@ elif [ $# = 1 ] ; then
-oLogLevel=quiet"
hosts=(`cat $HOST_FILE |cut -d ' ' -f 1`)
for i in ${hosts[@]} ; do
- cmd="killall -s SIGKILL "$PROC_NAME
+ cmd="killall -s SIGKILL -r "$PROC_NAME
echo kill singa @ $i ...
if [ $i == localhost ] ; then
$cmd
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acb96d4c/conf/hostfile
----------------------------------------------------------------------
diff --git a/conf/hostfile b/conf/hostfile
new file mode 100644
index 0000000..2fbb50c
--- /dev/null
+++ b/conf/hostfile
@@ -0,0 +1 @@
+localhost
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acb96d4c/examples/cifar10/hostfile
----------------------------------------------------------------------
diff --git a/examples/cifar10/hostfile b/examples/cifar10/hostfile
deleted file mode 100644
index 2fbb50c..0000000
--- a/examples/cifar10/hostfile
+++ /dev/null
@@ -1 +0,0 @@
-localhost
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acb96d4c/examples/mnist/hostfile
----------------------------------------------------------------------
diff --git a/examples/mnist/hostfile b/examples/mnist/hostfile
deleted file mode 100644
index 1781444..0000000
--- a/examples/mnist/hostfile
+++ /dev/null
@@ -1,8 +0,0 @@
-192.168.26.10
-192.168.26.11
-192.168.26.12
-192.168.26.13
-192.168.26.15
-192.168.26.16
-192.168.26.17
-192.168.26.18
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acb96d4c/tool/gen_hosts.py
----------------------------------------------------------------------
diff --git a/tool/gen_hosts.py b/tool/gen_hosts.py
new file mode 100755
index 0000000..e2ed29d
--- /dev/null
+++ b/tool/gen_hosts.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+
+import argparse
+import os
+import sys
+from google.protobuf import text_format
+from plot.cluster_pb2 import ClusterProto
+
+# parse command line
+parser = argparse.ArgumentParser(description='Generate host list from host file for a SINGA job')
+parser.add_argument('-conf', dest='conf', metavar='CONF_FILE', required=True, help='cluster.conf file')
+parser.add_argument('-src', dest='src', metavar='SRC_FILE', required=True, help='global host file')
+parser.add_argument('-dst', dest='dst', metavar='DST_FILE', required=True, help='generated list')
+args = parser.parse_args();
+
+# change to SINGA_HOME
+abspath = os.path.abspath(__file__)
+dname = os.path.dirname(abspath)
+os.chdir(dname+'/..')
+
+# read from .conf file
+fd_conf = open(args.conf, 'r')
+cluster = ClusterProto()
+text_format.Merge(str(fd_conf.read()), cluster)
+nworker_procs = cluster.nworker_groups * cluster.nworkers_per_group / cluster.nworkers_per_procs
+nserver_procs = cluster.nserver_groups * cluster.nservers_per_group / cluster.nservers_per_procs
+nprocs = 0
+if (cluster.server_worker_separate) :
+ nprocs = nworker_procs+nserver_procs
+else:
+ nprocs = max(nworker_procs, nserver_procs)
+fd_conf.close()
+
+# read from source host file
+fd_src = open(args.src, 'r')
+hosts = []
+for line in fd_src:
+ line = line.strip()
+ if len(line) == 0 or line[0] == '#':
+ continue
+ hosts.append(line)
+fd_src.close()
+
+# write to dst file
+num_hosts = len(hosts)
+if (num_hosts == 0):
+ print 'source host file is empty'
+ sys.exit()
+fd_dst = open(args.dst, 'w')
+for i in range(nprocs):
+ fd_dst.write(hosts[i % num_hosts] + '\n')
+fd_dst.close()
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acb96d4c/tool/plot/__init__.py
----------------------------------------------------------------------
diff --git a/tool/plot/__init__.py b/tool/plot/__init__.py
new file mode 100644
index 0000000..e69de29