You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by be...@apache.org on 2011/06/05 05:32:25 UTC
svn commit: r1131632 - in /incubator/mesos/trunk/src/ec2: ./
deploy.karmic64/root/nexus-ec2/ deploy.solaris/root/nexus-ec2/
Author: benh
Date: Sun Jun 5 03:32:24 2011
New Revision: 1131632
URL: http://svn.apache.org/viewvc?rev=1131632&view=rev
Log:
EC2 scripts modified with a --ft parameter that will a zookeeper group and instance, it will also launch multiple master instances and start them. Slaves and masters are connected to ZooKeeper. The nexus-ec2 directory now contains a zoo file with the id of the ZK instance and the master file can now contain multiple master addresses.
Added:
incubator/mesos/trunk/src/ec2/deploy.karmic64/root/nexus-ec2/zoo
incubator/mesos/trunk/src/ec2/deploy.solaris/root/nexus-ec2/zoo
Modified:
incubator/mesos/trunk/src/ec2/deploy.karmic64/root/nexus-ec2/redeploy-nexus
incubator/mesos/trunk/src/ec2/deploy.karmic64/root/nexus-ec2/setup
incubator/mesos/trunk/src/ec2/deploy.karmic64/root/nexus-ec2/start-nexus
incubator/mesos/trunk/src/ec2/deploy.karmic64/root/nexus-ec2/stop-nexus
incubator/mesos/trunk/src/ec2/nexus_ec2.py
Modified: incubator/mesos/trunk/src/ec2/deploy.karmic64/root/nexus-ec2/redeploy-nexus
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/ec2/deploy.karmic64/root/nexus-ec2/redeploy-nexus?rev=1131632&r1=1131631&r2=1131632&view=diff
==============================================================================
--- incubator/mesos/trunk/src/ec2/deploy.karmic64/root/nexus-ec2/redeploy-nexus (original)
+++ incubator/mesos/trunk/src/ec2/deploy.karmic64/root/nexus-ec2/redeploy-nexus Sun Jun 5 03:32:24 2011
@@ -2,11 +2,19 @@
SLAVES=/root/nexus-ec2/slaves
MASTER=/root/nexus-ec2/master
+ZOO=/root/nexus-ec2/zoo
SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=2"
-echo "RSYNC'ing /root/nexus to slaves..."
-for slave in `cat $SLAVES`; do
- echo $slave
- rsync -e "ssh $SSH_OPTS" -az --exclude '*.d' --exclude '*.o' --exclude '*.cpp' --exclude '*.hpp' --exclude '*.pyc' --exclude 'nexus/frameworks/hadoop-0.20.0/logs/*' /root/nexus $slave:/root
+SERVERS=`cat $SLAVES`
+SERVERS+=" "
+SERVERS+=`cat $MASTER | sed '1d'`
+SERVERS+=" "
+SERVERS+=`cat $ZOO`
+
+
+echo "RSYNC'ing /root/nexus to other servers..."
+for server in $SERVERS; do
+ echo $server
+ rsync -e "ssh $SSH_OPTS" -az --exclude '*.d' --exclude '*.o' --exclude '*.cpp' --exclude '*.hpp' --exclude '*.pyc' --exclude 'nexus/frameworks/hadoop-0.20.0/logs/*' /root/nexus $server:/root
done
Modified: incubator/mesos/trunk/src/ec2/deploy.karmic64/root/nexus-ec2/setup
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/ec2/deploy.karmic64/root/nexus-ec2/setup?rev=1131632&r1=1131631&r2=1131632&view=diff
==============================================================================
--- incubator/mesos/trunk/src/ec2/deploy.karmic64/root/nexus-ec2/setup (original)
+++ incubator/mesos/trunk/src/ec2/deploy.karmic64/root/nexus-ec2/setup Sun Jun 5 03:32:24 2011
@@ -11,6 +11,9 @@ export PATH=$PATH:/root/scala-2.7.7.fina
MASTER=/root/nexus-ec2/master
SLAVES=/root/nexus-ec2/slaves
+ZOO=/root/nexus-ec2/zoo
+
+ISFT=`cat $ZOO | wc -l`
# Scripts that get used for/while running Nexus.
SCRIPTS="copy-dir
@@ -45,9 +48,24 @@ echo "Setting executable permissions on
for s in $SCRIPTS; do chmod u+x $s; done
echo "SSH'ing to local machine to approve key..."
-ssh -q $SSH_OPTS `cat $MASTER` echo -n
+for master in `cat $MASTER`; do
+ echo $master
+ ssh $SSH_OPTS $master echo -n &
+ sleep 0.3
+done
+
ssh -q $SSH_OPTS localhost echo -n
+if [[ $ISFT != 0 ]] ; then
+ echo "SSH'ing to Zoo server(s) to approve keys..."
+ for zoo in `cat $ZOO`; do
+ echo $zoo
+ ssh $SSH_OPTS $zoo echo -n &
+ ssh $SSH_OPTS $zoo mkdir -p /tmp/zookeeper &
+ sleep 0.3
+ done
+fi
+
echo "SSH'ing to slaves to approve keys..."
for slave in `cat $SLAVES`; do
echo $slave
@@ -57,6 +75,22 @@ done
echo "Waiting for commands to finish..."
wait
+if [[ `cat $MASTER | wc -l` > 1 ]] ; then
+ echo "RSYNC'ing /root/nexus-ec2 to other master servers..."
+ for master in `cat $MASTER | sed '1d'`; do
+ echo $master
+ rsync -e "ssh $SSH_OPTS" -az /root/nexus-ec2 $master:/root
+ done
+fi
+
+if [[ $ISFT != 0 ]] ; then
+ echo "RSYNC'ing /root/nexus-ec2 to other Zoo servers..."
+ for zoo in `cat $ZOO`; do
+ echo $zoo
+ rsync -e "ssh $SSH_OPTS" -az /root/nexus-ec2 $zoo:/root
+ done
+fi
+
echo "RSYNC'ing /root/nexus-ec2 to slaves..."
for slave in `cat $SLAVES`; do
echo $slave
Modified: incubator/mesos/trunk/src/ec2/deploy.karmic64/root/nexus-ec2/start-nexus
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/ec2/deploy.karmic64/root/nexus-ec2/start-nexus?rev=1131632&r1=1131631&r2=1131632&view=diff
==============================================================================
--- incubator/mesos/trunk/src/ec2/deploy.karmic64/root/nexus-ec2/start-nexus (original)
+++ incubator/mesos/trunk/src/ec2/deploy.karmic64/root/nexus-ec2/start-nexus Sun Jun 5 03:32:24 2011
@@ -2,12 +2,38 @@
cd /root/nexus-ec2
MASTER="`cat master`"
+MASTER1="`cat master | head -1`"
SLAVES="`cat slaves`"
+ZOO1="`cat zoo | head -1`"
+ZOO="`cat zoo`"
+
+ISFT="`cat zoo | wc -l`"
SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=2"
-echo "Starting master on $MASTER"
-ssh $SSH_OPTS $MASTER "/root/nexus-ec2/nexus-daemon nexus-master -p 1111 </dev/null >/dev/null"
+if [[ $ISFT == 0 ]]; then
+ master_arg="1@${MASTER}:1111"
+else
+ master_arg="zoo://${ZOO1}:2181"
+fi
+
+
+if [[ $ISFT != 0 ]]; then
+ echo "Starting ZooKeeper on $ZOO1"
+ ssh $SSH_OPTS $ZOO1 "/root/nexus/src/third_party/zookeeper-*/bin/zkServer.sh start </dev/null >/dev/null"
+
+ sleep 2
+
+ masterid=1
+ for master in $MASTER; do
+ echo "Starting master $masterid on $MASTER"
+ ssh $SSH_OPTS $master "/root/nexus-ec2/nexus-daemon nexus-master -p 1111 -f $master_arg </dev/null >/dev/null"
+ masterid=$(($masterid+1))
+ done
+else
+ echo "Starting master on $MASTER1"
+ ssh $SSH_OPTS $MASTER1 "/root/nexus-ec2/nexus-daemon nexus-master -p 1111 </dev/null >/dev/null"
+fi
COUNT=''
CPUS=''
@@ -38,10 +64,19 @@ fi
for slave in $SLAVES; do
echo "Starting $COUNT slave(s) on $slave"
- ssh $SSH_OPTS $slave "for ((i = 0; i < $COUNT; i++)); do /root/nexus-ec2/nexus-daemon nexus-slave 1@$MASTER:1111 $CPUS $MEM; done </dev/null >/dev/null" &
+ ssh $SSH_OPTS $slave "for ((i = 0; i < $COUNT; i++)); do /root/nexus-ec2/nexus-daemon nexus-slave ${master_arg} $CPUS $MEM; done </dev/null >/dev/null" &
sleep 0.1
done
wait
+if [[ $ISFT != 0 ]]; then
+ echo "ZooKeeper is running at"
+ for zoo in $ZOO; do
+ echo " $zoo:2181"
+ done
+fi
+
echo "Everything's started! You can view the master Web UI at"
-echo " http://$MASTER:8080"
+for master in $MASTER; do
+ echo " http://$master:8080"
+done
Modified: incubator/mesos/trunk/src/ec2/deploy.karmic64/root/nexus-ec2/stop-nexus
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/ec2/deploy.karmic64/root/nexus-ec2/stop-nexus?rev=1131632&r1=1131631&r2=1131632&view=diff
==============================================================================
--- incubator/mesos/trunk/src/ec2/deploy.karmic64/root/nexus-ec2/stop-nexus (original)
+++ incubator/mesos/trunk/src/ec2/deploy.karmic64/root/nexus-ec2/stop-nexus Sun Jun 5 03:32:24 2011
@@ -3,6 +3,9 @@ cd /root/nexus-ec2
MASTER="`cat master`"
SLAVES="`cat slaves`"
+ZOO="`cat zoo`"
+
+ISFT="`cat zoo | wc -l`"
SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=2"
@@ -14,4 +17,18 @@ done
wait
echo "Stopping master on $MASTER"
-ssh $SSH_OPTS $MASTER pkill nexus-master
+for master in $MASTER; do
+ echo "Stopping slave(s) on $master"
+ ssh $SSH_OPTS $master pkill nexus-master &
+ sleep 0.1
+done
+wait
+
+if [[ $ISFT != 0 ]] ; then
+ for zoo in $ZOO; do
+ echo "Stopping ZK on $zoo"
+ ssh $SSH_OPTS $zoo "/root/nexus/src/third_party/zookeeper-*/bin/zkServer.sh stop </dev/null >/dev/null"
+ sleep 0.1
+ done
+ wait
+fi
Added: incubator/mesos/trunk/src/ec2/deploy.karmic64/root/nexus-ec2/zoo
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/ec2/deploy.karmic64/root/nexus-ec2/zoo?rev=1131632&view=auto
==============================================================================
--- incubator/mesos/trunk/src/ec2/deploy.karmic64/root/nexus-ec2/zoo (added)
+++ incubator/mesos/trunk/src/ec2/deploy.karmic64/root/nexus-ec2/zoo Sun Jun 5 03:32:24 2011
@@ -0,0 +1 @@
+{{zoo}}
Added: incubator/mesos/trunk/src/ec2/deploy.solaris/root/nexus-ec2/zoo
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/ec2/deploy.solaris/root/nexus-ec2/zoo?rev=1131632&view=auto
==============================================================================
--- incubator/mesos/trunk/src/ec2/deploy.solaris/root/nexus-ec2/zoo (added)
+++ incubator/mesos/trunk/src/ec2/deploy.solaris/root/nexus-ec2/zoo Sun Jun 5 03:32:24 2011
@@ -0,0 +1 @@
+{{zoo}}
Modified: incubator/mesos/trunk/src/ec2/nexus_ec2.py
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/ec2/nexus_ec2.py?rev=1131632&r1=1131631&r2=1131632&view=diff
==============================================================================
--- incubator/mesos/trunk/src/ec2/nexus_ec2.py (original)
+++ incubator/mesos/trunk/src/ec2/nexus_ec2.py Sun Jun 5 03:32:24 2011
@@ -48,7 +48,11 @@ def parse_args():
parser.add_option("--resume", action="store_true", default=False,
help="Resume installation on a previously launched cluster " +
"(for debugging)")
+ parser.add_option("-f", "--ft", default="1",
+ help="Number of masters to run. Default is 1. " +
+ "Greater values cause Nexus to run in FT mode with ZooKeeper")
(opts, args) = parser.parse_args()
+ opts.ft = int(opts.ft)
if len(args) != 2:
parser.print_help()
sys.exit(1)
@@ -96,12 +100,15 @@ def wait_for_instances(conn, reservation
def launch_cluster(conn, opts, cluster_name):
+ zoo_res = None
print "Setting up security groups..."
master_group = get_or_make_group(conn, cluster_name + "-master")
slave_group = get_or_make_group(conn, cluster_name + "-slaves")
+ zoo_group = get_or_make_group(conn, cluster_name + "-zoo")
if master_group.rules == []: # Group was just now created
master_group.authorize(src_group=master_group)
master_group.authorize(src_group=slave_group)
+ master_group.authorize(src_group=zoo_group)
master_group.authorize('tcp', 22, 22, '0.0.0.0/0')
master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0')
@@ -109,15 +116,22 @@ def launch_cluster(conn, opts, cluster_n
if slave_group.rules == []: # Group was just now created
slave_group.authorize(src_group=master_group)
slave_group.authorize(src_group=slave_group)
+ slave_group.authorize(src_group=zoo_group)
slave_group.authorize('tcp', 22, 22, '0.0.0.0/0')
slave_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
slave_group.authorize('tcp', 50060, 50060, '0.0.0.0/0')
slave_group.authorize('tcp', 50075, 50075, '0.0.0.0/0')
+ if zoo_group.rules == []: # Group was just now created
+ zoo_group.authorize(src_group=master_group)
+ zoo_group.authorize(src_group=slave_group)
+ zoo_group.authorize(src_group=zoo_group)
+ zoo_group.authorize('tcp', 22, 22, '0.0.0.0/0')
+ zoo_group.authorize('tcp', 2181, 2181, '0.0.0.0/0')
print "Checking for running cluster..."
reservations = conn.get_all_instances()
for res in reservations:
group_names = [g.id for g in res.groups]
- if master_group.name in group_names or slave_group.name in group_names:
+ if master_group.name in group_names or slave_group.name in group_names or zoo_group.name in group_names:
active = [i for i in res.instances if i.state in ['pending', 'running']]
if len(active) > 0:
print >> stderr, ("ERROR: There are already instances running in " +
@@ -142,9 +156,17 @@ def launch_cluster(conn, opts, cluster_n
master_res = image.run(key_name = opts.key_pair,
security_groups = [master_group],
instance_type = master_type,
- placement = opts.zone)
+ placement = opts.zone,
+ min_count = opts.ft,
+ max_count = opts.ft)
print "Launched master, regid = " + master_res.id
- return (master_res, slave_res)
+ if opts.ft > 1:
+ zoo_res = image.run(key_name = opts.key_pair,
+ security_groups = [zoo_group],
+ instance_type = opts.instance_type,
+ placement = opts.zone)
+ print "Launched zoo, regid = " + zoo_res.id
+ return (master_res, slave_res, zoo_res)
def get_existing_cluster(conn, opts, cluster_name):
@@ -152,6 +174,7 @@ def get_existing_cluster(conn, opts, clu
reservations = conn.get_all_instances()
master_res = None
slave_res = None
+ zoo_res = None
for res in reservations:
active = [i for i in res.instances if i.state in ['pending', 'running']]
if len(active) > 0:
@@ -160,10 +183,14 @@ def get_existing_cluster(conn, opts, clu
master_res = res
elif group_names == [cluster_name + "-slaves"]:
slave_res = res
+ elif group_names == [cluster_name + "-zoo"]:
+ zoo_res = res
if master_res != None and slave_res != None:
print "Found master regid: " + master_res.id
print "Found slave regid: " + slave_res.id
- return (master_res, slave_res)
+ if zoo_res != None:
+ print "Found slave regid: " + zoo_res.id
+ return (master_res, slave_res, zoo_res)
else:
if master_res == None and slave_res != None:
print "ERROR: Could not find master in group " + cluster_name + "-master"
@@ -174,14 +201,21 @@ def get_existing_cluster(conn, opts, clu
sys.exit(1)
-def deploy_files(conn, root_dir, instance, opts, master_res, slave_res):
+def deploy_files(conn, root_dir, instance, opts, master_res, slave_res, zoo_res):
# TODO: Speed up deployment by creating a temp directory with the
# template-transformed files and then rsyncing it
+
master = master_res.instances[0].public_dns_name
+
template_vars = {
- "master" : master,
+ "master" : '\n'.join([i.public_dns_name for i in master_res.instances]),
"slave_list" : '\n'.join([i.public_dns_name for i in slave_res.instances])
}
+
+ if opts.ft > 1:
+ zoo = zoo_res.instances[0].public_dns_name
+ template_vars[ "zoo" ] = '\n'.join([i.public_dns_name for i in zoo_res.instances])
+
for path, dirs, files in os.walk(root_dir):
dest_dir = os.path.join('/', path[len(root_dir):])
if len(files) > 0: # Only mkdir for low-level directories since we use -p
@@ -219,18 +253,20 @@ def main():
conn = boto.connect_ec2()
if action == "launch":
if opts.resume:
- (master_res, slave_res) = get_existing_cluster(conn, opts, cluster_name)
+ (master_res, slave_res, zoo_res) = get_existing_cluster(conn, opts, cluster_name)
else:
- (master_res, slave_res) = launch_cluster(conn, opts, cluster_name)
+ (master_res, slave_res, zoo_res) = launch_cluster(conn, opts, cluster_name)
print "Waiting for instances to start up..."
time.sleep(5)
wait_for_instances(conn, master_res)
wait_for_instances(conn, slave_res)
+ if opts.ft > 1:
+ wait_for_instances(conn, zoo_res)
print "Waiting 20 more seconds..."
time.sleep(20)
print "Deploying files to master..."
deploy_files(conn, "deploy." + opts.os, master_res.instances[0],
- opts, master_res, slave_res)
+ opts, master_res, slave_res, zoo_res)
print "Copying SSH key %s to master..." % opts.identity_file
master = master_res.instances[0].public_dns_name
ssh(master, opts, 'mkdir -p /root/.ssh')
@@ -243,13 +279,16 @@ def main():
response = raw_input("Are you sure you want to shut down the cluster " +
cluster_name + "? (y/N) ")
if response == "y":
- (master_res, slave_res) = get_existing_cluster(conn, opts, cluster_name)
+ (master_res, slave_res, zoo_res) = get_existing_cluster(conn, opts, cluster_name)
print "Shutting down master..."
master_res.stop_all()
print "Shutting down slaves..."
slave_res.stop_all()
+ if opts.ft > 1:
+ print "Shutting down zoo..."
+ zoo_res.stop_all()
elif action == "login":
- (master_res, slave_res) = get_existing_cluster(conn, opts, cluster_name)
+ (master_res, slave_res, zoo_res) = get_existing_cluster(conn, opts, cluster_name)
master = master_res.instances[0].public_dns_name
print "Logging into master " + master + "..."
proxy_opt = ""