You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by be...@apache.org on 2011/06/05 11:03:50 UTC
svn commit: r1132225 - in /incubator/mesos/trunk/ec2:
deploy.centos64/root/mesos-ec2/setup deploy.lucid64/root/mesos-ec2/setup
Author: benh
Date: Sun Jun 5 09:03:50 2011
New Revision: 1132225
URL: http://svn.apache.org/viewvc?rev=1132225&view=rev
Log:
Fixed broken behavior of Lucid setup script with --ft flag (by using
cleaner code from CentOS script) and increased time given to ZooKeeper
to start up.
Modified:
incubator/mesos/trunk/ec2/deploy.centos64/root/mesos-ec2/setup
incubator/mesos/trunk/ec2/deploy.lucid64/root/mesos-ec2/setup
Modified: incubator/mesos/trunk/ec2/deploy.centos64/root/mesos-ec2/setup
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/ec2/deploy.centos64/root/mesos-ec2/setup?rev=1132225&r1=1132224&r2=1132225&view=diff
==============================================================================
--- incubator/mesos/trunk/ec2/deploy.centos64/root/mesos-ec2/setup (original)
+++ incubator/mesos/trunk/ec2/deploy.centos64/root/mesos-ec2/setup Sun Jun 5 09:03:50 2011
@@ -236,7 +236,7 @@ if [[ $NUM_ZOOS != 0 ]]; then
ssh $SSH_OPTS $zoo "/root/mesos/third_party/zookeeper-*/bin/zkServer.sh start </dev/null >/dev/null" & sleep 0.1
done
wait
- sleep 2
+ sleep 5
fi
echo "Stopping any existing Mesos cluster..."
Modified: incubator/mesos/trunk/ec2/deploy.lucid64/root/mesos-ec2/setup
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/ec2/deploy.lucid64/root/mesos-ec2/setup?rev=1132225&r1=1132224&r2=1132225&view=diff
==============================================================================
--- incubator/mesos/trunk/ec2/deploy.lucid64/root/mesos-ec2/setup (original)
+++ incubator/mesos/trunk/ec2/deploy.lucid64/root/mesos-ec2/setup Sun Jun 5 09:03:50 2011
@@ -20,6 +20,7 @@ BRANCH=$3
MASTERS_FILE="masters"
MASTERS=`cat $MASTERS_FILE`
NUM_MASTERS=`cat $MASTERS_FILE | wc -l`
+OTHER_MASTERS=`cat $MASTERS_FILE | sed '1d'`
SLAVES=`cat slaves`
ZOOS=`cat zoo`
@@ -80,69 +81,51 @@ if [[ $NUM_ZOOS != 0 ]] ; then
done
fi
-echo "SSH'ing to slaves to approve keys..."
-for slave in $SLAVES; do
- echo $slave
- ssh $SSH_OPTS $slave echo -n &
- sleep 0.3
+# Try to SSH to each cluster node to approve their key. Since some nodes may
+# be slow in starting, we retry failed slaves up to 3 times.
+TODO="$SLAVES $ZOO $OTHER_MASTERS" # List of nodes to try (initially all)
+TRIES="0" # Number of times we've tried so far
+echo "SSH'ing to other cluster nodes to approve keys..."
+while [ "e$TODO" != "e" ] && [ $TRIES -lt 4 ] ; do
+ NEW_TODO=
+ for slave in $TODO; do
+ echo $slave
+ ssh $SSH_OPTS $slave echo -n
+ if [ $? != 0 ] ; then
+ NEW_TODO="$NEW_TODO $slave"
+ fi
+ done
+ TRIES=$[$TRIES + 1]
+ if [ "e$NEW_TODO" != "e" ] && [ $TRIES -lt 4 ] ; then
+ sleep 15
+ TODO="$NEW_TODO"
+ echo "Re-attempting SSH to cluster nodes to approve keys..."
+ else
+ break;
+ fi
done
-echo "Waiting for ssh commands to finish..."
-wait
-
-if [[ $NUM_MASTERS -gt 1 ]] ; then
- echo "RSYNC'ing /root/mesos-ec2 to other master servers..."
- for master in `cat $MASTERS_FILE | sed '1d'`; do
- echo $master
- rsync -e "ssh $SSH_OPTS" -az /root/mesos-ec2 $master:/root & sleep 0.3
- done
- wait
-fi
-
-if [[ $NUM_ZOOS != 0 ]] ; then
- echo "RSYNC'ing /root/mesos-ec2 to ZooKeeper servers..."
- for zoo in $ZOOS; do
- echo $zoo
- rsync -e "ssh $SSH_OPTS" -az /root/mesos-ec2 $zoo:/root & sleep 0.3
- done
- wait
-fi
-
-echo "RSYNC'ing /root/mesos-ec2 to slaves..."
-for slave in $SLAVES; do
- echo $slave
- rsync -e "ssh $SSH_OPTS" -az /root/mesos-ec2 $slave:/root &
- scp $SSH_OPTS ~/.ssh/id_rsa $slave:.ssh &
+echo "RSYNC'ing /root/mesos-ec2 to other cluster nodes..."
+for node in $SLAVES $ZOO $OTHER_MASTERS; do
+ echo $node
+ rsync -e "ssh $SSH_OPTS" -az /root/mesos-ec2 $node:/root &
+ scp $SSH_OPTS ~/.ssh/id_rsa $node:.ssh &
sleep 0.3
done
wait
-echo "Running slave setup script on slave and zookeeper nodes..."
-for node in $SLAVES $ZOO; do
+echo "Running slave setup script on other cluster nodes..."
+for node in $SLAVES $ZOO $OTHER_MASTERS; do
echo $node
ssh -t $SSH_OPTS root@$node "mesos-ec2/setup-slave" & sleep 0.3
done
wait
-if [[ $NUM_MASTERS -gt 1 ]] ; then
- echo "Running slave setup script on other masters..."
- for master in `cat $MASTERS_FILE | sed '1d'`; do
- echo $master
- rsync -e "ssh $SSH_OPTS" mesos-ec2/setup-slave & sleep 0.3
- done
- wait
- echo "RSYNC'ing HDFS config files to other masters..."
- for master in `cat $MASTERS_FILE | sed '1d'`; do
- echo $master
- rsync -e "ssh $SSH_OPTS" -az $HADOOP_HOME/conf $master:$HADOOP_HOME & sleep 0.3
- done
- wait
-fi
-
-echo "RSYNC'ing HDFS config files to slaves..."
-for slave in $SLAVES; do
- echo $slave
- rsync -e "ssh $SSH_OPTS" -az $HADOOP_HOME/conf $slave:$HADOOP_HOME & sleep 0.3
+echo "RSYNC'ing HDFS config files to other cluster nodes..."
+for node in $SLAVES $ZOO $OTHER_MASTERS; do
+ echo $node
+ rsync -e "ssh $SSH_OPTS" -az $HADOOP_HOME/conf $node:$HADOOP_HOME &
+ sleep 0.3
done
wait
@@ -238,7 +221,7 @@ if [[ $NUM_ZOOS != 0 ]]; then
ssh $SSH_OPTS $zoo "/root/mesos/third_party/zookeeper-*/bin/zkServer.sh start </dev/null >/dev/null" & sleep 0.1
done
wait
- sleep 2
+ sleep 5
fi
echo "Stopping any existing Mesos cluster..."