You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by be...@apache.org on 2011/06/05 11:03:50 UTC

svn commit: r1132225 - in /incubator/mesos/trunk/ec2: deploy.centos64/root/mesos-ec2/setup deploy.lucid64/root/mesos-ec2/setup

Author: benh
Date: Sun Jun  5 09:03:50 2011
New Revision: 1132225

URL: http://svn.apache.org/viewvc?rev=1132225&view=rev
Log:
Fixed broken behavior of Lucid setup script with --ft flag (by using
cleaner code from CentOS script) and increased time given to ZooKeeper
to start up.

Modified:
    incubator/mesos/trunk/ec2/deploy.centos64/root/mesos-ec2/setup
    incubator/mesos/trunk/ec2/deploy.lucid64/root/mesos-ec2/setup

Modified: incubator/mesos/trunk/ec2/deploy.centos64/root/mesos-ec2/setup
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/ec2/deploy.centos64/root/mesos-ec2/setup?rev=1132225&r1=1132224&r2=1132225&view=diff
==============================================================================
--- incubator/mesos/trunk/ec2/deploy.centos64/root/mesos-ec2/setup (original)
+++ incubator/mesos/trunk/ec2/deploy.centos64/root/mesos-ec2/setup Sun Jun  5 09:03:50 2011
@@ -236,7 +236,7 @@ if [[ $NUM_ZOOS != 0 ]]; then
     ssh $SSH_OPTS $zoo "/root/mesos/third_party/zookeeper-*/bin/zkServer.sh start </dev/null >/dev/null" & sleep 0.1
   done
   wait
-  sleep 2
+  sleep 5
 fi
 
 echo "Stopping any existing Mesos cluster..."

Modified: incubator/mesos/trunk/ec2/deploy.lucid64/root/mesos-ec2/setup
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/ec2/deploy.lucid64/root/mesos-ec2/setup?rev=1132225&r1=1132224&r2=1132225&view=diff
==============================================================================
--- incubator/mesos/trunk/ec2/deploy.lucid64/root/mesos-ec2/setup (original)
+++ incubator/mesos/trunk/ec2/deploy.lucid64/root/mesos-ec2/setup Sun Jun  5 09:03:50 2011
@@ -20,6 +20,7 @@ BRANCH=$3
 MASTERS_FILE="masters"
 MASTERS=`cat $MASTERS_FILE`
 NUM_MASTERS=`cat $MASTERS_FILE | wc -l`
+OTHER_MASTERS=`cat $MASTERS_FILE | sed '1d'`
 SLAVES=`cat slaves`
 ZOOS=`cat zoo`
 
@@ -80,69 +81,51 @@ if [[ $NUM_ZOOS != 0 ]] ; then
   done
 fi
 
-echo "SSH'ing to slaves to approve keys..."
-for slave in $SLAVES; do
-  echo $slave
-  ssh $SSH_OPTS $slave echo -n &
-  sleep 0.3
+# Try to SSH to each cluster node to approve their key. Since some nodes may
+# be slow in starting, we retry failed slaves up to 3 times.
+TODO="$SLAVES $ZOO $OTHER_MASTERS" # List of nodes to try (initially all)
+TRIES="0"                          # Number of times we've tried so far
+echo "SSH'ing to other cluster nodes to approve keys..."
+while [ "e$TODO" != "e" ] && [ $TRIES -lt 4 ] ; do
+  NEW_TODO=
+  for slave in $TODO; do
+    echo $slave
+    ssh $SSH_OPTS $slave echo -n
+    if [ $? != 0 ] ; then
+        NEW_TODO="$NEW_TODO $slave"
+    fi
+  done
+  TRIES=$[$TRIES + 1]
+  if [ "e$NEW_TODO" != "e" ] && [ $TRIES -lt 4 ] ; then
+      sleep 15
+      TODO="$NEW_TODO"
+      echo "Re-attempting SSH to cluster nodes to approve keys..."
+  else
+      break;
+  fi
 done
 
-echo "Waiting for ssh commands to finish..."
-wait
-
-if [[ $NUM_MASTERS -gt 1 ]] ; then
-  echo "RSYNC'ing /root/mesos-ec2 to other master servers..."
-  for master in `cat $MASTERS_FILE | sed '1d'`; do
-      echo $master
-      rsync -e "ssh $SSH_OPTS" -az /root/mesos-ec2 $master:/root & sleep 0.3
-  done
-  wait
-fi
-
-if [[ $NUM_ZOOS != 0 ]] ; then
-  echo "RSYNC'ing /root/mesos-ec2 to ZooKeeper servers..."
-  for zoo in $ZOOS; do
-      echo $zoo
-      rsync -e "ssh $SSH_OPTS" -az /root/mesos-ec2 $zoo:/root & sleep 0.3
-  done
-  wait
-fi
-
-echo "RSYNC'ing /root/mesos-ec2 to slaves..."
-for slave in $SLAVES; do
-  echo $slave
-  rsync -e "ssh $SSH_OPTS" -az /root/mesos-ec2 $slave:/root &
-  scp $SSH_OPTS ~/.ssh/id_rsa $slave:.ssh &
+echo "RSYNC'ing /root/mesos-ec2 to other cluster nodes..."
+for node in $SLAVES $ZOO $OTHER_MASTERS; do
+  echo $node
+  rsync -e "ssh $SSH_OPTS" -az /root/mesos-ec2 $node:/root &
+  scp $SSH_OPTS ~/.ssh/id_rsa $node:.ssh &
   sleep 0.3
 done
 wait
 
-echo "Running slave setup script on slave and zookeeper nodes..."
-for node in $SLAVES $ZOO; do
+echo "Running slave setup script on other cluster nodes..."
+for node in $SLAVES $ZOO $OTHER_MASTERS; do
   echo $node
   ssh -t $SSH_OPTS root@$node "mesos-ec2/setup-slave" & sleep 0.3
 done
 wait
 
-if [[ $NUM_MASTERS -gt 1 ]] ; then
-  echo "Running slave setup script on other masters..."
-  for master in `cat $MASTERS_FILE | sed '1d'`; do
-    echo $master
-    rsync -e "ssh $SSH_OPTS" mesos-ec2/setup-slave & sleep 0.3
-  done
-  wait
-  echo "RSYNC'ing HDFS config files to other masters..."
-  for master in `cat $MASTERS_FILE | sed '1d'`; do
-    echo $master
-    rsync -e "ssh $SSH_OPTS" -az $HADOOP_HOME/conf $master:$HADOOP_HOME & sleep 0.3
-  done
-  wait
-fi
-
-echo "RSYNC'ing HDFS config files to slaves..."
-for slave in $SLAVES; do
-  echo $slave
-  rsync -e "ssh $SSH_OPTS" -az $HADOOP_HOME/conf $slave:$HADOOP_HOME & sleep 0.3
+echo "RSYNC'ing HDFS config files to other cluster nodes..."
+for node in $SLAVES $ZOO $OTHER_MASTERS; do
+  echo $node
+  rsync -e "ssh $SSH_OPTS" -az $HADOOP_HOME/conf $node:$HADOOP_HOME &
+  sleep 0.3
 done
 wait
 
@@ -238,7 +221,7 @@ if [[ $NUM_ZOOS != 0 ]]; then
     ssh $SSH_OPTS $zoo "/root/mesos/third_party/zookeeper-*/bin/zkServer.sh start </dev/null >/dev/null" & sleep 0.1
   done
   wait
-  sleep 2
+  sleep 5
 fi
 
 echo "Stopping any existing Mesos cluster..."