You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2014/05/16 21:47:19 UTC

git commit: Added backoff to slave's initial (re-)registration attempt.

Repository: mesos
Updated Branches:
  refs/heads/master 299fe28dc -> 09b1dc3e9


Added backoff to slave's initial (re-)registration attempt.

Review: https://reviews.apache.org/r/21464


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/09b1dc3e
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/09b1dc3e
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/09b1dc3e

Branch: refs/heads/master
Commit: 09b1dc3e95955aa187458fcb61e1d66b04ec3af2
Parents: 299fe28
Author: Vinod Kone <vi...@twitter.com>
Authored: Fri May 2 17:14:14 2014 -0700
Committer: Vinod Kone <vi...@twitter.com>
Committed: Fri May 16 12:46:33 2014 -0700

----------------------------------------------------------------------
 src/slave/constants.cpp             |  2 +-
 src/slave/constants.hpp             |  5 ++-
 src/slave/flags.hpp                 | 11 +++++
 src/slave/slave.cpp                 | 38 +++++++++++++---
 src/slave/slave.hpp                 |  3 +-
 src/tests/fault_tolerance_tests.cpp |  7 ++-
 src/tests/master_tests.cpp          | 10 +++--
 src/tests/mesos.cpp                 |  3 ++
 src/tests/slave_recovery_tests.cpp  | 77 +++++++++++++++++---------------
 src/tests/slave_tests.cpp           |  2 +-
 10 files changed, 103 insertions(+), 55 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/09b1dc3e/src/slave/constants.cpp
----------------------------------------------------------------------
diff --git a/src/slave/constants.cpp b/src/slave/constants.cpp
index 1854b16..51f65bb 100644
--- a/src/slave/constants.cpp
+++ b/src/slave/constants.cpp
@@ -30,7 +30,7 @@ const Duration EXECUTOR_REREGISTER_TIMEOUT = Seconds(2);
 const Duration EXECUTOR_SIGNAL_ESCALATION_TIMEOUT = Seconds(3);
 const Duration STATUS_UPDATE_RETRY_INTERVAL_MIN = Seconds(10);
 const Duration STATUS_UPDATE_RETRY_INTERVAL_MAX = Minutes(10);
-const Duration REGISTER_RETRY_INTERVAL_MIN = Seconds(5);
+const Duration REGISTRATION_BACKOFF_FACTOR = Seconds(1);
 const Duration REGISTER_RETRY_INTERVAL_MAX = Minutes(1);
 const Duration GC_DELAY = Weeks(1);
 const double GC_DISK_HEADROOM = 0.1;

http://git-wip-us.apache.org/repos/asf/mesos/blob/09b1dc3e/src/slave/constants.hpp
----------------------------------------------------------------------
diff --git a/src/slave/constants.hpp b/src/slave/constants.hpp
index c097525..ace4590 100644
--- a/src/slave/constants.hpp
+++ b/src/slave/constants.hpp
@@ -45,8 +45,9 @@ extern const Duration GC_DELAY;
 extern const Duration DISK_WATCH_INTERVAL;
 extern const Duration RESOURCE_MONITORING_INTERVAL;
 
-// The minimum interval the slave waits before retrying registration.
-extern const Duration REGISTER_RETRY_INTERVAL_MIN;
+
+// Default backoff interval used by the slave to wait before registration.
+extern const Duration REGISTRATION_BACKOFF_FACTOR;
 
 // The maximum interval the slave waits before retrying registration.
 // Note that this value has to be << 'MIN_SLAVE_REREGISTER_TIMEOUT'

http://git-wip-us.apache.org/repos/asf/mesos/blob/09b1dc3e/src/slave/flags.hpp
----------------------------------------------------------------------
diff --git a/src/slave/flags.hpp b/src/slave/flags.hpp
index 8616817..15e5b64 100644
--- a/src/slave/flags.hpp
+++ b/src/slave/flags.hpp
@@ -103,6 +103,16 @@ public:
         "Directory prepended to relative executor URIs",
         "");
 
+    add(&Flags::registration_backoff_factor,
+        "registration_backoff_factor",
+        "Slave initially picks a random amount of time between [0, b], where\n"
+        "b = register_backoff_factor, to (re-)register with a new master.\n"
+        "Subsequent retries are exponentially backed off based on this\n"
+        "interval (e.g., 1st retry uses a random value between [0, b * 2^1],\n"
+        "2nd retry between [0, b * 2^2], 3rd retry between [0, b * 2^3] etc)\n"
+        "up to a maximum of " + stringify(REGISTER_RETRY_INTERVAL_MAX),
+        REGISTRATION_BACKOFF_FACTOR);
+
     add(&Flags::executor_registration_timeout,
         "executor_registration_timeout",
         "Amount of time to wait for an executor\n"
@@ -233,6 +243,7 @@ public:
   std::string hadoop_home; // TODO(benh): Make an Option.
   bool switch_user;
   std::string frameworks_home;  // TODO(benh): Make an Option.
+  Duration registration_backoff_factor;
   Duration executor_registration_timeout;
   Duration executor_shutdown_grace_period;
   Duration gc_delay;

http://git-wip-us.apache.org/repos/asf/mesos/blob/09b1dc3e/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index ce04afb..8c35590 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -219,6 +219,12 @@ void Slave::initialize()
   }
 #endif // __linux__
 
+  if (flags.registration_backoff_factor > REGISTER_RETRY_INTERVAL_MAX) {
+    EXIT(1) << "Invalid value '" << flags.registration_backoff_factor << "' "
+            << "for --registration_backoff_factor: "
+            << "Must be less than " << REGISTER_RETRY_INTERVAL_MAX;
+  }
+
   if (flags.credential.isSome()) {
     const string& path =
       strings::remove(flags.credential.get(), "file://", strings::PREFIX);
@@ -546,14 +552,30 @@ void Slave::detected(const Future<Option<MasterInfo> >& _master)
       return;
     }
 
+    // Wait for a random amount of time before authentication or
+    // registration.
+    Duration duration =
+      flags.registration_backoff_factor * ((double) ::random() / RAND_MAX);
+
     if (credential.isSome()) {
       // Authenticate with the master.
+      // TODO(vinod): Do a backoff for authentication similar to what
+      // we do for registration. This is a little tricky because, if
+      // we delay 'Slave::authenticate' and a new master is detected
+      // before 'authenticate' event is processed the slave tries to
+      // authenticate with the new master twice.
+      // TODO(vinod): Consider adding an "AUTHENTICATED" state to the\
+      // slave instead of "authenticate" variable.
       authenticate();
     } else {
       // Proceed with registration without authentication.
       LOG(INFO) << "No credentials provided."
                 << " Attempting to register without authentication";
-      doReliableRegistration();
+
+      delay(duration,
+            self(),
+            &Slave::doReliableRegistration,
+            flags.registration_backoff_factor * 2); // Backoff
     }
   } else {
     LOG(INFO) << "Lost leading master";
@@ -649,7 +671,8 @@ void Slave::_authenticate()
   authenticated = true;
   authenticating = None();
 
-  doReliableRegistration(); // Proceed with registration.
+  // Proceed with registration.
+  doReliableRegistration(flags.registration_backoff_factor * 2);
 }
 
 
@@ -870,15 +893,16 @@ void Slave::doReliableRegistration(const Duration& duration)
 
   // Retry registration if necessary.
   Duration next = std::min(
-      REGISTER_RETRY_INTERVAL_MIN + duration * ((double) ::random() / RAND_MAX),
+      duration * ((double) ::random() / RAND_MAX),
       REGISTER_RETRY_INTERVAL_MAX);
 
-  VLOG(1) << "Will retry registration in " << next << " if necessary";
+  Duration duration_ = std::min(
+      duration * 2,
+      REGISTER_RETRY_INTERVAL_MAX);
 
-  // Increase next backoff duration exponentially until the maximum
-  // is reached.
-  Duration duration_ = std::min(duration * 2, REGISTER_RETRY_INTERVAL_MAX);
+  VLOG(1) << "Will retry registration in " << next << " if necessary";
 
+  // Backoff.
   delay(next, self(), &Slave::doReliableRegistration, duration_);
 }
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/09b1dc3e/src/slave/slave.hpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.hpp b/src/slave/slave.hpp
index 29d2a16..769bd00 100644
--- a/src/slave/slave.hpp
+++ b/src/slave/slave.hpp
@@ -98,8 +98,7 @@ public:
 
   void registered(const process::UPID& from, const SlaveID& slaveId);
   void reregistered(const process::UPID& from, const SlaveID& slaveId);
-  void doReliableRegistration(
-      const Duration& duration = REGISTER_RETRY_INTERVAL_MIN);
+  void doReliableRegistration(const Duration& duration);
 
   void runTask(
       const process::UPID& from,

http://git-wip-us.apache.org/repos/asf/mesos/blob/09b1dc3e/src/tests/fault_tolerance_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/fault_tolerance_tests.cpp b/src/tests/fault_tolerance_tests.cpp
index 4796149..3f26393 100644
--- a/src/tests/fault_tolerance_tests.cpp
+++ b/src/tests/fault_tolerance_tests.cpp
@@ -323,6 +323,8 @@ TEST_F(FaultToleranceTest, PartitionedSlaveReregistration)
   // The master will notify the framework that the slave was lost.
   AWAIT_READY(slaveLost);
 
+  Clock::resume();
+
   // We now complete the partition on the slave side as well. This
   // is done by simulating a master loss event which would normally
   // occur during a network partition.
@@ -342,8 +344,6 @@ TEST_F(FaultToleranceTest, PartitionedSlaveReregistration)
   AWAIT_READY(shutdownMessage);
   AWAIT_READY(shutdown);
 
-  Clock::resume();
-
   driver.stop();
   driver.join();
 
@@ -2057,8 +2057,6 @@ TEST_F(FaultToleranceTest, ReconcileIncompleteTasks)
     .WillOnce(FutureArg<1>(&status))
     .WillRepeatedly(Return()); // Ignore retried update due to update framework.
 
-  Clock::pause();
-
   driver.launchTasks(offers.get()[0].id(), tasks);
 
   AWAIT_READY(_statusUpdate);
@@ -2078,6 +2076,7 @@ TEST_F(FaultToleranceTest, ReconcileIncompleteTasks)
   // TASK_FINISHED update.
   // NOTE: The status update manager resends the status update when
   // it detects a new master.
+  Clock::pause();
   Clock::settle();
 
   AWAIT_READY(status);

http://git-wip-us.apache.org/repos/asf/mesos/blob/09b1dc3e/src/tests/master_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_tests.cpp b/src/tests/master_tests.cpp
index d74fc94..87427d1 100644
--- a/src/tests/master_tests.cpp
+++ b/src/tests/master_tests.cpp
@@ -449,6 +449,9 @@ TEST_F(MasterTest, KillUnknownTaskSlaveInTransition)
   AWAIT_READY(status);
   EXPECT_EQ(TASK_RUNNING, status.get().state());
 
+  EXPECT_CALL(exec, shutdown(_))
+    .Times(AtMost(1));
+
   Future<Nothing> _reregisterSlave =
     DROP_DISPATCH(_, &Master::_reregisterSlave);
 
@@ -506,9 +509,6 @@ TEST_F(MasterTest, KillUnknownTaskSlaveInTransition)
 
   Clock::resume();
 
-  EXPECT_CALL(exec, shutdown(_))
-    .Times(AtMost(1));
-
   driver.stop();
   driver.join();
 
@@ -1548,6 +1548,8 @@ TEST_F(MasterTest, RecoveredSlaveDoesNotReregister)
 
   AWAIT_READY(slaveLost);
 
+  Clock::resume();
+
   // Step 7: Ensure the slave cannot re-register!
   Future<ShutdownMessage> shutdownMessage =
     FUTURE_PROTOBUF(ShutdownMessage(), master.get(), _);
@@ -1630,6 +1632,8 @@ TEST_F(MasterTest, NonStrictRegistryWriteOnly)
 
   ASSERT_TRUE(slaveLost.isPending());
 
+  Clock::resume();
+
   // Step 7: Now expect the slave to be able to re-register,
   // according to the non-strict semantics.
   Future<SlaveReregisteredMessage> slaveReregisteredMessage =

http://git-wip-us.apache.org/repos/asf/mesos/blob/09b1dc3e/src/tests/mesos.cpp
----------------------------------------------------------------------
diff --git a/src/tests/mesos.cpp b/src/tests/mesos.cpp
index 7f59b72..317b5a2 100644
--- a/src/tests/mesos.cpp
+++ b/src/tests/mesos.cpp
@@ -23,6 +23,7 @@
 #include <stout/os.hpp>
 #include <stout/path.hpp>
 #include <stout/result.hpp>
+#include <stout/stringify.hpp>
 #include <stout/uuid.hpp>
 
 #ifdef __linux__
@@ -142,6 +143,8 @@ slave::Flags MesosTest::CreateSlaveFlags()
 
   flags.resources = "cpus:2;mem:1024;disk:1024;ports:[31000-32000]";
 
+  flags.registration_backoff_factor = Milliseconds(10);
+
 #ifdef __linux__
   // Enable putting the slave into memory and cpuacct cgroups.
   if (os::exists("/proc/cgroups") && os::user() == "root") {

http://git-wip-us.apache.org/repos/asf/mesos/blob/09b1dc3e/src/tests/slave_recovery_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_recovery_tests.cpp b/src/tests/slave_recovery_tests.cpp
index 85c57b2..a6262e8 100644
--- a/src/tests/slave_recovery_tests.cpp
+++ b/src/tests/slave_recovery_tests.cpp
@@ -801,8 +801,6 @@ TYPED_TEST(SlaveRecoveryTest, DISABLED_RecoveryTimeout)
   AWAIT_READY(_recover);
 
   Clock::advance(EXECUTOR_REREGISTER_TIMEOUT);
-  Clock::settle();
-
   Clock::resume();
 
   // Scheduler should receive the TASK_FAILED update.
@@ -1207,7 +1205,6 @@ TYPED_TEST(SlaveRecoveryTest, NonCheckpointingSlave)
   slave::Flags flags = this->CreateSlaveFlags();
   flags.checkpoint = false;
 
-  Clock::pause();
 
   Future<RegisterSlaveMessage> registerSlaveMessage =
     FUTURE_PROTOBUF(RegisterSlaveMessage(), _, _);
@@ -1237,6 +1234,8 @@ TYPED_TEST(SlaveRecoveryTest, NonCheckpointingSlave)
   EXPECT_CALL(sched, resourceOffers(_, _))
     .Times(0); // No offers should be received!
 
+  Clock::pause();
+
   driver.start();
 
   // Wait for scheduler to register. We do a Clock::settle() here
@@ -1309,8 +1308,8 @@ TYPED_TEST(SlaveRecoveryTest, KillTask)
 
   Future<Nothing> _recover = FUTURE_DISPATCH(_, &Slave::_recover);
 
-  Future<ReregisterSlaveMessage> reregisterSlave =
-    FUTURE_PROTOBUF(ReregisterSlaveMessage(), _, _);
+  Future<SlaveReregisteredMessage> slaveReregisteredMessage =
+    FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _);
 
   // Restart the slave (use same flags) with a new isolator.
   Try<TypeParam*> containerizer2 = TypeParam::create(flags, true);
@@ -1326,9 +1325,12 @@ TYPED_TEST(SlaveRecoveryTest, KillTask)
   Clock::settle(); // Wait for slave to schedule reregister timeout.
 
   Clock::advance(EXECUTOR_REREGISTER_TIMEOUT);
+  Clock::resume();
 
   // Wait for the slave to re-register.
-  AWAIT_READY(reregisterSlave);
+  AWAIT_READY(slaveReregisteredMessage);
+
+  Clock::pause();
 
   Future<TaskStatus> status;
   EXPECT_CALL(sched, statusUpdate(_, _))
@@ -1560,8 +1562,8 @@ TYPED_TEST(SlaveRecoveryTest, GCExecutor)
 
   Future<Nothing> _recover = FUTURE_DISPATCH(_, &Slave::_recover);
 
-  Future<ReregisterSlaveMessage> reregisterSlave =
-    FUTURE_PROTOBUF(ReregisterSlaveMessage(), _, _);
+  Future<SlaveReregisteredMessage> slaveReregisteredMessage =
+    FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _);
 
   // Restart the slave (use same flags) with a new isolator.
   Try<TypeParam*> containerizer2 = TypeParam::create(flags, true);
@@ -1585,7 +1587,7 @@ TYPED_TEST(SlaveRecoveryTest, GCExecutor)
 
   Clock::settle();
 
-  AWAIT_READY(reregisterSlave);
+  AWAIT_READY(slaveReregisteredMessage);
 
   Clock::advance(flags.gc_delay);
 
@@ -2158,8 +2160,8 @@ TYPED_TEST(SlaveRecoveryTest, ReconcileTasksMissingFromSlave)
 
   Future<Nothing> _recover = FUTURE_DISPATCH(_, &Slave::_recover);
 
-  Future<ReregisterSlaveMessage> reregisterSlave =
-    FUTURE_PROTOBUF(ReregisterSlaveMessage(), _, _);
+  Future<SlaveReregisteredMessage> slaveReregisteredMessage =
+    FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _);
 
   EXPECT_CALL(allocator, slaveReconnected(_));
   EXPECT_CALL(allocator, resourcesRecovered(_, _, _));
@@ -2181,17 +2183,17 @@ TYPED_TEST(SlaveRecoveryTest, ReconcileTasksMissingFromSlave)
   slave = this->StartSlave(containerizer2.get(), flags);
   ASSERT_SOME(slave);
 
-  Clock::pause();
-
   AWAIT_READY(_recover);
 
   // Wait for the slave to re-register.
-  AWAIT_READY(reregisterSlave);
+  AWAIT_READY(slaveReregisteredMessage);
 
   // Wait for TASK_LOST update.
   AWAIT_READY(status);
   ASSERT_EQ(TASK_LOST, status.get().state());
 
+  Clock::pause();
+
   // Advance the clock until the allocator allocates
   // the recovered resources.
   while (offers2.isPending()) {
@@ -2310,8 +2312,8 @@ TYPED_TEST(SlaveRecoveryTest, SchedulerFailover)
 
   Future<Nothing> _recover = FUTURE_DISPATCH(_, &Slave::_recover);
 
-  Future<ReregisterSlaveMessage> reregisterSlaveMessage =
-      FUTURE_PROTOBUF(ReregisterSlaveMessage(), _, _);
+  Future<SlaveReregisteredMessage> slaveReregisteredMessage =
+      FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _);
 
   // Restart the slave (use same flags) with a new containerizer.
   Try<TypeParam*> containerizer2 = TypeParam::create(flags, true);
@@ -2327,9 +2329,10 @@ TYPED_TEST(SlaveRecoveryTest, SchedulerFailover)
   Clock::settle(); // Wait for slave to schedule reregister timeout.
 
   Clock::advance(EXECUTOR_REREGISTER_TIMEOUT);
+  Clock::resume();
 
   // Wait for the slave to re-register.
-  AWAIT_READY(reregisterSlaveMessage);
+  AWAIT_READY(slaveReregisteredMessage);
 
   Future<TaskStatus> status;
   EXPECT_CALL(sched2, statusUpdate(_, _))
@@ -2341,6 +2344,8 @@ TYPED_TEST(SlaveRecoveryTest, SchedulerFailover)
     .WillOnce(FutureArg<1>(&offers2))
     .WillRepeatedly(Return());        // Ignore subsequent offers.
 
+  Clock::pause();
+
   // Kill the task.
   driver2.killTask(task.task_id());
 
@@ -2487,6 +2492,8 @@ TYPED_TEST(SlaveRecoveryTest, PartitionedSlave)
   AWAIT_READY(executorTerminated);
   Clock::settle();
 
+  Clock::resume();
+
   this->Stop(slave.get());
   delete containerizer1.get();
 
@@ -2502,8 +2509,6 @@ TYPED_TEST(SlaveRecoveryTest, PartitionedSlave)
 
   AWAIT_READY(registerSlaveMessage);
 
-  Clock::resume();
-
   driver.stop();
   driver.join();
 
@@ -2593,8 +2598,8 @@ TYPED_TEST(SlaveRecoveryTest, MasterFailover)
   // Step 3. Restart the slave and kill the task.
   Future<Nothing> _recover = FUTURE_DISPATCH(_, &Slave::_recover);
 
-  Future<ReregisterSlaveMessage> reregisterSlaveMessage =
-    FUTURE_PROTOBUF(ReregisterSlaveMessage(), _, _);
+  Future<SlaveReregisteredMessage> slaveReregisteredMessage =
+    FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _);
 
   // Restart the slave (use same flags) with a new isolator.
   Try<TypeParam*> containerizer2 = TypeParam::create(flags, true);
@@ -2611,11 +2616,11 @@ TYPED_TEST(SlaveRecoveryTest, MasterFailover)
 
   Clock::advance(EXECUTOR_REREGISTER_TIMEOUT);
 
-  // Wait for the slave to re-register.
-  AWAIT_READY(reregisterSlaveMessage);
-
   Clock::resume();
 
+  // Wait for the slave to re-register.
+  AWAIT_READY(slaveReregisteredMessage);
+
   Future<TaskStatus> status;
   EXPECT_CALL(sched, statusUpdate(_, _))
     .WillOnce(FutureArg<1>(&status))
@@ -2757,8 +2762,8 @@ TYPED_TEST(SlaveRecoveryTest, MultipleFrameworks)
 
   Future<Nothing> _recover = FUTURE_DISPATCH(_, &Slave::_recover);
 
-  Future<ReregisterSlaveMessage> reregisterSlaveMessage =
-    FUTURE_PROTOBUF(ReregisterSlaveMessage(), _, _);
+  Future<SlaveReregisteredMessage> slaveReregisteredMessage =
+    FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _);
 
   // Restart the slave (use same flags) with a new containerizer.
   Try<TypeParam*> containerizer2 = TypeParam::create(flags, true);
@@ -2775,11 +2780,11 @@ TYPED_TEST(SlaveRecoveryTest, MultipleFrameworks)
 
   Clock::advance(EXECUTOR_REREGISTER_TIMEOUT);
 
-  // Wait for the slave to re-register.
-  AWAIT_READY(reregisterSlaveMessage);
-
   Clock::resume();
 
+  // Wait for the slave to re-register.
+  AWAIT_READY(slaveReregisteredMessage);
+
   // Expectations for the status changes as a result of killing the
   // tasks.
   Future<TaskStatus> status1;
@@ -2924,10 +2929,10 @@ TYPED_TEST(SlaveRecoveryTest, MultipleSlaves)
   Future<Nothing> _recover1 = FUTURE_DISPATCH(_, &Slave::_recover);
   Future<Nothing> _recover2 = FUTURE_DISPATCH(_, &Slave::_recover);
 
-  Future<ReregisterSlaveMessage> reregisterSlave1 =
-    FUTURE_PROTOBUF(ReregisterSlaveMessage(), _, _);
-  Future<ReregisterSlaveMessage> reregisterSlave2 =
-    FUTURE_PROTOBUF(ReregisterSlaveMessage(), _, _);
+  Future<SlaveReregisteredMessage> slaveReregisteredMessage1 =
+    FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _);
+  Future<SlaveReregisteredMessage> slaveReregisteredMessage2 =
+    FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _);
 
   // Restart both slaves using the same flags with new containerizers.
   Try<TypeParam*> containerizer3 = TypeParam::create(flags1, true);
@@ -2958,8 +2963,8 @@ TYPED_TEST(SlaveRecoveryTest, MultipleSlaves)
   Clock::resume();
 
   // Wait for the slaves to re-register.
-  AWAIT_READY(reregisterSlave1);
-  AWAIT_READY(reregisterSlave2);
+  AWAIT_READY(slaveReregisteredMessage1);
+  AWAIT_READY(slaveReregisteredMessage2);
 
   Future<TaskStatus> status1;
   Future<TaskStatus> status2;
@@ -3074,6 +3079,8 @@ TYPED_TEST(SlaveRecoveryTest, RestartBeforeContainerizerLaunch)
 
   Clock::advance(EXECUTOR_REREGISTER_TIMEOUT);
 
+  Clock::resume();
+
   // Scheduler should receive the TASK_FAILED update.
   AWAIT_READY(status);
   ASSERT_EQ(TASK_FAILED, status.get().state());

http://git-wip-us.apache.org/repos/asf/mesos/blob/09b1dc3e/src/tests/slave_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_tests.cpp b/src/tests/slave_tests.cpp
index 29dc7d4..85ca5c4 100644
--- a/src/tests/slave_tests.cpp
+++ b/src/tests/slave_tests.cpp
@@ -456,7 +456,7 @@ TEST_F(SlaveTest, ROOT_RunTaskWithCommandInfoWithoutUser)
 
 
 // This test runs a command _with_ the command user field set. The
-// command will very the assumption that the command is run as the
+// command will verify the assumption that the command is run as the
 // specified user. We use (and assume the precense) of the
 // unprivileged 'nobody' user which should be available on both Linux
 // and Mac OS X.