You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2014/05/08 20:54:06 UTC

[2/4] git commit: Added a test to ensure a recovered slave that does not re-register is removed.

Added a test to ensure a recovered slave that does not re-register is removed.

Review: https://reviews.apache.org/r/21176


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/f038c080
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/f038c080
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/f038c080

Branch: refs/heads/master
Commit: f038c080b3a51730086ac319a0e75c76cdd0d077
Parents: c5bd6c2
Author: Benjamin Mahler <bm...@twitter.com>
Authored: Wed Apr 30 08:27:51 2014 -0700
Committer: Benjamin Mahler <bm...@twitter.com>
Committed: Thu May 8 11:41:02 2014 -0700

----------------------------------------------------------------------
 src/master/master.cpp      |  3 ++
 src/tests/master_tests.cpp | 76 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/f038c080/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 87ea77a..bfe48bc 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -3524,6 +3524,9 @@ void Master::_removeSlave(
 
   // Notify all frameworks of the lost slave.
   foreachvalue (Framework* framework, frameworks.activated) {
+    LOG(INFO) << "Notifying framework " << framework->id << " of lost slave "
+              << slaveInfo.id() << " (" << slaveInfo.hostname() << ") "
+              << "after recovering";
     LostSlaveMessage message;
     message.mutable_slave_id()->MergeFrom(slaveInfo.id());
     send(framework->pid, message);

http://git-wip-us.apache.org/repos/asf/mesos/blob/f038c080/src/tests/master_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_tests.cpp b/src/tests/master_tests.cpp
index 939a08d..183a9b8 100644
--- a/src/tests/master_tests.cpp
+++ b/src/tests/master_tests.cpp
@@ -1304,6 +1304,82 @@ TEST_F(MasterTest, MetricsInStatsEndpoint)
 }
 
 
+// This test ensures that when a slave is recovered from the registry
+// but does not re-register with the master, it is removed from the
+// registry and the framework is informed that the slave is lost, and
+// the slave is refused re-registration.
+TEST_F(MasterTest, RecoveredSlaveDoesNotReregister)
+{
+  // Step 1: Start a master.
+  master::Flags masterFlags = CreateMasterFlags();
+  Try<PID<Master> > master = StartMaster(masterFlags);
+  ASSERT_SOME(master);
+
+  // Step 2: Start a slave.
+  Future<SlaveRegisteredMessage> slaveRegisteredMessage =
+    FUTURE_PROTOBUF(SlaveRegisteredMessage(), master.get(), _);
+
+  slave::Flags slaveFlags = this->CreateSlaveFlags();
+
+  // Setup recovery slave flags.
+  slaveFlags.checkpoint = true;
+  slaveFlags.recover = "reconnect";
+  slaveFlags.strict = true;
+
+  Try<PID<Slave> > slave = StartSlave(slaveFlags);
+  ASSERT_SOME(slave);
+
+  AWAIT_READY(slaveRegisteredMessage);
+
+  // Step 3: Stop the slave while the master is down.
+  this->Stop(master.get());
+
+  this->Stop(slave.get());
+
+  // Step 4: Restart the master.
+  master = StartMaster(masterFlags);
+  ASSERT_SOME(master);
+
+  // Step 5: Start a scheduler.
+  MockScheduler sched;
+  MesosSchedulerDriver driver(
+    &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL);
+
+  Future<Nothing> registered;
+  EXPECT_CALL(sched, registered(&driver, _, _))
+    .WillOnce(FutureSatisfy(&registered));
+
+  driver.start();
+
+  AWAIT_READY(registered);
+
+  // Step 6: Advance the clock until the re-registration timeout
+  // elapses, and expect the slave / task to be lost!
+  Future<Nothing> slaveLost;
+  EXPECT_CALL(sched, slaveLost(&driver, _))
+    .WillOnce(FutureSatisfy(&slaveLost));
+
+  Clock::pause();
+  Clock::advance(masterFlags.slave_reregister_timeout);
+
+  AWAIT_READY(slaveLost);
+
+  // Step 7: Ensure the slave cannot re-register!
+  Future<ShutdownMessage> shutdownMessage =
+    FUTURE_PROTOBUF(ShutdownMessage(), master.get(), _);
+
+  slave = StartSlave(slaveFlags);
+  ASSERT_SOME(slave);
+
+  AWAIT_READY(shutdownMessage);
+
+  driver.stop();
+  driver.join();
+
+  Shutdown();
+}
+
+
 #ifdef MESOS_HAS_JAVA
 
 class MasterZooKeeperTest : public MesosZooKeeperTest {};