You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2015/05/14 01:59:26 UTC
[2/4] mesos git commit: Moved a partition test into
partition_tests.cpp.
Moved a partition test into partition_tests.cpp.
Review: https://reviews.apache.org/r/33153
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/23a51be1
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/23a51be1
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/23a51be1
Branch: refs/heads/master
Commit: 23a51be160d57bccda31ce4e47c8a79d0c363d55
Parents: 085c75c
Author: Benjamin Mahler <be...@gmail.com>
Authored: Mon Apr 13 17:34:30 2015 -0700
Committer: Benjamin Mahler <be...@gmail.com>
Committed: Wed May 13 16:45:30 2015 -0700
----------------------------------------------------------------------
src/tests/fault_tolerance_tests.cpp | 115 ------------------------------
src/tests/partition_tests.cpp | 117 +++++++++++++++++++++++++++++++
2 files changed, 117 insertions(+), 115 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/23a51be1/src/tests/fault_tolerance_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/fault_tolerance_tests.cpp b/src/tests/fault_tolerance_tests.cpp
index cd2594b..fb28e2a 100644
--- a/src/tests/fault_tolerance_tests.cpp
+++ b/src/tests/fault_tolerance_tests.cpp
@@ -86,121 +86,6 @@ namespace tests {
class FaultToleranceTest : public MesosTest {};
-// The purpose of this test is to ensure that when slaves are removed
-// from the master, and then attempt to send status updates, we send
-// a ShutdownMessage to the slave. Why? Because during a network
-// partition, the master will remove a partitioned slave, thus sending
-// its tasks to LOST. At this point, when the partition is removed,
-// the slave may attempt to send updates if it was unaware that the
-// master removed it. We've already notified frameworks that these
-// tasks were LOST, so we have to have the slave shut down.
-TEST_F(FaultToleranceTest, PartitionedSlaveStatusUpdates)
-{
- Try<PID<Master> > master = StartMaster();
- ASSERT_SOME(master);
-
- // Allow the master to PING the slave, but drop all PONG messages
- // from the slave. Note that we don't match on the master / slave
- // PIDs because it's actually the SlaveObserver Process that sends
- // the pings.
- Future<Message> ping = FUTURE_MESSAGE(Eq("PING"), _, _);
- DROP_MESSAGES(Eq("PONG"), _, _);
-
- Future<SlaveRegisteredMessage> slaveRegisteredMessage =
- FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _);
-
- MockExecutor exec(DEFAULT_EXECUTOR_ID);
-
- Try<PID<Slave> > slave = StartSlave(&exec);
- ASSERT_SOME(slave);
-
- AWAIT_READY(slaveRegisteredMessage);
- SlaveID slaveId = slaveRegisteredMessage.get().slave_id();
-
- MockScheduler sched;
- MesosSchedulerDriver driver(
- &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL);
-
- Future<FrameworkID> frameworkId;
- EXPECT_CALL(sched, registered(&driver, _, _))
- .WillOnce(FutureArg<1>(&frameworkId));
-
- EXPECT_CALL(sched, resourceOffers(&driver, _))
- .WillRepeatedly(Return());
-
- driver.start();
-
- AWAIT_READY(frameworkId);
-
- // Drop the first shutdown message from the master (simulated
- // partition), allow the second shutdown message to pass when
- // the slave sends an update.
- Future<ShutdownMessage> shutdownMessage =
- DROP_PROTOBUF(ShutdownMessage(), _, slave.get());
-
- EXPECT_CALL(sched, offerRescinded(&driver, _))
- .WillRepeatedly(Return());
-
- Future<Nothing> slaveLost;
- EXPECT_CALL(sched, slaveLost(&driver, _))
- .WillOnce(FutureSatisfy(&slaveLost));
-
- Clock::pause();
-
- // Now, induce a partition of the slave by having the master
- // timeout the slave.
- uint32_t pings = 0;
- while (true) {
- AWAIT_READY(ping);
- pings++;
- if (pings == master::MAX_SLAVE_PING_TIMEOUTS) {
- break;
- }
- ping = FUTURE_MESSAGE(Eq("PING"), _, _);
- Clock::advance(master::SLAVE_PING_TIMEOUT);
- Clock::settle();
- }
-
- Clock::advance(master::SLAVE_PING_TIMEOUT);
- Clock::settle();
-
- // Wait for the master to attempt to shut down the slave.
- AWAIT_READY(shutdownMessage);
-
- // The master will notify the framework that the slave was lost.
- AWAIT_READY(slaveLost);
-
- shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get());
-
- // At this point, the slave still thinks it's registered, so we
- // simulate a status update coming from the slave.
- TaskID taskId;
- taskId.set_value("task_id");
- const StatusUpdate& update = createStatusUpdate(
- frameworkId.get(),
- slaveId,
- taskId,
- TASK_RUNNING,
- TaskStatus::SOURCE_SLAVE);
-
- StatusUpdateMessage message;
- message.mutable_update()->CopyFrom(update);
- message.set_pid(stringify(slave.get()));
-
- process::post(master.get(), message);
-
- // The master should shutdown the slave upon receiving the update.
- AWAIT_READY(shutdownMessage);
-
- Clock::resume();
-
- driver.stop();
- driver.join();
-
- Shutdown();
-}
-
-
// This test ensures that a framework connecting with a
// failed over master gets a registered callback.
// Note that this behavior might change in the future and
http://git-wip-us.apache.org/repos/asf/mesos/blob/23a51be1/src/tests/partition_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/partition_tests.cpp b/src/tests/partition_tests.cpp
index 1018e47..5e31cac 100644
--- a/src/tests/partition_tests.cpp
+++ b/src/tests/partition_tests.cpp
@@ -27,6 +27,8 @@
#include <stout/try.hpp>
+#include "common/protobuf_utils.hpp"
+
#include "master/master.hpp"
#include "slave/constants.hpp"
@@ -284,6 +286,121 @@ TEST_F(PartitionTest, PartitionedSlaveReregistration)
// The purpose of this test is to ensure that when slaves are removed
+// from the master, and then attempt to send status updates, we send
+// a ShutdownMessage to the slave. Why? Because during a network
+// partition, the master will remove a partitioned slave, thus sending
+// its tasks to LOST. At this point, when the partition is removed,
+// the slave may attempt to send updates if it was unaware that the
+// master removed it. We've already notified frameworks that these
+// tasks were LOST, so we have to have the slave shut down.
+TEST_F(PartitionTest, PartitionedSlaveStatusUpdates)
+{
+ Try<PID<Master> > master = StartMaster();
+ ASSERT_SOME(master);
+
+ // Allow the master to PING the slave, but drop all PONG messages
+ // from the slave. Note that we don't match on the master / slave
+ // PIDs because it's actually the SlaveObserver Process that sends
+ // the pings.
+ Future<Message> ping = FUTURE_MESSAGE(Eq("PING"), _, _);
+ DROP_MESSAGES(Eq("PONG"), _, _);
+
+ Future<SlaveRegisteredMessage> slaveRegisteredMessage =
+ FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _);
+
+ MockExecutor exec(DEFAULT_EXECUTOR_ID);
+
+ Try<PID<Slave> > slave = StartSlave(&exec);
+ ASSERT_SOME(slave);
+
+ AWAIT_READY(slaveRegisteredMessage);
+ SlaveID slaveId = slaveRegisteredMessage.get().slave_id();
+
+ MockScheduler sched;
+ MesosSchedulerDriver driver(
+ &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL);
+
+ Future<FrameworkID> frameworkId;
+ EXPECT_CALL(sched, registered(&driver, _, _))
+ .WillOnce(FutureArg<1>(&frameworkId));
+
+ EXPECT_CALL(sched, resourceOffers(&driver, _))
+ .WillRepeatedly(Return());
+
+ driver.start();
+
+ AWAIT_READY(frameworkId);
+
+ // Drop the first shutdown message from the master (simulated
+ // partition), allow the second shutdown message to pass when
+ // the slave sends an update.
+ Future<ShutdownMessage> shutdownMessage =
+ DROP_PROTOBUF(ShutdownMessage(), _, slave.get());
+
+ EXPECT_CALL(sched, offerRescinded(&driver, _))
+ .WillRepeatedly(Return());
+
+ Future<Nothing> slaveLost;
+ EXPECT_CALL(sched, slaveLost(&driver, _))
+ .WillOnce(FutureSatisfy(&slaveLost));
+
+ Clock::pause();
+
+ // Now, induce a partition of the slave by having the master
+ // timeout the slave.
+ uint32_t pings = 0;
+ while (true) {
+ AWAIT_READY(ping);
+ pings++;
+ if (pings == master::MAX_SLAVE_PING_TIMEOUTS) {
+ break;
+ }
+ ping = FUTURE_MESSAGE(Eq("PING"), _, _);
+ Clock::advance(master::SLAVE_PING_TIMEOUT);
+ Clock::settle();
+ }
+
+ Clock::advance(master::SLAVE_PING_TIMEOUT);
+ Clock::settle();
+
+ // Wait for the master to attempt to shut down the slave.
+ AWAIT_READY(shutdownMessage);
+
+ // The master will notify the framework that the slave was lost.
+ AWAIT_READY(slaveLost);
+
+ shutdownMessage = FUTURE_PROTOBUF(ShutdownMessage(), _, slave.get());
+
+ // At this point, the slave still thinks it's registered, so we
+ // simulate a status update coming from the slave.
+ TaskID taskId;
+ taskId.set_value("task_id");
+ const StatusUpdate& update = protobuf::createStatusUpdate(
+ frameworkId.get(),
+ slaveId,
+ taskId,
+ TASK_RUNNING,
+ TaskStatus::SOURCE_SLAVE);
+
+ StatusUpdateMessage message;
+ message.mutable_update()->CopyFrom(update);
+ message.set_pid(stringify(slave.get()));
+
+ process::post(master.get(), message);
+
+ // The master should shutdown the slave upon receiving the update.
+ AWAIT_READY(shutdownMessage);
+
+ Clock::resume();
+
+ driver.stop();
+ driver.join();
+
+ Shutdown();
+}
+
+
+// The purpose of this test is to ensure that when slaves are removed
// from the master, and then attempt to send exited executor messages,
// we send a ShutdownMessage to the slave. Why? Because during a
// network partition, the master will remove a partitioned slave, thus