You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2016/08/05 23:41:59 UTC

[1/5] mesos git commit: Added more expectations to TASK_LOST test cases.

Repository: mesos
Updated Branches:
  refs/heads/master cb3ef91a6 -> 5220f7758


Added more expectations to TASK_LOST test cases.

Check the reason and source of TASK_LOST status updates, replaced
ASSERT_ with EXPECT_ in various places where EXPECT_ is more
appropriate.

Review: https://reviews.apache.org/r/50235/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/29925658
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/29925658
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/29925658

Branch: refs/heads/master
Commit: 29925658291be60bda7af7f83225d743e8d24870
Parents: cb3ef91
Author: Neil Conway <ne...@gmail.com>
Authored: Fri Aug 5 16:41:10 2016 -0700
Committer: Vinod Kone <vi...@gmail.com>
Committed: Fri Aug 5 16:41:10 2016 -0700

----------------------------------------------------------------------
 src/tests/fault_tolerance_tests.cpp             |   1 +
 src/tests/master_authorization_tests.cpp        |   6 +
 src/tests/master_maintenance_tests.cpp          |   3 +
 src/tests/master_slave_reconciliation_tests.cpp |   6 +-
 src/tests/master_tests.cpp                      |   6 +
 src/tests/partition_tests.cpp                   |   2 +
 src/tests/slave_recovery_tests.cpp              | 144 +++++++++++--------
 7 files changed, 107 insertions(+), 61 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/29925658/src/tests/fault_tolerance_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/fault_tolerance_tests.cpp b/src/tests/fault_tolerance_tests.cpp
index 79f36a5..661cc8b 100644
--- a/src/tests/fault_tolerance_tests.cpp
+++ b/src/tests/fault_tolerance_tests.cpp
@@ -907,6 +907,7 @@ TEST_F(FaultToleranceTest, TaskLost)
 
   AWAIT_READY(status);
   EXPECT_EQ(TASK_LOST, status.get().state());
+  EXPECT_EQ(TaskStatus::REASON_MASTER_DISCONNECTED, status.get().reason());
 
   driver.stop();
   driver.join();

http://git-wip-us.apache.org/repos/asf/mesos/blob/29925658/src/tests/master_authorization_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_authorization_tests.cpp b/src/tests/master_authorization_tests.cpp
index e43b264..16f1ab4 100644
--- a/src/tests/master_authorization_tests.cpp
+++ b/src/tests/master_authorization_tests.cpp
@@ -373,7 +373,10 @@ TEST_F(MasterAuthorizationTest, SlaveRemoved)
 
   // Framework should get a TASK_LOST.
   AWAIT_READY(status);
+
   EXPECT_EQ(TASK_LOST, status.get().state());
+  EXPECT_EQ(TaskStatus::SOURCE_MASTER, status.get().source());
+  EXPECT_EQ(TaskStatus::REASON_SLAVE_REMOVED, status.get().reason());
 
   // No task launch should happen resulting in all resources being
   // returned to the allocator.
@@ -464,7 +467,10 @@ TEST_F(MasterAuthorizationTest, SlaveDisconnected)
 
   // Framework should get a TASK_LOST.
   AWAIT_READY(status);
+
   EXPECT_EQ(TASK_LOST, status.get().state());
+  EXPECT_EQ(TaskStatus::SOURCE_MASTER, status.get().source());
+  EXPECT_EQ(TaskStatus::REASON_SLAVE_REMOVED, status.get().reason());
 
   // No task launch should happen resulting in all resources being
   // returned to the allocator.

http://git-wip-us.apache.org/repos/asf/mesos/blob/29925658/src/tests/master_maintenance_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_maintenance_tests.cpp b/src/tests/master_maintenance_tests.cpp
index 2d4b45f..0820e63 100644
--- a/src/tests/master_maintenance_tests.cpp
+++ b/src/tests/master_maintenance_tests.cpp
@@ -698,7 +698,10 @@ TEST_F(MasterMaintenanceTest, EnterMaintenanceMode)
 
   // Verify that we received a TASK_LOST.
   AWAIT_READY(lostStatus);
+
   EXPECT_EQ(TASK_LOST, lostStatus.get().state());
+  EXPECT_EQ(TaskStatus::SOURCE_MASTER, lostStatus.get().source());
+  EXPECT_EQ(TaskStatus::REASON_SLAVE_REMOVED, lostStatus.get().reason());
 
   // Verify that the framework received the slave lost message.
   AWAIT_READY(slaveLost);

http://git-wip-us.apache.org/repos/asf/mesos/blob/29925658/src/tests/master_slave_reconciliation_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_slave_reconciliation_tests.cpp b/src/tests/master_slave_reconciliation_tests.cpp
index fdb9bb6..03405e3 100644
--- a/src/tests/master_slave_reconciliation_tests.cpp
+++ b/src/tests/master_slave_reconciliation_tests.cpp
@@ -233,8 +233,10 @@ TEST_F(MasterSlaveReconciliationTest, ReconcileLostTask)
 
   AWAIT_READY(status);
 
-  ASSERT_EQ(task.task_id(), status.get().task_id());
-  ASSERT_EQ(TASK_LOST, status.get().state());
+  EXPECT_EQ(task.task_id(), status.get().task_id());
+  EXPECT_EQ(TASK_LOST, status.get().state());
+  EXPECT_EQ(TaskStatus::SOURCE_SLAVE, status.get().source());
+  EXPECT_EQ(TaskStatus::REASON_RECONCILIATION, status.get().reason());
 
   // Before we obtain the metrics, ensure that the master has finished
   // processing the status update so metrics have been updated.

http://git-wip-us.apache.org/repos/asf/mesos/blob/29925658/src/tests/master_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_tests.cpp b/src/tests/master_tests.cpp
index b1d7545..398164d 100644
--- a/src/tests/master_tests.cpp
+++ b/src/tests/master_tests.cpp
@@ -461,7 +461,10 @@ TEST_F(MasterTest, KillUnknownTask)
   driver.killTask(unknownTaskId);
 
   AWAIT_READY(status);
+
   EXPECT_EQ(TASK_LOST, status.get().state());
+  EXPECT_EQ(TaskStatus::SOURCE_MASTER, status.get().source());
+  EXPECT_EQ(TaskStatus::REASON_RECONCILIATION, status.get().reason());
 
   EXPECT_CALL(exec, shutdown(_))
     .Times(AtMost(1));
@@ -1815,8 +1818,11 @@ TEST_F(MasterTest, RecoveredSlaveDoesNotReregister)
   AWAIT_READY(slaveLost);
 
   JSON::Object stats = Metrics();
+  EXPECT_EQ(1, stats.values["master/recovery_slave_removals"]);
   EXPECT_EQ(1, stats.values["master/slave_removals"]);
   EXPECT_EQ(1, stats.values["master/slave_removals/reason_unhealthy"]);
+  EXPECT_EQ(1, stats.values["master/slave_shutdowns_completed"]);
+  EXPECT_EQ(1, stats.values["master/slave_shutdowns_scheduled"]);
 
   Clock::resume();
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/29925658/src/tests/partition_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/partition_tests.cpp b/src/tests/partition_tests.cpp
index 91969e4..bafc7ce 100644
--- a/src/tests/partition_tests.cpp
+++ b/src/tests/partition_tests.cpp
@@ -278,6 +278,7 @@ TEST_P(PartitionTest, PartitionedSlaveReregistration)
   // The master will have notified the framework of the lost task.
   AWAIT_READY(lostStatus);
   EXPECT_EQ(TASK_LOST, lostStatus.get().state());
+  EXPECT_EQ(TaskStatus::REASON_SLAVE_REMOVED, lostStatus.get().reason());
 
   // Wait for the master to attempt to shut down the slave.
   AWAIT_READY(shutdownMessage);
@@ -555,6 +556,7 @@ TEST_P(PartitionTest, PartitionedSlaveExitedExecutor)
   // The master will have notified the framework of the lost task.
   AWAIT_READY(lostStatus);
   EXPECT_EQ(TASK_LOST, lostStatus.get().state());
+  EXPECT_EQ(TaskStatus::REASON_SLAVE_REMOVED, lostStatus.get().reason());
 
   // Wait for the master to attempt to shut down the slave.
   AWAIT_READY(shutdownMessage);

http://git-wip-us.apache.org/repos/asf/mesos/blob/29925658/src/tests/slave_recovery_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_recovery_tests.cpp b/src/tests/slave_recovery_tests.cpp
index 470fb26..83e89d6 100644
--- a/src/tests/slave_recovery_tests.cpp
+++ b/src/tests/slave_recovery_tests.cpp
@@ -414,7 +414,7 @@ TYPED_TEST(SlaveRecoveryTest, RecoverStatusUpdateManager)
   ASSERT_SOME(slave);
 
   AWAIT_READY(status);
-  ASSERT_EQ(TASK_RUNNING, status.get().state());
+  EXPECT_EQ(TASK_RUNNING, status.get().state());
 
   driver.stop();
   driver.join();
@@ -501,12 +501,12 @@ TYPED_TEST(SlaveRecoveryTest, ReconnectHTTPExecutor)
   // Ensure that the executor subscribes again.
   AWAIT_READY(subscribeCall);
 
-  ASSERT_EQ(1, subscribeCall.get().subscribe().unacknowledged_updates().size());
-  ASSERT_EQ(1, subscribeCall.get().subscribe().unacknowledged_tasks().size());
+  EXPECT_EQ(1, subscribeCall.get().subscribe().unacknowledged_updates().size());
+  EXPECT_EQ(1, subscribeCall.get().subscribe().unacknowledged_tasks().size());
 
   // Scheduler should receive the recovered update.
   AWAIT_READY(status);
-  ASSERT_EQ(TASK_RUNNING, status.get().state());
+  EXPECT_EQ(TASK_RUNNING, status.get().state());
 
   driver.stop();
   driver.join();
@@ -593,14 +593,14 @@ TYPED_TEST(SlaveRecoveryTest, ReconnectExecutor)
   reregister.ParseFromString(reregisterExecutorMessage.get().body);
 
   // Executor should inform about the unacknowledged update.
-  ASSERT_EQ(1, reregister.updates_size());
+  EXPECT_EQ(1, reregister.updates_size());
   const StatusUpdate& update = reregister.updates(0);
-  ASSERT_EQ(task.task_id(), update.status().task_id());
-  ASSERT_EQ(TASK_RUNNING, update.status().state());
+  EXPECT_EQ(task.task_id(), update.status().task_id());
+  EXPECT_EQ(TASK_RUNNING, update.status().state());
 
   // Scheduler should receive the recovered update.
   AWAIT_READY(status);
-  ASSERT_EQ(TASK_RUNNING, status.get().state());
+  EXPECT_EQ(TASK_RUNNING, status.get().state());
 
   driver.stop();
   driver.join();
@@ -704,7 +704,8 @@ TYPED_TEST(SlaveRecoveryTest, RecoverUnregisteredHTTPExecutor)
 
   // Scheduler should receive the TASK_LOST update.
   AWAIT_READY(status);
-  ASSERT_EQ(TASK_LOST, status->state());
+
+  EXPECT_EQ(TASK_LOST, status->state());
   EXPECT_EQ(TaskStatus::SOURCE_SLAVE, status->source());
   EXPECT_EQ(TaskStatus::REASON_EXECUTOR_REREGISTRATION_TIMEOUT,
             status->reason());
@@ -716,7 +717,7 @@ TYPED_TEST(SlaveRecoveryTest, RecoverUnregisteredHTTPExecutor)
   }
 
   AWAIT_READY(offers2);
-  ASSERT_EQ(Resources(offers1.get()[0].resources()),
+  EXPECT_EQ(Resources(offers1.get()[0].resources()),
             Resources(offers2.get()[0].resources()));
 
   driver.stop();
@@ -817,7 +818,8 @@ TYPED_TEST(SlaveRecoveryTest, RecoverUnregisteredExecutor)
 
   // Scheduler should receive the TASK_LOST update.
   AWAIT_READY(status);
-  ASSERT_EQ(TASK_LOST, status->state());
+
+  EXPECT_EQ(TASK_LOST, status->state());
   EXPECT_EQ(TaskStatus::SOURCE_SLAVE, status->source());
   EXPECT_EQ(TaskStatus::REASON_EXECUTOR_REREGISTRATION_TIMEOUT,
             status->reason());
@@ -829,7 +831,7 @@ TYPED_TEST(SlaveRecoveryTest, RecoverUnregisteredExecutor)
   }
 
   AWAIT_READY(offers2);
-  ASSERT_EQ(Resources(offers1.get()[0].resources()),
+  EXPECT_EQ(Resources(offers1.get()[0].resources()),
             Resources(offers2.get()[0].resources()));
 
   driver.stop();
@@ -1023,12 +1025,12 @@ TYPED_TEST(SlaveRecoveryTest, RecoverTerminatedHTTPExecutor)
   slave::state::FrameworkState frameworkState =
     state.get().slave.get().frameworks.get(frameworkId.get()).get();
 
-  ASSERT_EQ(1u, frameworkState.executors.size());
+  EXPECT_EQ(1u, frameworkState.executors.size());
 
   slave::state::ExecutorState executorState =
     frameworkState.executors.begin()->second;
 
-  ASSERT_EQ(1u, executorState.runs.size());
+  EXPECT_EQ(1u, executorState.runs.size());
 
   slave::state::RunState runState = executorState.runs.begin()->second;
 
@@ -1073,7 +1075,7 @@ TYPED_TEST(SlaveRecoveryTest, RecoverTerminatedHTTPExecutor)
 
   // Scheduler should receive the TASK_FAILED update.
   AWAIT_READY(status);
-  ASSERT_EQ(TASK_FAILED, status->state());
+  EXPECT_EQ(TASK_FAILED, status->state());
 
   while (offers2.isPending()) {
     Clock::advance(Seconds(1));
@@ -1082,7 +1084,7 @@ TYPED_TEST(SlaveRecoveryTest, RecoverTerminatedHTTPExecutor)
 
   // Master should subsequently reoffer the same resources.
   AWAIT_READY(offers2);
-  ASSERT_EQ(Resources(offers1.get()[0].resources()),
+  EXPECT_EQ(Resources(offers1.get()[0].resources()),
             Resources(offers2.get()[0].resources()));
 
   driver.stop();
@@ -1193,7 +1195,8 @@ TYPED_TEST(SlaveRecoveryTest, RecoverTerminatedExecutor)
 
   // Scheduler should receive the TASK_LOST update.
   AWAIT_READY(status);
-  ASSERT_EQ(TASK_LOST, status->state());
+
+  EXPECT_EQ(TASK_LOST, status->state());
   EXPECT_EQ(TaskStatus::SOURCE_SLAVE, status->source());
   EXPECT_EQ(TaskStatus::REASON_EXECUTOR_REREGISTRATION_TIMEOUT,
             status->reason());
@@ -1205,7 +1208,7 @@ TYPED_TEST(SlaveRecoveryTest, RecoverTerminatedExecutor)
 
   // Master should subsequently reoffer the same resources.
   AWAIT_READY(offers2);
-  ASSERT_EQ(Resources(offers1.get()[0].resources()),
+  EXPECT_EQ(Resources(offers1.get()[0].resources()),
             Resources(offers2.get()[0].resources()));
 
   driver.stop();
@@ -1305,7 +1308,7 @@ TYPED_TEST(SlaveRecoveryTest, DISABLED_RecoveryTimeout)
 
   // Scheduler should receive the TASK_FAILED update.
   AWAIT_READY(status);
-  ASSERT_EQ(TASK_FAILED, status.get().state());
+  EXPECT_EQ(TASK_FAILED, status.get().state());
 
   driver.stop();
   driver.join();
@@ -1395,7 +1398,7 @@ TYPED_TEST(SlaveRecoveryTest, RecoverCompletedExecutor)
 
   // Make sure all slave resources are reoffered.
   AWAIT_READY(offers2);
-  ASSERT_EQ(Resources(offers1.get()[0].resources()),
+  EXPECT_EQ(Resources(offers1.get()[0].resources()),
             Resources(offers2.get()[0].resources()));
 
   driver.stop();
@@ -1500,7 +1503,10 @@ TYPED_TEST(SlaveRecoveryTest, CleanupHTTPExecutor)
 
   // Scheduler should receive the TASK_LOST update.
   AWAIT_READY(status);
-  ASSERT_EQ(TASK_LOST, status.get().state());
+
+  EXPECT_EQ(TASK_LOST, status.get().state());
+  EXPECT_EQ(TaskStatus::SOURCE_MASTER, status.get().source());
+  EXPECT_EQ(TaskStatus::REASON_SLAVE_REMOVED, status.get().reason());
 
   driver.stop();
   driver.join();
@@ -1600,7 +1606,10 @@ TYPED_TEST(SlaveRecoveryTest, CleanupExecutor)
 
   // Scheduler should receive the TASK_LOST update.
   AWAIT_READY(status);
-  ASSERT_EQ(TASK_LOST, status.get().state());
+
+  EXPECT_EQ(TASK_LOST, status.get().state());
+  EXPECT_EQ(TaskStatus::SOURCE_MASTER, status.get().source());
+  EXPECT_EQ(TaskStatus::REASON_SLAVE_REMOVED, status.get().reason());
 
   driver.stop();
   driver.join();
@@ -1690,10 +1699,14 @@ TYPED_TEST(SlaveRecoveryTest, RemoveNonCheckpointingFramework)
 
   // Scheduler should receive the TASK_LOST updates.
   AWAIT_READY(status1);
-  ASSERT_EQ(TASK_LOST, status1.get().state());
+  EXPECT_EQ(TASK_LOST, status1.get().state());
+  EXPECT_EQ(TaskStatus::SOURCE_MASTER, status1.get().source());
+  EXPECT_EQ(TaskStatus::REASON_SLAVE_DISCONNECTED, status1.get().reason());
 
   AWAIT_READY(status2);
-  ASSERT_EQ(TASK_LOST, status2.get().state());
+  EXPECT_EQ(TASK_LOST, status2.get().state());
+  EXPECT_EQ(TaskStatus::SOURCE_MASTER, status2.get().source());
+  EXPECT_EQ(TaskStatus::REASON_SLAVE_DISCONNECTED, status2.get().reason());
 
   driver.stop();
   driver.join();
@@ -1894,7 +1907,7 @@ TYPED_TEST(SlaveRecoveryTest, KillTaskWithHTTPExecutor)
 
   // Wait for TASK_KILLED update.
   AWAIT_READY(status);
-  ASSERT_EQ(TASK_KILLED, status.get().state());
+  EXPECT_EQ(TASK_KILLED, status.get().state());
 
   Clock::pause();
 
@@ -1907,7 +1920,7 @@ TYPED_TEST(SlaveRecoveryTest, KillTaskWithHTTPExecutor)
 
   // Make sure all slave resources are reoffered.
   AWAIT_READY(offers2);
-  ASSERT_EQ(Resources(offers1.get()[0].resources()),
+  EXPECT_EQ(Resources(offers1.get()[0].resources()),
             Resources(offers2.get()[0].resources()));
 
   Clock::resume();
@@ -2016,7 +2029,7 @@ TYPED_TEST(SlaveRecoveryTest, KillTask)
 
   // Wait for TASK_KILLED update.
   AWAIT_READY(status);
-  ASSERT_EQ(TASK_KILLED, status.get().state());
+  EXPECT_EQ(TASK_KILLED, status.get().state());
 
   Clock::pause();
 
@@ -2029,7 +2042,7 @@ TYPED_TEST(SlaveRecoveryTest, KillTask)
 
   // Make sure all slave resources are reoffered.
   AWAIT_READY(offers2);
-  ASSERT_EQ(Resources(offers1.get()[0].resources()),
+  EXPECT_EQ(Resources(offers1.get()[0].resources()),
             Resources(offers2.get()[0].resources()));
 
   Clock::resume();
@@ -2111,7 +2124,7 @@ TYPED_TEST(SlaveRecoveryTest, Reboot)
   Future<hashset<ContainerID> > containers = containerizer->containers();
 
   AWAIT_READY(containers);
-  ASSERT_EQ(1u, containers.get().size());
+  EXPECT_EQ(1u, containers.get().size());
 
   ContainerID containerId = *containers.get().begin();
 
@@ -2164,7 +2177,7 @@ TYPED_TEST(SlaveRecoveryTest, Reboot)
 
   // Make sure all slave resources are reoffered.
   AWAIT_READY(offers2);
-  ASSERT_EQ(Resources(offers1.get()[0].resources()),
+  EXPECT_EQ(Resources(offers1.get()[0].resources()),
             Resources(offers2.get()[0].resources()));
 
   driver.stop();
@@ -2303,7 +2316,7 @@ TYPED_TEST(SlaveRecoveryTest, GCExecutor)
 
   // Make sure all slave resources are reoffered.
   AWAIT_READY(offers2);
-  ASSERT_EQ(Resources(offers1.get()[0].resources()),
+  EXPECT_EQ(Resources(offers1.get()[0].resources()),
             Resources(offers2.get()[0].resources()));
 
   Clock::resume();
@@ -2422,11 +2435,11 @@ TYPED_TEST(SlaveRecoveryTest, ShutdownSlave)
 
   EXPECT_NE(0u, offers3.get().size());
   // Make sure all slave resources are reoffered.
-  ASSERT_EQ(Resources(offers1.get()[0].resources()),
+  EXPECT_EQ(Resources(offers1.get()[0].resources()),
             Resources(offers3.get()[0].resources()));
 
   // Ensure the slave id is different.
-  ASSERT_NE(
+  EXPECT_NE(
       offers1.get()[0].slave_id().value(), offers3.get()[0].slave_id().value());
 
   driver.stop();
@@ -2485,7 +2498,7 @@ TYPED_TEST(SlaveRecoveryTest, ShutdownSlaveSIGUSR1)
   driver.launchTasks(offers.get()[0].id(), {task});
 
   AWAIT_READY(status);
-  ASSERT_EQ(TASK_RUNNING, status.get().state());
+  EXPECT_EQ(TASK_RUNNING, status.get().state());
 
   Future<TaskStatus> status2;
   EXPECT_CALL(sched, statusUpdate(_, _))
@@ -2516,7 +2529,11 @@ TYPED_TEST(SlaveRecoveryTest, ShutdownSlaveSIGUSR1)
 
   // The master should send a TASK_LOST and slaveLost.
   AWAIT_READY(status2);
-  ASSERT_EQ(TASK_LOST, status2.get().state());
+
+  EXPECT_EQ(TASK_LOST, status2.get().state());
+  EXPECT_EQ(TaskStatus::SOURCE_MASTER, status2.get().source());
+  EXPECT_EQ(TaskStatus::REASON_SLAVE_REMOVED, status2.get().reason());
+
   AWAIT_READY(slaveLost);
 
   // Make sure the slave terminates.
@@ -2620,7 +2637,10 @@ TYPED_TEST(SlaveRecoveryTest, RegisterDisconnectedSlave)
 
   // Scheduler should get a TASK_LOST message.
   AWAIT_READY(status2);
-  ASSERT_EQ(TASK_LOST, status2.get().state());
+
+  EXPECT_EQ(TASK_LOST, status2.get().state());
+  EXPECT_EQ(TaskStatus::SOURCE_MASTER, status2.get().source());
+  EXPECT_EQ(TaskStatus::REASON_SLAVE_REMOVED, status2.get().reason());
 
   driver.stop();
   driver.join();
@@ -2728,11 +2748,11 @@ TYPED_TEST(SlaveRecoveryTest, ReconcileKillTask)
 
   // Scheduler should get a TASK_KILLED message.
   AWAIT_READY(status);
-  ASSERT_EQ(TASK_KILLED, status.get().state());
+  EXPECT_EQ(TASK_KILLED, status.get().state());
 
   // Make sure all slave resources are reoffered.
   AWAIT_READY(offers2);
-  ASSERT_EQ(Resources(offers1.get()[0].resources()),
+  EXPECT_EQ(Resources(offers1.get()[0].resources()),
             Resources(offers2.get()[0].resources()));
 
   driver.stop();
@@ -2933,12 +2953,12 @@ TYPED_TEST(SlaveRecoveryTest, ReconcileTasksMissingFromSlave)
   slave::state::FrameworkState frameworkState =
     state.get().slave.get().frameworks.get(frameworkId.get()).get();
 
-  ASSERT_EQ(1u, frameworkState.executors.size());
+  EXPECT_EQ(1u, frameworkState.executors.size());
 
   slave::state::ExecutorState executorState =
     frameworkState.executors.begin()->second;
 
-  ASSERT_EQ(1u, executorState.runs.size());
+  EXPECT_EQ(1u, executorState.runs.size());
 
   slave::state::RunState runState = executorState.runs.begin()->second;
 
@@ -2983,7 +3003,10 @@ TYPED_TEST(SlaveRecoveryTest, ReconcileTasksMissingFromSlave)
 
   // Wait for TASK_LOST update.
   AWAIT_READY(status);
-  ASSERT_EQ(TASK_LOST, status.get().state());
+
+  EXPECT_EQ(TASK_LOST, status.get().state());
+  EXPECT_EQ(TaskStatus::SOURCE_SLAVE, status.get().source());
+  EXPECT_EQ(TaskStatus::REASON_RECONCILIATION, status.get().reason());
 
   Clock::pause();
 
@@ -2996,7 +3019,7 @@ TYPED_TEST(SlaveRecoveryTest, ReconcileTasksMissingFromSlave)
 
   // Make sure all slave resources are reoffered.
   AWAIT_READY(offers2);
-  ASSERT_EQ(Resources(offers1.get()[0].resources()),
+  EXPECT_EQ(Resources(offers1.get()[0].resources()),
             Resources(offers2.get()[0].resources()));
 
   Clock::resume();
@@ -3137,7 +3160,7 @@ TYPED_TEST(SlaveRecoveryTest, SchedulerFailover)
 
   // Wait for TASK_KILLED update.
   AWAIT_READY(status);
-  ASSERT_EQ(TASK_KILLED, status.get().state());
+  EXPECT_EQ(TASK_KILLED, status.get().state());
 
   Clock::pause();
 
@@ -3150,7 +3173,7 @@ TYPED_TEST(SlaveRecoveryTest, SchedulerFailover)
 
   // Make sure all slave resources are reoffered.
   AWAIT_READY(offers2);
-  ASSERT_EQ(Resources(offers1.get()[0].resources()),
+  EXPECT_EQ(Resources(offers1.get()[0].resources()),
             Resources(offers2.get()[0].resources()));
 
   Clock::resume();
@@ -3215,7 +3238,7 @@ TYPED_TEST(SlaveRecoveryTest, PartitionedSlave)
   driver.start();
 
   AWAIT_READY(offers);
-  ASSERT_NE(0u, offers.get().size());
+  EXPECT_NE(0u, offers.get().size());
 
   // Long running task.
   TaskInfo task = createTask(offers.get()[0], "sleep 1000");
@@ -3268,7 +3291,10 @@ TYPED_TEST(SlaveRecoveryTest, PartitionedSlave)
 
   // The master will have notified the framework of the lost task.
   AWAIT_READY(status);
+
   EXPECT_EQ(TASK_LOST, status.get().state());
+  EXPECT_EQ(TaskStatus::SOURCE_MASTER, status.get().source());
+  EXPECT_EQ(TaskStatus::REASON_SLAVE_REMOVED, status.get().reason());
 
   // Wait for the master to attempt to shut down the slave.
   AWAIT_READY(shutdownMessage);
@@ -3430,11 +3456,11 @@ TYPED_TEST(SlaveRecoveryTest, MasterFailover)
 
   // Wait for TASK_KILLED update.
   AWAIT_READY(status);
-  ASSERT_EQ(TASK_KILLED, status.get().state());
+  EXPECT_EQ(TASK_KILLED, status.get().state());
 
   // Make sure all slave resources are reoffered.
   AWAIT_READY(offers2);
-  ASSERT_EQ(Resources(offers1.get()[0].resources()),
+  EXPECT_EQ(Resources(offers1.get()[0].resources()),
             Resources(offers2.get()[0].resources()));
 
   AWAIT_READY(executorTerminated);
@@ -3597,14 +3623,14 @@ TYPED_TEST(SlaveRecoveryTest, MultipleFrameworks)
 
   // Wait for TASK_KILLED update.
   AWAIT_READY(status1);
-  ASSERT_EQ(TASK_KILLED, status1.get().state());
+  EXPECT_EQ(TASK_KILLED, status1.get().state());
 
   // Kill task 2.
   driver2.killTask(task2.task_id());
 
   // Wait for TASK_KILLED update.
   AWAIT_READY(status2);
-  ASSERT_EQ(TASK_KILLED, status2.get().state());
+  EXPECT_EQ(TASK_KILLED, status2.get().state());
 
   AWAIT_READY(executorTerminated1);
   AWAIT_READY(executorTerminated2);
@@ -3664,7 +3690,7 @@ TYPED_TEST(SlaveRecoveryTest, MultipleSlaves)
   ASSERT_SOME(slave1);
 
   AWAIT_READY(offers1);
-  ASSERT_EQ(1u, offers1.get().size());
+  EXPECT_EQ(1u, offers1.get().size());
 
   // Launch a long running task in the first slave.
   TaskInfo task1 = createTask(offers1.get()[0], "sleep 1000");
@@ -3705,7 +3731,7 @@ TYPED_TEST(SlaveRecoveryTest, MultipleSlaves)
   ASSERT_SOME(slave2);
 
   AWAIT_READY(offers2);
-  ASSERT_EQ(1u, offers2.get().size());
+  EXPECT_EQ(1u, offers2.get().size());
 
   // Launch a long running task in each slave.
   TaskInfo task2 = createTask(offers2.get()[0], "sleep 1000");
@@ -3783,10 +3809,10 @@ TYPED_TEST(SlaveRecoveryTest, MultipleSlaves)
   driver.killTask(task2.task_id());
 
   AWAIT_READY(status1);
-  ASSERT_EQ(TASK_KILLED, status1.get().state());
+  EXPECT_EQ(TASK_KILLED, status1.get().state());
 
   AWAIT_READY(status2);
-  ASSERT_EQ(TASK_KILLED, status2.get().state());
+  EXPECT_EQ(TASK_KILLED, status2.get().state());
 
   AWAIT_READY(executorTerminated1);
   AWAIT_READY(executorTerminated2);
@@ -3882,7 +3908,7 @@ TYPED_TEST(SlaveRecoveryTest, RestartBeforeContainerizerLaunch)
 
   // Scheduler should receive the TASK_FAILED update.
   AWAIT_READY(status);
-  ASSERT_EQ(TASK_FAILED, status->state());
+  EXPECT_EQ(TASK_FAILED, status->state());
   EXPECT_EQ(TaskStatus::SOURCE_SLAVE, status->source());
   EXPECT_EQ(TaskStatus::REASON_EXECUTOR_TERMINATED,
             status->reason());
@@ -3970,7 +3996,7 @@ TEST_F(MesosContainerizerSlaveRecoveryTest, ResourceStatistics)
 
   Future<hashset<ContainerID> > containers = containerizer->containers();
   AWAIT_READY(containers);
-  ASSERT_EQ(1u, containers.get().size());
+  EXPECT_EQ(1u, containers.get().size());
 
   ContainerID containerId = *(containers.get().begin());
 
@@ -4061,7 +4087,7 @@ TEST_F(MesosContainerizerSlaveRecoveryTest, CGROUPS_ROOT_PerfRollForward)
 
   Future<hashset<ContainerID>> containers = containerizer->containers();
   AWAIT_READY(containers);
-  ASSERT_EQ(1u, containers.get().size());
+  EXPECT_EQ(1u, containers.get().size());
 
   ContainerID containerId1 = *(containers.get().begin());
 
@@ -4121,7 +4147,7 @@ TEST_F(MesosContainerizerSlaveRecoveryTest, CGROUPS_ROOT_PerfRollForward)
 
   containers = containerizer->containers();
   AWAIT_READY(containers);
-  ASSERT_EQ(2u, containers.get().size());
+  EXPECT_EQ(2u, containers.get().size());
   EXPECT_TRUE(containers.get().contains(containerId1));
 
   ContainerID containerId2;
@@ -4208,7 +4234,7 @@ TEST_F(MesosContainerizerSlaveRecoveryTest, CGROUPS_ROOT_PidNamespaceForward)
 
   Future<hashset<ContainerID> > containers = containerizer->containers();
   AWAIT_READY(containers);
-  ASSERT_EQ(1u, containers.get().size());
+  EXPECT_EQ(1u, containers.get().size());
 
   ContainerID containerId = *(containers.get().begin());
 
@@ -4313,7 +4339,7 @@ TEST_F(MesosContainerizerSlaveRecoveryTest, CGROUPS_ROOT_PidNamespaceBackward)
 
   Future<hashset<ContainerID> > containers = containerizer->containers();
   AWAIT_READY(containers);
-  ASSERT_EQ(1u, containers.get().size());
+  EXPECT_EQ(1u, containers.get().size());
 
   ContainerID containerId = *(containers.get().begin());
 


[2/5] mesos git commit: Added more assertions to master code.

Posted by vi...@apache.org.
Added more assertions to master code.

Review: https://reviews.apache.org/r/50416/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/60dbd347
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/60dbd347
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/60dbd347

Branch: refs/heads/master
Commit: 60dbd347b409c788776760a8270965d943b6806e
Parents: 2992565
Author: Neil Conway <ne...@gmail.com>
Authored: Fri Aug 5 16:41:18 2016 -0700
Committer: Vinod Kone <vi...@gmail.com>
Committed: Fri Aug 5 16:41:18 2016 -0700

----------------------------------------------------------------------
 src/master/master.cpp | 4 ++++
 1 file changed, 4 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/60dbd347/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 9259509..0bd1a34 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -4572,6 +4572,7 @@ void Master::_registerSlave(
     const string& version,
     const Future<bool>& admit)
 {
+  CHECK(slaves.registering.contains(pid));
   slaves.registering.erase(pid);
 
   CHECK(!admit.isDiscarded());
@@ -4822,6 +4823,7 @@ void Master::_reregisterSlave(
     const string& version,
     const Future<bool>& readmit)
 {
+  CHECK(slaves.reregistering.contains(slaveInfo.id()));
   slaves.reregistering.erase(slaveInfo.id());
 
   CHECK(!readmit.isDiscarded());
@@ -5933,6 +5935,7 @@ void Master::_authenticate(
     authenticated.put(pid, future.get().get());
   }
 
+  CHECK(authenticating.contains(pid));
   authenticating.erase(pid);
 }
 
@@ -6720,6 +6723,7 @@ void Master::_removeSlave(
     const string& message,
     Option<Counter> reason)
 {
+  CHECK(slaves.removing.contains(slaveInfo.id()));
   slaves.removing.erase(slaveInfo.id());
 
   CHECK(!removed.isDiscarded());


[3/5] mesos git commit: Improved consistency of test code for partitioning an agent.

Posted by vi...@apache.org.
Improved consistency of test code for partitioning an agent.

Removed unnecessary `Clock::settle` calls: `Clock::settle` should
typically only be used when a test case does not have an easy way to
wait for a _specific_ event to occur. In this case, `Clock::settle` was
unnecessary because the test code immediately proceeded to `AWAIT_READY`
for a more specific event.

Also fixed up some whitespace.

Review: https://reviews.apache.org/r/50417/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/5de96fa4
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/5de96fa4
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/5de96fa4

Branch: refs/heads/master
Commit: 5de96fa4b3e603553dbae3f06aff6621b268a7be
Parents: 60dbd34
Author: Neil Conway <ne...@gmail.com>
Authored: Fri Aug 5 16:41:28 2016 -0700
Committer: Vinod Kone <vi...@gmail.com>
Committed: Fri Aug 5 16:41:28 2016 -0700

----------------------------------------------------------------------
 src/tests/partition_tests.cpp      | 14 ++++----------
 src/tests/slave_recovery_tests.cpp |  4 +---
 src/tests/slave_tests.cpp          |  6 ++++--
 3 files changed, 9 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/5de96fa4/src/tests/partition_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/partition_tests.cpp b/src/tests/partition_tests.cpp
index bafc7ce..0a72b34 100644
--- a/src/tests/partition_tests.cpp
+++ b/src/tests/partition_tests.cpp
@@ -134,7 +134,7 @@ TEST_P(PartitionTest, PartitionedSlave)
     AWAIT_READY(ping);
     pings++;
     if (pings == masterFlags.max_agent_ping_timeouts) {
-     break;
+      break;
     }
     ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _);
     Clock::advance(masterFlags.agent_ping_timeout);
@@ -265,15 +265,13 @@ TEST_P(PartitionTest, PartitionedSlaveReregistration)
     AWAIT_READY(ping);
     pings++;
     if (pings == masterFlags.max_agent_ping_timeouts) {
-     break;
+      break;
     }
     ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _);
     Clock::advance(masterFlags.agent_ping_timeout);
-    Clock::settle();
   }
 
   Clock::advance(masterFlags.agent_ping_timeout);
-  Clock::settle();
 
   // The master will have notified the framework of the lost task.
   AWAIT_READY(lostStatus);
@@ -392,15 +390,13 @@ TEST_P(PartitionTest, PartitionedSlaveStatusUpdates)
     AWAIT_READY(ping);
     pings++;
     if (pings == masterFlags.max_agent_ping_timeouts) {
-     break;
+      break;
     }
     ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _);
     Clock::advance(masterFlags.agent_ping_timeout);
-    Clock::settle();
   }
 
   Clock::advance(masterFlags.agent_ping_timeout);
-  Clock::settle();
 
   // Wait for the master to attempt to shut down the slave.
   AWAIT_READY(shutdownMessage);
@@ -543,15 +539,13 @@ TEST_P(PartitionTest, PartitionedSlaveExitedExecutor)
     AWAIT_READY(ping);
     pings++;
     if (pings == masterFlags.max_agent_ping_timeouts) {
-     break;
+      break;
     }
     ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _);
     Clock::advance(masterFlags.agent_ping_timeout);
-    Clock::settle();
   }
 
   Clock::advance(masterFlags.agent_ping_timeout);
-  Clock::settle();
 
   // The master will have notified the framework of the lost task.
   AWAIT_READY(lostStatus);

http://git-wip-us.apache.org/repos/asf/mesos/blob/5de96fa4/src/tests/slave_recovery_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_recovery_tests.cpp b/src/tests/slave_recovery_tests.cpp
index 83e89d6..998d445 100644
--- a/src/tests/slave_recovery_tests.cpp
+++ b/src/tests/slave_recovery_tests.cpp
@@ -3276,15 +3276,13 @@ TYPED_TEST(SlaveRecoveryTest, PartitionedSlave)
     AWAIT_READY(ping);
     pings++;
     if (pings == masterFlags.max_agent_ping_timeouts) {
-     break;
+      break;
     }
     ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _);
     Clock::advance(masterFlags.agent_ping_timeout);
-    Clock::settle();
   }
 
   Clock::advance(masterFlags.agent_ping_timeout);
-  Clock::settle();
 
   // The master will notify the framework that the slave was lost.
   AWAIT_READY(slaveLost);

http://git-wip-us.apache.org/repos/asf/mesos/blob/5de96fa4/src/tests/slave_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_tests.cpp b/src/tests/slave_tests.cpp
index b9fa85d..890119f 100644
--- a/src/tests/slave_tests.cpp
+++ b/src/tests/slave_tests.cpp
@@ -2425,13 +2425,14 @@ TEST_F(SlaveTest, RateLimitSlaveShutdown)
     AWAIT_READY(ping);
     pings++;
     if (pings == masterFlags.max_agent_ping_timeouts) {
-      Clock::advance(masterFlags.agent_ping_timeout);
       break;
     }
     ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _);
     Clock::advance(masterFlags.agent_ping_timeout);
   }
 
+  Clock::advance(masterFlags.agent_ping_timeout);
+
   // The master should attempt to acquire a permit.
   AWAIT_READY(acquire);
 
@@ -2495,13 +2496,14 @@ TEST_F(SlaveTest, CancelSlaveShutdown)
     AWAIT_READY(ping);
     pings++;
     if (pings == masterFlags.max_agent_ping_timeouts) {
-      Clock::advance(masterFlags.agent_ping_timeout);
       break;
     }
     ping = FUTURE_MESSAGE(Eq(PingSlaveMessage().GetTypeName()), _, _);
     Clock::advance(masterFlags.agent_ping_timeout);
   }
 
+  Clock::advance(masterFlags.agent_ping_timeout);
+
   // The master should attempt to acquire a permit.
   AWAIT_READY(acquire);
 


[5/5] mesos git commit: Future-proofed some slave removal tests.

Posted by vi...@apache.org.
Future-proofed some slave removal tests.

These tests relied on the implementation detail that when an agent is
removed from the list of registered agents, the master sends a
ShutdownSlaveMessage to the agent. That will change in the future
(MESOS-4049). To prepare for this future planned behavior, adjust these
tests to be more robust by instead checking for the invocation of the
`slaveLost` scheduler callback.

Review: https://reviews.apache.org/r/50422/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/5220f775
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/5220f775
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/5220f775

Branch: refs/heads/master
Commit: 5220f77582a14d4cdd0a907ba8af6e9db87d8ab7
Parents: 8a0b17a
Author: Neil Conway <ne...@gmail.com>
Authored: Fri Aug 5 16:41:41 2016 -0700
Committer: Vinod Kone <vi...@gmail.com>
Committed: Fri Aug 5 16:41:41 2016 -0700

----------------------------------------------------------------------
 src/tests/slave_tests.cpp | 91 +++++++++++++++++++++++++++++-------------
 1 file changed, 63 insertions(+), 28 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/5220f775/src/tests/slave_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_tests.cpp b/src/tests/slave_tests.cpp
index 890119f..787ec5b 100644
--- a/src/tests/slave_tests.cpp
+++ b/src/tests/slave_tests.cpp
@@ -2377,10 +2377,10 @@ TEST_F(SlaveTest, PingTimeoutSomePings)
 }
 
 
-// This test ensures that when slave removal rate limit is specified
-// a slave that fails health checks is removed after a permit is
-// provided by the rate limiter.
-TEST_F(SlaveTest, RateLimitSlaveShutdown)
+// This test ensures that when a slave removal rate limit is
+// specified, the master only removes a slave that fails health checks
+// when it is permitted to do so by the rate limiter.
+TEST_F(SlaveTest, RateLimitSlaveRemoval)
 {
   // Start a master.
   auto slaveRemovalLimiter = std::make_shared<MockRateLimiter>();
@@ -2398,16 +2398,29 @@ TEST_F(SlaveTest, RateLimitSlaveShutdown)
   // Drop all the PONGs to simulate health check timeout.
   DROP_PROTOBUFS(PongSlaveMessage(), _, _);
 
-  Future<SlaveRegisteredMessage> slaveRegisteredMessage =
-    FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _);
-
   Owned<MasterDetector> detector = master.get()->createDetector();
 
   // Start a slave.
   Try<Owned<cluster::Slave>> slave = StartSlave(detector.get());
   ASSERT_SOME(slave);
 
-  AWAIT_READY(slaveRegisteredMessage);
+  // Start a scheduler.
+  MockScheduler sched;
+  MesosSchedulerDriver driver(
+      &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);
+
+  EXPECT_CALL(sched, registered(&driver, _, _));
+
+  Future<Nothing> resourceOffers;
+  EXPECT_CALL(sched, resourceOffers(&driver, _))
+    .WillOnce(FutureSatisfy(&resourceOffers))
+    .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+  driver.start();
+
+  // Need to make sure the framework AND slave have registered with
+  // master. Waiting for resource offers should accomplish both.
+  AWAIT_READY(resourceOffers);
 
   // Return a pending future from the rate limiter.
   Future<Nothing> acquire;
@@ -2416,7 +2429,12 @@ TEST_F(SlaveTest, RateLimitSlaveShutdown)
     .WillOnce(DoAll(FutureSatisfy(&acquire),
                     Return(promise.future())));
 
-  Future<ShutdownMessage> shutdown = FUTURE_PROTOBUF(ShutdownMessage(), _, _);
+  EXPECT_CALL(sched, offerRescinded(&driver, _))
+    .WillOnce(Return()); // Expect a single offer to be rescinded.
+
+  Future<Nothing> slaveLost;
+  EXPECT_CALL(sched, slaveLost(&driver, _))
+    .WillOnce(FutureSatisfy(&slaveLost));
 
   // Induce a health check failure of the slave.
   Clock::pause();
@@ -2436,21 +2454,25 @@ TEST_F(SlaveTest, RateLimitSlaveShutdown)
   // The master should attempt to acquire a permit.
   AWAIT_READY(acquire);
 
-  // The shutdown should not occur before the permit is satisfied.
+  // The slave should not be removed before the permit is satisfied;
+  // that means the scheduler shouldn't receive `slaveLost` yet.
   Clock::settle();
-  ASSERT_TRUE(shutdown.isPending());
+  ASSERT_TRUE(slaveLost.isPending());
 
-  // Once the permit is satisfied, the shutdown message
-  // should be sent.
+  // Once the permit is satisfied, the `slaveLost` scheduler callback
+  // should be invoked.
   promise.set(Nothing());
-  AWAIT_READY(shutdown);
+  AWAIT_READY(slaveLost);
+
+  driver.stop();
+  driver.join();
 }
 
 
 // This test verifies that when a slave responds to pings after the
-// slave observer has scheduled it for shutdown (due to health check
-// failure), the shutdown is cancelled.
-TEST_F(SlaveTest, CancelSlaveShutdown)
+// slave observer has scheduled it for removal (due to health check
+// failure), the slave removal is cancelled.
+TEST_F(SlaveTest, CancelSlaveRemoval)
 {
   // Start a master.
   auto slaveRemovalLimiter = std::make_shared<MockRateLimiter>();
@@ -2468,19 +2490,32 @@ TEST_F(SlaveTest, CancelSlaveShutdown)
   // Drop all the PONGs to simulate health check timeout.
   DROP_PROTOBUFS(PongSlaveMessage(), _, _);
 
-  // No shutdown should occur during the test!
-  EXPECT_NO_FUTURE_PROTOBUFS(ShutdownMessage(), _, _);
-
-  Future<SlaveRegisteredMessage> slaveRegisteredMessage =
-    FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _);
-
   Owned<MasterDetector> detector = master.get()->createDetector();
 
   // Start a slave.
   Try<Owned<cluster::Slave>> slave = StartSlave(detector.get());
   ASSERT_SOME(slave);
 
-  AWAIT_READY(slaveRegisteredMessage);
+  // Start a scheduler.
+  MockScheduler sched;
+  MesosSchedulerDriver driver(
+      &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);
+
+  EXPECT_CALL(sched, registered(&driver, _, _));
+
+  Future<Nothing> resourceOffers;
+  EXPECT_CALL(sched, resourceOffers(&driver, _))
+    .WillOnce(FutureSatisfy(&resourceOffers))
+    .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+  EXPECT_CALL(sched, slaveLost(&driver, _))
+    .Times(0); // The `slaveLost` callback should not be invoked.
+
+  driver.start();
+
+  // Need to make sure the framework AND slave have registered with
+  // master. Waiting for resource offers should accomplish both.
+  AWAIT_READY(resourceOffers);
 
   // Return a pending future from the rate limiter.
   Future<Nothing> acquire;
@@ -2507,7 +2542,7 @@ TEST_F(SlaveTest, CancelSlaveShutdown)
   // The master should attempt to acquire a permit.
   AWAIT_READY(acquire);
 
-  // Settle to make sure the shutdown does not occur.
+  // Settle to make sure the slave removal does not occur.
   Clock::settle();
 
   // Reset the filters to allow pongs from the slave.
@@ -2518,10 +2553,10 @@ TEST_F(SlaveTest, CancelSlaveShutdown)
   Clock::settle();
 
   // The master should have tried to cancel the removal.
-  ASSERT_TRUE(promise.future().hasDiscard());
+  EXPECT_TRUE(promise.future().hasDiscard());
 
-  // Allow the cancelation and settle the clock to ensure a shutdown
-  // does not occur.
+  // Allow the cancellation and settle the clock to ensure the
+  // `slaveLost` scheduler callback is not invoked.
   promise.discard();
   Clock::settle();
 }


[4/5] mesos git commit: Cleaned up comments in fault tolerance tests.

Posted by vi...@apache.org.
Cleaned up comments in fault tolerance tests.

Review: https://reviews.apache.org/r/50418/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/8a0b17a1
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/8a0b17a1
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/8a0b17a1

Branch: refs/heads/master
Commit: 8a0b17a11560f482628e890094e83400fa805a80
Parents: 5de96fa
Author: Neil Conway <ne...@gmail.com>
Authored: Fri Aug 5 16:41:35 2016 -0700
Committer: Vinod Kone <vi...@gmail.com>
Committed: Fri Aug 5 16:41:35 2016 -0700

----------------------------------------------------------------------
 src/tests/fault_tolerance_tests.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/8a0b17a1/src/tests/fault_tolerance_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/fault_tolerance_tests.cpp b/src/tests/fault_tolerance_tests.cpp
index 661cc8b..5a9944c 100644
--- a/src/tests/fault_tolerance_tests.cpp
+++ b/src/tests/fault_tolerance_tests.cpp
@@ -158,7 +158,7 @@ TEST_F(FaultToleranceTest, MasterFailover)
 // This test ensures that a failed over master recovers completed tasks
 // from a slave's re-registration when the slave thinks the framework has
 // completed (but the framework has not actually completed yet from master's
-// point of view.
+// point of view).
 TEST_F(FaultToleranceTest, ReregisterCompletedFrameworks)
 {
   // Step 1. Start Master and Slave.
@@ -340,6 +340,7 @@ TEST_F(FaultToleranceTest, ReregisterCompletedFrameworks)
   Future<Nothing> executorLost;
   EXPECT_CALL(sched, executorLost(&driver, DEFAULT_EXECUTOR_ID, _, _))
     .WillOnce(FutureSatisfy(&executorLost));
+
   // Induce an ExitedExecutorMessage from the slave.
   containerizer.destroy(
       frameworkId.get(), DEFAULT_EXECUTOR_INFO.executor_id());
@@ -1082,8 +1083,8 @@ TEST_F(FaultToleranceTest, ReregisterFrameworkExitedExecutor)
   //   2. Framework re-registration.
   //
   // To achieve this, we need to:
-  //   1. Restart the master (the slave / framework will not detect
-  //      the new master automatically using the BasicMasterDetector).
+  //   1. Restart the master (the slave / framework will not detect the
+  //      new master automatically using the StandaloneMasterDetector).
   //   2. Notify the slave of the new master.
   //   3. Kill the executor.
   //   4. Drop the status update, but allow the ExitedExecutorMessage.