You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by qi...@apache.org on 2018/01/24 01:31:44 UTC
mesos git commit: Updated
`SlaveRecoveryTest.RecoverCompletedExecutor` to verify gc.
Repository: mesos
Updated Branches:
refs/heads/master 1382e595f -> 12faca980
Updated `SlaveRecoveryTest.RecoverCompletedExecutor` to verify gc.
In the test `SlaveRecoveryTest.RecoverCompletedExecutor`, when the
completed executor is recovered, verify its work and meta directories
gc'ed successfully.
Review: https://reviews.apache.org/r/65263
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/12faca98
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/12faca98
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/12faca98
Branch: refs/heads/master
Commit: 12faca980084c565efdd3b0cfbb3b272d530ba5a
Parents: 1382e59
Author: Qian Zhang <zh...@gmail.com>
Authored: Mon Jan 22 16:14:27 2018 +0800
Committer: Qian Zhang <zh...@gmail.com>
Committed: Wed Jan 24 09:31:16 2018 +0800
----------------------------------------------------------------------
src/tests/slave_recovery_tests.cpp | 54 ++++++++++++++++++++++++++++-----
1 file changed, 46 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/12faca98/src/tests/slave_recovery_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_recovery_tests.cpp b/src/tests/slave_recovery_tests.cpp
index 6d8a571..6dcbedb 100644
--- a/src/tests/slave_recovery_tests.cpp
+++ b/src/tests/slave_recovery_tests.cpp
@@ -1794,10 +1794,11 @@ TYPED_TEST(SlaveRecoveryTest, DISABLED_RecoveryTimeout)
}
-// The slave is stopped after an executor is completed (i.e., it has
-// terminated and all its updates have been acknowledged).
-// When it comes back up with recovery=reconnect, make
-// sure the recovery successfully completes.
+// The slave is stopped after an executor is completed (i.e., it
+// has terminated and all its updates have been acknowledged).
+// When it comes back up with recovery=reconnect, make sure the
+// recovery successfully completes and the executor's work and
+// meta directories successfully gc'ed.
TYPED_TEST(SlaveRecoveryTest, RecoverCompletedExecutor)
{
Try<Owned<cluster::Master>> master = this->StartMaster();
@@ -1839,24 +1840,37 @@ TYPED_TEST(SlaveRecoveryTest, RecoverCompletedExecutor)
TaskInfo task = createTask(offers1.get()[0], "exit 0");
+ // Capture the slave and framework ids.
+ SlaveID slaveId = offers1.get()[0].slave_id();
+ FrameworkID frameworkId = offers1.get()[0].framework_id();
+
EXPECT_CALL(sched, statusUpdate(_, _))
.Times(3); // TASK_STARTING, TASK_RUNNING and TASK_FINISHED updates.
EXPECT_CALL(sched, offerRescinded(_, _))
.Times(AtMost(1));
+ Future<RegisterExecutorMessage> registerExecutor =
+ FUTURE_PROTOBUF(RegisterExecutorMessage(), _, _);
+
Future<Nothing> schedule = FUTURE_DISPATCH(
_, &GarbageCollectorProcess::schedule);
driver.launchTasks(offers1.get()[0].id(), {task});
+ // Capture the executor id.
+ AWAIT_READY(registerExecutor);
+ ExecutorID executorId = registerExecutor->executor_id();
+
// We use 'gc.schedule' as a proxy for the cleanup of the executor.
AWAIT_READY(schedule);
slave.get()->terminate();
- Future<Nothing> schedule2 = FUTURE_DISPATCH(
- _, &GarbageCollectorProcess::schedule);
+ Future<Nothing> _recover = FUTURE_DISPATCH(_, &Slave::_recover);
+
+ Future<SlaveReregisteredMessage> slaveReregisteredMessage =
+ FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _);
// Restart the slave (use same flags) with a new containerizer.
_containerizer = TypeParam::create(flags, true, &fetcher);
@@ -1871,14 +1885,38 @@ TYPED_TEST(SlaveRecoveryTest, RecoverCompletedExecutor)
slave = this->StartSlave(detector.get(), containerizer.get(), flags);
ASSERT_SOME(slave);
- // We use 'gc.schedule' as a proxy for the cleanup of the executor.
- AWAIT_READY(schedule2);
+ Clock::pause();
+
+ AWAIT_READY(_recover);
+
+ Clock::settle(); // Wait for slave to schedule reregister timeout.
+
+ // Ensure the slave considers itself recovered.
+ Clock::advance(flags.executor_reregistration_timeout);
+
+ AWAIT_READY(slaveReregisteredMessage);
+
+ Clock::advance(flags.gc_delay);
+
+ Clock::settle();
+
+ // Executor's work and meta directories should be gc'ed by now.
+ ASSERT_FALSE(os::exists(paths::getExecutorPath(
+ flags.work_dir, slaveId, frameworkId, executorId)));
+
+ ASSERT_FALSE(os::exists(paths::getExecutorPath(
+ paths::getMetaRootDir(flags.work_dir),
+ slaveId,
+ frameworkId,
+ executorId)));
// Make sure all slave resources are reoffered.
AWAIT_READY(offers2);
EXPECT_EQ(Resources(offers1.get()[0].resources()),
Resources(offers2.get()[0].resources()));
+ Clock::resume();
+
driver.stop();
driver.join();
}