You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by qi...@apache.org on 2018/01/24 01:31:44 UTC

mesos git commit: Updated `SlaveRecoveryTest.RecoverCompletedExecutor` to verify gc.

Repository: mesos
Updated Branches:
  refs/heads/master 1382e595f -> 12faca980


Updated `SlaveRecoveryTest.RecoverCompletedExecutor` to verify gc.

In the test `SlaveRecoveryTest.RecoverCompletedExecutor`, when the
completed executor is recovered, verify its work and meta directories
gc'ed successfully.

Review: https://reviews.apache.org/r/65263


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/12faca98
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/12faca98
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/12faca98

Branch: refs/heads/master
Commit: 12faca980084c565efdd3b0cfbb3b272d530ba5a
Parents: 1382e59
Author: Qian Zhang <zh...@gmail.com>
Authored: Mon Jan 22 16:14:27 2018 +0800
Committer: Qian Zhang <zh...@gmail.com>
Committed: Wed Jan 24 09:31:16 2018 +0800

----------------------------------------------------------------------
 src/tests/slave_recovery_tests.cpp | 54 ++++++++++++++++++++++++++++-----
 1 file changed, 46 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/12faca98/src/tests/slave_recovery_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_recovery_tests.cpp b/src/tests/slave_recovery_tests.cpp
index 6d8a571..6dcbedb 100644
--- a/src/tests/slave_recovery_tests.cpp
+++ b/src/tests/slave_recovery_tests.cpp
@@ -1794,10 +1794,11 @@ TYPED_TEST(SlaveRecoveryTest, DISABLED_RecoveryTimeout)
 }
 
 
-// The slave is stopped after an executor is completed (i.e., it has
-// terminated and all its updates have been acknowledged).
-// When it comes back up with recovery=reconnect, make
-// sure the recovery successfully completes.
+// The slave is stopped after an executor is completed (i.e., it
+// has terminated and all its updates have been acknowledged).
+// When it comes back up with recovery=reconnect, make sure the
+// recovery successfully completes and the executor's work and
+// meta directories successfully gc'ed.
 TYPED_TEST(SlaveRecoveryTest, RecoverCompletedExecutor)
 {
   Try<Owned<cluster::Master>> master = this->StartMaster();
@@ -1839,24 +1840,37 @@ TYPED_TEST(SlaveRecoveryTest, RecoverCompletedExecutor)
 
   TaskInfo task = createTask(offers1.get()[0], "exit 0");
 
+  // Capture the slave and framework ids.
+  SlaveID slaveId = offers1.get()[0].slave_id();
+  FrameworkID frameworkId = offers1.get()[0].framework_id();
+
   EXPECT_CALL(sched, statusUpdate(_, _))
     .Times(3); // TASK_STARTING, TASK_RUNNING and TASK_FINISHED updates.
 
   EXPECT_CALL(sched, offerRescinded(_, _))
     .Times(AtMost(1));
 
+  Future<RegisterExecutorMessage> registerExecutor =
+    FUTURE_PROTOBUF(RegisterExecutorMessage(), _, _);
+
   Future<Nothing> schedule = FUTURE_DISPATCH(
       _, &GarbageCollectorProcess::schedule);
 
   driver.launchTasks(offers1.get()[0].id(), {task});
 
+  // Capture the executor id.
+  AWAIT_READY(registerExecutor);
+  ExecutorID executorId = registerExecutor->executor_id();
+
   // We use 'gc.schedule' as a proxy for the cleanup of the executor.
   AWAIT_READY(schedule);
 
   slave.get()->terminate();
 
-  Future<Nothing> schedule2 = FUTURE_DISPATCH(
-      _, &GarbageCollectorProcess::schedule);
+  Future<Nothing> _recover = FUTURE_DISPATCH(_, &Slave::_recover);
+
+  Future<SlaveReregisteredMessage> slaveReregisteredMessage =
+    FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _);
 
   // Restart the slave (use same flags) with a new containerizer.
   _containerizer = TypeParam::create(flags, true, &fetcher);
@@ -1871,14 +1885,38 @@ TYPED_TEST(SlaveRecoveryTest, RecoverCompletedExecutor)
   slave = this->StartSlave(detector.get(), containerizer.get(), flags);
   ASSERT_SOME(slave);
 
-  // We use 'gc.schedule' as a proxy for the cleanup of the executor.
-  AWAIT_READY(schedule2);
+  Clock::pause();
+
+  AWAIT_READY(_recover);
+
+  Clock::settle(); // Wait for slave to schedule reregister timeout.
+
+  // Ensure the slave considers itself recovered.
+  Clock::advance(flags.executor_reregistration_timeout);
+
+  AWAIT_READY(slaveReregisteredMessage);
+
+  Clock::advance(flags.gc_delay);
+
+  Clock::settle();
+
+  // Executor's work and meta directories should be gc'ed by now.
+  ASSERT_FALSE(os::exists(paths::getExecutorPath(
+      flags.work_dir, slaveId, frameworkId, executorId)));
+
+  ASSERT_FALSE(os::exists(paths::getExecutorPath(
+      paths::getMetaRootDir(flags.work_dir),
+      slaveId,
+      frameworkId,
+      executorId)));
 
   // Make sure all slave resources are reoffered.
   AWAIT_READY(offers2);
   EXPECT_EQ(Resources(offers1.get()[0].resources()),
             Resources(offers2.get()[0].resources()));
 
+  Clock::resume();
+
   driver.stop();
   driver.join();
 }