You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by al...@apache.org on 2016/03/24 18:24:29 UTC
[4/9] mesos git commit: Added a test for executor shutdown grace
period.
Added a test for executor shutdown grace period.
Review: https://reviews.apache.org/r/44994/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/201ab8f3
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/201ab8f3
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/201ab8f3
Branch: refs/heads/master
Commit: 201ab8f3abc83e9235afef551ef3d858ca1e8224
Parents: d322aa9
Author: Alexander Rukletsov <ru...@gmail.com>
Authored: Thu Mar 24 17:30:13 2016 +0100
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Thu Mar 24 18:21:01 2016 +0100
----------------------------------------------------------------------
src/slave/slave.hpp | 14 ++---
src/tests/slave_tests.cpp | 139 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 146 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/201ab8f3/src/slave/slave.hpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.hpp b/src/slave/slave.hpp
index bc68cf1..3ba335f 100644
--- a/src/slave/slave.hpp
+++ b/src/slave/slave.hpp
@@ -398,6 +398,13 @@ public:
// Returns the resource usage information for all executors.
process::Future<ResourceUsage> usage();
+ // Handle the second phase of shutting down an executor for those
+ // executors that have not properly shutdown within a timeout.
+ void shutdownExecutorTimeout(
+ const FrameworkID& frameworkId,
+ const ExecutorID& executorId,
+ const ContainerID& containerId);
+
private:
void _authenticate();
void authenticationTimeout(process::Future<bool> future);
@@ -409,13 +416,6 @@ private:
// exited.
void _shutdownExecutor(Framework* framework, Executor* executor);
- // Handle the second phase of shutting down an executor for those
- // executors that have not properly shutdown within a timeout.
- void shutdownExecutorTimeout(
- const FrameworkID& frameworkId,
- const ExecutorID& executorId,
- const ContainerID& containerId);
-
// Inner class used to namespace HTTP route handlers (see
// slave/http.cpp for implementations).
class Http
http://git-wip-us.apache.org/repos/asf/mesos/blob/201ab8f3/src/tests/slave_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_tests.cpp b/src/tests/slave_tests.cpp
index 5f47a71..69d4894 100644
--- a/src/tests/slave_tests.cpp
+++ b/src/tests/slave_tests.cpp
@@ -3261,6 +3261,145 @@ TEST_F(SlaveTest, HTTPSchedulerSlaveRestart)
driver.join();
}
+
+// Ensures that if `ExecutorInfo.shutdown_grace_period` is set, it
+// overrides the default value from the agent flag, is observed by
+// executor, and is enforced by the agent.
+TEST_F(SlaveTest, ExecutorShutdownGracePeriod)
+{
+ master::Flags masterFlags = CreateMasterFlags();
+ Try<Owned<cluster::Master>> master = StartMaster(masterFlags);
+ ASSERT_SOME(master);
+
+ MockExecutor exec(DEFAULT_EXECUTOR_ID);
+
+ TestContainerizer containerizer(&exec);
+
+ Owned<MasterDetector> detector = master.get()->createDetector();
+
+ slave::Flags agentFlags = CreateSlaveFlags();
+ Try<Owned<cluster::Slave>> slave =
+ StartSlave(detector.get(), &containerizer, agentFlags);
+ ASSERT_SOME(slave);
+
+ MockScheduler sched;
+ MesosSchedulerDriver driver(
+ &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);
+
+ // We need framework's ID to shutdown the executor later on.
+ Future<FrameworkID> frameworkId;
+ EXPECT_CALL(sched, registered(&driver, _, _))
+ .WillOnce(FutureArg<1>(&frameworkId));
+
+ Future<vector<Offer>> offers;
+ EXPECT_CALL(sched, resourceOffers(&driver, _))
+ .WillOnce(FutureArg<1>(&offers));
+
+ driver.start();
+
+ AWAIT_READY(offers);
+ EXPECT_NE(0u, offers->size());
+ Offer offer = offers.get()[0];
+
+ // Customize executor shutdown grace period to be larger than the
+ // default agent flag value, so that we can check it is respected.
+ Duration customGracePeriod = agentFlags.executor_shutdown_grace_period * 2;
+
+ ExecutorInfo executorInfo(DEFAULT_EXECUTOR_INFO);
+ executorInfo.mutable_shutdown_grace_period()->set_nanoseconds(
+ customGracePeriod.ns());
+
+ TaskInfo task;
+ task.set_name("");
+ task.mutable_task_id()->set_value("2");
+ task.mutable_slave_id()->MergeFrom(offer.slave_id());
+ task.mutable_resources()->MergeFrom(offer.resources());
+ task.mutable_executor()->MergeFrom(executorInfo);
+
+ Future<TaskStatus> statusRunning;
+ EXPECT_CALL(sched, statusUpdate(&driver, _))
+ .WillOnce(FutureArg<1>(&statusRunning));
+
+ EXPECT_CALL(exec, registered(_, _, _, _))
+ .Times(1);
+
+ Future<TaskInfo> receivedTask;
+ EXPECT_CALL(exec, launchTask(_, _))
+ .WillOnce(DoAll(SendStatusUpdateFromTask(TASK_RUNNING),
+ FutureArg<1>(&receivedTask)));
+
+ driver.launchTasks(offer.id(), {task});
+
+ AWAIT_READY(statusRunning);
+ EXPECT_EQ(TASK_RUNNING, statusRunning->state());
+ EXPECT_EQ(customGracePeriod.ns(),
+ receivedTask->executor().shutdown_grace_period().nanoseconds());
+
+ // If executor is asked to shutdown but fails to do so within the grace
+ // shutdown period, the shutdown is enforced by the agent. The agent
+ // adjusts its timeout according to `ExecutorInfo.shutdown_grace_period`.
+ //
+ // NOTE: Executors relying on the executor driver have a built-in suicide
+ // mechanism (`ShutdownProcess`), that kills the OS process where the
+ // executor is running after the grace period ends. This mechanism is
+ // disabled in tests, hence we do not observe crashes induced by this test.
+ // The test containerizer only accepts "local" executors and it considers
+ // them "terminated" only once destroy is called.
+
+ EXPECT_CALL(exec, shutdown(_))
+ .Times(AtMost(1))
+ .WillOnce(Return());
+
+ // Once the grace period ends, the agent forcibly shuts down the executor.
+ Future<Nothing> executorShutdownTimeout =
+ FUTURE_DISPATCH(slave.get()->pid, &Slave::shutdownExecutorTimeout);
+
+ Future<TaskStatus> statusFailed;
+ EXPECT_CALL(sched, statusUpdate(&driver, _))
+ .WillOnce(FutureArg<1>(&statusFailed));
+
+ Future<ExecutorID> lostExecutorId;
+ EXPECT_CALL(sched, executorLost(&driver, DEFAULT_EXECUTOR_ID, _, _))
+ .WillOnce(FutureArg<1>(&lostExecutorId));
+
+ // Ask executor to shutdown. There is no support in the scheduler
+ // driver for shutting down executors, hence we have to spoof it.
+ AWAIT_READY(frameworkId);
+ ShutdownExecutorMessage shutdownMessage;
+ shutdownMessage.mutable_executor_id()->CopyFrom(DEFAULT_EXECUTOR_ID);
+ shutdownMessage.mutable_framework_id()->CopyFrom(frameworkId.get());
+ post(master.get()->pid, slave.get()->pid, shutdownMessage);
+
+ // Ensure the `ShutdownExecutorMessage` message is
+ // received by the agent before we start the timer.
+ Clock::pause();
+ Clock::settle();
+ Clock::advance(agentFlags.executor_shutdown_grace_period);
+ Clock::settle();
+
+ // The executor shutdown timeout should not have fired, since the
+ // `ExecutorInfo` contains a grace period larger than the agent flag.
+ EXPECT_TRUE(executorShutdownTimeout.isPending());
+
+ // Trigger the shutdown grace period from the `ExecutorInfo`
+ // (note that is is 2x the agent flag).
+ Clock::advance(agentFlags.executor_shutdown_grace_period);
+
+ AWAIT_READY(executorShutdownTimeout);
+
+ AWAIT_READY(statusFailed);
+ EXPECT_EQ(TASK_FAILED, statusFailed->state());
+ EXPECT_EQ(TaskStatus::REASON_EXECUTOR_TERMINATED,
+ statusFailed->reason());
+
+ AWAIT_EXPECT_EQ(DEFAULT_EXECUTOR_ID, lostExecutorId);
+
+ Clock::resume();
+
+ driver.stop();
+ driver.join();
+}
+
} // namespace tests {
} // namespace internal {
} // namespace mesos {