You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by al...@apache.org on 2016/03/31 15:31:59 UTC

[2/6] mesos git commit: Refactored docker executor's shutdown logic.

Refactored docker executor's shutdown logic.

Even though the docker executor manages a single task, killTask() and
shutdown() differ in time period the executor has to finalize. If a
killTask is issued, the executor may use as much time as needed to
wait for the underlying container (not configurable now) to exit. If
asked to shutdown, the executor is limited by the agent, that
destroys the entire container (the executor and the docker container)
altogether after a certain timeout. Moreover, in the latter case the user
is usually less interested in graceful shutdown, because the framework
is being removed completely.


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/c7fde21d
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/c7fde21d
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/c7fde21d

Branch: refs/heads/master
Commit: c7fde21d05a251a9ec2fec5e79b8eb29408ca54c
Parents: 83000a7
Author: Alexander Rukletsov <al...@apache.org>
Authored: Thu Mar 31 13:19:15 2016 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Thu Mar 31 13:19:15 2016 +0200

----------------------------------------------------------------------
 src/docker/executor.cpp | 89 +++++++++++++++++++++++++++-----------------
 1 file changed, 54 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/c7fde21d/src/docker/executor.cpp
----------------------------------------------------------------------
diff --git a/src/docker/executor.cpp b/src/docker/executor.cpp
index afc769d..5b06024 100644
--- a/src/docker/executor.cpp
+++ b/src/docker/executor.cpp
@@ -188,11 +188,9 @@ public:
 
   void killTask(ExecutorDriver* driver, const TaskID& taskId)
   {
-    cout << "Received killTask" << endl;
+    cout << "Received killTask for task " << taskId.value() << endl;
 
-    // Since the docker executor manages a single task, we
-    // shutdown completely when we receive a killTask.
-    shutdown(driver);
+    killTask(driver, taskId, stopTimeout);
   }
 
   void frameworkMessage(ExecutorDriver* driver, const string& data) {}
@@ -201,38 +199,18 @@ public:
   {
     cout << "Shutting down" << endl;
 
-    if (run.isSome() && !killed) {
-      // Send TASK_KILLING if the framework can handle it.
-      CHECK_SOME(frameworkInfo);
-      CHECK_SOME(taskId);
-
-      foreach (const FrameworkInfo::Capability& c,
-               frameworkInfo->capabilities()) {
-        if (c.type() == FrameworkInfo::Capability::TASK_KILLING_STATE) {
-          TaskStatus status;
-          status.mutable_task_id()->CopyFrom(taskId.get());
-          status.set_state(TASK_KILLING);
-          driver->sendStatusUpdate(status);
-          break;
-        }
-      }
-
-      // The docker daemon might still be in progress starting the
-      // container, therefore we kill both the docker run process
-      // and also ask the daemon to stop the container.
-      run->discard();
-      stop = docker->stop(containerName, stopTimeout);
-      killed = true;
-    }
-
-    // Cleanup health check process.
+    // Since the docker executor manages a single task,
+    // shutdown boils down to killing this task.
     //
-    // TODO(bmahler): Consider doing this after the task has been
-    // reaped, since a framework may be interested in health
-    // information while the task is being killed (consider a
-    // task that takes 30 minutes to be cleanly killed).
-    if (healthPid != -1) {
-      os::killtree(healthPid, SIGKILL);
+    // TODO(bmahler): If a shutdown arrives after a kill task within
+    // the grace period of the `KillPolicy`, we may need to escalate
+    // more quickly (e.g. the shutdown grace period allotted by the
+    // agent is smaller than the kill grace period).
+    if (run.isSome()) {
+      CHECK_SOME(taskId);
+      killTask(driver, taskId.get(), stopTimeout);
+    } else {
+      driver->stop();
     }
   }
 
@@ -273,6 +251,47 @@ protected:
   }
 
 private:
+  void killTask(
+      ExecutorDriver* driver,
+      const TaskID& _taskId,
+      const Duration& gracePeriod)
+  {
+    if (run.isSome() && !killed) {
+      // Send TASK_KILLING if the framework can handle it.
+      CHECK_SOME(frameworkInfo);
+      CHECK_SOME(taskId);
+      CHECK(taskId.get() == _taskId);
+
+      foreach (const FrameworkInfo::Capability& c,
+               frameworkInfo->capabilities()) {
+        if (c.type() == FrameworkInfo::Capability::TASK_KILLING_STATE) {
+          TaskStatus status;
+          status.mutable_task_id()->CopyFrom(taskId.get());
+          status.set_state(TASK_KILLING);
+          driver->sendStatusUpdate(status);
+          break;
+        }
+      }
+
+      // The docker daemon might still be in progress starting the
+      // container, therefore we kill both the docker run process
+      // and also ask the daemon to stop the container.
+      run->discard();
+      stop = docker->stop(containerName, gracePeriod);
+      killed = true;
+    }
+
+    // Cleanup health check process.
+    //
+    // TODO(bmahler): Consider doing this after the task has been
+    // reaped, since a framework may be interested in health
+    // information while the task is being killed (consider a
+    // task that takes 30 minutes to be cleanly killed).
+    if (healthPid != -1) {
+      os::killtree(healthPid, SIGKILL);
+    }
+  }
+
   void reaped(
       ExecutorDriver* _driver,
       const Future<Nothing>& run)