You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by an...@apache.org on 2017/02/07 17:43:43 UTC

[4/5] mesos git commit: Made `kill()` not invoke `shutdown()` in the default executor.

Made `kill()` not invoke `shutdown()` in the default executor.

This ensures that when we add support for the default executor
launch multiple task groups, kill for a particular task should
_only_ result in the killing of a task group and not the entire
executor as was the case currently.

Review: https://reviews.apache.org/r/56268/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/7458ba52
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/7458ba52
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/7458ba52

Branch: refs/heads/master
Commit: 7458ba52dee908608f452e9adabca930656d39bf
Parents: 11fb934
Author: Anand Mazumdar <an...@apache.org>
Authored: Tue Feb 7 09:42:19 2017 -0800
Committer: Anand Mazumdar <an...@apache.org>
Committed: Tue Feb 7 09:42:52 2017 -0800

----------------------------------------------------------------------
 src/launcher/default_executor.cpp    | 39 ++++++++++++++++++++++---------
 src/tests/default_executor_tests.cpp | 12 ++++++++++
 2 files changed, 40 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/7458ba52/src/launcher/default_executor.cpp
----------------------------------------------------------------------
diff --git a/src/launcher/default_executor.cpp b/src/launcher/default_executor.cpp
index 327510e..4b21854 100644
--- a/src/launcher/default_executor.cpp
+++ b/src/launcher/default_executor.cpp
@@ -92,6 +92,9 @@ private:
     // that a container is active but a connection for sending the
     // `WAIT_NESTED_CONTAINER` call has not been established yet.
     Option<Connection> waiting;
+
+    // Set to true if the child container is in the process of being killed.
+    bool killing;
   };
 
 public:
@@ -408,7 +411,7 @@ protected:
 
       tasks[taskId] = task;
       containers[taskId] = Owned<Container>(
-          new Container {containerId, taskId, taskGroup, None()});
+          new Container {containerId, taskId, taskGroup, None(), false});
 
       if (task.has_health_check()) {
         // TODO(anand): Add support for command health checks.
@@ -649,7 +652,7 @@ protected:
 
       if (WSUCCEEDED(status.get())) {
         taskState = TASK_FINISHED;
-      } else if (shuttingDown) {
+      } else if (container->killing) {
         // Send TASK_KILLED if the task was killed as a result of
         // `killTask()` or `shutdown()`.
         taskState = TASK_KILLED;
@@ -687,7 +690,7 @@ protected:
     // The default restart policy for a task group is to kill all the
     // remaining child containers if one of them terminated with a
     // non-zero exit code.
-    if (taskState == TASK_FAILED) {
+    if (taskState == TASK_FAILED || taskState == TASK_KILLED) {
       // Kill all the other active containers.
       //
       // TODO(anand): Invoke `kill()` once per active container
@@ -730,14 +733,21 @@ protected:
 
     CHECK_EQ(SUBSCRIBED, state);
 
-    list<Future<Nothing>> killing;
-    foreachkey (const TaskID& taskId, containers) {
-      killing.push_back(kill(taskId));
+    list<Future<Nothing>> killResponses;
+    foreachvalue (const Owned<Container>& container, containers) {
+      // It is possible that we received a `killTask()` request
+      // from the scheduler before and are waiting on the `waited()`
+      // callback to be invoked for the child container.
+      if (container->killing) {
+        continue;
+      }
+
+      killResponses.push_back(kill(container));
     }
 
     // It is possible that the agent process can fail while we are
     // killing child containers. We fail fast if this happens.
-    collect(killing)
+    collect(killResponses)
       .onAny(defer(
           self(),
           [this](const Future<list<Nothing>>& future) {
@@ -767,12 +777,12 @@ protected:
     terminate(self());
   }
 
-  Future<Nothing> kill(const TaskID& taskId)
+  Future<Nothing> kill(Owned<Container> container)
   {
     CHECK_EQ(SUBSCRIBED, state);
-    CHECK(containers.contains(taskId));
 
-    const Owned<Container>& container = containers.at(taskId);
+    CHECK(!container->killing);
+    container->killing = true;
 
     LOG(INFO) << "Killing child container " << container->containerId;
 
@@ -810,7 +820,14 @@ protected:
       return;
     }
 
-    shutdown();
+    const Owned<Container>& container = containers.at(taskId);
+    if (container->killing) {
+      LOG(WARNING) << "Ignoring kill for task '" << taskId
+                   << "' as it is in the process of getting killed";
+      return;
+    }
+
+    kill(container);
   }
 
   void taskHealthUpdated(const TaskHealthStatus& healthStatus)

http://git-wip-us.apache.org/repos/asf/mesos/blob/7458ba52/src/tests/default_executor_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/default_executor_tests.cpp b/src/tests/default_executor_tests.cpp
index e8e0aa2..adf5e56 100644
--- a/src/tests/default_executor_tests.cpp
+++ b/src/tests/default_executor_tests.cpp
@@ -406,6 +406,10 @@ TEST_P(DefaultExecutorTest, KillTask)
     .WillOnce(FutureArg<1>(&killedUpdate1))
     .WillOnce(FutureArg<1>(&killedUpdate2));
 
+  Future<v1::scheduler::Event::Failure> executorFailure;
+  EXPECT_CALL(*scheduler, failure(_, _))
+    .WillOnce(FutureArg<1>(&executorFailure));
+
   // Now kill one task in the task group.
   {
     Call call;
@@ -433,6 +437,14 @@ TEST_P(DefaultExecutorTest, KillTask)
     killedUpdate2->status().task_id()};
 
   ASSERT_EQ(tasks, tasksKilled);
+
+  // The executor should commit suicide after all the tasks have been
+  // killed.
+  AWAIT_READY(executorFailure);
+
+  // Even though the tasks were killed, the executor should exit gracefully.
+  ASSERT_TRUE(executorFailure->has_status());
+  ASSERT_EQ(0, executorFailure->status());
 }