You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by al...@apache.org on 2018/03/27 09:38:46 UTC

[4/8] mesos git commit: Made the default executor's handling of kill escalations more robust.

Made the default executor's handling of kill escalations more robust.

This patch makes the default executor retry SIGKILL escalations if the
executor is disconnected from the agent or the kill call fails.

Review: https://reviews.apache.org/r/65694/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/501eb344
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/501eb344
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/501eb344

Branch: refs/heads/master
Commit: 501eb34467812143a6462441e9d8447be474c675
Parents: 801aee6
Author: Gaston Kleiman <ga...@mesosphere.io>
Authored: Tue Mar 27 11:37:28 2018 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Tue Mar 27 11:37:28 2018 +0200

----------------------------------------------------------------------
 src/launcher/default_executor.cpp | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/501eb344/src/launcher/default_executor.cpp
----------------------------------------------------------------------
diff --git a/src/launcher/default_executor.cpp b/src/launcher/default_executor.cpp
index 1109175..a2f1ec5 100644
--- a/src/launcher/default_executor.cpp
+++ b/src/launcher/default_executor.cpp
@@ -1200,7 +1200,19 @@ protected:
       << " did not terminate after " << timeout << ", sending SIGKILL"
       << " to the container";
 
-    kill(containerId, SIGKILL);
+    kill(containerId, SIGKILL).onFailed([=](const string& failure) {
+      const Duration duration = Seconds(1);
+
+      LOG(WARNING)
+        << "Escalation to SIGKILL the task '" << taskId
+        << "' running in child container " << containerId
+        << " failed: " << failure << "; Retrying in " << duration;
+
+      process::delay(
+          duration, self(), &Self::escalated, containerId, taskId, timeout);
+
+      return;
+    });
   }
 
   void killTask(