You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by al...@apache.org on 2016/03/31 15:31:58 UTC

[1/6] mesos git commit: Refactored command executor's shutdown logic.

Repository: mesos
Updated Branches:
  refs/heads/master e66a7790b -> a3d29b9c4


Refactored command executor's shutdown logic.

Even though the command executor manages a single task, killTask() and
shutdown() differ in time period the executor has to finalize. If a
killTask is issued, the executor may use as much time as needed to
wait for the underlying task (specified in the kill policy) to exit.
If asked to shutdown, the executor is limited by the agent, that
destroys the container altogether after a certain timeout. Moreover,
in the latter case the user is usually less interested in graceful
shutdown, because the framework is being removed completely.


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/83000a79
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/83000a79
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/83000a79

Branch: refs/heads/master
Commit: 83000a794505b46f7667c64b86fa91708616dcde
Parents: e66a779
Author: Alexander Rukletsov <al...@apache.org>
Authored: Tue Mar 29 13:44:06 2016 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Thu Mar 31 13:02:28 2016 +0200

----------------------------------------------------------------------
 src/launcher/executor.cpp | 118 ++++++++++++++++++++++-------------------
 1 file changed, 63 insertions(+), 55 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/83000a79/src/launcher/executor.cpp
----------------------------------------------------------------------
diff --git a/src/launcher/executor.cpp b/src/launcher/executor.cpp
index 4e9b4d9..7677391 100644
--- a/src/launcher/executor.cpp
+++ b/src/launcher/executor.cpp
@@ -466,23 +466,27 @@ public:
 
   void killTask(ExecutorDriver* driver, const TaskID& taskId)
   {
-    cout << "Received killTask" << endl;
+    cout << "Received killTask for task " << taskId.value() << endl;
 
-    // Since the command executor manages a single task, we
-    // shutdown completely when we receive a killTask.
-    shutdown(driver);
+    // Default grace period is set to 3s for backwards compatibility.
+    //
+    // TODO(alexr): Replace it with a more meaningful default, e.g.
+    // `shutdownGracePeriod` after the deprecation cycle, started in 0.29.
+    Duration gracePeriod = Seconds(3);
+
+    if (killPolicy.isSome() && killPolicy->has_grace_period()) {
+      gracePeriod = Nanoseconds(killPolicy->grace_period().nanoseconds());
+    }
+
+    killTask(driver, taskId, gracePeriod);
   }
 
   void frameworkMessage(ExecutorDriver* driver, const string& data) {}
 
   void shutdown(ExecutorDriver* driver)
   {
-    // If the kill policy's grace period is set, we use it for the signal
-    // escalation timeout. The agent adjusts executor's shutdown grace
-    // period based on it, hence the executor will be given enough time
-    // to clean up. If the kill policy is not specified, the executor's
-    // shutdown grace period is used, which is set to some default value.
-    //
+    cout << "Shutting down" << endl;
+
     // NOTE: We leave a small buffer of time to do the forced kill, otherwise
     // the agent may destroy the container before we can send `TASK_KILLED`.
     //
@@ -491,26 +495,68 @@ public:
     Duration gracePeriod =
       shutdownGracePeriod - process::MAX_REAP_INTERVAL() - Seconds(1);
 
-    if (killPolicy.isSome() && killPolicy->has_grace_period()) {
-      gracePeriod = Nanoseconds(killPolicy->grace_period().nanoseconds());
-    }
-
+    // Since the command executor manages a single task,
+    // shutdown boils down to killing this task.
+    //
     // TODO(bmahler): If a shutdown arrives after a kill task within
     // the grace period of the `KillPolicy`, we may need to escalate
     // more quickly (e.g. the shutdown grace period allotted by the
     // agent is smaller than the kill grace period).
+    if (launched) {
+      CHECK_SOME(taskId);
+      killTask(driver, taskId.get(), gracePeriod);
+    } else {
+      driver->stop();
+    }
+  }
+
+  virtual void error(ExecutorDriver* driver, const string& message) {}
 
-    shutdown(driver, gracePeriod);
+protected:
+  virtual void initialize()
+  {
+    install<TaskHealthStatus>(
+        &CommandExecutorProcess::taskHealthUpdated,
+        &TaskHealthStatus::task_id,
+        &TaskHealthStatus::healthy,
+        &TaskHealthStatus::kill_task);
   }
 
-  void shutdown(ExecutorDriver* driver, const Duration& gracePeriod)
+  void taskHealthUpdated(
+      const TaskID& taskID,
+      const bool& healthy,
+      const bool& initiateTaskKill)
   {
-    cout << "Shutting down" << endl;
+    if (driver.isNone()) {
+      return;
+    }
+
+    cout << "Received task health update, healthy: "
+         << stringify(healthy) << endl;
 
+    TaskStatus status;
+    status.mutable_task_id()->CopyFrom(taskID);
+    status.set_healthy(healthy);
+    status.set_state(TASK_RUNNING);
+    driver.get()->sendStatusUpdate(status);
+
+    if (initiateTaskKill) {
+      killedByHealthCheck = true;
+      killTask(driver.get(), taskID);
+    }
+  }
+
+private:
+  void killTask(
+      ExecutorDriver* driver,
+      const TaskID& _taskId,
+      const Duration& gracePeriod)
+  {
     if (launched && !killed) {
       // Send TASK_KILLING if the framework can handle it.
       CHECK_SOME(frameworkInfo);
       CHECK_SOME(taskId);
+      CHECK(taskId.get() == _taskId);
 
       foreach (const FrameworkInfo::Capability& c,
                frameworkInfo->capabilities()) {
@@ -560,44 +606,6 @@ public:
     }
   }
 
-  virtual void error(ExecutorDriver* driver, const string& message) {}
-
-protected:
-  virtual void initialize()
-  {
-    install<TaskHealthStatus>(
-        &CommandExecutorProcess::taskHealthUpdated,
-        &TaskHealthStatus::task_id,
-        &TaskHealthStatus::healthy,
-        &TaskHealthStatus::kill_task);
-  }
-
-  void taskHealthUpdated(
-      const TaskID& taskID,
-      const bool& healthy,
-      const bool& initiateTaskKill)
-  {
-    if (driver.isNone()) {
-      return;
-    }
-
-    cout << "Received task health update, healthy: "
-         << stringify(healthy) << endl;
-
-    TaskStatus status;
-    status.mutable_task_id()->CopyFrom(taskID);
-    status.set_healthy(healthy);
-    status.set_state(TASK_RUNNING);
-    driver.get()->sendStatusUpdate(status);
-
-    if (initiateTaskKill) {
-      killedByHealthCheck = true;
-      killTask(driver.get(), taskID);
-    }
-  }
-
-
-private:
   void reaped(
       ExecutorDriver* driver,
       pid_t pid,


[2/6] mesos git commit: Refactored docker executor's shutdown logic.

Posted by al...@apache.org.
Refactored docker executor's shutdown logic.

Even though the docker executor manages a single task, killTask() and
shutdown() differ in time period the executor has to finalize. If a
killTask is issued, the executor may use as much time as needed to
wait for the underlying container (not configurable now) to exit. If
asked to shutdown, the executor is limited by the agent, that
destroys the entire container (the executor and the docker container)
altogether after a certain timeout. Moreover, in the latter case the user
is usually less interested in graceful shutdown, because the framework
is being removed completely.


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/c7fde21d
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/c7fde21d
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/c7fde21d

Branch: refs/heads/master
Commit: c7fde21d05a251a9ec2fec5e79b8eb29408ca54c
Parents: 83000a7
Author: Alexander Rukletsov <al...@apache.org>
Authored: Thu Mar 31 13:19:15 2016 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Thu Mar 31 13:19:15 2016 +0200

----------------------------------------------------------------------
 src/docker/executor.cpp | 89 +++++++++++++++++++++++++++-----------------
 1 file changed, 54 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/c7fde21d/src/docker/executor.cpp
----------------------------------------------------------------------
diff --git a/src/docker/executor.cpp b/src/docker/executor.cpp
index afc769d..5b06024 100644
--- a/src/docker/executor.cpp
+++ b/src/docker/executor.cpp
@@ -188,11 +188,9 @@ public:
 
   void killTask(ExecutorDriver* driver, const TaskID& taskId)
   {
-    cout << "Received killTask" << endl;
+    cout << "Received killTask for task " << taskId.value() << endl;
 
-    // Since the docker executor manages a single task, we
-    // shutdown completely when we receive a killTask.
-    shutdown(driver);
+    killTask(driver, taskId, stopTimeout);
   }
 
   void frameworkMessage(ExecutorDriver* driver, const string& data) {}
@@ -201,38 +199,18 @@ public:
   {
     cout << "Shutting down" << endl;
 
-    if (run.isSome() && !killed) {
-      // Send TASK_KILLING if the framework can handle it.
-      CHECK_SOME(frameworkInfo);
-      CHECK_SOME(taskId);
-
-      foreach (const FrameworkInfo::Capability& c,
-               frameworkInfo->capabilities()) {
-        if (c.type() == FrameworkInfo::Capability::TASK_KILLING_STATE) {
-          TaskStatus status;
-          status.mutable_task_id()->CopyFrom(taskId.get());
-          status.set_state(TASK_KILLING);
-          driver->sendStatusUpdate(status);
-          break;
-        }
-      }
-
-      // The docker daemon might still be in progress starting the
-      // container, therefore we kill both the docker run process
-      // and also ask the daemon to stop the container.
-      run->discard();
-      stop = docker->stop(containerName, stopTimeout);
-      killed = true;
-    }
-
-    // Cleanup health check process.
+    // Since the docker executor manages a single task,
+    // shutdown boils down to killing this task.
     //
-    // TODO(bmahler): Consider doing this after the task has been
-    // reaped, since a framework may be interested in health
-    // information while the task is being killed (consider a
-    // task that takes 30 minutes to be cleanly killed).
-    if (healthPid != -1) {
-      os::killtree(healthPid, SIGKILL);
+    // TODO(bmahler): If a shutdown arrives after a kill task within
+    // the grace period of the `KillPolicy`, we may need to escalate
+    // more quickly (e.g. the shutdown grace period allotted by the
+    // agent is smaller than the kill grace period).
+    if (run.isSome()) {
+      CHECK_SOME(taskId);
+      killTask(driver, taskId.get(), stopTimeout);
+    } else {
+      driver->stop();
     }
   }
 
@@ -273,6 +251,47 @@ protected:
   }
 
 private:
+  void killTask(
+      ExecutorDriver* driver,
+      const TaskID& _taskId,
+      const Duration& gracePeriod)
+  {
+    if (run.isSome() && !killed) {
+      // Send TASK_KILLING if the framework can handle it.
+      CHECK_SOME(frameworkInfo);
+      CHECK_SOME(taskId);
+      CHECK(taskId.get() == _taskId);
+
+      foreach (const FrameworkInfo::Capability& c,
+               frameworkInfo->capabilities()) {
+        if (c.type() == FrameworkInfo::Capability::TASK_KILLING_STATE) {
+          TaskStatus status;
+          status.mutable_task_id()->CopyFrom(taskId.get());
+          status.set_state(TASK_KILLING);
+          driver->sendStatusUpdate(status);
+          break;
+        }
+      }
+
+      // The docker daemon might still be in progress starting the
+      // container, therefore we kill both the docker run process
+      // and also ask the daemon to stop the container.
+      run->discard();
+      stop = docker->stop(containerName, gracePeriod);
+      killed = true;
+    }
+
+    // Cleanup health check process.
+    //
+    // TODO(bmahler): Consider doing this after the task has been
+    // reaped, since a framework may be interested in health
+    // information while the task is being killed (consider a
+    // task that takes 30 minutes to be cleanly killed).
+    if (healthPid != -1) {
+      os::killtree(healthPid, SIGKILL);
+    }
+  }
+
   void reaped(
       ExecutorDriver* _driver,
       const Future<Nothing>& run)


[4/6] mesos git commit: Deprecated the `docker_stop_timeout` flag.

Posted by al...@apache.org.
Deprecated the `docker_stop_timeout` flag.

Instead, a combination of `executor_shutdown_grace_period`
agent flag and task kill policies should be used.

Review: https://reviews.apache.org/r/44661


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/1bbe4859
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/1bbe4859
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/1bbe4859

Branch: refs/heads/master
Commit: 1bbe485945035f9573cd2bf48d594d79b9a40392
Parents: 327f840
Author: Alexander Rukletsov <ru...@gmail.com>
Authored: Thu Mar 10 15:07:59 2016 +0100
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Thu Mar 31 14:35:45 2016 +0200

----------------------------------------------------------------------
 CHANGELOG                          |  2 ++
 docs/configuration.md              |  5 +++--
 src/docker/executor.hpp            |  7 ++++++-
 src/slave/containerizer/docker.cpp | 16 ++++++++++++++--
 src/slave/flags.cpp                |  6 ++++--
 src/slave/flags.hpp                |  3 +++
 6 files changed, 32 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/1bbe4859/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index 09b9e63..b90078d 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -31,6 +31,8 @@ Deprecations:
   * [MESOS-2281] - Deprecated the plain text format for credentials in favor of
     the JSON format.
 
+  * [MESOS-4910] - Deprecate the --docker_stop_timeout agent flag.
+
   * [MESOS-5001] - The 'allocator/event_queue_dispatches' metric is now
     deprecated in favor 'of allocator/mesos/event_queue_dispatches'.
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/1bbe4859/docs/configuration.md
----------------------------------------------------------------------
diff --git a/docs/configuration.md b/docs/configuration.md
index 75c9a0a..da42eaf 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1124,8 +1124,9 @@ path used by the slave's docker image.
     --docker_stop_timeout=VALUE
   </td>
   <td>
-The time as a duration for docker to wait after stopping an instance
-before it kills that instance. (default: 0ns)
+The time docker daemon waits after stopping a container before killing
+that container. This flag is deprecated; use task's kill policy instead.
+(default: 0ns)
   </td>
 </tr>
 <tr>

http://git-wip-us.apache.org/repos/asf/mesos/blob/1bbe4859/src/docker/executor.hpp
----------------------------------------------------------------------
diff --git a/src/docker/executor.hpp b/src/docker/executor.hpp
index abbc419..798ca3d 100644
--- a/src/docker/executor.hpp
+++ b/src/docker/executor.hpp
@@ -52,10 +52,12 @@ struct Flags : public mesos::internal::logging::Flags
         "mapped_directory",
         "The sandbox directory path that is mapped in the docker container.\n");
 
+    // TODO(alexr): Remove this after the deprecation cycle (started in 0.29).
     add(&stop_timeout,
         "stop_timeout",
         "The duration for docker to wait after stopping a running container\n"
-        "before it kills that container.");
+        "before it kills that container. This flag is deprecated; use task's\n"
+        "kill policy instead.");
 
     add(&launcher_dir,
         "launcher_dir",
@@ -69,7 +71,10 @@ struct Flags : public mesos::internal::logging::Flags
   Option<std::string> docker_socket;
   Option<std::string> sandbox_directory;
   Option<std::string> mapped_directory;
+
+  // TODO(alexr): Remove this after the deprecation cycle (started in 0.29).
   Option<Duration> stop_timeout;
+
   Option<std::string> launcher_dir;
 };
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/1bbe4859/src/slave/containerizer/docker.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/docker.cpp b/src/slave/containerizer/docker.cpp
index c5007a3..9314d1f 100644
--- a/src/slave/containerizer/docker.cpp
+++ b/src/slave/containerizer/docker.cpp
@@ -209,9 +209,12 @@ docker::Flags dockerFlags(
   dockerFlags.docker = flags.docker;
   dockerFlags.sandbox_directory = directory;
   dockerFlags.mapped_directory = flags.sandbox_directory;
-  dockerFlags.stop_timeout = flags.docker_stop_timeout;
   dockerFlags.docker_socket = flags.docker_socket;
   dockerFlags.launcher_dir = flags.launcher_dir;
+
+  // TODO(alexr): Remove this after the deprecation cycle (started in 0.29).
+  dockerFlags.stop_timeout = flags.docker_stop_timeout;
+
   return dockerFlags;
 }
 
@@ -925,7 +928,11 @@ Future<Nothing> DockerContainerizerProcess::__recover(
     // Check if we're watching an executor for this container ID and
     // if not, rm -f the Docker container.
     if (!containers_.contains(id.get())) {
-      // TODO(tnachen): Consider using executor_shutdown_grace_period.
+      // TODO(alexr): After the deprecation cycle (started in 0.29.0), update
+      // this to omit the timeout. Graceful shutdown of the container is not
+      // a containerizer responsibility; it is the responsibility of the agent
+      // in co-operation with the executor. Once `destroy()` is called, the
+      // container should be destroyed forcefully.
       futures.push_back(
           docker->stop(
               container.id,
@@ -1837,6 +1844,11 @@ void DockerContainerizerProcess::_destroy(
   LOG(INFO) << "Running docker stop on container '" << containerId << "'";
 
   if (killed) {
+    // TODO(alexr): After the deprecation cycle (started in 0.29.0), update
+    // this to omit the timeout. Graceful shutdown of the container is not
+    // a containerizer responsibility; it is the responsibility of the agent
+    // in co-operation with the executor. Once `destroy()` is called, the
+    // container should be destroyed forcefully.
     docker->stop(container->name(), flags.docker_stop_timeout)
       .onAny(defer(self(), &Self::__destroy, containerId, killed, lambda::_1));
   } else {

http://git-wip-us.apache.org/repos/asf/mesos/blob/1bbe4859/src/slave/flags.cpp
----------------------------------------------------------------------
diff --git a/src/slave/flags.cpp b/src/slave/flags.cpp
index 8868e1e..0551ec3 100644
--- a/src/slave/flags.cpp
+++ b/src/slave/flags.cpp
@@ -524,10 +524,12 @@ mesos::internal::slave::Flags::Flags()
       "  ]\n"
       "}");
 
+  // TODO(alexr): Remove this after the deprecation cycle (started in 0.29).
   add(&Flags::docker_stop_timeout,
       "docker_stop_timeout",
-      "The time as a duration for docker to wait after stopping an instance\n"
-      "before it kills that instance.",
+      "The time docker daemon waits after stopping a container before\n"
+      "killing that container. This flag is deprecated; use task's kill\n"
+      "policy instead.",
       Seconds(0));
 
 #ifdef ENABLE_NVIDIA_GPU_SUPPORT

http://git-wip-us.apache.org/repos/asf/mesos/blob/1bbe4859/src/slave/flags.hpp
----------------------------------------------------------------------
diff --git a/src/slave/flags.hpp b/src/slave/flags.hpp
index 345a225..d0c606e 100644
--- a/src/slave/flags.hpp
+++ b/src/slave/flags.hpp
@@ -107,7 +107,10 @@ public:
   Duration docker_remove_delay;
   std::string sandbox_directory;
   Option<ContainerInfo> default_container_info;
+
+  // TODO(alexr): Remove this after the deprecation cycle (started in 0.29).
   Duration docker_stop_timeout;
+
   bool docker_kill_orphans;
   std::string docker_socket;
 #ifdef ENABLE_NVIDIA_GPU_SUPPORT


[3/6] mesos git commit: Used `KillPolicy` and shutdown grace period in docker executor.

Posted by al...@apache.org.
Used `KillPolicy` and shutdown grace period in docker executor.

The docker executor determines how much time it allots the
underlying container to clean up (via passing the timeout to
the docker daemon) based on both optional task's `KillPolicy`
and optional `shutdown_grace_period` field in `ExecutorInfo`.

Review: https://reviews.apache.org/r/44660


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/327f840a
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/327f840a
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/327f840a

Branch: refs/heads/master
Commit: 327f840a32a0c8e2cb05d313b5c5f4d43a206c85
Parents: c7fde21
Author: Alexander Rukletsov <al...@apache.org>
Authored: Thu Mar 31 13:27:20 2016 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Thu Mar 31 13:27:20 2016 +0200

----------------------------------------------------------------------
 include/mesos/mesos.proto    |  8 +++--
 include/mesos/v1/mesos.proto |  8 +++--
 src/docker/executor.cpp      | 71 +++++++++++++++++++++++++++++++++------
 3 files changed, 70 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/327f840a/include/mesos/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto
index cb68e2c..e1fc02e 100644
--- a/include/mesos/mesos.proto
+++ b/include/mesos/mesos.proto
@@ -364,7 +364,8 @@ message HealthCheck {
  *
  * NOTE: For executor-less command-based tasks, the kill is performed
  * via sending a signal to the task process: SIGTERM for the graceful
- * kill and SIGKILL for the forcible kill.
+ * kill and SIGKILL for the forcible kill. For the docker executor-less
+ * tasks the grace period is passed to 'docker stop --time'.
  */
 message KillPolicy {
   // The grace period specifies how long to wait before forcibly
@@ -1251,8 +1252,9 @@ message TaskInfo {
   optional HealthCheck health_check = 8;
 
   // A kill policy for the task. Implemented for executor-less
-  // command-based tasks. For tasks that specify an executor, it is
-  // the executor's responsibility to implement the kill policy.
+  // command-based and docker tasks. For tasks that specify other
+  // executor, it is the executor's responsibility to implement
+  // the kill policy.
   optional KillPolicy kill_policy = 12;
 
   optional bytes data = 6;

http://git-wip-us.apache.org/repos/asf/mesos/blob/327f840a/include/mesos/v1/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/v1/mesos.proto b/include/mesos/v1/mesos.proto
index af1dc9e..35789e0 100644
--- a/include/mesos/v1/mesos.proto
+++ b/include/mesos/v1/mesos.proto
@@ -364,7 +364,8 @@ message HealthCheck {
  *
  * NOTE: For executor-less command-based tasks, the kill is performed
  * via sending a signal to the task process: SIGTERM for the graceful
- * kill and SIGKILL for the forcible kill.
+ * kill and SIGKILL for the forcible kill. For the docker executor-less
+ * tasks the grace period is passed to 'docker stop --time'.
  */
 message KillPolicy {
   // The grace period specifies how long to wait before forcibly
@@ -1250,8 +1251,9 @@ message TaskInfo {
   optional HealthCheck health_check = 8;
 
   // A kill policy for the task. Implemented for executor-less
-  // command-based tasks. For tasks that specify an executor, it is
-  // the executor's responsibility to implement the kill policy.
+  // command-based and docker tasks. For tasks that specify other
+  // executor, it is the executor's responsibility to implement
+  // the kill policy.
   optional KillPolicy kill_policy = 12;
 
   optional bytes data = 6;

http://git-wip-us.apache.org/repos/asf/mesos/blob/327f840a/src/docker/executor.cpp
----------------------------------------------------------------------
diff --git a/src/docker/executor.cpp b/src/docker/executor.cpp
index 5b06024..6dd4838 100644
--- a/src/docker/executor.cpp
+++ b/src/docker/executor.cpp
@@ -41,6 +41,8 @@
 
 #include "messages/messages.hpp"
 
+#include "slave/constants.hpp"
+
 using namespace mesos;
 using namespace process;
 
@@ -70,7 +72,7 @@ public:
       const string& containerName,
       const string& sandboxDirectory,
       const string& mappedDirectory,
-      const Duration& stopTimeout,
+      const Duration& shutdownGracePeriod,
       const string& healthCheckDir)
     : killed(false),
       killedByHealthCheck(false),
@@ -80,7 +82,7 @@ public:
       containerName(containerName),
       sandboxDirectory(sandboxDirectory),
       mappedDirectory(mappedDirectory),
-      stopTimeout(stopTimeout),
+      shutdownGracePeriod(shutdownGracePeriod),
       stop(Nothing()),
       inspect(Nothing()) {}
 
@@ -125,6 +127,11 @@ public:
     // Capture the TaskID.
     taskId = task.task_id();
 
+    // Capture the kill policy.
+    if (task.has_kill_policy()) {
+      killPolicy = task.kill_policy();
+    }
+
     cout << "Starting task " << taskId.get() << endl;
 
     CHECK(task.has_container());
@@ -190,7 +197,15 @@ public:
   {
     cout << "Received killTask for task " << taskId.value() << endl;
 
-    killTask(driver, taskId, stopTimeout);
+    // Using shutdown grace period as a default is backwards compatible
+    // with the `stop_timeout` flag, deprecated in 0.29.
+    Duration gracePeriod = shutdownGracePeriod;
+
+    if (killPolicy.isSome() && killPolicy->has_grace_period()) {
+      gracePeriod = Nanoseconds(killPolicy->grace_period().nanoseconds());
+    }
+
+    killTask(driver, taskId, gracePeriod);
   }
 
   void frameworkMessage(ExecutorDriver* driver, const string& data) {}
@@ -199,6 +214,16 @@ public:
   {
     cout << "Shutting down" << endl;
 
+    // Currently, 'docker->run' uses the reaper internally, hence we need
+    // to account for the reap interval. We also leave a small buffer of
+    // time to do the forced kill, otherwise the agent may destroy the
+    // container before we can send `TASK_KILLED`.
+    //
+    // TODO(alexr): Remove `MAX_REAP_INTERVAL` once the reaper signals
+    // immediately after the watched process has exited.
+    Duration gracePeriod =
+      shutdownGracePeriod - process::MAX_REAP_INTERVAL() - Seconds(1);
+
     // Since the docker executor manages a single task,
     // shutdown boils down to killing this task.
     //
@@ -208,7 +233,7 @@ public:
     // agent is smaller than the kill grace period).
     if (run.isSome()) {
       CHECK_SOME(taskId);
-      killTask(driver, taskId.get(), stopTimeout);
+      killTask(driver, taskId.get(), gracePeriod);
     } else {
       driver->stop();
     }
@@ -448,7 +473,8 @@ private:
   string containerName;
   string sandboxDirectory;
   string mappedDirectory;
-  Duration stopTimeout;
+  Duration shutdownGracePeriod;
+  Option<KillPolicy> killPolicy;
   Option<Future<Nothing>> run;
   Future<Nothing> stop;
   Future<Nothing> inspect;
@@ -466,7 +492,7 @@ public:
       const string& container,
       const string& sandboxDirectory,
       const string& mappedDirectory,
-      const Duration& stopTimeout,
+      const Duration& shutdownGracePeriod,
       const string& healthCheckDir)
   {
     process = Owned<DockerExecutorProcess>(new DockerExecutorProcess(
@@ -474,7 +500,7 @@ public:
         container,
         sandboxDirectory,
         mappedDirectory,
-        stopTimeout,
+        shutdownGracePeriod,
         healthCheckDir));
 
     spawn(process.get());
@@ -598,9 +624,32 @@ int main(int argc, char** argv)
     return EXIT_FAILURE;
   }
 
-  if (flags.stop_timeout.isNone()) {
-    cerr << flags.usage("Missing required option --stop_timeout") << endl;
-    return EXIT_FAILURE;
+  // Get executor shutdown grace period from the environment.
+  //
+  // NOTE: We avoided introducing a docker executor flag for this
+  // because the docker executor exits if it sees an unknown flag.
+  // This makes it difficult to add or remove docker executor flags
+  // that are unconditionally set by the agent.
+  Duration shutdownGracePeriod =
+    mesos::internal::slave::DEFAULT_EXECUTOR_SHUTDOWN_GRACE_PERIOD;
+  Option<string> value = os::getenv("MESOS_EXECUTOR_SHUTDOWN_GRACE_PERIOD");
+  if (value.isSome()) {
+    Try<Duration> parse = Duration::parse(value.get());
+    if (parse.isError()) {
+      cerr << "Failed to parse value '" << value.get() << "'"
+           << " of 'MESOS_EXECUTOR_SHUTDOWN_GRACE_PERIOD': " << parse.error();
+      return EXIT_FAILURE;
+    }
+
+    shutdownGracePeriod = parse.get();
+  }
+
+  // If the deprecated flag is set, respect it and choose the bigger value.
+  //
+  // TODO(alexr): Remove this after the deprecation cycle (started in 0.29).
+  if (flags.stop_timeout.isSome() &&
+      flags.stop_timeout.get() > shutdownGracePeriod) {
+    shutdownGracePeriod = flags.stop_timeout.get();
   }
 
   if (flags.launcher_dir.isNone()) {
@@ -626,7 +675,7 @@ int main(int argc, char** argv)
       flags.container.get(),
       flags.sandbox_directory.get(),
       flags.mapped_directory.get(),
-      flags.stop_timeout.get(),
+      shutdownGracePeriod,
       flags.launcher_dir.get());
 
   mesos::MesosExecutorDriver driver(&executor);


Re: [5/6] mesos git commit: WIP: mesos-execute.

Posted by Alex R <al...@apache.org>.
Folks,

my apologies for this. The rogue patch has been reverted, all culprits are
punished.

AlexR.

On 31 March 2016 at 15:32, <al...@apache.org> wrote:

> WIP: mesos-execute.
>
>
> Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
> Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/c0f6ae86
> Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/c0f6ae86
> Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/c0f6ae86
>
> Branch: refs/heads/master
> Commit: c0f6ae86e69f4a8f4735407d3b0134672b0f531f
> Parents: 1bbe485
> Author: Alexander Rukletsov <al...@apache.org>
> Authored: Wed Mar 30 16:54:30 2016 +0200
> Committer: Alexander Rukletsov <al...@apache.org>
> Committed: Thu Mar 31 14:35:53 2016 +0200
>
> ----------------------------------------------------------------------
>  src/cli/execute.cpp | 31 +++++++++++++++++++++++++++++++
>  1 file changed, 31 insertions(+)
> ----------------------------------------------------------------------
>
>
>
> http://git-wip-us.apache.org/repos/asf/mesos/blob/c0f6ae86/src/cli/execute.cpp
> ----------------------------------------------------------------------
> diff --git a/src/cli/execute.cpp b/src/cli/execute.cpp
> index af62f41..7ced232 100644
> --- a/src/cli/execute.cpp
> +++ b/src/cli/execute.cpp
> @@ -262,6 +262,15 @@ protected:
>          task.mutable_agent_id()->MergeFrom(offer.agent_id());
>          task.mutable_resources()->CopyFrom(TASK_RESOURCES.get());
>
> +
> +        // Inject `KillPolicy` for testing.
> +        task.mutable_kill_policy()->mutable_grace_period()->
> +            set_nanoseconds(Seconds(11).ns());
> +
> +
> +
> +
> +
>          CommandInfo* commandInfo = task.mutable_command();
>
>          if (shell) {
> @@ -271,7 +280,12 @@ protected:
>            commandInfo->set_value(command.get());
>          } else {
>            // TODO(gilbert): Treat 'command' as executable value and
> arguments.
> +          // TODO(alexr): Add support for arguments.
> +
>            commandInfo->set_shell(false);
> +
> +          commandInfo->set_value(command.get());
> +          commandInfo->add_arguments()->assign(command.get());
>          }
>
>          if (environment.isSome()) {
> @@ -412,6 +426,23 @@ protected:
>      cout << "Received status update " << status.state()
>           << " for task " << status.task_id() << endl;
>
> +
> +    if (mesos::v1::TASK_RUNNING == status.state()) {
> +      Call call;
> +      call.set_type(Call::KILL);
> +
> +      CHECK(frameworkInfo.has_id());
> +      call.mutable_framework_id()->CopyFrom(frameworkInfo.id());
> +
> +      Call::Kill* kill = call.mutable_kill();
> +      kill->mutable_task_id()->CopyFrom(status.task_id());
> +      kill->mutable_agent_id()->CopyFrom(status.agent_id());
> +
> +      mesos->send(call);
> +    }
> +
> +
> +
>      if (status.has_uuid()) {
>        Call call;
>        call.set_type(Call::ACKNOWLEDGE);
>
>

Re: [5/6] mesos git commit: WIP: mesos-execute.

Posted by Alex R <al...@apache.org>.
Folks,

my apologies for this. The rogue patch has been reverted, all culprits are
punished.

AlexR.

On 31 March 2016 at 15:32, <al...@apache.org> wrote:

> WIP: mesos-execute.
>
>
> Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
> Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/c0f6ae86
> Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/c0f6ae86
> Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/c0f6ae86
>
> Branch: refs/heads/master
> Commit: c0f6ae86e69f4a8f4735407d3b0134672b0f531f
> Parents: 1bbe485
> Author: Alexander Rukletsov <al...@apache.org>
> Authored: Wed Mar 30 16:54:30 2016 +0200
> Committer: Alexander Rukletsov <al...@apache.org>
> Committed: Thu Mar 31 14:35:53 2016 +0200
>
> ----------------------------------------------------------------------
>  src/cli/execute.cpp | 31 +++++++++++++++++++++++++++++++
>  1 file changed, 31 insertions(+)
> ----------------------------------------------------------------------
>
>
>
> http://git-wip-us.apache.org/repos/asf/mesos/blob/c0f6ae86/src/cli/execute.cpp
> ----------------------------------------------------------------------
> diff --git a/src/cli/execute.cpp b/src/cli/execute.cpp
> index af62f41..7ced232 100644
> --- a/src/cli/execute.cpp
> +++ b/src/cli/execute.cpp
> @@ -262,6 +262,15 @@ protected:
>          task.mutable_agent_id()->MergeFrom(offer.agent_id());
>          task.mutable_resources()->CopyFrom(TASK_RESOURCES.get());
>
> +
> +        // Inject `KillPolicy` for testing.
> +        task.mutable_kill_policy()->mutable_grace_period()->
> +            set_nanoseconds(Seconds(11).ns());
> +
> +
> +
> +
> +
>          CommandInfo* commandInfo = task.mutable_command();
>
>          if (shell) {
> @@ -271,7 +280,12 @@ protected:
>            commandInfo->set_value(command.get());
>          } else {
>            // TODO(gilbert): Treat 'command' as executable value and
> arguments.
> +          // TODO(alexr): Add support for arguments.
> +
>            commandInfo->set_shell(false);
> +
> +          commandInfo->set_value(command.get());
> +          commandInfo->add_arguments()->assign(command.get());
>          }
>
>          if (environment.isSome()) {
> @@ -412,6 +426,23 @@ protected:
>      cout << "Received status update " << status.state()
>           << " for task " << status.task_id() << endl;
>
> +
> +    if (mesos::v1::TASK_RUNNING == status.state()) {
> +      Call call;
> +      call.set_type(Call::KILL);
> +
> +      CHECK(frameworkInfo.has_id());
> +      call.mutable_framework_id()->CopyFrom(frameworkInfo.id());
> +
> +      Call::Kill* kill = call.mutable_kill();
> +      kill->mutable_task_id()->CopyFrom(status.task_id());
> +      kill->mutable_agent_id()->CopyFrom(status.agent_id());
> +
> +      mesos->send(call);
> +    }
> +
> +
> +
>      if (status.has_uuid()) {
>        Call call;
>        call.set_type(Call::ACKNOWLEDGE);
>
>

[5/6] mesos git commit: WIP: mesos-execute.

Posted by al...@apache.org.
WIP: mesos-execute.


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/c0f6ae86
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/c0f6ae86
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/c0f6ae86

Branch: refs/heads/master
Commit: c0f6ae86e69f4a8f4735407d3b0134672b0f531f
Parents: 1bbe485
Author: Alexander Rukletsov <al...@apache.org>
Authored: Wed Mar 30 16:54:30 2016 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Thu Mar 31 14:35:53 2016 +0200

----------------------------------------------------------------------
 src/cli/execute.cpp | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/c0f6ae86/src/cli/execute.cpp
----------------------------------------------------------------------
diff --git a/src/cli/execute.cpp b/src/cli/execute.cpp
index af62f41..7ced232 100644
--- a/src/cli/execute.cpp
+++ b/src/cli/execute.cpp
@@ -262,6 +262,15 @@ protected:
         task.mutable_agent_id()->MergeFrom(offer.agent_id());
         task.mutable_resources()->CopyFrom(TASK_RESOURCES.get());
 
+
+        // Inject `KillPolicy` for testing.
+        task.mutable_kill_policy()->mutable_grace_period()->
+            set_nanoseconds(Seconds(11).ns());
+
+
+
+
+
         CommandInfo* commandInfo = task.mutable_command();
 
         if (shell) {
@@ -271,7 +280,12 @@ protected:
           commandInfo->set_value(command.get());
         } else {
           // TODO(gilbert): Treat 'command' as executable value and arguments.
+          // TODO(alexr): Add support for arguments.
+
           commandInfo->set_shell(false);
+
+          commandInfo->set_value(command.get());
+          commandInfo->add_arguments()->assign(command.get());
         }
 
         if (environment.isSome()) {
@@ -412,6 +426,23 @@ protected:
     cout << "Received status update " << status.state()
          << " for task " << status.task_id() << endl;
 
+
+    if (mesos::v1::TASK_RUNNING == status.state()) {
+      Call call;
+      call.set_type(Call::KILL);
+
+      CHECK(frameworkInfo.has_id());
+      call.mutable_framework_id()->CopyFrom(frameworkInfo.id());
+
+      Call::Kill* kill = call.mutable_kill();
+      kill->mutable_task_id()->CopyFrom(status.task_id());
+      kill->mutable_agent_id()->CopyFrom(status.agent_id());
+
+      mesos->send(call);
+    }
+
+
+
     if (status.has_uuid()) {
       Call call;
       call.set_type(Call::ACKNOWLEDGE);


[6/6] mesos git commit: Added Hamburg Mesos User Group to community page.

Posted by al...@apache.org.
Added Hamburg Mesos User Group to community page.

Review: https://reviews.apache.org/r/45539/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/a3d29b9c
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/a3d29b9c
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/a3d29b9c

Branch: refs/heads/master
Commit: a3d29b9c48e059163a106db4c03f13ba3446829a
Parents: c0f6ae8
Author: Joerg Schad <jo...@mesosphere.io>
Authored: Thu Mar 31 15:31:21 2016 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Thu Mar 31 15:31:21 2016 +0200

----------------------------------------------------------------------
 site/source/community/user-groups.html.md | 1 +
 1 file changed, 1 insertion(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/a3d29b9c/site/source/community/user-groups.html.md
----------------------------------------------------------------------
diff --git a/site/source/community/user-groups.html.md b/site/source/community/user-groups.html.md
index bf072cd..fdfe659 100644
--- a/site/source/community/user-groups.html.md
+++ b/site/source/community/user-groups.html.md
@@ -20,6 +20,7 @@ Mesos User Groups (MUGs) are responsible for organizing local meetups, hackathon
 
 ### Germany
 * [Cologne](http://www.meetup.com/Mesos-User-Group-Cologne/)
+* [Hamburg](http://www.meetup.com/Hamburg-Mesos-User-Group/)
 
 ### India
 * [Bangalore](http://www.meetup.com/Bangalore-Mesos-User-Group/)