You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ti...@apache.org on 2014/05/25 23:33:38 UTC

git commit: Added workaround to allow the ExternalContainerizer containers to get recovered via slave state.

Repository: mesos
Updated Branches:
  refs/heads/master fc306fb3b -> a8cb44346


Added workaround to allow the ExternalContainerizer containers to get recovered via slave state.

The slave currently expects a forked executor PID within its states
when recovering. For the ExternalContainerizer however this does not
make sense but is introduced in this patch to get around the problem
as described in MESOS-1328 and MESOS-923.

Review: https://reviews.apache.org/r/21677


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/a8cb4434
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/a8cb4434
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/a8cb4434

Branch: refs/heads/master
Commit: a8cb44346fa5101844d4de145354481f84417fb6
Parents: fc306fb
Author: Till Toenshoff <to...@me.com>
Authored: Sun May 25 23:26:28 2014 +0200
Committer: Till Toenshoff <to...@me.com>
Committed: Sun May 25 23:26:28 2014 +0200

----------------------------------------------------------------------
 .../containerizer/external_containerizer.cpp    | 29 ++++++++++++++++++++
 1 file changed, 29 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/a8cb4434/src/slave/containerizer/external_containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/external_containerizer.cpp b/src/slave/containerizer/external_containerizer.cpp
index ac3dd18..b39c845 100644
--- a/src/slave/containerizer/external_containerizer.cpp
+++ b/src/slave/containerizer/external_containerizer.cpp
@@ -437,6 +437,35 @@ Future<Nothing> ExternalContainerizerProcess::launch(
                    "' failed: " + invoked.error());
   }
 
+  // Checkpoint the executor's pid if requested.
+  // NOTE: Containerizer(s) currently rely on their state being
+  // persisted in the slave. However, that responsibility should have
+  // been delegated to the containerizer.
+  // To work around the mandatory forked pid recovery, we need to
+  // checkpoint one. See MESOS-1328 and MESOS-923.
+  // TODO(tillt): Remove this entirely as soon as MESOS-923 is fixed.
+  if (checkpoint) {
+    const string& path = slave::paths::getForkedPidPath(
+        slave::paths::getMetaRootDir(flags.work_dir),
+        slaveId,
+        executor.framework_id(),
+        executor.executor_id(),
+        containerId);
+
+    LOG(INFO) << "Checkpointing executor's forked pid " << invoked.get().pid()
+              << " to '" << path <<  "'";
+
+    Try<Nothing> checkpointed =
+      slave::state::checkpoint(path, stringify(invoked.get().pid()));
+
+    if (checkpointed.isError()) {
+      LOG(ERROR) << "Failed to checkpoint executor's forked pid to '"
+                 << path << "': " << checkpointed.error();
+
+      return Failure("Could not checkpoint executor's pid");
+    }
+  }
+
   // Record the container launch intend.
   actives.put(containerId, Owned<Container>(new Container(sandbox)));