You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by id...@apache.org on 2014/05/06 20:29:10 UTC

git commit: Extend the Linux launcher to use clone.

Repository: mesos
Updated Branches:
  refs/heads/master 4d5866d47 -> 5044bc831


Extend the Linux launcher to use clone.

The launcher can now support clone'ing processes with namespace flags.

Review: https://reviews.apache.org/r/20816


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/5044bc83
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/5044bc83
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/5044bc83

Branch: refs/heads/master
Commit: 5044bc8318b4ec03163561b0b173e6391a49312c
Parents: 4d5866d
Author: Ian Downes <ia...@gmail.com>
Authored: Tue May 6 11:12:38 2014 -0700
Committer: Ian Downes <id...@twitter.com>
Committed: Tue May 6 11:14:04 2014 -0700

----------------------------------------------------------------------
 src/slave/containerizer/linux_launcher.cpp | 138 +++++++++++++++---------
 src/slave/containerizer/linux_launcher.hpp |  11 +-
 2 files changed, 93 insertions(+), 56 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/5044bc83/src/slave/containerizer/linux_launcher.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/linux_launcher.cpp b/src/slave/containerizer/linux_launcher.cpp
index 530e0bd..c17724b 100644
--- a/src/slave/containerizer/linux_launcher.cpp
+++ b/src/slave/containerizer/linux_launcher.cpp
@@ -16,12 +16,16 @@
  * limitations under the License.
  */
 
+#include <sched.h>
+#include <unistd.h>
+
+#include <linux/sched.h>
+
 #include <vector>
 
 #include <stout/abort.hpp>
 #include <stout/hashset.hpp>
 #include <stout/path.hpp>
-#include <stout/unreachable.hpp>
 
 #include "linux/cgroups.hpp"
 
@@ -41,8 +45,12 @@ namespace slave {
 
 using state::RunState;
 
-LinuxLauncher::LinuxLauncher(const Flags& _flags, const string& _hierarchy)
+LinuxLauncher::LinuxLauncher(
+    const Flags& _flags,
+    int _namespaces,
+    const string& _hierarchy)
   : flags(_flags),
+    namespaces(_namespaces),
     hierarchy(_hierarchy) {}
 
 
@@ -58,7 +66,10 @@ Try<Launcher*> LinuxLauncher::create(const Flags& flags)
   LOG(INFO) << "Using " << hierarchy.get()
             << " as the freezer hierarchy for the Linux launcher";
 
-  return new LinuxLauncher(flags, hierarchy.get());
+  // TODO(idownes): Inspect the isolation flag to determine namespaces to use.
+  int namespaces = 0;
+
+  return new LinuxLauncher(flags, namespaces, hierarchy.get());
 }
 
 
@@ -122,80 +133,101 @@ Try<Nothing> LinuxLauncher::recover(const std::list<state::RunState>& states)
 }
 
 
+// Helper for clone() which expects an int(void*).
+static int childMain(void* child)
+{
+  const lambda::function<int()>* func =
+    static_cast<const lambda::function<int()>*> (child);
+
+  return (*func)();
+}
+
+
+// Helper that creates a new session then blocks on reading the pipe before
+// calling the supplied function.
+static int _childMain(
+    const lambda::function<int()>& childFunction,
+    int pipes[2])
+{
+  // In child.
+  os::close(pipes[1]);
+
+  // Move to a different session (and new process group) so we're independent
+  // from the slave's session (otherwise children will receive SIGHUP if the
+  // slave exits).
+  // TODO(idownes): perror is not listed as async-signal-safe and should be
+  // reimplemented safely.
+  if (setsid() == -1) {
+    perror("Failed to put child in a new session");
+    os::close(pipes[0]);
+    _exit(1);
+  }
+
+  // Do a blocking read on the pipe until the parent signals us to continue.
+  int buf;
+  int len;
+  while ((len = read(pipes[0], &buf, sizeof(buf))) == -1 && errno == EINTR);
+
+  if (len != sizeof(buf)) {
+    ABORT("Failed to synchronize with parent");
+  }
+
+  os::close(pipes[0]);
+
+  // This function should exec() and therefore not return.
+  childFunction();
+
+  ABORT("Child failed to exec");
+
+  return -1;
+}
+
+
 Try<pid_t> LinuxLauncher::fork(
     const ContainerID& containerId,
-    const lambda::function<int()>& inChild)
+    const lambda::function<int()>& childFunction)
 {
   // Create a freezer cgroup for this container if necessary.
   Try<bool> exists = cgroups::exists(hierarchy, cgroup(containerId));
-
   if (exists.isError()) {
-    return Error("Failed to create freezer cgroup: " + exists.error());
+    return Error("Failed to check existence of freezer cgroup: " +
+                 exists.error());
   }
 
   if (!exists.get()) {
     Try<Nothing> created = cgroups::create(hierarchy, cgroup(containerId));
 
     if (created.isError()) {
-      LOG(ERROR) << "Failed to create freezer cgroup for container '"
-                 << containerId << "': " << created.error();
-      return Error("Failed to contain process: " + created.error());
+      return Error("Failed to create freezer cgroup: " + created.error());
     }
   }
 
-  // Additional processes forked will be put into the same process group and
-  // session.
-  Option<pid_t> pgid = pids.get(containerId);
-
   // Use a pipe to block the child until it's been moved into the freezer
   // cgroup.
   int pipes[2];
   // We assume this should not fail under reasonable conditions so we use CHECK.
   CHECK(pipe(pipes) == 0);
 
-  pid_t pid;
-
-  if ((pid = ::fork()) == -1) {
-    return ErrnoError("Failed to fork");
-  }
-
-  if (pid == 0) {
-    // In child.
-    os::close(pipes[1]);
-
-    // Move to a previously created process group (and session) if available,
-    // else create a new session and process group. Even though we track
-    // processes using cgroups we need to move to a different session so we're
-    // independent from the slave's session (otherwise children will receive
-    // SIGHUP if the slave exits).
-    // TODO(idownes): perror is not listed as async-signal-safe and should be
-    // reimplemented safely.
-    if (pgid.isSome() && (setpgid(0, pgid.get()) == -1)) {
-      perror("Failed to put child into process group");
-      os::close(pipes[0]);
-      _exit(1);
-    } else if (setsid() == -1) {
-      perror("Failed to put child in a new session");
-      os::close(pipes[0]);
-      _exit(1);
-    }
+  // Use the _childMain helper which moves the child into a new session and
+  // blocks on the pipe until we're ready for it to run.
+  lambda::function<int()> func =
+    lambda::bind(&_childMain, childFunction, pipes);
 
-    // Do a blocking read on the pipe until the parent signals us to continue.
-    int buf;
-    int len;
-    while ((len = read(pipes[0], &buf, sizeof(buf))) == -1 && errno == EINTR);
+  // Stack for the child.
+  // - unsigned long long used for best alignment.
+  // - static is ok because each child gets their own copy after the clone.
+  // - 8 MiB appears to be the default for "ulimit -s" on OSX and Linux.
+  static unsigned long long stack[(8*1024*1024)/sizeof(unsigned long long)];
 
-    if (len != sizeof(buf)) {
-      os::close(pipes[0]);
-      ABORT("Failed to synchronize with parent");
-    }
-
-    os::close(pipes[0]);
+  LOG(INFO) << "Cloning child process with flags = " << namespaces;
 
-    // This function should exec() and therefore not return.
-    inChild();
-
-    ABORT("Child failed to exec");
+  pid_t pid;
+  if ((pid = ::clone(
+          childMain,
+          &stack[sizeof(stack)/sizeof(stack[0]) - 1],  // stack grows down
+          namespaces | SIGCHLD,   // Specify SIGCHLD as child termination signal
+          static_cast<void*>(&func))) == -1) {
+      return ErrnoError("Failed to clone child process");
   }
 
   // Parent.

http://git-wip-us.apache.org/repos/asf/mesos/blob/5044bc83/src/slave/containerizer/linux_launcher.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/linux_launcher.hpp b/src/slave/containerizer/linux_launcher.hpp
index 8f96c69..84249c3 100644
--- a/src/slave/containerizer/linux_launcher.hpp
+++ b/src/slave/containerizer/linux_launcher.hpp
@@ -38,22 +38,27 @@ public:
 
   virtual Try<pid_t> fork(
       const ContainerID& containerId,
-      const lambda::function<int()>& inChild);
+      const lambda::function<int()>& childFunction);
 
   virtual process::Future<Nothing> destroy(const ContainerID& containerId);
 
 private:
-  LinuxLauncher(const Flags& flags, const std::string& hierarchy);
+  LinuxLauncher(
+      const Flags& flags,
+      int namespaces,
+      const std::string& hierarchy);
 
   static const std::string subsystem;
   const Flags flags;
+  const int namespaces;
   const std::string hierarchy;
 
   std::string cgroup(const ContainerID& containerId);
 
-  // The 'pid' is the process id of the first process and also the process
+  // The 'pid' is the process id of the child process and also the process
   // group id and session id.
   hashmap<ContainerID, pid_t> pids;
+
 };