You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by id...@apache.org on 2014/05/06 20:29:10 UTC
git commit: Extend the Linux launcher to use clone.
Repository: mesos
Updated Branches:
refs/heads/master 4d5866d47 -> 5044bc831
Extend the Linux launcher to use clone.
The launcher can now support clone'ing processes with namespace flags.
Review: https://reviews.apache.org/r/20816
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/5044bc83
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/5044bc83
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/5044bc83
Branch: refs/heads/master
Commit: 5044bc8318b4ec03163561b0b173e6391a49312c
Parents: 4d5866d
Author: Ian Downes <ia...@gmail.com>
Authored: Tue May 6 11:12:38 2014 -0700
Committer: Ian Downes <id...@twitter.com>
Committed: Tue May 6 11:14:04 2014 -0700
----------------------------------------------------------------------
src/slave/containerizer/linux_launcher.cpp | 138 +++++++++++++++---------
src/slave/containerizer/linux_launcher.hpp | 11 +-
2 files changed, 93 insertions(+), 56 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/5044bc83/src/slave/containerizer/linux_launcher.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/linux_launcher.cpp b/src/slave/containerizer/linux_launcher.cpp
index 530e0bd..c17724b 100644
--- a/src/slave/containerizer/linux_launcher.cpp
+++ b/src/slave/containerizer/linux_launcher.cpp
@@ -16,12 +16,16 @@
* limitations under the License.
*/
+#include <sched.h>
+#include <unistd.h>
+
+#include <linux/sched.h>
+
#include <vector>
#include <stout/abort.hpp>
#include <stout/hashset.hpp>
#include <stout/path.hpp>
-#include <stout/unreachable.hpp>
#include "linux/cgroups.hpp"
@@ -41,8 +45,12 @@ namespace slave {
using state::RunState;
-LinuxLauncher::LinuxLauncher(const Flags& _flags, const string& _hierarchy)
+LinuxLauncher::LinuxLauncher(
+ const Flags& _flags,
+ int _namespaces,
+ const string& _hierarchy)
: flags(_flags),
+ namespaces(_namespaces),
hierarchy(_hierarchy) {}
@@ -58,7 +66,10 @@ Try<Launcher*> LinuxLauncher::create(const Flags& flags)
LOG(INFO) << "Using " << hierarchy.get()
<< " as the freezer hierarchy for the Linux launcher";
- return new LinuxLauncher(flags, hierarchy.get());
+ // TODO(idownes): Inspect the isolation flag to determine namespaces to use.
+ int namespaces = 0;
+
+ return new LinuxLauncher(flags, namespaces, hierarchy.get());
}
@@ -122,80 +133,101 @@ Try<Nothing> LinuxLauncher::recover(const std::list<state::RunState>& states)
}
+// Helper for clone() which expects an int(void*).
+static int childMain(void* child)
+{
+ const lambda::function<int()>* func =
+ static_cast<const lambda::function<int()>*> (child);
+
+ return (*func)();
+}
+
+
+// Helper that creates a new session then blocks on reading the pipe before
+// calling the supplied function.
+static int _childMain(
+ const lambda::function<int()>& childFunction,
+ int pipes[2])
+{
+ // In child.
+ os::close(pipes[1]);
+
+ // Move to a different session (and new process group) so we're independent
+ // from the slave's session (otherwise children will receive SIGHUP if the
+ // slave exits).
+ // TODO(idownes): perror is not listed as async-signal-safe and should be
+ // reimplemented safely.
+ if (setsid() == -1) {
+ perror("Failed to put child in a new session");
+ os::close(pipes[0]);
+ _exit(1);
+ }
+
+ // Do a blocking read on the pipe until the parent signals us to continue.
+ int buf;
+ int len;
+ while ((len = read(pipes[0], &buf, sizeof(buf))) == -1 && errno == EINTR);
+
+ if (len != sizeof(buf)) {
+ ABORT("Failed to synchronize with parent");
+ }
+
+ os::close(pipes[0]);
+
+ // This function should exec() and therefore not return.
+ childFunction();
+
+ ABORT("Child failed to exec");
+
+ return -1;
+}
+
+
Try<pid_t> LinuxLauncher::fork(
const ContainerID& containerId,
- const lambda::function<int()>& inChild)
+ const lambda::function<int()>& childFunction)
{
// Create a freezer cgroup for this container if necessary.
Try<bool> exists = cgroups::exists(hierarchy, cgroup(containerId));
-
if (exists.isError()) {
- return Error("Failed to create freezer cgroup: " + exists.error());
+ return Error("Failed to check existence of freezer cgroup: " +
+ exists.error());
}
if (!exists.get()) {
Try<Nothing> created = cgroups::create(hierarchy, cgroup(containerId));
if (created.isError()) {
- LOG(ERROR) << "Failed to create freezer cgroup for container '"
- << containerId << "': " << created.error();
- return Error("Failed to contain process: " + created.error());
+ return Error("Failed to create freezer cgroup: " + created.error());
}
}
- // Additional processes forked will be put into the same process group and
- // session.
- Option<pid_t> pgid = pids.get(containerId);
-
// Use a pipe to block the child until it's been moved into the freezer
// cgroup.
int pipes[2];
// We assume this should not fail under reasonable conditions so we use CHECK.
CHECK(pipe(pipes) == 0);
- pid_t pid;
-
- if ((pid = ::fork()) == -1) {
- return ErrnoError("Failed to fork");
- }
-
- if (pid == 0) {
- // In child.
- os::close(pipes[1]);
-
- // Move to a previously created process group (and session) if available,
- // else create a new session and process group. Even though we track
- // processes using cgroups we need to move to a different session so we're
- // independent from the slave's session (otherwise children will receive
- // SIGHUP if the slave exits).
- // TODO(idownes): perror is not listed as async-signal-safe and should be
- // reimplemented safely.
- if (pgid.isSome() && (setpgid(0, pgid.get()) == -1)) {
- perror("Failed to put child into process group");
- os::close(pipes[0]);
- _exit(1);
- } else if (setsid() == -1) {
- perror("Failed to put child in a new session");
- os::close(pipes[0]);
- _exit(1);
- }
+ // Use the _childMain helper which moves the child into a new session and
+ // blocks on the pipe until we're ready for it to run.
+ lambda::function<int()> func =
+ lambda::bind(&_childMain, childFunction, pipes);
- // Do a blocking read on the pipe until the parent signals us to continue.
- int buf;
- int len;
- while ((len = read(pipes[0], &buf, sizeof(buf))) == -1 && errno == EINTR);
+ // Stack for the child.
+ // - unsigned long long used for best alignment.
+ // - static is ok because each child gets their own copy after the clone.
+ // - 8 MiB appears to be the default for "ulimit -s" on OSX and Linux.
+ static unsigned long long stack[(8*1024*1024)/sizeof(unsigned long long)];
- if (len != sizeof(buf)) {
- os::close(pipes[0]);
- ABORT("Failed to synchronize with parent");
- }
-
- os::close(pipes[0]);
+ LOG(INFO) << "Cloning child process with flags = " << namespaces;
- // This function should exec() and therefore not return.
- inChild();
-
- ABORT("Child failed to exec");
+ pid_t pid;
+ if ((pid = ::clone(
+ childMain,
+ &stack[sizeof(stack)/sizeof(stack[0]) - 1], // stack grows down
+ namespaces | SIGCHLD, // Specify SIGCHLD as child termination signal
+ static_cast<void*>(&func))) == -1) {
+ return ErrnoError("Failed to clone child process");
}
// Parent.
http://git-wip-us.apache.org/repos/asf/mesos/blob/5044bc83/src/slave/containerizer/linux_launcher.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/linux_launcher.hpp b/src/slave/containerizer/linux_launcher.hpp
index 8f96c69..84249c3 100644
--- a/src/slave/containerizer/linux_launcher.hpp
+++ b/src/slave/containerizer/linux_launcher.hpp
@@ -38,22 +38,27 @@ public:
virtual Try<pid_t> fork(
const ContainerID& containerId,
- const lambda::function<int()>& inChild);
+ const lambda::function<int()>& childFunction);
virtual process::Future<Nothing> destroy(const ContainerID& containerId);
private:
- LinuxLauncher(const Flags& flags, const std::string& hierarchy);
+ LinuxLauncher(
+ const Flags& flags,
+ int namespaces,
+ const std::string& hierarchy);
static const std::string subsystem;
const Flags flags;
+ const int namespaces;
const std::string hierarchy;
std::string cgroup(const ContainerID& containerId);
- // The 'pid' is the process id of the first process and also the process
+ // The 'pid' is the process id of the child process and also the process
// group id and session id.
hashmap<ContainerID, pid_t> pids;
+
};