You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2013/08/14 20:07:33 UTC
[10/18] git commit: Fixed cgroups isolator to do setsid on the
executor process.
Fixed cgroups isolator to do setsid on the executor process.
Review: https://reviews.apache.org/r/13449
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/376cd66c
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/376cd66c
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/376cd66c
Branch: refs/heads/master
Commit: 376cd66c4a6b71d5444acf615db1fc098747da75
Parents: 649295f
Author: Vinod Kone <vi...@twitter.com>
Authored: Fri Aug 9 15:00:16 2013 -0700
Committer: Vinod Kone <vi...@twitter.com>
Committed: Tue Aug 13 14:36:00 2013 -0700
----------------------------------------------------------------------
src/slave/cgroups_isolator.cpp | 51 ++++++++++++++++++++++++++++++++-
src/slave/process_isolator.cpp | 2 --
src/tests/slave_recovery_tests.cpp | 6 ++++
3 files changed, 56 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/376cd66c/src/slave/cgroups_isolator.cpp
----------------------------------------------------------------------
diff --git a/src/slave/cgroups_isolator.cpp b/src/slave/cgroups_isolator.cpp
index 3427c62..d4ccd11 100644
--- a/src/slave/cgroups_isolator.cpp
+++ b/src/slave/cgroups_isolator.cpp
@@ -532,6 +532,19 @@ void CgroupsIsolator::launchExecutor(
// Start listening on OOM events.
oomListen(frameworkId, executorId);
+ // Use pipes to determine which child has successfully changed session.
+ int pipes[2];
+ if (pipe(pipes) < 0) {
+ PLOG(FATAL) << "Failed to create a pipe";
+ }
+
+ // Set the FD_CLOEXEC flags on these pipes
+ Try<Nothing> cloexec = os::cloexec(pipes[0]);
+ CHECK_SOME(cloexec) << "Error setting FD_CLOEXEC on pipe[0]";
+
+ cloexec = os::cloexec(pipes[1]);
+ CHECK_SOME(cloexec) << "Error setting FD_CLOEXEC on pipe[1]";
+
// Launch the executor using fork-exec.
pid_t pid;
if ((pid = ::fork()) == -1) {
@@ -539,6 +552,15 @@ void CgroupsIsolator::launchExecutor(
}
if (pid > 0) {
+ os::close(pipes[1]);
+
+ // Get the child's pid via the pipe.
+ if (read(pipes[0], &pid, sizeof(pid)) == -1) {
+ PLOG(FATAL) << "Failed to get child PID from pipe";
+ }
+
+ os::close(pipes[0]);
+
// In parent process.
LOG(INFO) << "Forked executor at = " << pid;
@@ -558,7 +580,34 @@ void CgroupsIsolator::launchExecutor(
executorId,
pid);
} else {
- // In child process.
+ // In child process, we make cleanup easier by putting process
+ // into it's own session. DO NOT USE GLOG!
+ os::close(pipes[0]);
+
+ // NOTE: We setsid() in a loop because setsid() might fail if another
+ // process has the same process group id as the calling process.
+ while ((pid = setsid()) == -1) {
+ perror("Could not put executor in its own session");
+
+ std::cout << "Forking another process and retrying ..." << std::endl;
+
+ if ((pid = fork()) == -1) {
+ perror("Failed to fork to launch executor");
+ abort();
+ }
+
+ if (pid > 0) {
+ // In parent process.
+ exit(0);
+ }
+ }
+
+ if (write(pipes[1], &pid, sizeof(pid)) != sizeof(pid)) {
+ perror("Failed to write PID on pipe");
+ abort();
+ }
+
+ os::close(pipes[1]);
launcher::ExecutorLauncher launcher(
slaveId,
http://git-wip-us.apache.org/repos/asf/mesos/blob/376cd66c/src/slave/process_isolator.cpp
----------------------------------------------------------------------
diff --git a/src/slave/process_isolator.cpp b/src/slave/process_isolator.cpp
index a80b047..24a7fb2 100644
--- a/src/slave/process_isolator.cpp
+++ b/src/slave/process_isolator.cpp
@@ -191,8 +191,6 @@ void ProcessIsolator::launchExecutor(
if (pid > 0) {
// In parent process.
- // It is ok to suicide here, though process reaper signals the exit,
- // because the process isolator ignores unknown processes.
exit(0);
}
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/376cd66c/src/tests/slave_recovery_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_recovery_tests.cpp b/src/tests/slave_recovery_tests.cpp
index 6de8108..ef6dbf7 100644
--- a/src/tests/slave_recovery_tests.cpp
+++ b/src/tests/slave_recovery_tests.cpp
@@ -734,6 +734,9 @@ TYPED_TEST(SlaveRecoveryTest, RecoverCompletedExecutor)
EXPECT_CALL(sched, statusUpdate(_, _))
.Times(2); // TASK_RUNNING and TASK_FINISHED updates.
+ EXPECT_CALL(sched, offerRescinded(_, _))
+ .Times(AtMost(1));
+
Future<Nothing> schedule = FUTURE_DISPATCH(
_, &GarbageCollectorProcess::schedule);
@@ -1366,6 +1369,9 @@ TYPED_TEST(SlaveRecoveryTest, ShutdownSlave)
AWAIT_READY(statusUpdate1); // Wait for TASK_RUNNING update.
+ EXPECT_CALL(sched, offerRescinded(_, _))
+ .Times(AtMost(1));
+
Future<Nothing> executorTerminated =
FUTURE_DISPATCH(_, &Slave::executorTerminated);