You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2013/08/14 20:07:33 UTC

[10/18] git commit: Fixed cgroups isolator to do setsid on the executor process.

Fixed cgroups isolator to do setsid on the executor process.

Review: https://reviews.apache.org/r/13449


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/376cd66c
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/376cd66c
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/376cd66c

Branch: refs/heads/master
Commit: 376cd66c4a6b71d5444acf615db1fc098747da75
Parents: 649295f
Author: Vinod Kone <vi...@twitter.com>
Authored: Fri Aug 9 15:00:16 2013 -0700
Committer: Vinod Kone <vi...@twitter.com>
Committed: Tue Aug 13 14:36:00 2013 -0700

----------------------------------------------------------------------
 src/slave/cgroups_isolator.cpp     | 51 ++++++++++++++++++++++++++++++++-
 src/slave/process_isolator.cpp     |  2 --
 src/tests/slave_recovery_tests.cpp |  6 ++++
 3 files changed, 56 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/376cd66c/src/slave/cgroups_isolator.cpp
----------------------------------------------------------------------
diff --git a/src/slave/cgroups_isolator.cpp b/src/slave/cgroups_isolator.cpp
index 3427c62..d4ccd11 100644
--- a/src/slave/cgroups_isolator.cpp
+++ b/src/slave/cgroups_isolator.cpp
@@ -532,6 +532,19 @@ void CgroupsIsolator::launchExecutor(
   // Start listening on OOM events.
   oomListen(frameworkId, executorId);
 
+  // Use pipes to determine which child has successfully changed session.
+  int pipes[2];
+  if (pipe(pipes) < 0) {
+    PLOG(FATAL) << "Failed to create a pipe";
+  }
+
+  // Set the FD_CLOEXEC flags on these pipes
+  Try<Nothing> cloexec = os::cloexec(pipes[0]);
+  CHECK_SOME(cloexec) << "Error setting FD_CLOEXEC on pipe[0]";
+
+  cloexec = os::cloexec(pipes[1]);
+  CHECK_SOME(cloexec) << "Error setting FD_CLOEXEC on pipe[1]";
+
   // Launch the executor using fork-exec.
   pid_t pid;
   if ((pid = ::fork()) == -1) {
@@ -539,6 +552,15 @@ void CgroupsIsolator::launchExecutor(
   }
 
   if (pid > 0) {
+    os::close(pipes[1]);
+
+    // Get the child's pid via the pipe.
+    if (read(pipes[0], &pid, sizeof(pid)) == -1) {
+      PLOG(FATAL) << "Failed to get child PID from pipe";
+    }
+
+    os::close(pipes[0]);
+
     // In parent process.
     LOG(INFO) << "Forked executor at = " << pid;
 
@@ -558,7 +580,34 @@ void CgroupsIsolator::launchExecutor(
              executorId,
              pid);
   } else {
-    // In child process.
+    // In child process, we make cleanup easier by putting process
+    // into it's own session. DO NOT USE GLOG!
+    os::close(pipes[0]);
+
+    // NOTE: We setsid() in a loop because setsid() might fail if another
+    // process has the same process group id as the calling process.
+    while ((pid = setsid()) == -1) {
+      perror("Could not put executor in its own session");
+
+      std::cout << "Forking another process and retrying ..." << std::endl;
+
+      if ((pid = fork()) == -1) {
+        perror("Failed to fork to launch executor");
+        abort();
+      }
+
+      if (pid > 0) {
+        // In parent process.
+        exit(0);
+      }
+    }
+
+    if (write(pipes[1], &pid, sizeof(pid)) != sizeof(pid)) {
+      perror("Failed to write PID on pipe");
+      abort();
+    }
+
+    os::close(pipes[1]);
 
     launcher::ExecutorLauncher launcher(
         slaveId,

http://git-wip-us.apache.org/repos/asf/mesos/blob/376cd66c/src/slave/process_isolator.cpp
----------------------------------------------------------------------
diff --git a/src/slave/process_isolator.cpp b/src/slave/process_isolator.cpp
index a80b047..24a7fb2 100644
--- a/src/slave/process_isolator.cpp
+++ b/src/slave/process_isolator.cpp
@@ -191,8 +191,6 @@ void ProcessIsolator::launchExecutor(
 
       if (pid > 0) {
         // In parent process.
-        // It is ok to suicide here, though process reaper signals the exit,
-        // because the process isolator ignores unknown processes.
         exit(0);
       }
     }

http://git-wip-us.apache.org/repos/asf/mesos/blob/376cd66c/src/tests/slave_recovery_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_recovery_tests.cpp b/src/tests/slave_recovery_tests.cpp
index 6de8108..ef6dbf7 100644
--- a/src/tests/slave_recovery_tests.cpp
+++ b/src/tests/slave_recovery_tests.cpp
@@ -734,6 +734,9 @@ TYPED_TEST(SlaveRecoveryTest, RecoverCompletedExecutor)
   EXPECT_CALL(sched, statusUpdate(_, _))
     .Times(2); // TASK_RUNNING and TASK_FINISHED updates.
 
+  EXPECT_CALL(sched, offerRescinded(_, _))
+    .Times(AtMost(1));
+
   Future<Nothing> schedule = FUTURE_DISPATCH(
       _, &GarbageCollectorProcess::schedule);
 
@@ -1366,6 +1369,9 @@ TYPED_TEST(SlaveRecoveryTest, ShutdownSlave)
 
   AWAIT_READY(statusUpdate1); // Wait for TASK_RUNNING update.
 
+  EXPECT_CALL(sched, offerRescinded(_, _))
+    .Times(AtMost(1));
+
   Future<Nothing> executorTerminated =
     FUTURE_DISPATCH(_, &Slave::executorTerminated);