You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by be...@apache.org on 2011/06/05 07:02:00 UTC

svn commit: r1131724 - /incubator/mesos/trunk/src/process_based_isolation_module.cpp

Author: benh
Date: Sun Jun  5 05:02:00 2011
New Revision: 1131724

URL: http://svn.apache.org/viewvc?rev=1131724&view=rev
Log:
Using sessions and doing process group killing in order to cleanup most descendants of an executor (this still will not cleanup any descendants that create their own process group or session, but hopefully that is unlikely, and if the organization is doing that then they probably really know what they are doing).

Modified:
    incubator/mesos/trunk/src/process_based_isolation_module.cpp

Modified: incubator/mesos/trunk/src/process_based_isolation_module.cpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/process_based_isolation_module.cpp?rev=1131724&r1=1131723&r2=1131724&view=diff
==============================================================================
--- incubator/mesos/trunk/src/process_based_isolation_module.cpp (original)
+++ incubator/mesos/trunk/src/process_based_isolation_module.cpp Sun Jun  5 05:02:00 2011
@@ -65,12 +65,15 @@ void ProcessBasedIsolationModule::startE
     PLOG(FATAL) << "Failed to fork to launch new executor";
 
   if (pid) {
-    // In parent process
+    // In parent process, record the pid for killpg later.
     LOG(INFO) << "Started executor, OS pid = " << pid;
     osPid[framework->id] = pid;
     framework->executorStatus = "PID: " + lexical_cast<string>(pid);
   } else {
-    // In child process
+    // In child process, do setsid to make cleanup easier.
+    if ((pid = setsid()) == -1)
+      perror("setsid error");
+
     createExecutorLauncher(framework)->run();
   }
 }
@@ -79,11 +82,18 @@ void ProcessBasedIsolationModule::startE
 void ProcessBasedIsolationModule::killExecutor(Framework* fw)
 {
   if (osPid[fw->id] != -1) {
-    LOG(INFO) << "Sending SIGTERM to pid " << osPid[fw->id];
-    kill(osPid[fw->id], SIGKILL);
+    // TODO(benh): Consider sending a SIGTERM, then after so much time
+    // if it still hasn't exited do a SIGKILL (can use a libprocess
+    // process for this).
+    LOG(INFO) << "Sending SIGKILL to gpid " << osPid[fw->id];
+    killpg(osPid[fw->id], SIGKILL);
     osPid[fw->id] = -1;
     fw->executorStatus = "No executor running";
-    // TODO: Kill all of the process's descendants?
+    // TODO(benh): Kill all of the process's descendants? Perhaps
+    // create a new libprocess process that continually tries to kill
+    // all the processes that are a descendant of the executor, trying
+    // to kill the executor last ... maybe this is just too much of a
+    // burden?
   }
 }