You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2013/04/13 00:47:58 UTC

svn commit: r1467505 - /incubator/mesos/trunk/src/slave/process_isolator.cpp

Author: bmahler
Date: Fri Apr 12 22:47:58 2013
New Revision: 1467505

URL: http://svn.apache.org/r1467505
Log:
Added child process monitoring to process based isolation module.

Review: https://reviews.apache.org/r/9795

Modified:
    incubator/mesos/trunk/src/slave/process_isolator.cpp

Modified: incubator/mesos/trunk/src/slave/process_isolator.cpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/slave/process_isolator.cpp?rev=1467505&r1=1467504&r2=1467505&view=diff
==============================================================================
--- incubator/mesos/trunk/src/slave/process_isolator.cpp (original)
+++ incubator/mesos/trunk/src/slave/process_isolator.cpp Fri Apr 12 22:47:58 2013
@@ -26,6 +26,7 @@
 #endif
 
 #include <map>
+#include <set>
 
 #include <process/clock.hpp>
 #include <process/dispatch.hpp>
@@ -51,6 +52,7 @@
 using namespace process;
 
 using std::map;
+using std::set;
 using std::string;
 
 using process::wait; // Necessary on some OS's to disambiguate.
@@ -361,22 +363,59 @@ Future<ResourceStatistics> ProcessIsolat
   // Get the page size, used for memory accounting.
   // NOTE: This is more portable than using getpagesize().
   long pageSize = sysconf(_SC_PAGESIZE);
+  PCHECK(pageSize > 0) << "Failed to get sysconf(_SC_PAGESIZE)";
 
   // Get the number of clock ticks, used for cpu accounting.
   long ticks = sysconf(_SC_CLK_TCK);
+  PCHECK(ticks > 0) << "Failed to get sysconf(_SC_CLK_TCK)";
 
   CHECK_SOME(info->pid);
-  Try<proc::ProcessStatistics> stat = proc::stat(info->pid.get());
 
-  if (stat.isSome() && pageSize > 0) {
-    result.set_memory_rss(stat.get().rss * pageSize);
+  // Get the parent process usage statistics.
+  Try<proc::ProcessStatus> status = proc::status(info->pid.get());
+
+  if (status.isError()) {
+    return Future<ResourceStatistics>::failed(status.error());
   }
 
-  if (stat.isSome() && ticks > 0) {
-    result.set_cpu_user_time((double) stat.get().utime / (double) ticks);
-    result.set_cpu_system_time((double) stat.get().stime / (double) ticks);
+  result.set_memory_rss(status.get().rss * pageSize);
+  result.set_cpu_user_time((double) status.get().utime / (double) ticks);
+  result.set_cpu_system_time((double) status.get().stime / (double) ticks);
+
+  // Now aggregate all descendant process usage statistics.
+  Try<set<pid_t> > children = proc::children(info->pid.get(), true);
+
+  if (children.isError()) {
+    return Future<ResourceStatistics>::failed(
+        "Failed to get children of " + stringify(info->pid.get()) + ": " +
+        children.error());
+  }
+
+  // Aggregate the usage of all child processes.
+  foreach (pid_t child, children.get()) {
+    status = proc::status(child);
+
+    if (status.isError()) {
+      LOG(WARNING) << "Failed to get status of descendant process " << child
+                   << " of parent " << info->pid.get() << ": "
+                   << status.error();
+      continue;
+    }
+
+    result.set_memory_rss(
+        result.memory_rss() +
+        status.get().rss * pageSize);
+
+    result.set_cpu_user_time(
+        result.cpu_user_time() +
+        (double) status.get().utime / (double) ticks);
+
+    result.set_cpu_system_time(
+        result.cpu_system_time() +
+        (double) status.get().stime / (double) ticks);
   }
 #elif defined __APPLE__
+  // TODO(bmahler): Aggregate the usage of all child processes.
   // NOTE: There are several pitfalls to using proc_pidinfo().
   // In particular:
   //   -This will not work for many root processes.
@@ -385,19 +424,22 @@ Future<ResourceStatistics> ProcessIsolat
   // This beats using task_for_pid(), which only works for the same pid.
   // For further discussion around these issues,
   // see: http://code.google.com/p/psutil/issues/detail?id=297
-  struct proc_taskinfo task;
+  CHECK_SOME(info->pid);
+
+  proc_taskinfo task;
   int size =
     proc_pidinfo(info->pid.get(), PROC_PIDTASKINFO, 0, &task, sizeof(task));
 
-  if (size == sizeof(task)) {
-    result.set_memory_rss(task.pti_resident_size);
-
-    // NOTE: CPU Times are in nanoseconds, but this is not documented!
-    result.set_cpu_user_time(Nanoseconds(task.pti_total_user).secs());
-    result.set_cpu_system_time(Nanoseconds(task.pti_total_system).secs());
-  } else {
-    LOG(WARNING) << "Failed to get proc_pidinfo: " << size;
+  if (size != sizeof(task)) {
+    return Future<ResourceStatistics>::failed(
+        "Failed to get proc_pidinfo: " + stringify(size));
   }
+
+  result.set_memory_rss(task.pti_resident_size);
+
+  // NOTE: CPU Times are in nanoseconds, but this is not documented!
+  result.set_cpu_user_time(Nanoseconds(task.pti_total_user).secs());
+  result.set_cpu_system_time(Nanoseconds(task.pti_total_system).secs());
 #endif
 
   return result;