You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ji...@apache.org on 2015/04/02 21:05:21 UTC

[1/3] mesos git commit: Added command logging for processes running in slave's cgroup.

Repository: mesos
Updated Branches:
  refs/heads/master d72c11cf9 -> 88db77f26


Added command logging for processes running in slave's cgroup.

Review: https://reviews.apache.org/r/32742


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/2b5e5b4c
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/2b5e5b4c
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/2b5e5b4c

Branch: refs/heads/master
Commit: 2b5e5b4c399174c8890727344b0acb9b8da5d6d3
Parents: d72c11c
Author: Jie Yu <yu...@gmail.com>
Authored: Wed Apr 1 13:05:15 2015 -0700
Committer: Jie Yu <yu...@gmail.com>
Committed: Thu Apr 2 11:54:03 2015 -0700

----------------------------------------------------------------------
 src/slave/slave.cpp | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/2b5e5b4c/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 0f70eba..521624c 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -245,10 +245,24 @@ void Slave::initialize()
       // possibly automatically killing any running processes and
       // moving this code to during recovery.
       if (!processes.get().empty()) {
+        // For each process, we print its pid as well as its command
+        // to help triaging.
+        vector<string> infos;
+        foreach (pid_t pid, processes.get()) {
+          Result<os::Process> proc = os::process(pid);
+
+          // Only print the command if available.
+          if (proc.isSome()) {
+            infos.push_back(stringify(pid) + " '" + proc.get().command + "'");
+          } else {
+            infos.push_back(stringify(pid));
+          }
+        }
+
         EXIT(1) << "A slave (or child process) is still running, "
-                << "please check the process(es) '"
-                << stringify(processes.get()) << "' listed in "
-                << path::join(hierarchy.get(), cgroup, "cgroups.proc");
+                << "please check the following process(es) listed in "
+                << path::join(hierarchy.get(), cgroup, "cgroups.proc")
+                << ":\n" << strings::join("\n", infos);
       }
 
       // Move all of our threads into the cgroup.


[2/3] mesos git commit: Set death signal for forked du processes for posix/disk isolator.

Posted by ji...@apache.org.
Set death signal for forked du processes for posix/disk isolator.

Review: https://reviews.apache.org/r/32694


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/86d45bc1
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/86d45bc1
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/86d45bc1

Branch: refs/heads/master
Commit: 86d45bc1411544b0dd38b18b3c38c84107ccb926
Parents: 2b5e5b4
Author: Jie Yu <yu...@gmail.com>
Authored: Tue Mar 31 11:08:55 2015 -0700
Committer: Jie Yu <yu...@gmail.com>
Committed: Thu Apr 2 12:03:11 2015 -0700

----------------------------------------------------------------------
 .../containerizer/isolators/posix/disk.cpp      | 25 ++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/86d45bc1/src/slave/containerizer/isolators/posix/disk.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/posix/disk.cpp b/src/slave/containerizer/isolators/posix/disk.cpp
index 6e41e2a..d2ea3b1 100644
--- a/src/slave/containerizer/isolators/posix/disk.cpp
+++ b/src/slave/containerizer/isolators/posix/disk.cpp
@@ -18,6 +18,9 @@
 
 #include <signal.h>
 
+#ifdef __linux__
+#include <sys/prctl.h>
+#endif
 #include <sys/types.h>
 
 #include <deque>
@@ -353,6 +356,20 @@ private:
     Promise<Bytes> promise;
   };
 
+  // This function is invoked right before each 'du' is exec'ed. Note
+  // that this function needs to be async signal safe.
+  static int setupChild()
+  {
+#ifdef __linux__
+    // Kill the child process if the parent exits.
+    // NOTE: This function should never returns non-zero because we
+    // are passing in a valid signal.
+    return ::prctl(PR_SET_PDEATHSIG, SIGKILL);
+#else
+    return 0;
+#endif
+  }
+
   void discard(const string& path)
   {
     for (auto it = entries.begin(); it != entries.end(); ++it) {
@@ -387,10 +404,14 @@ private:
     // fs data structures, (b) disk I/O to read those structures, and
     // (c) the cpu time to traverse.
     Try<Subprocess> s = subprocess(
-        "du -k -s " + entry->path,
+        "du",
+        vector<string>({"du", "-k", "-s", entry->path}),
         Subprocess::PATH("/dev/null"),
         Subprocess::PIPE(),
-        Subprocess::PIPE());
+        Subprocess::PIPE(),
+        None(),
+        None(),
+        setupChild);
 
     if (s.isError()) {
       entry->promise.fail("Failed to exec 'du': " + s.error());


[3/3] mesos git commit: Used the argv version of subprocess for linux perf utilities.

Posted by ji...@apache.org.
Used the argv version of subprocess for linux perf utilities.

Review: https://reviews.apache.org/r/32698


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/88db77f2
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/88db77f2
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/88db77f2

Branch: refs/heads/master
Commit: 88db77f266b9f46a14b893effeb0bad1581b0022
Parents: 86d45bc
Author: Jie Yu <yu...@gmail.com>
Authored: Tue Mar 31 12:57:42 2015 -0700
Committer: Jie Yu <yu...@gmail.com>
Committed: Thu Apr 2 12:03:14 2015 -0700

----------------------------------------------------------------------
 src/linux/perf.cpp | 81 +++++++++++++++++++++++++++++++++----------------
 1 file changed, 55 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/88db77f2/src/linux/perf.cpp
----------------------------------------------------------------------
diff --git a/src/linux/perf.cpp b/src/linux/perf.cpp
index 377b6d2..cad6c80 100644
--- a/src/linux/perf.cpp
+++ b/src/linux/perf.cpp
@@ -50,29 +50,45 @@ const string PIDS_KEY = "";
 
 namespace internal {
 
-string command(
+vector<string> argv(
     const set<string>& events,
     const set<string>& cgroups,
     const Duration& duration)
 {
-  ostringstream command;
+  vector<string> argv = {
+    "perf", "stat",
+
+    // System-wide collection from all CPUs.
+    "--all-cpus",
+
+    // Print counts using a CSV-style output to make it easy to import
+    // directly into spreadsheets. Columns are separated by the string
+    // specified in PERF_DELIMITER.
+    "--field-separator", PERF_DELIMITER,
+
+    // Ensure all output goes to stdout.
+    "--log-fd", "1"
+  };
 
-  command << "perf stat -x" << PERF_DELIMITER << " -a";
-  command << " --log-fd 1";  // Ensure all output goes to stdout.
   // Nested loop to produce all pairings of event and cgroup.
   foreach (const string& event, events) {
     foreach (const string& cgroup, cgroups) {
-      command << " --event " << event
-              << " --cgroup " << cgroup;
+      argv.push_back("--event");
+      argv.push_back(event);
+      argv.push_back("--cgroup");
+      argv.push_back(cgroup);
     }
   }
-  command << " -- sleep " << stringify(duration.secs());
 
-  return command.str();
+  argv.push_back("--");
+  argv.push_back("sleep");
+  argv.push_back(stringify(duration.secs()));
+
+  return argv;
 }
 
 
-string command(
+vector<string> argv(
     const set<string>& events,
     const string& cgroup,
     const Duration& duration)
@@ -80,24 +96,36 @@ string command(
   set<string> cgroups;
   cgroups.insert(cgroup);
 
-  return command(events, cgroups, duration);
+  return argv(events, cgroups, duration);
 }
 
 
-string command(
+vector<string> argv(
     const set<string>& events,
     const set<pid_t>& pids,
     const Duration& duration)
 {
-  ostringstream command;
+  vector<string> argv = {
+    "perf", "stat",
+
+    // System-wide collection from all CPUs.
+    "--all-cpus",
+
+    // Print counts using a CSV-style output to make it easy to import
+    // directly into spreadsheets. Columns are separated by the string
+    // specified in PERF_DELIMITER.
+    "--field-separator", PERF_DELIMITER,
+
+    // Ensure all output goes to stdout.
+    "--log-fd", "1",
 
-  command << "perf stat -x" << PERF_DELIMITER << " -a";
-  command << " --log-fd 1";  // Ensure all output goes to stdout.
-  command << " --event " << strings::join(",", events);
-  command << " --pid " << strings::join(",", pids);
-  command << " -- sleep " << stringify(duration.secs());
+    "--event", strings::join(",", events),
+    "--pid", strings::join(",", pids),
+    "--",
+    "sleep", stringify(duration.secs())
+  };
 
-  return command.str();
+  return argv;
 }
 
 
@@ -113,8 +141,8 @@ inline string normalize(const string& s)
 class PerfSampler : public Process<PerfSampler>
 {
 public:
-  PerfSampler(const string& _command, const Duration& _duration)
-    : command(_command), duration(_duration) {}
+  PerfSampler(const vector<string>& _argv, const Duration& _duration)
+    : argv(_argv), duration(_duration) {}
 
   virtual ~PerfSampler() {}
 
@@ -158,7 +186,8 @@ private:
   void sample()
   {
     Try<Subprocess> _perf = subprocess(
-        command,
+        "perf",
+        argv,
         Subprocess::PIPE(),
         Subprocess::PIPE(),
         Subprocess::PIPE());
@@ -233,7 +262,7 @@ private:
     return;
   }
 
-  const string command;
+  const vector<string> argv;
   const Duration duration;
   Time start;
   Option<Subprocess> perf;
@@ -273,8 +302,8 @@ Future<mesos::PerfStatistics> sample(
     return Failure("Perf is not supported");
   }
 
-  const string command = internal::command(events, pids, duration);
-  internal::PerfSampler* sampler = new internal::PerfSampler(command, duration);
+  const vector<string> argv = internal::argv(events, pids, duration);
+  internal::PerfSampler* sampler = new internal::PerfSampler(argv, duration);
   Future<hashmap<string, mesos::PerfStatistics>> future = sampler->future();
   spawn(sampler, true);
   return future
@@ -303,8 +332,8 @@ Future<hashmap<string, mesos::PerfStatistics>> sample(
     return Failure("Perf is not supported");
   }
 
-  const string command = internal::command(events, cgroups, duration);
-  internal::PerfSampler* sampler = new internal::PerfSampler(command, duration);
+  const vector<string> argv = internal::argv(events, cgroups, duration);
+  internal::PerfSampler* sampler = new internal::PerfSampler(argv, duration);
   Future<hashmap<string, mesos::PerfStatistics>> future = sampler->future();
   spawn(sampler, true);
   return future;