You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by id...@apache.org on 2014/06/14 00:32:31 UTC

[1/5] git commit: Support for running "perf stat".

Repository: mesos
Updated Branches:
  refs/heads/master 4c83662e1 -> d74de8c57


Support for running "perf stat".

Perf can be run against either a set of pids or a set of perf_event cgroups.

Review: https://reviews.apache.org/r/21443


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/be0ba0db
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/be0ba0db
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/be0ba0db

Branch: refs/heads/master
Commit: be0ba0db76e7fb5b54b25b09d154fdfdd6f2a741
Parents: 160e9e0
Author: Ian Downes <id...@twitter.com>
Authored: Wed May 14 14:33:58 2014 -0700
Committer: Ian Downes <id...@twitter.com>
Committed: Fri Jun 13 15:32:08 2014 -0700

----------------------------------------------------------------------
 src/Makefile.am             |   3 +
 src/linux/perf.cpp          | 401 +++++++++++++++++++++++++++++++++++++++
 src/linux/perf.hpp          |  78 ++++++++
 src/tests/cgroups_tests.cpp |  82 ++++++++
 src/tests/perf_tests.cpp    | 146 ++++++++++++++
 5 files changed, 710 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/be0ba0db/src/Makefile.am
----------------------------------------------------------------------
diff --git a/src/Makefile.am b/src/Makefile.am
index 5d3196d..3e623cc 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -287,6 +287,7 @@ nodist_scheduler_HEADERS = scheduler/scheduler.pb.h
 
 if OS_LINUX
   libmesos_no_3rdparty_la_SOURCES += linux/cgroups.cpp
+  libmesos_no_3rdparty_la_SOURCES += linux/perf.cpp
   libmesos_no_3rdparty_la_SOURCES += slave/containerizer/isolators/cgroups/cpushare.cpp
   libmesos_no_3rdparty_la_SOURCES += slave/containerizer/isolators/cgroups/mem.cpp
   libmesos_no_3rdparty_la_SOURCES += slave/containerizer/isolators/cgroups/perf_event.cpp
@@ -339,6 +340,7 @@ libmesos_no_3rdparty_la_SOURCES +=					\
 	hdfs/hdfs.hpp							\
 	launcher/launcher.hpp						\
 	linux/cgroups.hpp						\
+	linux/perf.hpp							\
 	linux/fs.hpp local/flags.hpp local/local.hpp			\
 	logging/flags.hpp logging/logging.hpp				\
 	master/allocator.hpp						\
@@ -1014,6 +1016,7 @@ if OS_LINUX
   mesos_tests_SOURCES += tests/cgroups_isolator_tests.cpp
   mesos_tests_SOURCES += tests/cgroups_tests.cpp
   mesos_tests_SOURCES += tests/fs_tests.cpp
+  mesos_tests_SOURCES += tests/perf_tests.cpp
 endif
 
 if WITH_NETWORK_ISOLATOR

http://git-wip-us.apache.org/repos/asf/mesos/blob/be0ba0db/src/linux/perf.cpp
----------------------------------------------------------------------
diff --git a/src/linux/perf.cpp b/src/linux/perf.cpp
new file mode 100644
index 0000000..c0f2e92
--- /dev/null
+++ b/src/linux/perf.cpp
@@ -0,0 +1,401 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <list>
+#include <ostream>
+#include <vector>
+
+#include <process/clock.hpp>
+#include <process/collect.hpp>
+#include <process/defer.hpp>
+#include <process/io.hpp>
+#include <process/process.hpp>
+#include <process/subprocess.hpp>
+
+#include <stout/strings.hpp>
+
+#include "linux/perf.hpp"
+
+using std::list;
+using std::ostringstream;
+using std::set;
+using std::string;
+using std::vector;
+
+using namespace process;
+
+namespace perf {
+
+// Delimiter for fields in perf stat output.
+const string PERF_DELIMITER = ",";
+
+// Use an empty string as the key for the parse output when sampling a
+// set of pids. No valid cgroup can be an empty string.
+const string PIDS_KEY = "";
+
+namespace internal {
+
+string command(
+    const set<string>& events,
+    const set<string>& cgroups,
+    const Duration& duration)
+{
+  ostringstream command;
+
+  command << "perf stat -x" << PERF_DELIMITER << " -a";
+  command << " --log-fd 1";  // Ensure all output goes to stdout.
+  // Nested loop to produce all pairings of event and cgroup.
+  foreach (const string& event, events) {
+    foreach (const string& cgroup, cgroups) {
+      command << " --event " << event
+              << " --cgroup " << cgroup;
+    }
+  }
+  command << " -- sleep " << stringify(duration.secs());
+
+  return command.str();
+}
+
+
+string command(
+    const set<string>& events,
+    const string& cgroup,
+    const Duration& duration)
+{
+  set<string> cgroups;
+  cgroups.insert(cgroup);
+
+  return command(events, cgroups, duration);
+}
+
+
+string command(
+    const set<string>& events,
+    const set<pid_t>& pids,
+    const Duration& duration)
+{
+  ostringstream command;
+
+  command << "perf stat -x" << PERF_DELIMITER << " -a";
+  command << " --log-fd 1";  // Ensure all output goes to stdout.
+  command << " --event " << strings::join(",", events);
+  command << " --pid " << strings::join(",", pids);
+  command << " -- sleep " << stringify(duration.secs());
+
+  return command.str();
+}
+
+
+// Normalize a perf event name. After normalization the event name
+// should match an event field in the PerfStatistics protobuf.
+inline string normalize(const string& s)
+{
+  string lower = strings::lower(s);
+  return strings::replace(lower, "-", "_");
+}
+
+
+class PerfSampler : public Process<PerfSampler>
+{
+public:
+  PerfSampler(const string& _command, const Duration& _duration)
+    : command(_command), duration(_duration) {}
+
+  virtual ~PerfSampler() {}
+
+  Future<hashmap<string, mesos::PerfStatistics> > future()
+  {
+    return promise.future();
+  }
+
+protected:
+  virtual void initialize()
+  {
+    // Stop when no one cares.
+    promise.future().onDiscard(lambda::bind(
+          static_cast<void(*)(const UPID&, bool)>(terminate), self(), true));
+
+    if (duration < Seconds(0)) {
+      promise.fail("Perf sample duration cannot be negative: '" +
+                    stringify(duration.secs()) + "'");
+      terminate(self());
+      return;
+    }
+
+    start = Clock::now();
+
+    sample();
+  }
+
+  virtual void finalize()
+  {
+    discard(output);
+
+    // Kill the perf process if it's still running.
+    if (perf.isSome() && perf.get().status().isPending()) {
+      kill(perf.get().pid(), SIGKILL);
+    }
+
+    promise.discard();
+  }
+
+private:
+  void sample()
+  {
+    Try<Subprocess> _perf = subprocess(command);
+    if (_perf.isError()) {
+      promise.fail("Failed to launch perf process: " + _perf.error());
+      terminate(self());
+      return;
+    }
+    perf = _perf.get();
+
+    Try<Nothing> nonblock = os::nonblock(perf.get().out());
+    if (nonblock.isError()) {
+      promise.fail("Failed to set nonblock on stdout for perf process: " +
+                    nonblock.error());
+      terminate(self());
+      return;
+    }
+
+    nonblock = os::nonblock(perf.get().err());
+    if (nonblock.isError()) {
+      promise.fail("Failed to set nonblock on stderr for perf process: " +
+                    nonblock.error());
+      terminate(self());
+      return;
+    }
+
+    // Start reading from stdout and stderr now. We don't use stderr
+    // but must read from it to avoid the subprocess blocking on the
+    // pipe.
+    output.push_back(process::io::read(perf.get().out()));
+    output.push_back(process::io::read(perf.get().err()));
+
+    // Wait for the process to exit.
+    perf.get().status()
+      .onAny(defer(self(), &Self::_sample, lambda::_1));
+  }
+
+  void _sample(const Future<Option<int> >& status)
+  {
+    if (!status.isReady()) {
+      promise.fail("Failed to get exit status of perf process: " +
+                    status.isFailed() ? status.failure() : "discarded");
+      terminate(self());
+      return;
+    }
+
+    if (status.get().get() != 0) {
+      promise.fail("Failed to execute perf, exit status: " +
+                    stringify(WEXITSTATUS(status.get().get())));
+
+      terminate(self());
+      return;
+    }
+
+    // Wait until we collect all output.
+    collect(output).onAny(defer(self(), &Self::__sample, lambda::_1));
+  }
+
+  void  __sample(const Future<list<string> >& future)
+  {
+    if (!future.isReady()) {
+      promise.fail("Failed to collect output of perf process: " +
+                    future.isFailed() ? future.failure() : "discarded");
+      terminate(self());
+      return;
+    }
+
+    // Parse output from stdout.
+    Try<hashmap<string, mesos::PerfStatistics> > parse =
+      perf::parse(output.front().get());
+    if (parse.isError()) {
+      promise.fail("Failed to parse perf output: " + parse.error());
+      terminate(self());
+      return;
+    }
+
+    // Create a non-const copy from the Try<> so we can set the
+    // timestamp and duration.
+    hashmap<string, mesos::PerfStatistics> statistics = parse.get();
+    foreachvalue (mesos::PerfStatistics& s, statistics) {
+      s.set_timestamp(start.secs());
+      s.set_duration(duration.secs());
+    }
+
+    promise.set(statistics);
+    terminate(self());
+    return;
+  }
+
+  const string command;
+  const Duration duration;
+  Time start;
+  Option<Subprocess> perf;
+  Promise<hashmap<string, mesos::PerfStatistics> > promise;
+  list<Future<string> > output;
+};
+
+
+// Helper to select a single key from the hashmap of perf statistics.
+Future<mesos::PerfStatistics> select(
+    const string& key,
+    const hashmap<string, mesos::PerfStatistics>& statistics)
+{
+  return statistics.get(key).get();
+}
+
+} // namespace internal {
+
+
+Future<mesos::PerfStatistics> sample(
+    const set<string>& events,
+    pid_t pid,
+    const Duration& duration)
+{
+  set<pid_t> pids;
+  pids.insert(pid);
+  return sample(events, pids, duration);
+}
+
+
+Future<mesos::PerfStatistics> sample(
+    const set<string>& events,
+    const set<pid_t>& pids,
+    const Duration& duration)
+{
+  const string command = internal::command(events, pids, duration);
+  internal::PerfSampler* sampler = new internal::PerfSampler(command, duration);
+  Future<hashmap<string, mesos::PerfStatistics> > future = sampler->future();
+  spawn(sampler, true);
+  return future
+    .then(lambda::bind(&internal::select, PIDS_KEY, lambda::_1));
+}
+
+
+Future<mesos::PerfStatistics> sample(
+    const set<string>& events,
+    const string& cgroup,
+    const Duration& duration)
+{
+  set<string> cgroups;
+  cgroups.insert(cgroup);
+  return sample(events, cgroups, duration)
+    .then(lambda::bind(&internal::select, cgroup, lambda::_1));
+}
+
+
+Future<hashmap<string, mesos::PerfStatistics> > sample(
+    const set<string>& events,
+    const set<string>& cgroups,
+    const Duration& duration)
+{
+  const string command = internal::command(events, cgroups, duration);
+  internal::PerfSampler* sampler = new internal::PerfSampler(command, duration);
+  Future<hashmap<string, mesos::PerfStatistics> > future = sampler->future();
+  spawn(sampler, true);
+  return future;
+}
+
+
+bool valid(const set<string>& events)
+{
+  ostringstream command;
+
+  // Log everything to stderr which is then redirected to /dev/null.
+  command << "perf stat --log-fd 2";
+  foreach (const string& event, events) {
+    command << " --event " << event;
+  }
+  command << " true 2>/dev/null";
+
+  return (os::system(command.str()) == 0);
+}
+
+
+Try<hashmap<string, mesos::PerfStatistics> > parse(const string& output)
+{
+  hashmap<string, mesos::PerfStatistics> statistics;
+
+  foreach (const string& line, strings::tokenize(output, "\n")) {
+    vector<string> tokens = strings::tokenize(line, PERF_DELIMITER);
+    // Expected format for an output line is either:
+    // value,event          (when sampling pids)
+    // value,event,cgroup   (when sampling a cgroup)
+    // assuming PERF_DELIMITER = ",".
+    if (tokens.size() < 2 || tokens.size() > 3) {
+      return Error("Unexpected perf output at line: " + line);
+    }
+
+    const string value = tokens[0];
+    const string event = internal::normalize(tokens[1]);
+    // Use the special PIDS_KEY when sampling pids.
+    const string cgroup = (tokens.size() == 3 ? tokens[2] : PIDS_KEY);
+
+    if (!statistics.contains(cgroup)) {
+      statistics.put(cgroup, mesos::PerfStatistics());
+    }
+
+    const google::protobuf::Reflection* reflection =
+      statistics[cgroup].GetReflection();
+    const google::protobuf::FieldDescriptor* field =
+      statistics[cgroup].GetDescriptor()->FindFieldByName(event);
+    if (!field) {
+      return Error("Unexpected perf output at line: " + line);
+    }
+
+    if (value == "<not supported>") {
+      LOG(WARNING) << "Unsupported perf counter, ignoring: " << line;
+      continue;
+    }
+
+    switch (field->type()) {
+      case google::protobuf::FieldDescriptor::TYPE_DOUBLE:
+        {
+          Try<double> number =
+            (value == "<not counted>") ?  0 : numify<double>(value);
+
+          if (number.isError()) {
+            return Error("Unable to parse perf value at line: " + line);
+          }
+
+          reflection->SetDouble(&(statistics[cgroup]), field, number.get());
+          break;
+        }
+      case google::protobuf::FieldDescriptor::TYPE_UINT64:
+        {
+          Try<uint64_t> number =
+            (value == "<not counted>") ?  0 : numify<uint64_t>(value);
+
+          if (number.isError()) {
+            return Error("Unable to parse perf value at line: " + line);
+          }
+
+          reflection->SetUInt64(&(statistics[cgroup]), field, number.get());
+          break;
+        }
+      default:
+        return Error("Unsupported perf field type at line: " + line);
+      }
+  }
+
+  return statistics;
+}
+
+} // namespace perf {

http://git-wip-us.apache.org/repos/asf/mesos/blob/be0ba0db/src/linux/perf.hpp
----------------------------------------------------------------------
diff --git a/src/linux/perf.hpp b/src/linux/perf.hpp
new file mode 100644
index 0000000..0d510c5
--- /dev/null
+++ b/src/linux/perf.hpp
@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PERF_HPP__
+#define __PERF_HPP__
+
+#include <unistd.h>
+
+#include <set>
+#include <string>
+
+#include <process/future.hpp>
+
+#include <stout/duration.hpp>
+#include <stout/hashmap.hpp>
+
+// For PerfStatistics protobuf.
+#include "mesos/mesos.hpp"
+
+namespace perf {
+
+// Sample the perf events for process pid for duration.
+process::Future<mesos::PerfStatistics> sample(
+    const std::set<std::string>& events,
+    pid_t pid,
+    const Duration& duration);
+
+
+// Sample the perf events for processes in pids for duration.
+process::Future<mesos::PerfStatistics> sample(
+    const std::set<std::string>& events,
+    const std::set<pid_t>& pids,
+    const Duration& duration);
+
+
+// Sample the perf events for process(es) in the perf_event cgroups
+// for duration. The returned hashmap is keyed by cgroup.
+// NOTE: cgroups should be relative to the perf_event subsystem mount,
+// e.g., mesos/test for /sys/fs/cgroup/perf_event/mesos/test.
+process::Future<hashmap<std::string, mesos::PerfStatistics> > sample(
+    const std::set<std::string>& events,
+    const std::set<std::string>& cgroup,
+    const Duration& duration);
+
+
+// Sample the perf events for process(es) in the perf_event cgroup.
+process::Future<mesos::PerfStatistics> sample(
+    const std::set<std::string>& events,
+    const std::string& cgroup,
+    const Duration& duration);
+
+
+// Validate a set of events are accepted by `perf stat`.
+bool valid(const std::set<std::string>& events);
+
+
+// Note: Exposed for testing purposes.
+Try<hashmap<std::string, mesos::PerfStatistics> > parse(
+    const std::string& output);
+
+} // namespace perf {
+
+#endif // __PERF_HPP__

http://git-wip-us.apache.org/repos/asf/mesos/blob/be0ba0db/src/tests/cgroups_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/cgroups_tests.cpp b/src/tests/cgroups_tests.cpp
index 5f674cd..73510fb 100644
--- a/src/tests/cgroups_tests.cpp
+++ b/src/tests/cgroups_tests.cpp
@@ -48,6 +48,7 @@
 #include <stout/strings.hpp>
 
 #include "linux/cgroups.hpp"
+#include "linux/perf.hpp"
 
 #include "tests/mesos.hpp" // For TEST_CGROUPS_(HIERARCHY|ROOT).
 
@@ -852,3 +853,84 @@ TEST_F(CgroupsAnyHierarchyWithFreezerTest, ROOT_CGROUPS_AssignThreads)
   ASSERT_TRUE(future.isReady());
   EXPECT_TRUE(future.get());
 }
+
+
+class CgroupsAnyHierarchyWithPerfEventTest
+  : public CgroupsAnyHierarchyTest
+{
+public:
+  CgroupsAnyHierarchyWithPerfEventTest()
+    : CgroupsAnyHierarchyTest("perf_event") {}
+};
+
+
+TEST_F(CgroupsAnyHierarchyWithPerfEventTest, ROOT_CGROUPS_Perf)
+{
+  int pipes[2];
+  int dummy;
+  ASSERT_NE(-1, ::pipe(pipes));
+
+  std::string hierarchy = path::join(baseHierarchy, "perf_event");
+  ASSERT_SOME(cgroups::create(hierarchy, TEST_CGROUPS_ROOT));
+
+  pid_t pid = ::fork();
+  ASSERT_NE(-1, pid);
+
+  if (pid == 0) {
+    // In child process.
+    ::close(pipes[1]);
+
+    // Wait until parent has assigned us to the cgroup.
+    ssize_t len;
+    while ((len = ::read(pipes[0], &dummy, sizeof(dummy))) == -1 &&
+           errno == EINTR);
+    ASSERT_EQ(sizeof(dummy), len);
+    ::close(pipes[0]);
+
+    while (true) { sleep(1); }
+
+    ABORT("Child should not reach here");
+  }
+
+  // In parent.
+  ::close(pipes[0]);
+
+  // Put child into the test cgroup.
+  ASSERT_SOME(cgroups::assign(hierarchy, TEST_CGROUPS_ROOT, pid));
+
+  ssize_t len;
+  while ((len = ::write(pipes[1], &dummy, sizeof(dummy))) == -1 &&
+         errno == EINTR);
+  ASSERT_EQ(sizeof(dummy), len);
+  ::close(pipes[1]);
+
+  std::set<std::string> events;
+  // Hardware event.
+  events.insert("cycles");
+  // Software event.
+  events.insert("task-clock");
+
+  Future<mesos::PerfStatistics> statistics =
+    perf::sample(events, TEST_CGROUPS_ROOT, Seconds(1));
+  AWAIT_READY(statistics);
+
+  ASSERT_TRUE(statistics.get().has_cycles());
+  EXPECT_LT(0u, statistics.get().cycles());
+
+  ASSERT_TRUE(statistics.get().has_task_clock());
+  EXPECT_LT(0.0, statistics.get().task_clock());
+
+  // Kill the child process.
+  ASSERT_NE(-1, ::kill(pid, SIGKILL));
+
+  // Wait for the child process.
+  int status;
+  EXPECT_NE(-1, ::waitpid((pid_t) -1, &status, 0));
+  ASSERT_TRUE(WIFSIGNALED(status));
+  EXPECT_EQ(SIGKILL, WTERMSIG(status));
+
+  // Destroy the cgroup.
+  Future<bool> destroy = cgroups::destroy(hierarchy, TEST_CGROUPS_ROOT);
+  AWAIT_READY(destroy);
+  EXPECT_TRUE(destroy.get());
+}

http://git-wip-us.apache.org/repos/asf/mesos/blob/be0ba0db/src/tests/perf_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/perf_tests.cpp b/src/tests/perf_tests.cpp
new file mode 100644
index 0000000..7d6dd24
--- /dev/null
+++ b/src/tests/perf_tests.cpp
@@ -0,0 +1,146 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <set>
+
+#include <gmock/gmock.h>
+
+#include <process/clock.hpp>
+#include <process/gtest.hpp>
+
+#include <stout/gtest.hpp>
+#include <stout/stringify.hpp>
+
+#include "linux/perf.hpp"
+
+using std::set;
+using std::string;
+
+using namespace process;
+
+class PerfTest : public ::testing::Test {};
+
+
+TEST_F(PerfTest, ROOT_Events)
+{
+  set<string> events;
+  // Valid events.
+  events.insert("cycles");
+  events.insert("task-clock");
+  EXPECT_TRUE(perf::valid(events));
+
+  // Add an invalid event.
+  events.insert("this-is-an-invalid-event");
+  EXPECT_FALSE(perf::valid(events));
+}
+
+
+TEST_F(PerfTest, Parse)
+{
+  // uint64 and floats should be parsed.
+  Try<hashmap<string, mesos::PerfStatistics> > parse =
+    perf::parse("123,cycles\n0.123,task-clock");
+  CHECK_SOME(parse);
+
+  ASSERT_TRUE(parse.get().contains(""));
+  mesos::PerfStatistics statistics = parse.get().get("").get();
+
+  ASSERT_TRUE(statistics.has_cycles());
+  EXPECT_EQ(123u, statistics.cycles());
+  ASSERT_TRUE(statistics.has_task_clock());
+  EXPECT_EQ(0.123, statistics.task_clock());
+
+  // Parse multiple cgroups.
+  parse = perf::parse("123,cycles,cgroup1\n"
+                      "456,cycles,cgroup2\n"
+                      "0.456,task-clock,cgroup2\n"
+                      "0.123,task-clock,cgroup1");
+  CHECK_SOME(parse);
+  EXPECT_FALSE(parse.get().contains(""));
+
+  ASSERT_TRUE(parse.get().contains("cgroup1"));
+  statistics = parse.get().get("cgroup1").get();
+
+  ASSERT_TRUE(statistics.has_cycles());
+  EXPECT_EQ(123u, statistics.cycles());
+  ASSERT_TRUE(statistics.has_task_clock());
+  EXPECT_EQ(0.123, statistics.task_clock());
+
+  ASSERT_TRUE(parse.get().contains("cgroup2"));
+  statistics = parse.get().get("cgroup2").get();
+
+  ASSERT_TRUE(statistics.has_cycles());
+  EXPECT_EQ(456u, statistics.cycles());
+  EXPECT_TRUE(statistics.has_task_clock());
+  EXPECT_EQ(0.456, statistics.task_clock());
+
+  // Statistics reporting <not supported> should not appear.
+  parse = perf::parse("<not supported>,cycles");
+  CHECK_SOME(parse);
+
+  ASSERT_TRUE(parse.get().contains(""));
+  statistics = parse.get().get("").get();
+  EXPECT_FALSE(statistics.has_cycles());
+
+  // Statistics reporting <not counted> should be zero.
+  parse = perf::parse("<not counted>,cycles\n<not counted>,task-clock");
+  CHECK_SOME(parse);
+
+  ASSERT_TRUE(parse.get().contains(""));
+  statistics = parse.get().get("").get();
+
+  EXPECT_TRUE(statistics.has_cycles());
+  EXPECT_EQ(0u, statistics.cycles());
+  EXPECT_TRUE(statistics.has_task_clock());
+  EXPECT_EQ(0.0, statistics.task_clock());
+
+  // Check parsing fails.
+  parse = perf::parse("1,cycles\ngarbage");
+  EXPECT_ERROR(parse);
+
+  parse = perf::parse("1,unknown-field");
+  EXPECT_ERROR(parse);
+}
+
+
+TEST_F(PerfTest, ROOT_SampleInit)
+{
+  set<string> events;
+  // Hardware event.
+  events.insert("cycles");
+  // Software event.
+  events.insert("task-clock");
+
+  // Sample init/launchd/systemd (pid 1).
+  Future<mesos::PerfStatistics> statistics =
+    perf::sample(events, 1, Seconds(1));
+  AWAIT_READY(statistics);
+
+  // Check the sample timestamp is within the last 5 seconds. This is generous
+  // because there's the process reap delay in addition to the sampling
+  // duration.
+  ASSERT_TRUE(statistics.get().has_timestamp());
+  EXPECT_GT(Seconds(5).secs(), Clock::now().secs() - statistics.get().timestamp());
+  EXPECT_EQ(Seconds(1).secs(), statistics.get().duration());
+
+  ASSERT_TRUE(statistics.get().has_cycles());
+  EXPECT_LT(0u, statistics.get().cycles());
+
+  ASSERT_TRUE(statistics.get().has_task_clock());
+  EXPECT_LT(0.0, statistics.get().task_clock());
+}


[5/5] git commit: Add sampling support to the perf_event isolator.

Posted by id...@apache.org.
Add sampling support to the perf_event isolator.

Review: https://reviews.apache.org/r/21451


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/d74de8c5
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/d74de8c5
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/d74de8c5

Branch: refs/heads/master
Commit: d74de8c579f74b089c3116cf0b11571b1051f735
Parents: be0ba0d
Author: Ian Downes <id...@twitter.com>
Authored: Wed May 14 10:33:21 2014 -0700
Committer: Ian Downes <id...@twitter.com>
Committed: Fri Jun 13 15:32:09 2014 -0700

----------------------------------------------------------------------
 .../isolators/cgroups/perf_event.cpp            | 244 +++++++++++++++----
 .../isolators/cgroups/perf_event.hpp            |  33 ++-
 src/slave/containerizer/mesos_containerizer.cpp |   2 +
 src/slave/flags.hpp                             |  27 ++
 src/tests/isolator_tests.cpp                    |  72 ++++++
 src/tests/mesos.cpp                             |   1 +
 src/tests/slave_recovery_tests.cpp              | 150 ++++++++++++
 7 files changed, 481 insertions(+), 48 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/d74de8c5/src/slave/containerizer/isolators/cgroups/perf_event.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/cgroups/perf_event.cpp b/src/slave/containerizer/isolators/cgroups/perf_event.cpp
index d9c8b25..1bd5dfa 100644
--- a/src/slave/containerizer/isolators/cgroups/perf_event.cpp
+++ b/src/slave/containerizer/isolators/cgroups/perf_event.cpp
@@ -19,13 +19,20 @@
 #include <stdint.h>
 
 #include <vector>
+#include <set>
+
+#include <google/protobuf/descriptor.h>
+#include <google/protobuf/message.h>
 
 #include <mesos/resources.hpp>
 #include <mesos/values.hpp>
 
 #include <process/collect.hpp>
 #include <process/defer.hpp>
+#include <process/delay.hpp>
+#include <process/io.hpp>
 #include <process/pid.hpp>
+#include <process/subprocess.hpp>
 
 #include <stout/bytes.hpp>
 #include <stout/check.hpp>
@@ -35,11 +42,10 @@
 #include <stout/hashset.hpp>
 #include <stout/lambda.hpp>
 #include <stout/nothing.hpp>
+#include <stout/os.hpp>
 #include <stout/stringify.hpp>
 #include <stout/try.hpp>
 
-#include "common/type_utils.hpp"
-
 #include "linux/cgroups.hpp"
 
 #include "slave/containerizer/isolators/cgroups/perf_event.hpp"
@@ -47,7 +53,7 @@
 using namespace process;
 
 using std::list;
-using std::ostringstream;
+using std::set;
 using std::string;
 using std::vector;
 
@@ -55,17 +61,33 @@ namespace mesos {
 namespace internal {
 namespace slave {
 
-CgroupsPerfEventIsolatorProcess::CgroupsPerfEventIsolatorProcess(
-    const Flags& _flags,
-    const string& _hierarchy)
-  : flags(_flags), hierarchy(_hierarchy) {}
+Try<Isolator*> CgroupsPerfEventIsolatorProcess::create(const Flags& flags)
+{
+  LOG(INFO) << "Creating PerfEvent isolator";
+
+  if (flags.perf_duration > flags.perf_interval) {
+    return Error("Sampling perf for duration (" +
+                 stringify(flags.perf_duration) +
+                 ") > interval (" +
+                 stringify(flags.perf_interval) +
+                 ") is not supported.");
+  }
 
+  if (!flags.perf_events.isSome()) {
+    return Error("No perf events specified.");
+  }
 
-CgroupsPerfEventIsolatorProcess::~CgroupsPerfEventIsolatorProcess() {}
+  set<string> events;
+  foreach (const string& event,
+           strings::tokenize(flags.perf_events.get(), ",")) {
+    events.insert(event);
+  }
 
+  if (!perf::valid(events)) {
+    return Error("Failed to create PerfEvent isolator, invalid events: " +
+                 stringify(events));
+  }
 
-Try<Isolator*> CgroupsPerfEventIsolatorProcess::create(const Flags& flags)
-{
   Try<string> hierarchy = cgroups::prepare(
       flags.cgroups_hierarchy, "perf_event", flags.cgroups_root);
 
@@ -73,6 +95,10 @@ Try<Isolator*> CgroupsPerfEventIsolatorProcess::create(const Flags& flags)
     return Error("Failed to create perf_event cgroup: " + hierarchy.error());
   }
 
+  LOG(INFO) << "PerfEvent isolator will profile for " << flags.perf_duration
+            << " every " << flags.perf_interval
+            << " for events: " << stringify(events);
+
   process::Owned<IsolatorProcess> process(
       new CgroupsPerfEventIsolatorProcess(flags, hierarchy.get()));
 
@@ -80,6 +106,31 @@ Try<Isolator*> CgroupsPerfEventIsolatorProcess::create(const Flags& flags)
 }
 
 
+CgroupsPerfEventIsolatorProcess::CgroupsPerfEventIsolatorProcess(
+    const Flags& _flags,
+    const string& _hierarchy)
+  : flags(_flags),
+    hierarchy(_hierarchy)
+{
+  CHECK_SOME(flags.perf_events);
+
+  foreach (const string& event,
+           strings::tokenize(flags.perf_events.get(), ",")) {
+    events.insert(event);
+  }
+}
+
+
+CgroupsPerfEventIsolatorProcess::~CgroupsPerfEventIsolatorProcess() {}
+
+
+void CgroupsPerfEventIsolatorProcess::initialize()
+{
+  // Start sampling.
+  sample();
+}
+
+
 Future<Nothing> CgroupsPerfEventIsolatorProcess::recover(
     const list<state::RunState>& states)
 {
@@ -95,41 +146,39 @@ Future<Nothing> CgroupsPerfEventIsolatorProcess::recover(
     }
 
     const ContainerID& containerId = state.id.get();
+    const string cgroup = path::join(flags.cgroups_root, containerId.value());
 
-    Info* info = new Info(
-        containerId, path::join(flags.cgroups_root, containerId.value()));
-    CHECK_NOTNULL(info);
-
-    infos[containerId] = info;
-    cgroups.insert(info->cgroup);
-
-    Try<bool> exists = cgroups::exists(hierarchy, info->cgroup);
+    Try<bool> exists = cgroups::exists(hierarchy, cgroup);
     if (exists.isError()) {
-      delete info;
       foreachvalue (Info* info, infos) {
         delete info;
       }
+
       infos.clear();
-      return Failure("Failed to check cgroup for container '" +
-                     stringify(containerId) + "'");
+      return Failure("Failed to check cgroup " + cgroup +
+                     " for container '" + stringify(containerId) + "'");
     }
 
     if (!exists.get()) {
-      VLOG(1) << "Couldn't find cgroup for container " << containerId;
       // This may occur if the executor is exiting and the isolator has
       // destroyed the cgroup but the slave dies before noticing this. This
       // will be detected when the containerizer tries to monitor the
       // executor's pid.
       // NOTE: This could also occur if this isolator is now enabled for a
-      // container that was started without this isolator. For this particular
-      // isolator it is okay to continue running this container without its
-      // perf_event cgroup existing because we don't ever query it and the
-      // destroy will succeed immediately.
+      // container that was started without this isolator. For this
+      // particular isolator it is acceptable to continue running this
+      // container without a perf_event cgroup because we don't ever
+      // query it and the destroy will succeed immediately.
+      VLOG(1) << "Couldn't find perf event cgroup for container " << containerId
+              << ", perf statistics will not be available";
+      continue;
     }
+
+    infos[containerId] = new Info(containerId, cgroup);
+    cgroups.insert(cgroup);
   }
 
-  Try<vector<string> > orphans = cgroups::get(
-      hierarchy, flags.cgroups_root);
+  Try<vector<string> > orphans = cgroups::get(hierarchy, flags.cgroups_root);
   if (orphans.isError()) {
     foreachvalue (Info* info, infos) {
       delete info;
@@ -139,6 +188,13 @@ Future<Nothing> CgroupsPerfEventIsolatorProcess::recover(
   }
 
   foreach (const string& orphan, orphans.get()) {
+    // Ignore the slave cgroup (see the --slave_subsystems flag).
+    // TODO(idownes): Remove this when the cgroups layout is updated,
+    // see MESOS-1185.
+    if (orphan == path::join(flags.cgroups_root, "slave")) {
+      continue;
+    }
+
     if (!cgroups.contains(orphan)) {
       LOG(INFO) << "Removing orphaned cgroup '" << orphan << "'";
       cgroups::destroy(hierarchy, orphan);
@@ -157,8 +213,11 @@ Future<Option<CommandInfo> > CgroupsPerfEventIsolatorProcess::prepare(
     return Failure("Container has already been prepared");
   }
 
+  LOG(INFO) << "Preparing perf event cgroup for " << containerId;
+
   Info* info = new Info(
-      containerId, path::join(flags.cgroups_root, containerId.value()));
+      containerId,
+      path::join(flags.cgroups_root, containerId.value()));
 
   infos[containerId] = CHECK_NOTNULL(info);
 
@@ -194,9 +253,6 @@ Future<Nothing> CgroupsPerfEventIsolatorProcess::isolate(
 
   Info* info = CHECK_NOTNULL(infos[containerId]);
 
-  CHECK(info->pid.isNone());
-  info->pid = pid;
-
   Try<Nothing> assign = cgroups::assign(hierarchy, info->cgroup, pid);
   if (assign.isError()) {
     return Failure("Failed to assign container '" +
@@ -212,13 +268,8 @@ Future<Nothing> CgroupsPerfEventIsolatorProcess::isolate(
 Future<Limitation> CgroupsPerfEventIsolatorProcess::watch(
     const ContainerID& containerId)
 {
-  if (!infos.contains(containerId)) {
-    return Failure("Unknown container");
-  }
-
-  CHECK_NOTNULL(infos[containerId]);
-
-  return infos[containerId]->limitation.future();
+  // No resources are limited.
+  return Future<Limitation>();
 }
 
 
@@ -234,20 +285,36 @@ Future<Nothing> CgroupsPerfEventIsolatorProcess::update(
 Future<ResourceStatistics> CgroupsPerfEventIsolatorProcess::usage(
     const ContainerID& containerId)
 {
-  // No resource statistics provided by this isolator.
-  return ResourceStatistics();
+  if (!infos.contains(containerId)) {
+    // Return an empty ResourceStatistics, i.e., without
+    // PerfStatistics, if we don't know about this container.
+    return ResourceStatistics();
+  }
+
+  CHECK_NOTNULL(infos[containerId]);
+
+  ResourceStatistics statistics;
+  statistics.mutable_perf()->CopyFrom(infos[containerId]->statistics);
+
+  return statistics;
 }
 
 
 Future<Nothing> CgroupsPerfEventIsolatorProcess::cleanup(
     const ContainerID& containerId)
 {
+  // Tolerate clean up attempts for unknown containers which may arise from
+  // repeated clean up attempts (during test cleanup).
   if (!infos.contains(containerId)) {
-    return Failure("Unknown container");
+    VLOG(1) << "Ignoring cleanup request for unknown container: "
+            << containerId;
+    return Nothing();
   }
 
   Info* info = CHECK_NOTNULL(infos[containerId]);
 
+  info->destroying = true;
+
   return cgroups::destroy(hierarchy, info->cgroup)
     .then(defer(PID<CgroupsPerfEventIsolatorProcess>(this),
                 &CgroupsPerfEventIsolatorProcess::_cleanup,
@@ -258,7 +325,10 @@ Future<Nothing> CgroupsPerfEventIsolatorProcess::cleanup(
 Future<Nothing> CgroupsPerfEventIsolatorProcess::_cleanup(
     const ContainerID& containerId)
 {
-  CHECK(infos.contains(containerId));
+  if (!infos.contains(containerId))
+  {
+    return Nothing();
+  }
 
   delete infos[containerId];
   infos.erase(containerId);
@@ -267,6 +337,94 @@ Future<Nothing> CgroupsPerfEventIsolatorProcess::_cleanup(
 }
 
 
+Future<hashmap<string, PerfStatistics> > discardSample(
+    Future<hashmap<string, PerfStatistics> > future,
+    const Duration& duration,
+    const Duration& timeout)
+{
+  LOG(ERROR) << "Perf sample of " << stringify(duration)
+             << " failed to complete within " << stringify(timeout)
+             << "; sampling will be halted";
+
+  future.discard();
+
+  return future;
+}
+
+
+void CgroupsPerfEventIsolatorProcess::sample()
+{
+  set<string> cgroups;
+  foreachvalue (Info* info, infos) {
+    CHECK_NOTNULL(info);
+
+    if (info->destroying) {
+      // Skip cgroups if destroy has started because it's asynchronous
+      // and "perf stat" will fail if the cgroup has been destroyed
+      // by the time we actually run perf.
+      continue;
+    }
+
+    cgroups.insert(info->cgroup);
+  }
+
+  if (cgroups.size() > 0) {
+    // The timeout includes an allowance of twice the process::reap
+    // interval (currently one second) to ensure we see the perf
+    // process exit. If the sample is not ready after the timeout
+    // something very unexpected has occurred so we discard it and
+    // halt all sampling.
+    Duration timeout = flags.perf_duration + Seconds(2);
+
+    perf::sample(events, cgroups, flags.perf_duration)
+      .after(timeout,
+             lambda::bind(&discardSample,
+                          lambda::_1,
+                          flags.perf_duration,
+                          timeout))
+      .onAny(defer(PID<CgroupsPerfEventIsolatorProcess>(this),
+                   &CgroupsPerfEventIsolatorProcess::_sample,
+                   Clock::now() + flags.perf_interval,
+                   lambda::_1));
+  } else {
+    // No cgroups to sample for now so just schedule the next sample.
+    delay(flags.perf_interval,
+          PID<CgroupsPerfEventIsolatorProcess>(this),
+          &CgroupsPerfEventIsolatorProcess::sample);
+  }
+}
+
+
+void CgroupsPerfEventIsolatorProcess::_sample(
+    const Time& next,
+    const Future<hashmap<string, PerfStatistics> >& statistics)
+{
+  if (!statistics.isReady()) {
+    // Failure can occur for many reasons but all are unexpected and
+    // indicate something is not right so we'll stop sampling.
+    LOG(ERROR) << "Failed to get perf sample, sampling will be halted: "
+               << (statistics.isFailed() ? statistics.failure() : "discarded");
+    return;
+  }
+
+  foreachvalue (Info* info, infos) {
+    CHECK_NOTNULL(info);
+
+    if (!statistics.get().contains(info->cgroup)) {
+      // This must be a newly added cgroup and isn't in this sample;
+      // it should be included in the next sample.
+      continue;
+    }
+
+    info->statistics = statistics.get().get(info->cgroup).get();
+  }
+
+  // Schedule sample for the next time.
+  delay(next - Clock::now(),
+        PID<CgroupsPerfEventIsolatorProcess>(this),
+        &CgroupsPerfEventIsolatorProcess::sample);
+}
+
 } // namespace slave {
 } // namespace internal {
 } // namespace mesos {

http://git-wip-us.apache.org/repos/asf/mesos/blob/d74de8c5/src/slave/containerizer/isolators/cgroups/perf_event.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/cgroups/perf_event.hpp b/src/slave/containerizer/isolators/cgroups/perf_event.hpp
index 2db7b3e..4ceb07a 100644
--- a/src/slave/containerizer/isolators/cgroups/perf_event.hpp
+++ b/src/slave/containerizer/isolators/cgroups/perf_event.hpp
@@ -19,9 +19,14 @@
 #ifndef __PERF_EVENT_ISOLATOR_HPP__
 #define __PERF_EVENT_ISOLATOR_HPP__
 
+#include <set>
+
+#include "linux/perf.hpp"
+
 #include <mesos/resources.hpp>
 
 #include <process/future.hpp>
+#include <process/time.hpp>
 
 #include <stout/hashmap.hpp>
 #include <stout/try.hpp>
@@ -65,29 +70,48 @@ public:
   virtual process::Future<Nothing> cleanup(
       const ContainerID& containerId);
 
+protected:
+  virtual void initialize();
+
 private:
   CgroupsPerfEventIsolatorProcess(
       const Flags& flags,
       const std::string& hierarchy);
 
+  void sample();
+
+  void _sample(
+      const process::Time& next,
+      const process::Future<hashmap<std::string, PerfStatistics> >& statistics);
+
   virtual process::Future<Nothing> _cleanup(const ContainerID& containerId);
 
   struct Info
   {
     Info(const ContainerID& _containerId, const std::string& _cgroup)
-      : containerId(_containerId), cgroup(_cgroup) {}
+      : containerId(_containerId), cgroup(_cgroup), destroying(false)
+    {
+      // Ensure the initial statistics include the required fields.
+      // Note the duration is set to zero to indicate no sampling has
+      // taken place. This empty sample will be returned from usage()
+      // until the first true sample is obtained.
+      statistics.set_timestamp(process::Clock::now().secs());
+      statistics.set_duration(Seconds(0).secs());
+    }
 
     const ContainerID containerId;
     const std::string cgroup;
-    Option<pid_t> pid;
-
-    process::Promise<Limitation> limitation;
+    PerfStatistics statistics;
+    // Mark a container when we start destruction so we stop sampling it.
+    bool destroying;
   };
 
   const Flags flags;
 
   // The path to the cgroups subsystem hierarchy root.
   const std::string hierarchy;
+  // Set of events to sample.
+  std::set<std::string> events;
 
   hashmap<ContainerID, Info*> infos;
 };
@@ -97,4 +121,3 @@ private:
 } // namespace mesos {
 
 #endif // __PERF_EVENT_ISOLATOR_HPP__
-

http://git-wip-us.apache.org/repos/asf/mesos/blob/d74de8c5/src/slave/containerizer/mesos_containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos_containerizer.cpp b/src/slave/containerizer/mesos_containerizer.cpp
index b5e29da..d6df9a1 100644
--- a/src/slave/containerizer/mesos_containerizer.cpp
+++ b/src/slave/containerizer/mesos_containerizer.cpp
@@ -45,6 +45,7 @@
 #ifdef __linux__
 #include "slave/containerizer/isolators/cgroups/cpushare.hpp"
 #include "slave/containerizer/isolators/cgroups/mem.hpp"
+#include "slave/containerizer/isolators/cgroups/perf_event.hpp"
 #endif // __linux__
 
 using std::list;
@@ -131,6 +132,7 @@ Try<MesosContainerizer*> MesosContainerizer::create(
 #ifdef __linux__
   creators["cgroups/cpu"] = &CgroupsCpushareIsolatorProcess::create;
   creators["cgroups/mem"] = &CgroupsMemIsolatorProcess::create;
+  creators["cgroups/perf_event"] = &CgroupsPerfEventIsolatorProcess::create;
 #endif // __linux__
 
   vector<Owned<Isolator> > isolators;

http://git-wip-us.apache.org/repos/asf/mesos/blob/d74de8c5/src/slave/flags.hpp
----------------------------------------------------------------------
diff --git a/src/slave/flags.hpp b/src/slave/flags.hpp
index 15e5b64..3b8ba08 100644
--- a/src/slave/flags.hpp
+++ b/src/slave/flags.hpp
@@ -213,6 +213,30 @@ public:
         "Present functionality is intended for resource monitoring and\n"
         "no cgroup limits are set, they are inherited from the root mesos\n"
         "cgroup.");
+
+    add(&Flags::perf_events,
+        "perf_events",
+        "List of command-separated perf events to sample for each container\n"
+        "when using the perf_event isolator. Default is none.\n"
+        "Run command 'perf list' to see all events. Event names are\n"
+        "sanitized by downcasing and replacing hyphens with underscores\n"
+        "when reported in the PerfStatistics protobuf, e.g., cpu-cycles\n"
+        "becomes cpu_cycles; see the PerfStatistics protobuf for all names.");
+
+    add(&Flags::perf_interval,
+        "perf_interval",
+        "Interval between the start of perf stat samples. Perf samples are\n"
+        "obtained periodically according to perf_interval and the most\n"
+        "recently obtained sample is returned rather than sampling on\n"
+        "demand. For this reason, perf_interval is independent of the\n"
+        "resource monitoring interval",
+        Seconds(60));
+
+    add(&Flags::perf_duration,
+        "perf_duration",
+        "Duration of a perf stat sample. The duration must be less\n"
+        "that the perf_interval.",
+        Seconds(10));
 #endif
 
     add(&Flags::credential,
@@ -260,6 +284,9 @@ public:
   Option<std::string> cgroups_subsystems;
   bool cgroups_enable_cfs;
   Option<std::string> slave_subsystems;
+  Option<std::string> perf_events;
+  Duration perf_interval;
+  Duration perf_duration;
 #endif
   Option<std::string> credential;
   Option<std::string> containerizer_path;

http://git-wip-us.apache.org/repos/asf/mesos/blob/d74de8c5/src/tests/isolator_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/isolator_tests.cpp b/src/tests/isolator_tests.cpp
index b0eff57..0bbec09 100644
--- a/src/tests/isolator_tests.cpp
+++ b/src/tests/isolator_tests.cpp
@@ -48,6 +48,7 @@
 #ifdef __linux__
 #include "slave/containerizer/isolators/cgroups/cpushare.hpp"
 #include "slave/containerizer/isolators/cgroups/mem.hpp"
+#include "slave/containerizer/isolators/cgroups/perf_event.hpp"
 #endif // __linux__
 
 #include "tests/mesos.hpp"
@@ -63,6 +64,7 @@ using mesos::internal::master::Master;
 #ifdef __linux__
 using mesos::internal::slave::CgroupsCpushareIsolatorProcess;
 using mesos::internal::slave::CgroupsMemIsolatorProcess;
+using mesos::internal::slave::CgroupsPerfEventIsolatorProcess;
 using mesos::internal::slave::LinuxLauncher;
 #endif // __linux__
 using mesos::internal::slave::Isolator;
@@ -576,3 +578,73 @@ TYPED_TEST(MemIsolatorTest, MemUsage)
   delete isolator.get();
   delete launcher.get();
 }
+
+
+#ifdef __linux__
+class PerfEventIsolatorTest : public MesosTest {};
+
+TEST_F(PerfEventIsolatorTest, ROOT_CGROUPS_Sample)
+{
+  Flags flags;
+
+  flags.perf_events = "cycles,task-clock";
+  flags.perf_duration = Milliseconds(250);
+  flags.perf_interval = Milliseconds(500);
+
+  Try<Isolator*> isolator = CgroupsPerfEventIsolatorProcess::create(flags);
+  CHECK_SOME(isolator);
+
+  ExecutorInfo executorInfo;
+
+  ContainerID containerId;
+  containerId.set_value("test");
+
+  AWAIT_READY(isolator.get()->prepare(containerId, executorInfo));
+
+  // This first sample is likely to be empty because perf hasn't
+  // completed yet but we should still have the required fields.
+  Future<ResourceStatistics> statistics1 = isolator.get()->usage(containerId);
+  AWAIT_READY(statistics1);
+  ASSERT_TRUE(statistics1.get().has_perf());
+  EXPECT_TRUE(statistics1.get().perf().has_timestamp());
+  EXPECT_TRUE(statistics1.get().perf().has_duration());
+
+  // Wait until we get the next sample. We use a generous timeout of
+  // two seconds because we currently have a one second reap interval;
+  // when running perf with perf_duration of 250ms we won't notice the
+  // exit for up to one second.
+  ResourceStatistics statistics2;
+  Duration waited = Duration::zero();
+  do {
+    Future<ResourceStatistics> statistics = isolator.get()->usage(containerId);
+    AWAIT_READY(statistics);
+
+    statistics2 = statistics.get();
+
+    ASSERT_TRUE(statistics2.has_perf());
+
+    if (statistics1.get().perf().timestamp() !=
+        statistics2.perf().timestamp()) {
+      break;
+    }
+
+    os::sleep(Milliseconds(250));
+    waited += Milliseconds(250);
+  } while (waited < Seconds(2));
+
+  sleep(2);
+
+  EXPECT_NE(statistics1.get().perf().timestamp(),
+            statistics2.perf().timestamp());
+
+  EXPECT_TRUE(statistics2.perf().has_cycles());
+  EXPECT_LE(0u, statistics2.perf().cycles());
+
+  EXPECT_TRUE(statistics2.perf().has_task_clock());
+  EXPECT_LE(0.0, statistics2.perf().task_clock());
+
+  AWAIT_READY(isolator.get()->cleanup(containerId));
+
+  delete isolator.get();
+}
+#endif // __linux__

http://git-wip-us.apache.org/repos/asf/mesos/blob/d74de8c5/src/tests/mesos.cpp
----------------------------------------------------------------------
diff --git a/src/tests/mesos.cpp b/src/tests/mesos.cpp
index e6d807c..98a7c38 100644
--- a/src/tests/mesos.cpp
+++ b/src/tests/mesos.cpp
@@ -408,6 +408,7 @@ void ContainerizerTest<slave::MesosContainerizer>::SetUp()
   subsystems.insert("cpuacct");
   subsystems.insert("memory");
   subsystems.insert("freezer");
+  subsystems.insert("perf_event");
 
   if (cgroups::enabled() && os::user() == "root") {
     foreach (const string& subsystem, subsystems) {

http://git-wip-us.apache.org/repos/asf/mesos/blob/d74de8c5/src/tests/slave_recovery_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_recovery_tests.cpp b/src/tests/slave_recovery_tests.cpp
index 9725e6a..7044327 100644
--- a/src/tests/slave_recovery_tests.cpp
+++ b/src/tests/slave_recovery_tests.cpp
@@ -3218,3 +3218,153 @@ TEST_F(MesosContainerizerSlaveRecoveryTest, ResourceStatistics)
 
   delete containerizer2.get();
 }
+
+#ifdef __linux__
+// Test that the perf event isolator can be enabled on a new slave.
+// Previously created containers will not report perf statistics but
+// newly created containers will.
+TEST_F(MesosContainerizerSlaveRecoveryTest, CGROUPS_ROOT_PerfRollForward)
+{
+  Try<PID<Master> > master = this->StartMaster();
+  ASSERT_SOME(master);
+
+  // Start a slave using a containerizer without a perf event
+  // isolator.
+  slave::Flags flags = this->CreateSlaveFlags();
+  flags.isolation = "cgroups/cpu,cgroups/mem";
+
+  Try<MesosContainerizer*> containerizer1 =
+    MesosContainerizer::create(flags, true);
+  ASSERT_SOME(containerizer1);
+
+  Try<PID<Slave> > slave = this->StartSlave(containerizer1.get(), flags);
+  ASSERT_SOME(slave);
+
+  MockScheduler sched;
+
+  // Scheduler expectations.
+  EXPECT_CALL(sched, statusUpdate(_, _))
+    .WillRepeatedly(Return());
+
+  // Enable checkpointing for the framework.
+  FrameworkInfo frameworkInfo;
+  frameworkInfo.CopyFrom(DEFAULT_FRAMEWORK_INFO);
+  frameworkInfo.set_checkpoint(true);
+
+  MesosSchedulerDriver driver(
+      &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL);
+
+  EXPECT_CALL(sched, registered(_, _, _));
+
+  Future<vector<Offer> > offers1;
+  EXPECT_CALL(sched, resourceOffers(_, _))
+    .WillOnce(FutureArg<1>(&offers1))
+    .WillRepeatedly(Return());      // Ignore subsequent offers.
+
+  driver.start();
+
+  AWAIT_READY(offers1);
+  EXPECT_NE(0u, offers1.get().size());
+
+  SlaveID slaveId = offers1.get()[0].slave_id();
+
+  TaskInfo task1 = createTask(
+      slaveId, Resources::parse("cpus:0.5;mem:128").get(), "sleep 1000");
+  vector<TaskInfo> tasks1;
+  tasks1.push_back(task1);
+
+  // Message expectations.
+  Future<Message> registerExecutor =
+    FUTURE_MESSAGE(Eq(RegisterExecutorMessage().GetTypeName()), _, _);
+
+  driver.launchTasks(offers1.get()[0].id(), tasks1);
+
+  AWAIT_READY(registerExecutor);
+
+  Future<hashset<ContainerID> > containers = containerizer1.get()->containers();
+  AWAIT_READY(containers);
+  ASSERT_EQ(1u, containers.get().size());
+
+  ContainerID containerId1 = *(containers.get().begin());
+
+  Future<ResourceStatistics> usage = containerizer1.get()->usage(containerId1);
+  AWAIT_READY(usage);
+
+  // There should not be any perf statistics.
+  EXPECT_FALSE(usage.get().has_perf());
+
+  this->Stop(slave.get());
+  delete containerizer1.get();
+
+  // Set up so we can wait until the new slave updates the container's
+  // resources (this occurs after the executor has re-registered).
+  Future<Nothing> update =
+    FUTURE_DISPATCH(_, &MesosContainerizerProcess::update);
+
+  // Start a slave using a containerizer with a perf event isolator.
+  flags.isolation = "cgroups/cpu,cgroups/mem,cgroups/perf_event";
+  flags.perf_events = "cycles,task-clock";
+  flags.perf_duration = Milliseconds(250);
+  flags.perf_interval = Milliseconds(500);
+
+  Try<MesosContainerizer*> containerizer2 =
+    MesosContainerizer::create(flags, true);
+  ASSERT_SOME(containerizer2);
+
+  Future<vector<Offer> > offers2;
+  EXPECT_CALL(sched, resourceOffers(_, _))
+    .WillOnce(FutureArg<1>(&offers2))
+    .WillRepeatedly(Return());        // Ignore subsequent offers.
+
+  slave = this->StartSlave(containerizer2.get(), flags);
+  ASSERT_SOME(slave);
+
+  AWAIT_READY(offers2);
+  EXPECT_NE(0u, offers2.get().size());
+
+  // Wait until the containerizer is updated.
+  AWAIT_READY(update);
+
+  // The first container should not report perf statistics.
+  usage = containerizer2.get()->usage(containerId1);
+  AWAIT_READY(usage);
+
+  EXPECT_FALSE(usage.get().has_perf());
+
+  // Start a new container which will start reporting perf statistics.
+  TaskInfo task2 = createTask(offers2.get()[0], "sleep 1000");
+  vector<TaskInfo> tasks2;
+  tasks2.push_back(task2);
+
+  // Message expectations.
+  registerExecutor =
+    FUTURE_MESSAGE(Eq(RegisterExecutorMessage().GetTypeName()), _, _);
+
+  driver.launchTasks(offers2.get()[0].id(), tasks2);
+
+  AWAIT_READY(registerExecutor);
+
+  containers = containerizer2.get()->containers();
+  AWAIT_READY(containers);
+  ASSERT_EQ(2u, containers.get().size());
+  EXPECT_TRUE(containers.get().contains(containerId1));
+
+  ContainerID containerId2;
+  foreach (const ContainerID containerId, containers.get()) {
+    if (containerId != containerId1) {
+      containerId2.CopyFrom(containerId);
+    }
+  }
+
+  usage = containerizer2.get()->usage(containerId2);
+  AWAIT_READY(usage);
+
+  EXPECT_TRUE(usage.get().has_perf());
+
+  driver.stop();
+  driver.join();
+
+  this->Shutdown();
+  delete containerizer2.get();
+}
+#endif // __linux__


[4/5] git commit: Add a cgroup perf_event isolator.

Posted by id...@apache.org.
Add a cgroup perf_event isolator.

This isolator only creates the perf_event cgroup and places the executor
into the cgroup; no performance statistics are gathered/exported.

Review: https://reviews.apache.org/r/20963


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/2e15bc93
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/2e15bc93
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/2e15bc93

Branch: refs/heads/master
Commit: 2e15bc939f4eaae8becfc76255fb20bcbbb27146
Parents: 16345a7
Author: Ian Downes <id...@twitter.com>
Authored: Fri Apr 4 16:27:39 2014 -0700
Committer: Ian Downes <id...@twitter.com>
Committed: Fri Jun 13 15:32:08 2014 -0700

----------------------------------------------------------------------
 src/Makefile.am                                 |   2 +
 .../isolators/cgroups/perf_event.cpp            | 272 +++++++++++++++++++
 .../isolators/cgroups/perf_event.hpp            | 100 +++++++
 3 files changed, 374 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/2e15bc93/src/Makefile.am
----------------------------------------------------------------------
diff --git a/src/Makefile.am b/src/Makefile.am
index c91b438..5d3196d 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -289,6 +289,7 @@ if OS_LINUX
   libmesos_no_3rdparty_la_SOURCES += linux/cgroups.cpp
   libmesos_no_3rdparty_la_SOURCES += slave/containerizer/isolators/cgroups/cpushare.cpp
   libmesos_no_3rdparty_la_SOURCES += slave/containerizer/isolators/cgroups/mem.cpp
+  libmesos_no_3rdparty_la_SOURCES += slave/containerizer/isolators/cgroups/perf_event.cpp
   libmesos_no_3rdparty_la_SOURCES += slave/containerizer/linux_launcher.cpp
   libmesos_no_3rdparty_la_SOURCES += linux/fs.cpp
 else
@@ -355,6 +356,7 @@ libmesos_no_3rdparty_la_SOURCES +=					\
 	slave/containerizer/isolator.hpp				\
 	slave/containerizer/isolators/cgroups/cpushare.hpp		\
 	slave/containerizer/isolators/cgroups/mem.hpp			\
+	slave/containerizer/isolators/cgroups/perf_event.hpp		\
 	slave/containerizer/isolators/posix.hpp				\
 	slave/containerizer/launcher.hpp				\
 	slave/containerizer/mesos_containerizer.hpp			\

http://git-wip-us.apache.org/repos/asf/mesos/blob/2e15bc93/src/slave/containerizer/isolators/cgroups/perf_event.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/cgroups/perf_event.cpp b/src/slave/containerizer/isolators/cgroups/perf_event.cpp
new file mode 100644
index 0000000..d9c8b25
--- /dev/null
+++ b/src/slave/containerizer/isolators/cgroups/perf_event.cpp
@@ -0,0 +1,272 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdint.h>
+
+#include <vector>
+
+#include <mesos/resources.hpp>
+#include <mesos/values.hpp>
+
+#include <process/collect.hpp>
+#include <process/defer.hpp>
+#include <process/pid.hpp>
+
+#include <stout/bytes.hpp>
+#include <stout/check.hpp>
+#include <stout/error.hpp>
+#include <stout/foreach.hpp>
+#include <stout/hashmap.hpp>
+#include <stout/hashset.hpp>
+#include <stout/lambda.hpp>
+#include <stout/nothing.hpp>
+#include <stout/stringify.hpp>
+#include <stout/try.hpp>
+
+#include "common/type_utils.hpp"
+
+#include "linux/cgroups.hpp"
+
+#include "slave/containerizer/isolators/cgroups/perf_event.hpp"
+
+using namespace process;
+
+using std::list;
+using std::ostringstream;
+using std::string;
+using std::vector;
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+CgroupsPerfEventIsolatorProcess::CgroupsPerfEventIsolatorProcess(
+    const Flags& _flags,
+    const string& _hierarchy)
+  : flags(_flags), hierarchy(_hierarchy) {}
+
+
+CgroupsPerfEventIsolatorProcess::~CgroupsPerfEventIsolatorProcess() {}
+
+
+Try<Isolator*> CgroupsPerfEventIsolatorProcess::create(const Flags& flags)
+{
+  Try<string> hierarchy = cgroups::prepare(
+      flags.cgroups_hierarchy, "perf_event", flags.cgroups_root);
+
+  if (hierarchy.isError()) {
+    return Error("Failed to create perf_event cgroup: " + hierarchy.error());
+  }
+
+  process::Owned<IsolatorProcess> process(
+      new CgroupsPerfEventIsolatorProcess(flags, hierarchy.get()));
+
+  return new Isolator(process);
+}
+
+
+Future<Nothing> CgroupsPerfEventIsolatorProcess::recover(
+    const list<state::RunState>& states)
+{
+  hashset<string> cgroups;
+
+  foreach (const state::RunState& state, states) {
+    if (state.id.isNone()) {
+      foreachvalue (Info* info, infos) {
+        delete info;
+      }
+      infos.clear();
+      return Failure("ContainerID is required to recover");
+    }
+
+    const ContainerID& containerId = state.id.get();
+
+    Info* info = new Info(
+        containerId, path::join(flags.cgroups_root, containerId.value()));
+    CHECK_NOTNULL(info);
+
+    infos[containerId] = info;
+    cgroups.insert(info->cgroup);
+
+    Try<bool> exists = cgroups::exists(hierarchy, info->cgroup);
+    if (exists.isError()) {
+      delete info;
+      foreachvalue (Info* info, infos) {
+        delete info;
+      }
+      infos.clear();
+      return Failure("Failed to check cgroup for container '" +
+                     stringify(containerId) + "'");
+    }
+
+    if (!exists.get()) {
+      VLOG(1) << "Couldn't find cgroup for container " << containerId;
+      // This may occur if the executor is exiting and the isolator has
+      // destroyed the cgroup but the slave dies before noticing this. This
+      // will be detected when the containerizer tries to monitor the
+      // executor's pid.
+      // NOTE: This could also occur if this isolator is now enabled for a
+      // container that was started without this isolator. For this particular
+      // isolator it is okay to continue running this container without its
+      // perf_event cgroup existing because we don't ever query it and the
+      // destroy will succeed immediately.
+    }
+  }
+
+  Try<vector<string> > orphans = cgroups::get(
+      hierarchy, flags.cgroups_root);
+  if (orphans.isError()) {
+    foreachvalue (Info* info, infos) {
+      delete info;
+    }
+    infos.clear();
+    return Failure(orphans.error());
+  }
+
+  foreach (const string& orphan, orphans.get()) {
+    if (!cgroups.contains(orphan)) {
+      LOG(INFO) << "Removing orphaned cgroup '" << orphan << "'";
+      cgroups::destroy(hierarchy, orphan);
+    }
+  }
+
+  return Nothing();
+}
+
+
+Future<Option<CommandInfo> > CgroupsPerfEventIsolatorProcess::prepare(
+    const ContainerID& containerId,
+    const ExecutorInfo& executorInfo)
+{
+  if (infos.contains(containerId)) {
+    return Failure("Container has already been prepared");
+  }
+
+  Info* info = new Info(
+      containerId, path::join(flags.cgroups_root, containerId.value()));
+
+  infos[containerId] = CHECK_NOTNULL(info);
+
+  // Create a cgroup for this container.
+  Try<bool> exists = cgroups::exists(hierarchy, info->cgroup);
+
+  if (exists.isError()) {
+    return Failure("Failed to prepare isolator: " + exists.error());
+  }
+
+  if (exists.get()) {
+    return Failure("Failed to prepare isolator: cgroup already exists");
+  }
+
+  if (!exists.get()) {
+    Try<Nothing> create = cgroups::create(hierarchy, info->cgroup);
+    if (create.isError()) {
+      return Failure("Failed to prepare isolator: " + create.error());
+    }
+  }
+
+  return None();
+}
+
+
+Future<Nothing> CgroupsPerfEventIsolatorProcess::isolate(
+    const ContainerID& containerId,
+    pid_t pid)
+{
+  if (!infos.contains(containerId)) {
+    return Failure("Unknown container");
+  }
+
+  Info* info = CHECK_NOTNULL(infos[containerId]);
+
+  CHECK(info->pid.isNone());
+  info->pid = pid;
+
+  Try<Nothing> assign = cgroups::assign(hierarchy, info->cgroup, pid);
+  if (assign.isError()) {
+    return Failure("Failed to assign container '" +
+                   stringify(info->containerId) + "' to its own cgroup '" +
+                   path::join(hierarchy, info->cgroup) +
+                   "' : " + assign.error());
+  }
+
+  return Nothing();
+}
+
+
+Future<Limitation> CgroupsPerfEventIsolatorProcess::watch(
+    const ContainerID& containerId)
+{
+  if (!infos.contains(containerId)) {
+    return Failure("Unknown container");
+  }
+
+  CHECK_NOTNULL(infos[containerId]);
+
+  return infos[containerId]->limitation.future();
+}
+
+
+Future<Nothing> CgroupsPerfEventIsolatorProcess::update(
+    const ContainerID& containerId,
+    const Resources& resources)
+{
+  // Nothing to update.
+  return Nothing();
+}
+
+
+Future<ResourceStatistics> CgroupsPerfEventIsolatorProcess::usage(
+    const ContainerID& containerId)
+{
+  // No resource statistics provided by this isolator.
+  return ResourceStatistics();
+}
+
+
+Future<Nothing> CgroupsPerfEventIsolatorProcess::cleanup(
+    const ContainerID& containerId)
+{
+  if (!infos.contains(containerId)) {
+    return Failure("Unknown container");
+  }
+
+  Info* info = CHECK_NOTNULL(infos[containerId]);
+
+  return cgroups::destroy(hierarchy, info->cgroup)
+    .then(defer(PID<CgroupsPerfEventIsolatorProcess>(this),
+                &CgroupsPerfEventIsolatorProcess::_cleanup,
+                containerId));
+}
+
+
+Future<Nothing> CgroupsPerfEventIsolatorProcess::_cleanup(
+    const ContainerID& containerId)
+{
+  CHECK(infos.contains(containerId));
+
+  delete infos[containerId];
+  infos.erase(containerId);
+
+  return Nothing();
+}
+
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {

http://git-wip-us.apache.org/repos/asf/mesos/blob/2e15bc93/src/slave/containerizer/isolators/cgroups/perf_event.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/cgroups/perf_event.hpp b/src/slave/containerizer/isolators/cgroups/perf_event.hpp
new file mode 100644
index 0000000..2db7b3e
--- /dev/null
+++ b/src/slave/containerizer/isolators/cgroups/perf_event.hpp
@@ -0,0 +1,100 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PERF_EVENT_ISOLATOR_HPP__
+#define __PERF_EVENT_ISOLATOR_HPP__
+
+#include <mesos/resources.hpp>
+
+#include <process/future.hpp>
+
+#include <stout/hashmap.hpp>
+#include <stout/try.hpp>
+
+#include "slave/containerizer/isolator.hpp"
+
+#include "slave/flags.hpp"
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+class CgroupsPerfEventIsolatorProcess : public IsolatorProcess
+{
+public:
+  static Try<Isolator*> create(const Flags& flags);
+
+  virtual ~CgroupsPerfEventIsolatorProcess();
+
+  virtual process::Future<Nothing> recover(
+      const std::list<state::RunState>& states);
+
+  virtual process::Future<Option<CommandInfo> > prepare(
+      const ContainerID& containerId,
+      const ExecutorInfo& executorInfo);
+
+  virtual process::Future<Nothing> isolate(
+      const ContainerID& containerId,
+      pid_t pid);
+
+  virtual process::Future<Limitation> watch(
+      const ContainerID& containerId);
+
+  virtual process::Future<Nothing> update(
+      const ContainerID& containerId,
+      const Resources& resources);
+
+  virtual process::Future<ResourceStatistics> usage(
+      const ContainerID& containerId);
+
+  virtual process::Future<Nothing> cleanup(
+      const ContainerID& containerId);
+
+private:
+  CgroupsPerfEventIsolatorProcess(
+      const Flags& flags,
+      const std::string& hierarchy);
+
+  virtual process::Future<Nothing> _cleanup(const ContainerID& containerId);
+
+  struct Info
+  {
+    Info(const ContainerID& _containerId, const std::string& _cgroup)
+      : containerId(_containerId), cgroup(_cgroup) {}
+
+    const ContainerID containerId;
+    const std::string cgroup;
+    Option<pid_t> pid;
+
+    process::Promise<Limitation> limitation;
+  };
+
+  const Flags flags;
+
+  // The path to the cgroups subsystem hierarchy root.
+  const std::string hierarchy;
+
+  hashmap<ContainerID, Info*> infos;
+};
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {
+
+#endif // __PERF_EVENT_ISOLATOR_HPP__
+


[2/5] git commit: Introduce a PerfStatistics protobuf.

Posted by id...@apache.org.
Introduce a PerfStatistics protobuf.

Review: https://reviews.apache.org/r/21442


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/160e9e0c
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/160e9e0c
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/160e9e0c

Branch: refs/heads/master
Commit: 160e9e0ccaf320c1881036f351d4a08a2542442d
Parents: 2e15bc9
Author: Ian Downes <id...@twitter.com>
Authored: Wed May 14 10:15:12 2014 -0700
Committer: Ian Downes <id...@twitter.com>
Committed: Fri Jun 13 15:32:08 2014 -0700

----------------------------------------------------------------------
 include/mesos/mesos.proto | 78 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/160e9e0c/include/mesos/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto
index 8c6933d..709b8b1 100644
--- a/include/mesos/mesos.proto
+++ b/include/mesos/mesos.proto
@@ -405,6 +405,9 @@ message ResourceStatistics {
 
   // TODO(bmahler): Add disk usage.
   // TODO(bmahler): Add network usage?
+
+  // Perf statistics.
+  optional PerfStatistics perf = 13;
 }
 
 
@@ -437,6 +440,81 @@ message ResourceUsage {
 
 
 /**
+ * Describes a sample of events from "perf stat". Only available on
+ * Linux.
+ *
+ * NOTE: Each optional field matches the name of a perf event (see
+ * "perf list") with the following changes:
+ * 1. Names are downcased.
+ * 2. Hyphens ('-') are replaced with underscores ('_').
+ * 3. Events with alternate names use the name "perf stat" returns,
+ *    e.g., for the event "cycles OR cpu-cycles" perf always returns
+ *    cycles.
+ */
+message PerfStatistics {
+  required double timestamp = 1; // Start of sample interval, in seconds since the Epoch.
+  required double duration = 2;  // Duration of sample interval, in seconds.
+
+  // Hardware event.
+  optional uint64 cycles = 3;
+  optional uint64 stalled_cycles_frontend = 4;
+  optional uint64 stalled_cycles_backend = 5;
+  optional uint64 instructions = 6;
+  optional uint64 cache_references = 7;
+  optional uint64 cache_misses = 8;
+  optional uint64 branches = 9;
+  optional uint64 branch_misses = 10;
+  optional uint64 bus_cycles = 11;
+  optional uint64 ref_cycles = 12;
+
+  // Software event.
+  optional double cpu_clock = 13;
+  optional double task_clock = 14;
+  optional uint64 page_faults = 15;
+  optional uint64 minor_faults = 16;
+  optional uint64 major_faults = 17;
+  optional uint64 context_switches = 18;
+  optional uint64 cpu_migrations = 19;
+  optional uint64 alignment_faults = 20;
+  optional uint64 emulation_faults = 21;
+
+  // Hardware cache event.
+  optional uint64 l1_dcache_loads = 22;
+  optional uint64 l1_dcache_load_misses = 23;
+  optional uint64 l1_dcache_stores = 24;
+  optional uint64 l1_dcache_store_misses = 25;
+  optional uint64 l1_dcache_prefetches = 26;
+  optional uint64 l1_dcache_prefetch_misses = 27;
+  optional uint64 l1_icache_loads = 28;
+  optional uint64 l1_icache_load_misses = 29;
+  optional uint64 l1_icache_prefetches = 30;
+  optional uint64 l1_icache_prefetch_misses = 31;
+  optional uint64 llc_loads = 32;
+  optional uint64 llc_load_misses = 33;
+  optional uint64 llc_stores = 34;
+  optional uint64 llc_store_misses = 35;
+  optional uint64 llc_prefetches = 36;
+  optional uint64 llc_prefetch_misses = 37;
+  optional uint64 dtlb_loads = 38;
+  optional uint64 dtlb_load_misses = 39;
+  optional uint64 dtlb_stores = 40;
+  optional uint64 dtlb_store_misses = 41;
+  optional uint64 dtlb_prefetches = 42;
+  optional uint64 dtlb_prefetch_misses = 43;
+  optional uint64 itlb_loads = 44;
+  optional uint64 itlb_load_misses = 45;
+  optional uint64 branch_loads = 46;
+  optional uint64 branch_load_misses = 47;
+  optional uint64 node_loads = 48;
+  optional uint64 node_load_misses = 49;
+  optional uint64 node_stores = 50;
+  optional uint64 node_store_misses = 51;
+  optional uint64 node_prefetches = 52;
+  optional uint64 node_prefetch_misses = 53;
+}
+
+
+/**
  * Describes a request for resources that can be used by a framework
  * to proactively influence the allocator.  If 'slave_id' is provided
  * then this request is assumed to only apply to resources on that


[3/5] git commit: Make unknown container not a Failure for Isolator::cleanup.

Posted by id...@apache.org.
Make unknown container not a Failure for Isolator::cleanup.

Review: https://reviews.apache.org/r/22049


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/16345a7b
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/16345a7b
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/16345a7b

Branch: refs/heads/master
Commit: 16345a7be08b48873e263ca9f9867f4b2f298f27
Parents: 4c83662
Author: Ian Downes <id...@twitter.com>
Authored: Thu May 29 13:23:04 2014 -0700
Committer: Ian Downes <id...@twitter.com>
Committed: Fri Jun 13 15:32:08 2014 -0700

----------------------------------------------------------------------
 src/slave/containerizer/isolators/cgroups/cpushare.cpp | 5 ++++-
 src/slave/containerizer/isolators/cgroups/mem.cpp      | 5 ++++-
 2 files changed, 8 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/16345a7b/src/slave/containerizer/isolators/cgroups/cpushare.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/cgroups/cpushare.cpp b/src/slave/containerizer/isolators/cgroups/cpushare.cpp
index 3d253af..eb8933f 100644
--- a/src/slave/containerizer/isolators/cgroups/cpushare.cpp
+++ b/src/slave/containerizer/isolators/cgroups/cpushare.cpp
@@ -432,8 +432,11 @@ Future<ResourceStatistics> CgroupsCpushareIsolatorProcess::usage(
 Future<Nothing> CgroupsCpushareIsolatorProcess::cleanup(
     const ContainerID& containerId)
 {
+  // Multiple calls may occur during test clean up.
   if (!infos.contains(containerId)) {
-    return Failure("Unknown container");
+    VLOG(1) << "Ignoring cleanup request for unknown container: "
+            << containerId;
+    return Nothing();
   }
 
   Info* info = CHECK_NOTNULL(infos[containerId]);

http://git-wip-us.apache.org/repos/asf/mesos/blob/16345a7b/src/slave/containerizer/isolators/cgroups/mem.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/cgroups/mem.cpp b/src/slave/containerizer/isolators/cgroups/mem.cpp
index 60013d4..73b926f 100644
--- a/src/slave/containerizer/isolators/cgroups/mem.cpp
+++ b/src/slave/containerizer/isolators/cgroups/mem.cpp
@@ -367,8 +367,11 @@ Future<ResourceStatistics> CgroupsMemIsolatorProcess::usage(
 Future<Nothing> CgroupsMemIsolatorProcess::cleanup(
     const ContainerID& containerId)
 {
+  // Multiple calls may occur during test clean up.
   if (!infos.contains(containerId)) {
-    return Failure("Unknown container");
+    VLOG(1) << "Ignoring cleanup request for unknown container: "
+            << containerId;
+    return Nothing();
   }
 
   Info* info = CHECK_NOTNULL(infos[containerId]);