You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by id...@apache.org on 2014/06/14 00:32:35 UTC

[5/5] git commit: Add sampling support to the perf_event isolator.

Add sampling support to the perf_event isolator.

Review: https://reviews.apache.org/r/21451


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/d74de8c5
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/d74de8c5
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/d74de8c5

Branch: refs/heads/master
Commit: d74de8c579f74b089c3116cf0b11571b1051f735
Parents: be0ba0d
Author: Ian Downes <id...@twitter.com>
Authored: Wed May 14 10:33:21 2014 -0700
Committer: Ian Downes <id...@twitter.com>
Committed: Fri Jun 13 15:32:09 2014 -0700

----------------------------------------------------------------------
 .../isolators/cgroups/perf_event.cpp            | 244 +++++++++++++++----
 .../isolators/cgroups/perf_event.hpp            |  33 ++-
 src/slave/containerizer/mesos_containerizer.cpp |   2 +
 src/slave/flags.hpp                             |  27 ++
 src/tests/isolator_tests.cpp                    |  72 ++++++
 src/tests/mesos.cpp                             |   1 +
 src/tests/slave_recovery_tests.cpp              | 150 ++++++++++++
 7 files changed, 481 insertions(+), 48 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/d74de8c5/src/slave/containerizer/isolators/cgroups/perf_event.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/cgroups/perf_event.cpp b/src/slave/containerizer/isolators/cgroups/perf_event.cpp
index d9c8b25..1bd5dfa 100644
--- a/src/slave/containerizer/isolators/cgroups/perf_event.cpp
+++ b/src/slave/containerizer/isolators/cgroups/perf_event.cpp
@@ -19,13 +19,20 @@
 #include <stdint.h>
 
 #include <vector>
+#include <set>
+
+#include <google/protobuf/descriptor.h>
+#include <google/protobuf/message.h>
 
 #include <mesos/resources.hpp>
 #include <mesos/values.hpp>
 
 #include <process/collect.hpp>
 #include <process/defer.hpp>
+#include <process/delay.hpp>
+#include <process/io.hpp>
 #include <process/pid.hpp>
+#include <process/subprocess.hpp>
 
 #include <stout/bytes.hpp>
 #include <stout/check.hpp>
@@ -35,11 +42,10 @@
 #include <stout/hashset.hpp>
 #include <stout/lambda.hpp>
 #include <stout/nothing.hpp>
+#include <stout/os.hpp>
 #include <stout/stringify.hpp>
 #include <stout/try.hpp>
 
-#include "common/type_utils.hpp"
-
 #include "linux/cgroups.hpp"
 
 #include "slave/containerizer/isolators/cgroups/perf_event.hpp"
@@ -47,7 +53,7 @@
 using namespace process;
 
 using std::list;
-using std::ostringstream;
+using std::set;
 using std::string;
 using std::vector;
 
@@ -55,17 +61,33 @@ namespace mesos {
 namespace internal {
 namespace slave {
 
-CgroupsPerfEventIsolatorProcess::CgroupsPerfEventIsolatorProcess(
-    const Flags& _flags,
-    const string& _hierarchy)
-  : flags(_flags), hierarchy(_hierarchy) {}
+Try<Isolator*> CgroupsPerfEventIsolatorProcess::create(const Flags& flags)
+{
+  LOG(INFO) << "Creating PerfEvent isolator";
+
+  if (flags.perf_duration > flags.perf_interval) {
+    return Error("Sampling perf for duration (" +
+                 stringify(flags.perf_duration) +
+                 ") > interval (" +
+                 stringify(flags.perf_interval) +
+                 ") is not supported.");
+  }
 
+  if (!flags.perf_events.isSome()) {
+    return Error("No perf events specified.");
+  }
 
-CgroupsPerfEventIsolatorProcess::~CgroupsPerfEventIsolatorProcess() {}
+  set<string> events;
+  foreach (const string& event,
+           strings::tokenize(flags.perf_events.get(), ",")) {
+    events.insert(event);
+  }
 
+  if (!perf::valid(events)) {
+    return Error("Failed to create PerfEvent isolator, invalid events: " +
+                 stringify(events));
+  }
 
-Try<Isolator*> CgroupsPerfEventIsolatorProcess::create(const Flags& flags)
-{
   Try<string> hierarchy = cgroups::prepare(
       flags.cgroups_hierarchy, "perf_event", flags.cgroups_root);
 
@@ -73,6 +95,10 @@ Try<Isolator*> CgroupsPerfEventIsolatorProcess::create(const Flags& flags)
     return Error("Failed to create perf_event cgroup: " + hierarchy.error());
   }
 
+  LOG(INFO) << "PerfEvent isolator will profile for " << flags.perf_duration
+            << " every " << flags.perf_interval
+            << " for events: " << stringify(events);
+
   process::Owned<IsolatorProcess> process(
       new CgroupsPerfEventIsolatorProcess(flags, hierarchy.get()));
 
@@ -80,6 +106,31 @@ Try<Isolator*> CgroupsPerfEventIsolatorProcess::create(const Flags& flags)
 }
 
 
+CgroupsPerfEventIsolatorProcess::CgroupsPerfEventIsolatorProcess(
+    const Flags& _flags,
+    const string& _hierarchy)
+  : flags(_flags),
+    hierarchy(_hierarchy)
+{
+  CHECK_SOME(flags.perf_events);
+
+  foreach (const string& event,
+           strings::tokenize(flags.perf_events.get(), ",")) {
+    events.insert(event);
+  }
+}
+
+
+CgroupsPerfEventIsolatorProcess::~CgroupsPerfEventIsolatorProcess() {}
+
+
+void CgroupsPerfEventIsolatorProcess::initialize()
+{
+  // Start sampling.
+  sample();
+}
+
+
 Future<Nothing> CgroupsPerfEventIsolatorProcess::recover(
     const list<state::RunState>& states)
 {
@@ -95,41 +146,39 @@ Future<Nothing> CgroupsPerfEventIsolatorProcess::recover(
     }
 
     const ContainerID& containerId = state.id.get();
+    const string cgroup = path::join(flags.cgroups_root, containerId.value());
 
-    Info* info = new Info(
-        containerId, path::join(flags.cgroups_root, containerId.value()));
-    CHECK_NOTNULL(info);
-
-    infos[containerId] = info;
-    cgroups.insert(info->cgroup);
-
-    Try<bool> exists = cgroups::exists(hierarchy, info->cgroup);
+    Try<bool> exists = cgroups::exists(hierarchy, cgroup);
     if (exists.isError()) {
-      delete info;
       foreachvalue (Info* info, infos) {
         delete info;
       }
+
       infos.clear();
-      return Failure("Failed to check cgroup for container '" +
-                     stringify(containerId) + "'");
+      return Failure("Failed to check cgroup " + cgroup +
+                     " for container '" + stringify(containerId) + "'");
     }
 
     if (!exists.get()) {
-      VLOG(1) << "Couldn't find cgroup for container " << containerId;
       // This may occur if the executor is exiting and the isolator has
       // destroyed the cgroup but the slave dies before noticing this. This
       // will be detected when the containerizer tries to monitor the
       // executor's pid.
       // NOTE: This could also occur if this isolator is now enabled for a
-      // container that was started without this isolator. For this particular
-      // isolator it is okay to continue running this container without its
-      // perf_event cgroup existing because we don't ever query it and the
-      // destroy will succeed immediately.
+      // container that was started without this isolator. For this
+      // particular isolator it is acceptable to continue running this
+      // container without a perf_event cgroup because we don't ever
+      // query it and the destroy will succeed immediately.
+      VLOG(1) << "Couldn't find perf event cgroup for container " << containerId
+              << ", perf statistics will not be available";
+      continue;
     }
+
+    infos[containerId] = new Info(containerId, cgroup);
+    cgroups.insert(cgroup);
   }
 
-  Try<vector<string> > orphans = cgroups::get(
-      hierarchy, flags.cgroups_root);
+  Try<vector<string> > orphans = cgroups::get(hierarchy, flags.cgroups_root);
   if (orphans.isError()) {
     foreachvalue (Info* info, infos) {
       delete info;
@@ -139,6 +188,13 @@ Future<Nothing> CgroupsPerfEventIsolatorProcess::recover(
   }
 
   foreach (const string& orphan, orphans.get()) {
+    // Ignore the slave cgroup (see the --slave_subsystems flag).
+    // TODO(idownes): Remove this when the cgroups layout is updated,
+    // see MESOS-1185.
+    if (orphan == path::join(flags.cgroups_root, "slave")) {
+      continue;
+    }
+
     if (!cgroups.contains(orphan)) {
       LOG(INFO) << "Removing orphaned cgroup '" << orphan << "'";
       cgroups::destroy(hierarchy, orphan);
@@ -157,8 +213,11 @@ Future<Option<CommandInfo> > CgroupsPerfEventIsolatorProcess::prepare(
     return Failure("Container has already been prepared");
   }
 
+  LOG(INFO) << "Preparing perf event cgroup for " << containerId;
+
   Info* info = new Info(
-      containerId, path::join(flags.cgroups_root, containerId.value()));
+      containerId,
+      path::join(flags.cgroups_root, containerId.value()));
 
   infos[containerId] = CHECK_NOTNULL(info);
 
@@ -194,9 +253,6 @@ Future<Nothing> CgroupsPerfEventIsolatorProcess::isolate(
 
   Info* info = CHECK_NOTNULL(infos[containerId]);
 
-  CHECK(info->pid.isNone());
-  info->pid = pid;
-
   Try<Nothing> assign = cgroups::assign(hierarchy, info->cgroup, pid);
   if (assign.isError()) {
     return Failure("Failed to assign container '" +
@@ -212,13 +268,8 @@ Future<Nothing> CgroupsPerfEventIsolatorProcess::isolate(
 Future<Limitation> CgroupsPerfEventIsolatorProcess::watch(
     const ContainerID& containerId)
 {
-  if (!infos.contains(containerId)) {
-    return Failure("Unknown container");
-  }
-
-  CHECK_NOTNULL(infos[containerId]);
-
-  return infos[containerId]->limitation.future();
+  // No resources are limited.
+  return Future<Limitation>();
 }
 
 
@@ -234,20 +285,36 @@ Future<Nothing> CgroupsPerfEventIsolatorProcess::update(
 Future<ResourceStatistics> CgroupsPerfEventIsolatorProcess::usage(
     const ContainerID& containerId)
 {
-  // No resource statistics provided by this isolator.
-  return ResourceStatistics();
+  if (!infos.contains(containerId)) {
+    // Return an empty ResourceStatistics, i.e., without
+    // PerfStatistics, if we don't know about this container.
+    return ResourceStatistics();
+  }
+
+  CHECK_NOTNULL(infos[containerId]);
+
+  ResourceStatistics statistics;
+  statistics.mutable_perf()->CopyFrom(infos[containerId]->statistics);
+
+  return statistics;
 }
 
 
 Future<Nothing> CgroupsPerfEventIsolatorProcess::cleanup(
     const ContainerID& containerId)
 {
+  // Tolerate clean up attempts for unknown containers which may arise from
+  // repeated clean up attempts (during test cleanup).
   if (!infos.contains(containerId)) {
-    return Failure("Unknown container");
+    VLOG(1) << "Ignoring cleanup request for unknown container: "
+            << containerId;
+    return Nothing();
   }
 
   Info* info = CHECK_NOTNULL(infos[containerId]);
 
+  info->destroying = true;
+
   return cgroups::destroy(hierarchy, info->cgroup)
     .then(defer(PID<CgroupsPerfEventIsolatorProcess>(this),
                 &CgroupsPerfEventIsolatorProcess::_cleanup,
@@ -258,7 +325,10 @@ Future<Nothing> CgroupsPerfEventIsolatorProcess::cleanup(
 Future<Nothing> CgroupsPerfEventIsolatorProcess::_cleanup(
     const ContainerID& containerId)
 {
-  CHECK(infos.contains(containerId));
+  if (!infos.contains(containerId))
+  {
+    return Nothing();
+  }
 
   delete infos[containerId];
   infos.erase(containerId);
@@ -267,6 +337,94 @@ Future<Nothing> CgroupsPerfEventIsolatorProcess::_cleanup(
 }
 
 
+Future<hashmap<string, PerfStatistics> > discardSample(
+    Future<hashmap<string, PerfStatistics> > future,
+    const Duration& duration,
+    const Duration& timeout)
+{
+  LOG(ERROR) << "Perf sample of " << stringify(duration)
+             << " failed to complete within " << stringify(timeout)
+             << "; sampling will be halted";
+
+  future.discard();
+
+  return future;
+}
+
+
+void CgroupsPerfEventIsolatorProcess::sample()
+{
+  set<string> cgroups;
+  foreachvalue (Info* info, infos) {
+    CHECK_NOTNULL(info);
+
+    if (info->destroying) {
+      // Skip cgroups if destroy has started because it's asynchronous
+      // and "perf stat" will fail if the cgroup has been destroyed
+      // by the time we actually run perf.
+      continue;
+    }
+
+    cgroups.insert(info->cgroup);
+  }
+
+  if (cgroups.size() > 0) {
+    // The timeout includes an allowance of twice the process::reap
+    // interval (currently one second) to ensure we see the perf
+    // process exit. If the sample is not ready after the timeout
+    // something very unexpected has occurred so we discard it and
+    // halt all sampling.
+    Duration timeout = flags.perf_duration + Seconds(2);
+
+    perf::sample(events, cgroups, flags.perf_duration)
+      .after(timeout,
+             lambda::bind(&discardSample,
+                          lambda::_1,
+                          flags.perf_duration,
+                          timeout))
+      .onAny(defer(PID<CgroupsPerfEventIsolatorProcess>(this),
+                   &CgroupsPerfEventIsolatorProcess::_sample,
+                   Clock::now() + flags.perf_interval,
+                   lambda::_1));
+  } else {
+    // No cgroups to sample for now so just schedule the next sample.
+    delay(flags.perf_interval,
+          PID<CgroupsPerfEventIsolatorProcess>(this),
+          &CgroupsPerfEventIsolatorProcess::sample);
+  }
+}
+
+
+void CgroupsPerfEventIsolatorProcess::_sample(
+    const Time& next,
+    const Future<hashmap<string, PerfStatistics> >& statistics)
+{
+  if (!statistics.isReady()) {
+    // Failure can occur for many reasons but all are unexpected and
+    // indicate something is not right so we'll stop sampling.
+    LOG(ERROR) << "Failed to get perf sample, sampling will be halted: "
+               << (statistics.isFailed() ? statistics.failure() : "discarded");
+    return;
+  }
+
+  foreachvalue (Info* info, infos) {
+    CHECK_NOTNULL(info);
+
+    if (!statistics.get().contains(info->cgroup)) {
+      // This must be a newly added cgroup and isn't in this sample;
+      // it should be included in the next sample.
+      continue;
+    }
+
+    info->statistics = statistics.get().get(info->cgroup).get();
+  }
+
+  // Schedule sample for the next time.
+  delay(next - Clock::now(),
+        PID<CgroupsPerfEventIsolatorProcess>(this),
+        &CgroupsPerfEventIsolatorProcess::sample);
+}
+
 } // namespace slave {
 } // namespace internal {
 } // namespace mesos {

http://git-wip-us.apache.org/repos/asf/mesos/blob/d74de8c5/src/slave/containerizer/isolators/cgroups/perf_event.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/cgroups/perf_event.hpp b/src/slave/containerizer/isolators/cgroups/perf_event.hpp
index 2db7b3e..4ceb07a 100644
--- a/src/slave/containerizer/isolators/cgroups/perf_event.hpp
+++ b/src/slave/containerizer/isolators/cgroups/perf_event.hpp
@@ -19,9 +19,14 @@
 #ifndef __PERF_EVENT_ISOLATOR_HPP__
 #define __PERF_EVENT_ISOLATOR_HPP__
 
+#include <set>
+
+#include "linux/perf.hpp"
+
 #include <mesos/resources.hpp>
 
 #include <process/future.hpp>
+#include <process/time.hpp>
 
 #include <stout/hashmap.hpp>
 #include <stout/try.hpp>
@@ -65,29 +70,48 @@ public:
   virtual process::Future<Nothing> cleanup(
       const ContainerID& containerId);
 
+protected:
+  virtual void initialize();
+
 private:
   CgroupsPerfEventIsolatorProcess(
       const Flags& flags,
       const std::string& hierarchy);
 
+  void sample();
+
+  void _sample(
+      const process::Time& next,
+      const process::Future<hashmap<std::string, PerfStatistics> >& statistics);
+
   virtual process::Future<Nothing> _cleanup(const ContainerID& containerId);
 
   struct Info
   {
     Info(const ContainerID& _containerId, const std::string& _cgroup)
-      : containerId(_containerId), cgroup(_cgroup) {}
+      : containerId(_containerId), cgroup(_cgroup), destroying(false)
+    {
+      // Ensure the initial statistics include the required fields.
+      // Note the duration is set to zero to indicate no sampling has
+      // taken place. This empty sample will be returned from usage()
+      // until the first true sample is obtained.
+      statistics.set_timestamp(process::Clock::now().secs());
+      statistics.set_duration(Seconds(0).secs());
+    }
 
     const ContainerID containerId;
     const std::string cgroup;
-    Option<pid_t> pid;
-
-    process::Promise<Limitation> limitation;
+    PerfStatistics statistics;
+    // Mark a container when we start destruction so we stop sampling it.
+    bool destroying;
   };
 
   const Flags flags;
 
   // The path to the cgroups subsystem hierarchy root.
   const std::string hierarchy;
+  // Set of events to sample.
+  std::set<std::string> events;
 
   hashmap<ContainerID, Info*> infos;
 };
@@ -97,4 +121,3 @@ private:
 } // namespace mesos {
 
 #endif // __PERF_EVENT_ISOLATOR_HPP__
-

http://git-wip-us.apache.org/repos/asf/mesos/blob/d74de8c5/src/slave/containerizer/mesos_containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos_containerizer.cpp b/src/slave/containerizer/mesos_containerizer.cpp
index b5e29da..d6df9a1 100644
--- a/src/slave/containerizer/mesos_containerizer.cpp
+++ b/src/slave/containerizer/mesos_containerizer.cpp
@@ -45,6 +45,7 @@
 #ifdef __linux__
 #include "slave/containerizer/isolators/cgroups/cpushare.hpp"
 #include "slave/containerizer/isolators/cgroups/mem.hpp"
+#include "slave/containerizer/isolators/cgroups/perf_event.hpp"
 #endif // __linux__
 
 using std::list;
@@ -131,6 +132,7 @@ Try<MesosContainerizer*> MesosContainerizer::create(
 #ifdef __linux__
   creators["cgroups/cpu"] = &CgroupsCpushareIsolatorProcess::create;
   creators["cgroups/mem"] = &CgroupsMemIsolatorProcess::create;
+  creators["cgroups/perf_event"] = &CgroupsPerfEventIsolatorProcess::create;
 #endif // __linux__
 
   vector<Owned<Isolator> > isolators;

http://git-wip-us.apache.org/repos/asf/mesos/blob/d74de8c5/src/slave/flags.hpp
----------------------------------------------------------------------
diff --git a/src/slave/flags.hpp b/src/slave/flags.hpp
index 15e5b64..3b8ba08 100644
--- a/src/slave/flags.hpp
+++ b/src/slave/flags.hpp
@@ -213,6 +213,30 @@ public:
         "Present functionality is intended for resource monitoring and\n"
         "no cgroup limits are set, they are inherited from the root mesos\n"
         "cgroup.");
+
+    add(&Flags::perf_events,
+        "perf_events",
+        "List of command-separated perf events to sample for each container\n"
+        "when using the perf_event isolator. Default is none.\n"
+        "Run command 'perf list' to see all events. Event names are\n"
+        "sanitized by downcasing and replacing hyphens with underscores\n"
+        "when reported in the PerfStatistics protobuf, e.g., cpu-cycles\n"
+        "becomes cpu_cycles; see the PerfStatistics protobuf for all names.");
+
+    add(&Flags::perf_interval,
+        "perf_interval",
+        "Interval between the start of perf stat samples. Perf samples are\n"
+        "obtained periodically according to perf_interval and the most\n"
+        "recently obtained sample is returned rather than sampling on\n"
+        "demand. For this reason, perf_interval is independent of the\n"
+        "resource monitoring interval",
+        Seconds(60));
+
+    add(&Flags::perf_duration,
+        "perf_duration",
+        "Duration of a perf stat sample. The duration must be less\n"
+        "that the perf_interval.",
+        Seconds(10));
 #endif
 
     add(&Flags::credential,
@@ -260,6 +284,9 @@ public:
   Option<std::string> cgroups_subsystems;
   bool cgroups_enable_cfs;
   Option<std::string> slave_subsystems;
+  Option<std::string> perf_events;
+  Duration perf_interval;
+  Duration perf_duration;
 #endif
   Option<std::string> credential;
   Option<std::string> containerizer_path;

http://git-wip-us.apache.org/repos/asf/mesos/blob/d74de8c5/src/tests/isolator_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/isolator_tests.cpp b/src/tests/isolator_tests.cpp
index b0eff57..0bbec09 100644
--- a/src/tests/isolator_tests.cpp
+++ b/src/tests/isolator_tests.cpp
@@ -48,6 +48,7 @@
 #ifdef __linux__
 #include "slave/containerizer/isolators/cgroups/cpushare.hpp"
 #include "slave/containerizer/isolators/cgroups/mem.hpp"
+#include "slave/containerizer/isolators/cgroups/perf_event.hpp"
 #endif // __linux__
 
 #include "tests/mesos.hpp"
@@ -63,6 +64,7 @@ using mesos::internal::master::Master;
 #ifdef __linux__
 using mesos::internal::slave::CgroupsCpushareIsolatorProcess;
 using mesos::internal::slave::CgroupsMemIsolatorProcess;
+using mesos::internal::slave::CgroupsPerfEventIsolatorProcess;
 using mesos::internal::slave::LinuxLauncher;
 #endif // __linux__
 using mesos::internal::slave::Isolator;
@@ -576,3 +578,73 @@ TYPED_TEST(MemIsolatorTest, MemUsage)
   delete isolator.get();
   delete launcher.get();
 }
+
+
+#ifdef __linux__
+class PerfEventIsolatorTest : public MesosTest {};
+
+TEST_F(PerfEventIsolatorTest, ROOT_CGROUPS_Sample)
+{
+  Flags flags;
+
+  flags.perf_events = "cycles,task-clock";
+  flags.perf_duration = Milliseconds(250);
+  flags.perf_interval = Milliseconds(500);
+
+  Try<Isolator*> isolator = CgroupsPerfEventIsolatorProcess::create(flags);
+  CHECK_SOME(isolator);
+
+  ExecutorInfo executorInfo;
+
+  ContainerID containerId;
+  containerId.set_value("test");
+
+  AWAIT_READY(isolator.get()->prepare(containerId, executorInfo));
+
+  // This first sample is likely to be empty because perf hasn't
+  // completed yet but we should still have the required fields.
+  Future<ResourceStatistics> statistics1 = isolator.get()->usage(containerId);
+  AWAIT_READY(statistics1);
+  ASSERT_TRUE(statistics1.get().has_perf());
+  EXPECT_TRUE(statistics1.get().perf().has_timestamp());
+  EXPECT_TRUE(statistics1.get().perf().has_duration());
+
+  // Wait until we get the next sample. We use a generous timeout of
+  // two seconds because we currently have a one second reap interval;
+  // when running perf with perf_duration of 250ms we won't notice the
+  // exit for up to one second.
+  ResourceStatistics statistics2;
+  Duration waited = Duration::zero();
+  do {
+    Future<ResourceStatistics> statistics = isolator.get()->usage(containerId);
+    AWAIT_READY(statistics);
+
+    statistics2 = statistics.get();
+
+    ASSERT_TRUE(statistics2.has_perf());
+
+    if (statistics1.get().perf().timestamp() !=
+        statistics2.perf().timestamp()) {
+      break;
+    }
+
+    os::sleep(Milliseconds(250));
+    waited += Milliseconds(250);
+  } while (waited < Seconds(2));
+
+  sleep(2);
+
+  EXPECT_NE(statistics1.get().perf().timestamp(),
+            statistics2.perf().timestamp());
+
+  EXPECT_TRUE(statistics2.perf().has_cycles());
+  EXPECT_LE(0u, statistics2.perf().cycles());
+
+  EXPECT_TRUE(statistics2.perf().has_task_clock());
+  EXPECT_LE(0.0, statistics2.perf().task_clock());
+
+  AWAIT_READY(isolator.get()->cleanup(containerId));
+
+  delete isolator.get();
+}
+#endif // __linux__

http://git-wip-us.apache.org/repos/asf/mesos/blob/d74de8c5/src/tests/mesos.cpp
----------------------------------------------------------------------
diff --git a/src/tests/mesos.cpp b/src/tests/mesos.cpp
index e6d807c..98a7c38 100644
--- a/src/tests/mesos.cpp
+++ b/src/tests/mesos.cpp
@@ -408,6 +408,7 @@ void ContainerizerTest<slave::MesosContainerizer>::SetUp()
   subsystems.insert("cpuacct");
   subsystems.insert("memory");
   subsystems.insert("freezer");
+  subsystems.insert("perf_event");
 
   if (cgroups::enabled() && os::user() == "root") {
     foreach (const string& subsystem, subsystems) {

http://git-wip-us.apache.org/repos/asf/mesos/blob/d74de8c5/src/tests/slave_recovery_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_recovery_tests.cpp b/src/tests/slave_recovery_tests.cpp
index 9725e6a..7044327 100644
--- a/src/tests/slave_recovery_tests.cpp
+++ b/src/tests/slave_recovery_tests.cpp
@@ -3218,3 +3218,153 @@ TEST_F(MesosContainerizerSlaveRecoveryTest, ResourceStatistics)
 
   delete containerizer2.get();
 }
+
+#ifdef __linux__
+// Test that the perf event isolator can be enabled on a new slave.
+// Previously created containers will not report perf statistics but
+// newly created containers will.
+TEST_F(MesosContainerizerSlaveRecoveryTest, CGROUPS_ROOT_PerfRollForward)
+{
+  Try<PID<Master> > master = this->StartMaster();
+  ASSERT_SOME(master);
+
+  // Start a slave using a containerizer without a perf event
+  // isolator.
+  slave::Flags flags = this->CreateSlaveFlags();
+  flags.isolation = "cgroups/cpu,cgroups/mem";
+
+  Try<MesosContainerizer*> containerizer1 =
+    MesosContainerizer::create(flags, true);
+  ASSERT_SOME(containerizer1);
+
+  Try<PID<Slave> > slave = this->StartSlave(containerizer1.get(), flags);
+  ASSERT_SOME(slave);
+
+  MockScheduler sched;
+
+  // Scheduler expectations.
+  EXPECT_CALL(sched, statusUpdate(_, _))
+    .WillRepeatedly(Return());
+
+  // Enable checkpointing for the framework.
+  FrameworkInfo frameworkInfo;
+  frameworkInfo.CopyFrom(DEFAULT_FRAMEWORK_INFO);
+  frameworkInfo.set_checkpoint(true);
+
+  MesosSchedulerDriver driver(
+      &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL);
+
+  EXPECT_CALL(sched, registered(_, _, _));
+
+  Future<vector<Offer> > offers1;
+  EXPECT_CALL(sched, resourceOffers(_, _))
+    .WillOnce(FutureArg<1>(&offers1))
+    .WillRepeatedly(Return());      // Ignore subsequent offers.
+
+  driver.start();
+
+  AWAIT_READY(offers1);
+  EXPECT_NE(0u, offers1.get().size());
+
+  SlaveID slaveId = offers1.get()[0].slave_id();
+
+  TaskInfo task1 = createTask(
+      slaveId, Resources::parse("cpus:0.5;mem:128").get(), "sleep 1000");
+  vector<TaskInfo> tasks1;
+  tasks1.push_back(task1);
+
+  // Message expectations.
+  Future<Message> registerExecutor =
+    FUTURE_MESSAGE(Eq(RegisterExecutorMessage().GetTypeName()), _, _);
+
+  driver.launchTasks(offers1.get()[0].id(), tasks1);
+
+  AWAIT_READY(registerExecutor);
+
+  Future<hashset<ContainerID> > containers = containerizer1.get()->containers();
+  AWAIT_READY(containers);
+  ASSERT_EQ(1u, containers.get().size());
+
+  ContainerID containerId1 = *(containers.get().begin());
+
+  Future<ResourceStatistics> usage = containerizer1.get()->usage(containerId1);
+  AWAIT_READY(usage);
+
+  // There should not be any perf statistics.
+  EXPECT_FALSE(usage.get().has_perf());
+
+  this->Stop(slave.get());
+  delete containerizer1.get();
+
+  // Set up so we can wait until the new slave updates the container's
+  // resources (this occurs after the executor has re-registered).
+  Future<Nothing> update =
+    FUTURE_DISPATCH(_, &MesosContainerizerProcess::update);
+
+  // Start a slave using a containerizer with a perf event isolator.
+  flags.isolation = "cgroups/cpu,cgroups/mem,cgroups/perf_event";
+  flags.perf_events = "cycles,task-clock";
+  flags.perf_duration = Milliseconds(250);
+  flags.perf_interval = Milliseconds(500);
+
+  Try<MesosContainerizer*> containerizer2 =
+    MesosContainerizer::create(flags, true);
+  ASSERT_SOME(containerizer2);
+
+  Future<vector<Offer> > offers2;
+  EXPECT_CALL(sched, resourceOffers(_, _))
+    .WillOnce(FutureArg<1>(&offers2))
+    .WillRepeatedly(Return());        // Ignore subsequent offers.
+
+  slave = this->StartSlave(containerizer2.get(), flags);
+  ASSERT_SOME(slave);
+
+  AWAIT_READY(offers2);
+  EXPECT_NE(0u, offers2.get().size());
+
+  // Wait until the containerizer is updated.
+  AWAIT_READY(update);
+
+  // The first container should not report perf statistics.
+  usage = containerizer2.get()->usage(containerId1);
+  AWAIT_READY(usage);
+
+  EXPECT_FALSE(usage.get().has_perf());
+
+  // Start a new container which will start reporting perf statistics.
+  TaskInfo task2 = createTask(offers2.get()[0], "sleep 1000");
+  vector<TaskInfo> tasks2;
+  tasks2.push_back(task2);
+
+  // Message expectations.
+  registerExecutor =
+    FUTURE_MESSAGE(Eq(RegisterExecutorMessage().GetTypeName()), _, _);
+
+  driver.launchTasks(offers2.get()[0].id(), tasks2);
+
+  AWAIT_READY(registerExecutor);
+
+  containers = containerizer2.get()->containers();
+  AWAIT_READY(containers);
+  ASSERT_EQ(2u, containers.get().size());
+  EXPECT_TRUE(containers.get().contains(containerId1));
+
+  ContainerID containerId2;
+  foreach (const ContainerID containerId, containers.get()) {
+    if (containerId != containerId1) {
+      containerId2.CopyFrom(containerId);
+    }
+  }
+
+  usage = containerizer2.get()->usage(containerId2);
+  AWAIT_READY(usage);
+
+  EXPECT_TRUE(usage.get().has_perf());
+
+  driver.stop();
+  driver.join();
+
+  this->Shutdown();
+  delete containerizer2.get();
+}
+#endif // __linux__