You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by id...@apache.org on 2014/06/14 00:32:31 UTC
[1/5] git commit: Support for running "perf stat".
Repository: mesos
Updated Branches:
refs/heads/master 4c83662e1 -> d74de8c57
Support for running "perf stat".
Perf can be run against either a set of pids or a set of perf_event cgroups.
Review: https://reviews.apache.org/r/21443
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/be0ba0db
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/be0ba0db
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/be0ba0db
Branch: refs/heads/master
Commit: be0ba0db76e7fb5b54b25b09d154fdfdd6f2a741
Parents: 160e9e0
Author: Ian Downes <id...@twitter.com>
Authored: Wed May 14 14:33:58 2014 -0700
Committer: Ian Downes <id...@twitter.com>
Committed: Fri Jun 13 15:32:08 2014 -0700
----------------------------------------------------------------------
src/Makefile.am | 3 +
src/linux/perf.cpp | 401 +++++++++++++++++++++++++++++++++++++++
src/linux/perf.hpp | 78 ++++++++
src/tests/cgroups_tests.cpp | 82 ++++++++
src/tests/perf_tests.cpp | 146 ++++++++++++++
5 files changed, 710 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/be0ba0db/src/Makefile.am
----------------------------------------------------------------------
diff --git a/src/Makefile.am b/src/Makefile.am
index 5d3196d..3e623cc 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -287,6 +287,7 @@ nodist_scheduler_HEADERS = scheduler/scheduler.pb.h
if OS_LINUX
libmesos_no_3rdparty_la_SOURCES += linux/cgroups.cpp
+ libmesos_no_3rdparty_la_SOURCES += linux/perf.cpp
libmesos_no_3rdparty_la_SOURCES += slave/containerizer/isolators/cgroups/cpushare.cpp
libmesos_no_3rdparty_la_SOURCES += slave/containerizer/isolators/cgroups/mem.cpp
libmesos_no_3rdparty_la_SOURCES += slave/containerizer/isolators/cgroups/perf_event.cpp
@@ -339,6 +340,7 @@ libmesos_no_3rdparty_la_SOURCES += \
hdfs/hdfs.hpp \
launcher/launcher.hpp \
linux/cgroups.hpp \
+ linux/perf.hpp \
linux/fs.hpp local/flags.hpp local/local.hpp \
logging/flags.hpp logging/logging.hpp \
master/allocator.hpp \
@@ -1014,6 +1016,7 @@ if OS_LINUX
mesos_tests_SOURCES += tests/cgroups_isolator_tests.cpp
mesos_tests_SOURCES += tests/cgroups_tests.cpp
mesos_tests_SOURCES += tests/fs_tests.cpp
+ mesos_tests_SOURCES += tests/perf_tests.cpp
endif
if WITH_NETWORK_ISOLATOR
http://git-wip-us.apache.org/repos/asf/mesos/blob/be0ba0db/src/linux/perf.cpp
----------------------------------------------------------------------
diff --git a/src/linux/perf.cpp b/src/linux/perf.cpp
new file mode 100644
index 0000000..c0f2e92
--- /dev/null
+++ b/src/linux/perf.cpp
@@ -0,0 +1,401 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <list>
+#include <ostream>
+#include <vector>
+
+#include <process/clock.hpp>
+#include <process/collect.hpp>
+#include <process/defer.hpp>
+#include <process/io.hpp>
+#include <process/process.hpp>
+#include <process/subprocess.hpp>
+
+#include <stout/strings.hpp>
+
+#include "linux/perf.hpp"
+
+using std::list;
+using std::ostringstream;
+using std::set;
+using std::string;
+using std::vector;
+
+using namespace process;
+
+namespace perf {
+
+// Delimiter for fields in perf stat output.
+const string PERF_DELIMITER = ",";
+
+// Use an empty string as the key for the parse output when sampling a
+// set of pids. No valid cgroup can be an empty string.
+const string PIDS_KEY = "";
+
+namespace internal {
+
+string command(
+ const set<string>& events,
+ const set<string>& cgroups,
+ const Duration& duration)
+{
+ ostringstream command;
+
+ command << "perf stat -x" << PERF_DELIMITER << " -a";
+ command << " --log-fd 1"; // Ensure all output goes to stdout.
+ // Nested loop to produce all pairings of event and cgroup.
+ foreach (const string& event, events) {
+ foreach (const string& cgroup, cgroups) {
+ command << " --event " << event
+ << " --cgroup " << cgroup;
+ }
+ }
+ command << " -- sleep " << stringify(duration.secs());
+
+ return command.str();
+}
+
+
+string command(
+ const set<string>& events,
+ const string& cgroup,
+ const Duration& duration)
+{
+ set<string> cgroups;
+ cgroups.insert(cgroup);
+
+ return command(events, cgroups, duration);
+}
+
+
+string command(
+ const set<string>& events,
+ const set<pid_t>& pids,
+ const Duration& duration)
+{
+ ostringstream command;
+
+ command << "perf stat -x" << PERF_DELIMITER << " -a";
+ command << " --log-fd 1"; // Ensure all output goes to stdout.
+ command << " --event " << strings::join(",", events);
+ command << " --pid " << strings::join(",", pids);
+ command << " -- sleep " << stringify(duration.secs());
+
+ return command.str();
+}
+
+
+// Normalize a perf event name. After normalization the event name
+// should match an event field in the PerfStatistics protobuf.
+inline string normalize(const string& s)
+{
+ string lower = strings::lower(s);
+ return strings::replace(lower, "-", "_");
+}
+
+
+class PerfSampler : public Process<PerfSampler>
+{
+public:
+ PerfSampler(const string& _command, const Duration& _duration)
+ : command(_command), duration(_duration) {}
+
+ virtual ~PerfSampler() {}
+
+ Future<hashmap<string, mesos::PerfStatistics> > future()
+ {
+ return promise.future();
+ }
+
+protected:
+ virtual void initialize()
+ {
+ // Stop when no one cares.
+ promise.future().onDiscard(lambda::bind(
+ static_cast<void(*)(const UPID&, bool)>(terminate), self(), true));
+
+ if (duration < Seconds(0)) {
+ promise.fail("Perf sample duration cannot be negative: '" +
+ stringify(duration.secs()) + "'");
+ terminate(self());
+ return;
+ }
+
+ start = Clock::now();
+
+ sample();
+ }
+
+ virtual void finalize()
+ {
+ discard(output);
+
+ // Kill the perf process if it's still running.
+ if (perf.isSome() && perf.get().status().isPending()) {
+ kill(perf.get().pid(), SIGKILL);
+ }
+
+ promise.discard();
+ }
+
+private:
+ void sample()
+ {
+ Try<Subprocess> _perf = subprocess(command);
+ if (_perf.isError()) {
+ promise.fail("Failed to launch perf process: " + _perf.error());
+ terminate(self());
+ return;
+ }
+ perf = _perf.get();
+
+ Try<Nothing> nonblock = os::nonblock(perf.get().out());
+ if (nonblock.isError()) {
+ promise.fail("Failed to set nonblock on stdout for perf process: " +
+ nonblock.error());
+ terminate(self());
+ return;
+ }
+
+ nonblock = os::nonblock(perf.get().err());
+ if (nonblock.isError()) {
+ promise.fail("Failed to set nonblock on stderr for perf process: " +
+ nonblock.error());
+ terminate(self());
+ return;
+ }
+
+ // Start reading from stdout and stderr now. We don't use stderr
+ // but must read from it to avoid the subprocess blocking on the
+ // pipe.
+ output.push_back(process::io::read(perf.get().out()));
+ output.push_back(process::io::read(perf.get().err()));
+
+ // Wait for the process to exit.
+ perf.get().status()
+ .onAny(defer(self(), &Self::_sample, lambda::_1));
+ }
+
+ void _sample(const Future<Option<int> >& status)
+ {
+ if (!status.isReady()) {
+ promise.fail("Failed to get exit status of perf process: " +
+ status.isFailed() ? status.failure() : "discarded");
+ terminate(self());
+ return;
+ }
+
+ if (status.get().get() != 0) {
+ promise.fail("Failed to execute perf, exit status: " +
+ stringify(WEXITSTATUS(status.get().get())));
+
+ terminate(self());
+ return;
+ }
+
+ // Wait until we collect all output.
+ collect(output).onAny(defer(self(), &Self::__sample, lambda::_1));
+ }
+
+ void __sample(const Future<list<string> >& future)
+ {
+ if (!future.isReady()) {
+ promise.fail("Failed to collect output of perf process: " +
+ future.isFailed() ? future.failure() : "discarded");
+ terminate(self());
+ return;
+ }
+
+ // Parse output from stdout.
+ Try<hashmap<string, mesos::PerfStatistics> > parse =
+ perf::parse(output.front().get());
+ if (parse.isError()) {
+ promise.fail("Failed to parse perf output: " + parse.error());
+ terminate(self());
+ return;
+ }
+
+ // Create a non-const copy from the Try<> so we can set the
+ // timestamp and duration.
+ hashmap<string, mesos::PerfStatistics> statistics = parse.get();
+ foreachvalue (mesos::PerfStatistics& s, statistics) {
+ s.set_timestamp(start.secs());
+ s.set_duration(duration.secs());
+ }
+
+ promise.set(statistics);
+ terminate(self());
+ return;
+ }
+
+ const string command;
+ const Duration duration;
+ Time start;
+ Option<Subprocess> perf;
+ Promise<hashmap<string, mesos::PerfStatistics> > promise;
+ list<Future<string> > output;
+};
+
+
+// Helper to select a single key from the hashmap of perf statistics.
+Future<mesos::PerfStatistics> select(
+ const string& key,
+ const hashmap<string, mesos::PerfStatistics>& statistics)
+{
+ return statistics.get(key).get();
+}
+
+} // namespace internal {
+
+
+Future<mesos::PerfStatistics> sample(
+ const set<string>& events,
+ pid_t pid,
+ const Duration& duration)
+{
+ set<pid_t> pids;
+ pids.insert(pid);
+ return sample(events, pids, duration);
+}
+
+
+Future<mesos::PerfStatistics> sample(
+ const set<string>& events,
+ const set<pid_t>& pids,
+ const Duration& duration)
+{
+ const string command = internal::command(events, pids, duration);
+ internal::PerfSampler* sampler = new internal::PerfSampler(command, duration);
+ Future<hashmap<string, mesos::PerfStatistics> > future = sampler->future();
+ spawn(sampler, true);
+ return future
+ .then(lambda::bind(&internal::select, PIDS_KEY, lambda::_1));
+}
+
+
+Future<mesos::PerfStatistics> sample(
+ const set<string>& events,
+ const string& cgroup,
+ const Duration& duration)
+{
+ set<string> cgroups;
+ cgroups.insert(cgroup);
+ return sample(events, cgroups, duration)
+ .then(lambda::bind(&internal::select, cgroup, lambda::_1));
+}
+
+
+Future<hashmap<string, mesos::PerfStatistics> > sample(
+ const set<string>& events,
+ const set<string>& cgroups,
+ const Duration& duration)
+{
+ const string command = internal::command(events, cgroups, duration);
+ internal::PerfSampler* sampler = new internal::PerfSampler(command, duration);
+ Future<hashmap<string, mesos::PerfStatistics> > future = sampler->future();
+ spawn(sampler, true);
+ return future;
+}
+
+
+bool valid(const set<string>& events)
+{
+ ostringstream command;
+
+ // Log everything to stderr which is then redirected to /dev/null.
+ command << "perf stat --log-fd 2";
+ foreach (const string& event, events) {
+ command << " --event " << event;
+ }
+ command << " true 2>/dev/null";
+
+ return (os::system(command.str()) == 0);
+}
+
+
+Try<hashmap<string, mesos::PerfStatistics> > parse(const string& output)
+{
+ hashmap<string, mesos::PerfStatistics> statistics;
+
+ foreach (const string& line, strings::tokenize(output, "\n")) {
+ vector<string> tokens = strings::tokenize(line, PERF_DELIMITER);
+ // Expected format for an output line is either:
+ // value,event (when sampling pids)
+ // value,event,cgroup (when sampling a cgroup)
+ // assuming PERF_DELIMITER = ",".
+ if (tokens.size() < 2 || tokens.size() > 3) {
+ return Error("Unexpected perf output at line: " + line);
+ }
+
+ const string value = tokens[0];
+ const string event = internal::normalize(tokens[1]);
+ // Use the special PIDS_KEY when sampling pids.
+ const string cgroup = (tokens.size() == 3 ? tokens[2] : PIDS_KEY);
+
+ if (!statistics.contains(cgroup)) {
+ statistics.put(cgroup, mesos::PerfStatistics());
+ }
+
+ const google::protobuf::Reflection* reflection =
+ statistics[cgroup].GetReflection();
+ const google::protobuf::FieldDescriptor* field =
+ statistics[cgroup].GetDescriptor()->FindFieldByName(event);
+ if (!field) {
+ return Error("Unexpected perf output at line: " + line);
+ }
+
+ if (value == "<not supported>") {
+ LOG(WARNING) << "Unsupported perf counter, ignoring: " << line;
+ continue;
+ }
+
+ switch (field->type()) {
+ case google::protobuf::FieldDescriptor::TYPE_DOUBLE:
+ {
+ Try<double> number =
+ (value == "<not counted>") ? 0 : numify<double>(value);
+
+ if (number.isError()) {
+ return Error("Unable to parse perf value at line: " + line);
+ }
+
+ reflection->SetDouble(&(statistics[cgroup]), field, number.get());
+ break;
+ }
+ case google::protobuf::FieldDescriptor::TYPE_UINT64:
+ {
+ Try<uint64_t> number =
+ (value == "<not counted>") ? 0 : numify<uint64_t>(value);
+
+ if (number.isError()) {
+ return Error("Unable to parse perf value at line: " + line);
+ }
+
+ reflection->SetUInt64(&(statistics[cgroup]), field, number.get());
+ break;
+ }
+ default:
+ return Error("Unsupported perf field type at line: " + line);
+ }
+ }
+
+ return statistics;
+}
+
+} // namespace perf {
http://git-wip-us.apache.org/repos/asf/mesos/blob/be0ba0db/src/linux/perf.hpp
----------------------------------------------------------------------
diff --git a/src/linux/perf.hpp b/src/linux/perf.hpp
new file mode 100644
index 0000000..0d510c5
--- /dev/null
+++ b/src/linux/perf.hpp
@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PERF_HPP__
+#define __PERF_HPP__
+
+#include <unistd.h>
+
+#include <set>
+#include <string>
+
+#include <process/future.hpp>
+
+#include <stout/duration.hpp>
+#include <stout/hashmap.hpp>
+
+// For PerfStatistics protobuf.
+#include "mesos/mesos.hpp"
+
+namespace perf {
+
+// Sample the perf events for process pid for duration.
+process::Future<mesos::PerfStatistics> sample(
+ const std::set<std::string>& events,
+ pid_t pid,
+ const Duration& duration);
+
+
+// Sample the perf events for processes in pids for duration.
+process::Future<mesos::PerfStatistics> sample(
+ const std::set<std::string>& events,
+ const std::set<pid_t>& pids,
+ const Duration& duration);
+
+
+// Sample the perf events for process(es) in the perf_event cgroups
+// for duration. The returned hashmap is keyed by cgroup.
+// NOTE: cgroups should be relative to the perf_event subsystem mount,
+// e.g., mesos/test for /sys/fs/cgroup/perf_event/mesos/test.
+process::Future<hashmap<std::string, mesos::PerfStatistics> > sample(
+ const std::set<std::string>& events,
+ const std::set<std::string>& cgroup,
+ const Duration& duration);
+
+
+// Sample the perf events for process(es) in the perf_event cgroup.
+process::Future<mesos::PerfStatistics> sample(
+ const std::set<std::string>& events,
+ const std::string& cgroup,
+ const Duration& duration);
+
+
+// Validate a set of events are accepted by `perf stat`.
+bool valid(const std::set<std::string>& events);
+
+
+// Note: Exposed for testing purposes.
+Try<hashmap<std::string, mesos::PerfStatistics> > parse(
+ const std::string& output);
+
+} // namespace perf {
+
+#endif // __PERF_HPP__
http://git-wip-us.apache.org/repos/asf/mesos/blob/be0ba0db/src/tests/cgroups_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/cgroups_tests.cpp b/src/tests/cgroups_tests.cpp
index 5f674cd..73510fb 100644
--- a/src/tests/cgroups_tests.cpp
+++ b/src/tests/cgroups_tests.cpp
@@ -48,6 +48,7 @@
#include <stout/strings.hpp>
#include "linux/cgroups.hpp"
+#include "linux/perf.hpp"
#include "tests/mesos.hpp" // For TEST_CGROUPS_(HIERARCHY|ROOT).
@@ -852,3 +853,84 @@ TEST_F(CgroupsAnyHierarchyWithFreezerTest, ROOT_CGROUPS_AssignThreads)
ASSERT_TRUE(future.isReady());
EXPECT_TRUE(future.get());
}
+
+
+class CgroupsAnyHierarchyWithPerfEventTest
+ : public CgroupsAnyHierarchyTest
+{
+public:
+ CgroupsAnyHierarchyWithPerfEventTest()
+ : CgroupsAnyHierarchyTest("perf_event") {}
+};
+
+
+TEST_F(CgroupsAnyHierarchyWithPerfEventTest, ROOT_CGROUPS_Perf)
+{
+ int pipes[2];
+ int dummy;
+ ASSERT_NE(-1, ::pipe(pipes));
+
+ std::string hierarchy = path::join(baseHierarchy, "perf_event");
+ ASSERT_SOME(cgroups::create(hierarchy, TEST_CGROUPS_ROOT));
+
+ pid_t pid = ::fork();
+ ASSERT_NE(-1, pid);
+
+ if (pid == 0) {
+ // In child process.
+ ::close(pipes[1]);
+
+ // Wait until parent has assigned us to the cgroup.
+ ssize_t len;
+ while ((len = ::read(pipes[0], &dummy, sizeof(dummy))) == -1 &&
+ errno == EINTR);
+ ASSERT_EQ(sizeof(dummy), len);
+ ::close(pipes[0]);
+
+ while (true) { sleep(1); }
+
+ ABORT("Child should not reach here");
+ }
+
+ // In parent.
+ ::close(pipes[0]);
+
+ // Put child into the test cgroup.
+ ASSERT_SOME(cgroups::assign(hierarchy, TEST_CGROUPS_ROOT, pid));
+
+ ssize_t len;
+ while ((len = ::write(pipes[1], &dummy, sizeof(dummy))) == -1 &&
+ errno == EINTR);
+ ASSERT_EQ(sizeof(dummy), len);
+ ::close(pipes[1]);
+
+ std::set<std::string> events;
+ // Hardware event.
+ events.insert("cycles");
+ // Software event.
+ events.insert("task-clock");
+
+ Future<mesos::PerfStatistics> statistics =
+ perf::sample(events, TEST_CGROUPS_ROOT, Seconds(1));
+ AWAIT_READY(statistics);
+
+ ASSERT_TRUE(statistics.get().has_cycles());
+ EXPECT_LT(0u, statistics.get().cycles());
+
+ ASSERT_TRUE(statistics.get().has_task_clock());
+ EXPECT_LT(0.0, statistics.get().task_clock());
+
+ // Kill the child process.
+ ASSERT_NE(-1, ::kill(pid, SIGKILL));
+
+ // Wait for the child process.
+ int status;
+ EXPECT_NE(-1, ::waitpid((pid_t) -1, &status, 0));
+ ASSERT_TRUE(WIFSIGNALED(status));
+ EXPECT_EQ(SIGKILL, WTERMSIG(status));
+
+ // Destroy the cgroup.
+ Future<bool> destroy = cgroups::destroy(hierarchy, TEST_CGROUPS_ROOT);
+ AWAIT_READY(destroy);
+ EXPECT_TRUE(destroy.get());
+}
http://git-wip-us.apache.org/repos/asf/mesos/blob/be0ba0db/src/tests/perf_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/perf_tests.cpp b/src/tests/perf_tests.cpp
new file mode 100644
index 0000000..7d6dd24
--- /dev/null
+++ b/src/tests/perf_tests.cpp
@@ -0,0 +1,146 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <set>
+
+#include <gmock/gmock.h>
+
+#include <process/clock.hpp>
+#include <process/gtest.hpp>
+
+#include <stout/gtest.hpp>
+#include <stout/stringify.hpp>
+
+#include "linux/perf.hpp"
+
+using std::set;
+using std::string;
+
+using namespace process;
+
+class PerfTest : public ::testing::Test {};
+
+
+TEST_F(PerfTest, ROOT_Events)
+{
+ set<string> events;
+ // Valid events.
+ events.insert("cycles");
+ events.insert("task-clock");
+ EXPECT_TRUE(perf::valid(events));
+
+ // Add an invalid event.
+ events.insert("this-is-an-invalid-event");
+ EXPECT_FALSE(perf::valid(events));
+}
+
+
+TEST_F(PerfTest, Parse)
+{
+ // uint64 and floats should be parsed.
+ Try<hashmap<string, mesos::PerfStatistics> > parse =
+ perf::parse("123,cycles\n0.123,task-clock");
+ CHECK_SOME(parse);
+
+ ASSERT_TRUE(parse.get().contains(""));
+ mesos::PerfStatistics statistics = parse.get().get("").get();
+
+ ASSERT_TRUE(statistics.has_cycles());
+ EXPECT_EQ(123u, statistics.cycles());
+ ASSERT_TRUE(statistics.has_task_clock());
+ EXPECT_EQ(0.123, statistics.task_clock());
+
+ // Parse multiple cgroups.
+ parse = perf::parse("123,cycles,cgroup1\n"
+ "456,cycles,cgroup2\n"
+ "0.456,task-clock,cgroup2\n"
+ "0.123,task-clock,cgroup1");
+ CHECK_SOME(parse);
+ EXPECT_FALSE(parse.get().contains(""));
+
+ ASSERT_TRUE(parse.get().contains("cgroup1"));
+ statistics = parse.get().get("cgroup1").get();
+
+ ASSERT_TRUE(statistics.has_cycles());
+ EXPECT_EQ(123u, statistics.cycles());
+ ASSERT_TRUE(statistics.has_task_clock());
+ EXPECT_EQ(0.123, statistics.task_clock());
+
+ ASSERT_TRUE(parse.get().contains("cgroup2"));
+ statistics = parse.get().get("cgroup2").get();
+
+ ASSERT_TRUE(statistics.has_cycles());
+ EXPECT_EQ(456u, statistics.cycles());
+ EXPECT_TRUE(statistics.has_task_clock());
+ EXPECT_EQ(0.456, statistics.task_clock());
+
+ // Statistics reporting <not supported> should not appear.
+ parse = perf::parse("<not supported>,cycles");
+ CHECK_SOME(parse);
+
+ ASSERT_TRUE(parse.get().contains(""));
+ statistics = parse.get().get("").get();
+ EXPECT_FALSE(statistics.has_cycles());
+
+ // Statistics reporting <not counted> should be zero.
+ parse = perf::parse("<not counted>,cycles\n<not counted>,task-clock");
+ CHECK_SOME(parse);
+
+ ASSERT_TRUE(parse.get().contains(""));
+ statistics = parse.get().get("").get();
+
+ EXPECT_TRUE(statistics.has_cycles());
+ EXPECT_EQ(0u, statistics.cycles());
+ EXPECT_TRUE(statistics.has_task_clock());
+ EXPECT_EQ(0.0, statistics.task_clock());
+
+ // Check parsing fails.
+ parse = perf::parse("1,cycles\ngarbage");
+ EXPECT_ERROR(parse);
+
+ parse = perf::parse("1,unknown-field");
+ EXPECT_ERROR(parse);
+}
+
+
+TEST_F(PerfTest, ROOT_SampleInit)
+{
+ set<string> events;
+ // Hardware event.
+ events.insert("cycles");
+ // Software event.
+ events.insert("task-clock");
+
+ // Sample init/launchd/systemd (pid 1).
+ Future<mesos::PerfStatistics> statistics =
+ perf::sample(events, 1, Seconds(1));
+ AWAIT_READY(statistics);
+
+ // Check the sample timestamp is within the last 5 seconds. This is generous
+ // because there's the process reap delay in addition to the sampling
+ // duration.
+ ASSERT_TRUE(statistics.get().has_timestamp());
+ EXPECT_GT(Seconds(5).secs(), Clock::now().secs() - statistics.get().timestamp());
+ EXPECT_EQ(Seconds(1).secs(), statistics.get().duration());
+
+ ASSERT_TRUE(statistics.get().has_cycles());
+ EXPECT_LT(0u, statistics.get().cycles());
+
+ ASSERT_TRUE(statistics.get().has_task_clock());
+ EXPECT_LT(0.0, statistics.get().task_clock());
+}
[5/5] git commit: Add sampling support to the perf_event isolator.
Posted by id...@apache.org.
Add sampling support to the perf_event isolator.
Review: https://reviews.apache.org/r/21451
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/d74de8c5
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/d74de8c5
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/d74de8c5
Branch: refs/heads/master
Commit: d74de8c579f74b089c3116cf0b11571b1051f735
Parents: be0ba0d
Author: Ian Downes <id...@twitter.com>
Authored: Wed May 14 10:33:21 2014 -0700
Committer: Ian Downes <id...@twitter.com>
Committed: Fri Jun 13 15:32:09 2014 -0700
----------------------------------------------------------------------
.../isolators/cgroups/perf_event.cpp | 244 +++++++++++++++----
.../isolators/cgroups/perf_event.hpp | 33 ++-
src/slave/containerizer/mesos_containerizer.cpp | 2 +
src/slave/flags.hpp | 27 ++
src/tests/isolator_tests.cpp | 72 ++++++
src/tests/mesos.cpp | 1 +
src/tests/slave_recovery_tests.cpp | 150 ++++++++++++
7 files changed, 481 insertions(+), 48 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/d74de8c5/src/slave/containerizer/isolators/cgroups/perf_event.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/cgroups/perf_event.cpp b/src/slave/containerizer/isolators/cgroups/perf_event.cpp
index d9c8b25..1bd5dfa 100644
--- a/src/slave/containerizer/isolators/cgroups/perf_event.cpp
+++ b/src/slave/containerizer/isolators/cgroups/perf_event.cpp
@@ -19,13 +19,20 @@
#include <stdint.h>
#include <vector>
+#include <set>
+
+#include <google/protobuf/descriptor.h>
+#include <google/protobuf/message.h>
#include <mesos/resources.hpp>
#include <mesos/values.hpp>
#include <process/collect.hpp>
#include <process/defer.hpp>
+#include <process/delay.hpp>
+#include <process/io.hpp>
#include <process/pid.hpp>
+#include <process/subprocess.hpp>
#include <stout/bytes.hpp>
#include <stout/check.hpp>
@@ -35,11 +42,10 @@
#include <stout/hashset.hpp>
#include <stout/lambda.hpp>
#include <stout/nothing.hpp>
+#include <stout/os.hpp>
#include <stout/stringify.hpp>
#include <stout/try.hpp>
-#include "common/type_utils.hpp"
-
#include "linux/cgroups.hpp"
#include "slave/containerizer/isolators/cgroups/perf_event.hpp"
@@ -47,7 +53,7 @@
using namespace process;
using std::list;
-using std::ostringstream;
+using std::set;
using std::string;
using std::vector;
@@ -55,17 +61,33 @@ namespace mesos {
namespace internal {
namespace slave {
-CgroupsPerfEventIsolatorProcess::CgroupsPerfEventIsolatorProcess(
- const Flags& _flags,
- const string& _hierarchy)
- : flags(_flags), hierarchy(_hierarchy) {}
+Try<Isolator*> CgroupsPerfEventIsolatorProcess::create(const Flags& flags)
+{
+ LOG(INFO) << "Creating PerfEvent isolator";
+
+ if (flags.perf_duration > flags.perf_interval) {
+ return Error("Sampling perf for duration (" +
+ stringify(flags.perf_duration) +
+ ") > interval (" +
+ stringify(flags.perf_interval) +
+ ") is not supported.");
+ }
+ if (!flags.perf_events.isSome()) {
+ return Error("No perf events specified.");
+ }
-CgroupsPerfEventIsolatorProcess::~CgroupsPerfEventIsolatorProcess() {}
+ set<string> events;
+ foreach (const string& event,
+ strings::tokenize(flags.perf_events.get(), ",")) {
+ events.insert(event);
+ }
+ if (!perf::valid(events)) {
+ return Error("Failed to create PerfEvent isolator, invalid events: " +
+ stringify(events));
+ }
-Try<Isolator*> CgroupsPerfEventIsolatorProcess::create(const Flags& flags)
-{
Try<string> hierarchy = cgroups::prepare(
flags.cgroups_hierarchy, "perf_event", flags.cgroups_root);
@@ -73,6 +95,10 @@ Try<Isolator*> CgroupsPerfEventIsolatorProcess::create(const Flags& flags)
return Error("Failed to create perf_event cgroup: " + hierarchy.error());
}
+ LOG(INFO) << "PerfEvent isolator will profile for " << flags.perf_duration
+ << " every " << flags.perf_interval
+ << " for events: " << stringify(events);
+
process::Owned<IsolatorProcess> process(
new CgroupsPerfEventIsolatorProcess(flags, hierarchy.get()));
@@ -80,6 +106,31 @@ Try<Isolator*> CgroupsPerfEventIsolatorProcess::create(const Flags& flags)
}
+CgroupsPerfEventIsolatorProcess::CgroupsPerfEventIsolatorProcess(
+ const Flags& _flags,
+ const string& _hierarchy)
+ : flags(_flags),
+ hierarchy(_hierarchy)
+{
+ CHECK_SOME(flags.perf_events);
+
+ foreach (const string& event,
+ strings::tokenize(flags.perf_events.get(), ",")) {
+ events.insert(event);
+ }
+}
+
+
+CgroupsPerfEventIsolatorProcess::~CgroupsPerfEventIsolatorProcess() {}
+
+
+void CgroupsPerfEventIsolatorProcess::initialize()
+{
+ // Start sampling.
+ sample();
+}
+
+
Future<Nothing> CgroupsPerfEventIsolatorProcess::recover(
const list<state::RunState>& states)
{
@@ -95,41 +146,39 @@ Future<Nothing> CgroupsPerfEventIsolatorProcess::recover(
}
const ContainerID& containerId = state.id.get();
+ const string cgroup = path::join(flags.cgroups_root, containerId.value());
- Info* info = new Info(
- containerId, path::join(flags.cgroups_root, containerId.value()));
- CHECK_NOTNULL(info);
-
- infos[containerId] = info;
- cgroups.insert(info->cgroup);
-
- Try<bool> exists = cgroups::exists(hierarchy, info->cgroup);
+ Try<bool> exists = cgroups::exists(hierarchy, cgroup);
if (exists.isError()) {
- delete info;
foreachvalue (Info* info, infos) {
delete info;
}
+
infos.clear();
- return Failure("Failed to check cgroup for container '" +
- stringify(containerId) + "'");
+ return Failure("Failed to check cgroup " + cgroup +
+ " for container '" + stringify(containerId) + "'");
}
if (!exists.get()) {
- VLOG(1) << "Couldn't find cgroup for container " << containerId;
// This may occur if the executor is exiting and the isolator has
// destroyed the cgroup but the slave dies before noticing this. This
// will be detected when the containerizer tries to monitor the
// executor's pid.
// NOTE: This could also occur if this isolator is now enabled for a
- // container that was started without this isolator. For this particular
- // isolator it is okay to continue running this container without its
- // perf_event cgroup existing because we don't ever query it and the
- // destroy will succeed immediately.
+ // container that was started without this isolator. For this
+ // particular isolator it is acceptable to continue running this
+ // container without a perf_event cgroup because we don't ever
+ // query it and the destroy will succeed immediately.
+ VLOG(1) << "Couldn't find perf event cgroup for container " << containerId
+ << ", perf statistics will not be available";
+ continue;
}
+
+ infos[containerId] = new Info(containerId, cgroup);
+ cgroups.insert(cgroup);
}
- Try<vector<string> > orphans = cgroups::get(
- hierarchy, flags.cgroups_root);
+ Try<vector<string> > orphans = cgroups::get(hierarchy, flags.cgroups_root);
if (orphans.isError()) {
foreachvalue (Info* info, infos) {
delete info;
@@ -139,6 +188,13 @@ Future<Nothing> CgroupsPerfEventIsolatorProcess::recover(
}
foreach (const string& orphan, orphans.get()) {
+ // Ignore the slave cgroup (see the --slave_subsystems flag).
+ // TODO(idownes): Remove this when the cgroups layout is updated,
+ // see MESOS-1185.
+ if (orphan == path::join(flags.cgroups_root, "slave")) {
+ continue;
+ }
+
if (!cgroups.contains(orphan)) {
LOG(INFO) << "Removing orphaned cgroup '" << orphan << "'";
cgroups::destroy(hierarchy, orphan);
@@ -157,8 +213,11 @@ Future<Option<CommandInfo> > CgroupsPerfEventIsolatorProcess::prepare(
return Failure("Container has already been prepared");
}
+ LOG(INFO) << "Preparing perf event cgroup for " << containerId;
+
Info* info = new Info(
- containerId, path::join(flags.cgroups_root, containerId.value()));
+ containerId,
+ path::join(flags.cgroups_root, containerId.value()));
infos[containerId] = CHECK_NOTNULL(info);
@@ -194,9 +253,6 @@ Future<Nothing> CgroupsPerfEventIsolatorProcess::isolate(
Info* info = CHECK_NOTNULL(infos[containerId]);
- CHECK(info->pid.isNone());
- info->pid = pid;
-
Try<Nothing> assign = cgroups::assign(hierarchy, info->cgroup, pid);
if (assign.isError()) {
return Failure("Failed to assign container '" +
@@ -212,13 +268,8 @@ Future<Nothing> CgroupsPerfEventIsolatorProcess::isolate(
Future<Limitation> CgroupsPerfEventIsolatorProcess::watch(
const ContainerID& containerId)
{
- if (!infos.contains(containerId)) {
- return Failure("Unknown container");
- }
-
- CHECK_NOTNULL(infos[containerId]);
-
- return infos[containerId]->limitation.future();
+ // No resources are limited.
+ return Future<Limitation>();
}
@@ -234,20 +285,36 @@ Future<Nothing> CgroupsPerfEventIsolatorProcess::update(
Future<ResourceStatistics> CgroupsPerfEventIsolatorProcess::usage(
const ContainerID& containerId)
{
- // No resource statistics provided by this isolator.
- return ResourceStatistics();
+ if (!infos.contains(containerId)) {
+ // Return an empty ResourceStatistics, i.e., without
+ // PerfStatistics, if we don't know about this container.
+ return ResourceStatistics();
+ }
+
+ CHECK_NOTNULL(infos[containerId]);
+
+ ResourceStatistics statistics;
+ statistics.mutable_perf()->CopyFrom(infos[containerId]->statistics);
+
+ return statistics;
}
Future<Nothing> CgroupsPerfEventIsolatorProcess::cleanup(
const ContainerID& containerId)
{
+ // Tolerate clean up attempts for unknown containers which may arise from
+ // repeated clean up attempts (during test cleanup).
if (!infos.contains(containerId)) {
- return Failure("Unknown container");
+ VLOG(1) << "Ignoring cleanup request for unknown container: "
+ << containerId;
+ return Nothing();
}
Info* info = CHECK_NOTNULL(infos[containerId]);
+ info->destroying = true;
+
return cgroups::destroy(hierarchy, info->cgroup)
.then(defer(PID<CgroupsPerfEventIsolatorProcess>(this),
&CgroupsPerfEventIsolatorProcess::_cleanup,
@@ -258,7 +325,10 @@ Future<Nothing> CgroupsPerfEventIsolatorProcess::cleanup(
Future<Nothing> CgroupsPerfEventIsolatorProcess::_cleanup(
const ContainerID& containerId)
{
- CHECK(infos.contains(containerId));
+ if (!infos.contains(containerId))
+ {
+ return Nothing();
+ }
delete infos[containerId];
infos.erase(containerId);
@@ -267,6 +337,94 @@ Future<Nothing> CgroupsPerfEventIsolatorProcess::_cleanup(
}
+Future<hashmap<string, PerfStatistics> > discardSample(
+ Future<hashmap<string, PerfStatistics> > future,
+ const Duration& duration,
+ const Duration& timeout)
+{
+ LOG(ERROR) << "Perf sample of " << stringify(duration)
+ << " failed to complete within " << stringify(timeout)
+ << "; sampling will be halted";
+
+ future.discard();
+
+ return future;
+}
+
+
+void CgroupsPerfEventIsolatorProcess::sample()
+{
+ set<string> cgroups;
+ foreachvalue (Info* info, infos) {
+ CHECK_NOTNULL(info);
+
+ if (info->destroying) {
+ // Skip cgroups if destroy has started because it's asynchronous
+ // and "perf stat" will fail if the cgroup has been destroyed
+ // by the time we actually run perf.
+ continue;
+ }
+
+ cgroups.insert(info->cgroup);
+ }
+
+ if (cgroups.size() > 0) {
+ // The timeout includes an allowance of twice the process::reap
+ // interval (currently one second) to ensure we see the perf
+ // process exit. If the sample is not ready after the timeout
+ // something very unexpected has occurred so we discard it and
+ // halt all sampling.
+ Duration timeout = flags.perf_duration + Seconds(2);
+
+ perf::sample(events, cgroups, flags.perf_duration)
+ .after(timeout,
+ lambda::bind(&discardSample,
+ lambda::_1,
+ flags.perf_duration,
+ timeout))
+ .onAny(defer(PID<CgroupsPerfEventIsolatorProcess>(this),
+ &CgroupsPerfEventIsolatorProcess::_sample,
+ Clock::now() + flags.perf_interval,
+ lambda::_1));
+ } else {
+ // No cgroups to sample for now so just schedule the next sample.
+ delay(flags.perf_interval,
+ PID<CgroupsPerfEventIsolatorProcess>(this),
+ &CgroupsPerfEventIsolatorProcess::sample);
+ }
+}
+
+
+void CgroupsPerfEventIsolatorProcess::_sample(
+ const Time& next,
+ const Future<hashmap<string, PerfStatistics> >& statistics)
+{
+ if (!statistics.isReady()) {
+ // Failure can occur for many reasons but all are unexpected and
+ // indicate something is not right so we'll stop sampling.
+ LOG(ERROR) << "Failed to get perf sample, sampling will be halted: "
+ << (statistics.isFailed() ? statistics.failure() : "discarded");
+ return;
+ }
+
+ foreachvalue (Info* info, infos) {
+ CHECK_NOTNULL(info);
+
+ if (!statistics.get().contains(info->cgroup)) {
+ // This must be a newly added cgroup and isn't in this sample;
+ // it should be included in the next sample.
+ continue;
+ }
+
+ info->statistics = statistics.get().get(info->cgroup).get();
+ }
+
+ // Schedule sample for the next time.
+ delay(next - Clock::now(),
+ PID<CgroupsPerfEventIsolatorProcess>(this),
+ &CgroupsPerfEventIsolatorProcess::sample);
+}
+
} // namespace slave {
} // namespace internal {
} // namespace mesos {
http://git-wip-us.apache.org/repos/asf/mesos/blob/d74de8c5/src/slave/containerizer/isolators/cgroups/perf_event.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/cgroups/perf_event.hpp b/src/slave/containerizer/isolators/cgroups/perf_event.hpp
index 2db7b3e..4ceb07a 100644
--- a/src/slave/containerizer/isolators/cgroups/perf_event.hpp
+++ b/src/slave/containerizer/isolators/cgroups/perf_event.hpp
@@ -19,9 +19,14 @@
#ifndef __PERF_EVENT_ISOLATOR_HPP__
#define __PERF_EVENT_ISOLATOR_HPP__
+#include <set>
+
+#include "linux/perf.hpp"
+
#include <mesos/resources.hpp>
#include <process/future.hpp>
+#include <process/time.hpp>
#include <stout/hashmap.hpp>
#include <stout/try.hpp>
@@ -65,29 +70,48 @@ public:
virtual process::Future<Nothing> cleanup(
const ContainerID& containerId);
+protected:
+ virtual void initialize();
+
private:
CgroupsPerfEventIsolatorProcess(
const Flags& flags,
const std::string& hierarchy);
+ void sample();
+
+ void _sample(
+ const process::Time& next,
+ const process::Future<hashmap<std::string, PerfStatistics> >& statistics);
+
virtual process::Future<Nothing> _cleanup(const ContainerID& containerId);
struct Info
{
Info(const ContainerID& _containerId, const std::string& _cgroup)
- : containerId(_containerId), cgroup(_cgroup) {}
+ : containerId(_containerId), cgroup(_cgroup), destroying(false)
+ {
+ // Ensure the initial statistics include the required fields.
+ // Note the duration is set to zero to indicate no sampling has
+ // taken place. This empty sample will be returned from usage()
+ // until the first true sample is obtained.
+ statistics.set_timestamp(process::Clock::now().secs());
+ statistics.set_duration(Seconds(0).secs());
+ }
const ContainerID containerId;
const std::string cgroup;
- Option<pid_t> pid;
-
- process::Promise<Limitation> limitation;
+ PerfStatistics statistics;
+ // Mark a container when we start destruction so we stop sampling it.
+ bool destroying;
};
const Flags flags;
// The path to the cgroups subsystem hierarchy root.
const std::string hierarchy;
+ // Set of events to sample.
+ std::set<std::string> events;
hashmap<ContainerID, Info*> infos;
};
@@ -97,4 +121,3 @@ private:
} // namespace mesos {
#endif // __PERF_EVENT_ISOLATOR_HPP__
-
http://git-wip-us.apache.org/repos/asf/mesos/blob/d74de8c5/src/slave/containerizer/mesos_containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos_containerizer.cpp b/src/slave/containerizer/mesos_containerizer.cpp
index b5e29da..d6df9a1 100644
--- a/src/slave/containerizer/mesos_containerizer.cpp
+++ b/src/slave/containerizer/mesos_containerizer.cpp
@@ -45,6 +45,7 @@
#ifdef __linux__
#include "slave/containerizer/isolators/cgroups/cpushare.hpp"
#include "slave/containerizer/isolators/cgroups/mem.hpp"
+#include "slave/containerizer/isolators/cgroups/perf_event.hpp"
#endif // __linux__
using std::list;
@@ -131,6 +132,7 @@ Try<MesosContainerizer*> MesosContainerizer::create(
#ifdef __linux__
creators["cgroups/cpu"] = &CgroupsCpushareIsolatorProcess::create;
creators["cgroups/mem"] = &CgroupsMemIsolatorProcess::create;
+ creators["cgroups/perf_event"] = &CgroupsPerfEventIsolatorProcess::create;
#endif // __linux__
vector<Owned<Isolator> > isolators;
http://git-wip-us.apache.org/repos/asf/mesos/blob/d74de8c5/src/slave/flags.hpp
----------------------------------------------------------------------
diff --git a/src/slave/flags.hpp b/src/slave/flags.hpp
index 15e5b64..3b8ba08 100644
--- a/src/slave/flags.hpp
+++ b/src/slave/flags.hpp
@@ -213,6 +213,30 @@ public:
"Present functionality is intended for resource monitoring and\n"
"no cgroup limits are set, they are inherited from the root mesos\n"
"cgroup.");
+
+ add(&Flags::perf_events,
+ "perf_events",
+ "List of command-separated perf events to sample for each container\n"
+ "when using the perf_event isolator. Default is none.\n"
+ "Run command 'perf list' to see all events. Event names are\n"
+ "sanitized by downcasing and replacing hyphens with underscores\n"
+ "when reported in the PerfStatistics protobuf, e.g., cpu-cycles\n"
+ "becomes cpu_cycles; see the PerfStatistics protobuf for all names.");
+
+ add(&Flags::perf_interval,
+ "perf_interval",
+ "Interval between the start of perf stat samples. Perf samples are\n"
+ "obtained periodically according to perf_interval and the most\n"
+ "recently obtained sample is returned rather than sampling on\n"
+ "demand. For this reason, perf_interval is independent of the\n"
+ "resource monitoring interval",
+ Seconds(60));
+
+ add(&Flags::perf_duration,
+ "perf_duration",
+ "Duration of a perf stat sample. The duration must be less\n"
+ "that the perf_interval.",
+ Seconds(10));
#endif
add(&Flags::credential,
@@ -260,6 +284,9 @@ public:
Option<std::string> cgroups_subsystems;
bool cgroups_enable_cfs;
Option<std::string> slave_subsystems;
+ Option<std::string> perf_events;
+ Duration perf_interval;
+ Duration perf_duration;
#endif
Option<std::string> credential;
Option<std::string> containerizer_path;
http://git-wip-us.apache.org/repos/asf/mesos/blob/d74de8c5/src/tests/isolator_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/isolator_tests.cpp b/src/tests/isolator_tests.cpp
index b0eff57..0bbec09 100644
--- a/src/tests/isolator_tests.cpp
+++ b/src/tests/isolator_tests.cpp
@@ -48,6 +48,7 @@
#ifdef __linux__
#include "slave/containerizer/isolators/cgroups/cpushare.hpp"
#include "slave/containerizer/isolators/cgroups/mem.hpp"
+#include "slave/containerizer/isolators/cgroups/perf_event.hpp"
#endif // __linux__
#include "tests/mesos.hpp"
@@ -63,6 +64,7 @@ using mesos::internal::master::Master;
#ifdef __linux__
using mesos::internal::slave::CgroupsCpushareIsolatorProcess;
using mesos::internal::slave::CgroupsMemIsolatorProcess;
+using mesos::internal::slave::CgroupsPerfEventIsolatorProcess;
using mesos::internal::slave::LinuxLauncher;
#endif // __linux__
using mesos::internal::slave::Isolator;
@@ -576,3 +578,73 @@ TYPED_TEST(MemIsolatorTest, MemUsage)
delete isolator.get();
delete launcher.get();
}
+
+
+#ifdef __linux__
+class PerfEventIsolatorTest : public MesosTest {};
+
+TEST_F(PerfEventIsolatorTest, ROOT_CGROUPS_Sample)
+{
+ Flags flags;
+
+ flags.perf_events = "cycles,task-clock";
+ flags.perf_duration = Milliseconds(250);
+ flags.perf_interval = Milliseconds(500);
+
+ Try<Isolator*> isolator = CgroupsPerfEventIsolatorProcess::create(flags);
+ CHECK_SOME(isolator);
+
+ ExecutorInfo executorInfo;
+
+ ContainerID containerId;
+ containerId.set_value("test");
+
+ AWAIT_READY(isolator.get()->prepare(containerId, executorInfo));
+
+ // This first sample is likely to be empty because perf hasn't
+ // completed yet but we should still have the required fields.
+ Future<ResourceStatistics> statistics1 = isolator.get()->usage(containerId);
+ AWAIT_READY(statistics1);
+ ASSERT_TRUE(statistics1.get().has_perf());
+ EXPECT_TRUE(statistics1.get().perf().has_timestamp());
+ EXPECT_TRUE(statistics1.get().perf().has_duration());
+
+ // Wait until we get the next sample. We use a generous timeout of
+ // two seconds because we currently have a one second reap interval;
+ // when running perf with perf_duration of 250ms we won't notice the
+ // exit for up to one second.
+ ResourceStatistics statistics2;
+ Duration waited = Duration::zero();
+ do {
+ Future<ResourceStatistics> statistics = isolator.get()->usage(containerId);
+ AWAIT_READY(statistics);
+
+ statistics2 = statistics.get();
+
+ ASSERT_TRUE(statistics2.has_perf());
+
+ if (statistics1.get().perf().timestamp() !=
+ statistics2.perf().timestamp()) {
+ break;
+ }
+
+ os::sleep(Milliseconds(250));
+ waited += Milliseconds(250);
+ } while (waited < Seconds(2));
+
+ sleep(2);
+
+ EXPECT_NE(statistics1.get().perf().timestamp(),
+ statistics2.perf().timestamp());
+
+ EXPECT_TRUE(statistics2.perf().has_cycles());
+ EXPECT_LE(0u, statistics2.perf().cycles());
+
+ EXPECT_TRUE(statistics2.perf().has_task_clock());
+ EXPECT_LE(0.0, statistics2.perf().task_clock());
+
+ AWAIT_READY(isolator.get()->cleanup(containerId));
+
+ delete isolator.get();
+}
+#endif // __linux__
http://git-wip-us.apache.org/repos/asf/mesos/blob/d74de8c5/src/tests/mesos.cpp
----------------------------------------------------------------------
diff --git a/src/tests/mesos.cpp b/src/tests/mesos.cpp
index e6d807c..98a7c38 100644
--- a/src/tests/mesos.cpp
+++ b/src/tests/mesos.cpp
@@ -408,6 +408,7 @@ void ContainerizerTest<slave::MesosContainerizer>::SetUp()
subsystems.insert("cpuacct");
subsystems.insert("memory");
subsystems.insert("freezer");
+ subsystems.insert("perf_event");
if (cgroups::enabled() && os::user() == "root") {
foreach (const string& subsystem, subsystems) {
http://git-wip-us.apache.org/repos/asf/mesos/blob/d74de8c5/src/tests/slave_recovery_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_recovery_tests.cpp b/src/tests/slave_recovery_tests.cpp
index 9725e6a..7044327 100644
--- a/src/tests/slave_recovery_tests.cpp
+++ b/src/tests/slave_recovery_tests.cpp
@@ -3218,3 +3218,153 @@ TEST_F(MesosContainerizerSlaveRecoveryTest, ResourceStatistics)
delete containerizer2.get();
}
+
+#ifdef __linux__
+// Test that the perf event isolator can be enabled on a new slave.
+// Previously created containers will not report perf statistics but
+// newly created containers will.
+TEST_F(MesosContainerizerSlaveRecoveryTest, CGROUPS_ROOT_PerfRollForward)
+{
+ Try<PID<Master> > master = this->StartMaster();
+ ASSERT_SOME(master);
+
+ // Start a slave using a containerizer without a perf event
+ // isolator.
+ slave::Flags flags = this->CreateSlaveFlags();
+ flags.isolation = "cgroups/cpu,cgroups/mem";
+
+ Try<MesosContainerizer*> containerizer1 =
+ MesosContainerizer::create(flags, true);
+ ASSERT_SOME(containerizer1);
+
+ Try<PID<Slave> > slave = this->StartSlave(containerizer1.get(), flags);
+ ASSERT_SOME(slave);
+
+ MockScheduler sched;
+
+ // Scheduler expectations.
+ EXPECT_CALL(sched, statusUpdate(_, _))
+ .WillRepeatedly(Return());
+
+ // Enable checkpointing for the framework.
+ FrameworkInfo frameworkInfo;
+ frameworkInfo.CopyFrom(DEFAULT_FRAMEWORK_INFO);
+ frameworkInfo.set_checkpoint(true);
+
+ MesosSchedulerDriver driver(
+ &sched, frameworkInfo, master.get(), DEFAULT_CREDENTIAL);
+
+ EXPECT_CALL(sched, registered(_, _, _));
+
+ Future<vector<Offer> > offers1;
+ EXPECT_CALL(sched, resourceOffers(_, _))
+ .WillOnce(FutureArg<1>(&offers1))
+ .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+ driver.start();
+
+ AWAIT_READY(offers1);
+ EXPECT_NE(0u, offers1.get().size());
+
+ SlaveID slaveId = offers1.get()[0].slave_id();
+
+ TaskInfo task1 = createTask(
+ slaveId, Resources::parse("cpus:0.5;mem:128").get(), "sleep 1000");
+ vector<TaskInfo> tasks1;
+ tasks1.push_back(task1);
+
+ // Message expectations.
+ Future<Message> registerExecutor =
+ FUTURE_MESSAGE(Eq(RegisterExecutorMessage().GetTypeName()), _, _);
+
+ driver.launchTasks(offers1.get()[0].id(), tasks1);
+
+ AWAIT_READY(registerExecutor);
+
+ Future<hashset<ContainerID> > containers = containerizer1.get()->containers();
+ AWAIT_READY(containers);
+ ASSERT_EQ(1u, containers.get().size());
+
+ ContainerID containerId1 = *(containers.get().begin());
+
+ Future<ResourceStatistics> usage = containerizer1.get()->usage(containerId1);
+ AWAIT_READY(usage);
+
+ // There should not be any perf statistics.
+ EXPECT_FALSE(usage.get().has_perf());
+
+ this->Stop(slave.get());
+ delete containerizer1.get();
+
+ // Set up so we can wait until the new slave updates the container's
+ // resources (this occurs after the executor has re-registered).
+ Future<Nothing> update =
+ FUTURE_DISPATCH(_, &MesosContainerizerProcess::update);
+
+ // Start a slave using a containerizer with a perf event isolator.
+ flags.isolation = "cgroups/cpu,cgroups/mem,cgroups/perf_event";
+ flags.perf_events = "cycles,task-clock";
+ flags.perf_duration = Milliseconds(250);
+ flags.perf_interval = Milliseconds(500);
+
+ Try<MesosContainerizer*> containerizer2 =
+ MesosContainerizer::create(flags, true);
+ ASSERT_SOME(containerizer2);
+
+ Future<vector<Offer> > offers2;
+ EXPECT_CALL(sched, resourceOffers(_, _))
+ .WillOnce(FutureArg<1>(&offers2))
+ .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+ slave = this->StartSlave(containerizer2.get(), flags);
+ ASSERT_SOME(slave);
+
+ AWAIT_READY(offers2);
+ EXPECT_NE(0u, offers2.get().size());
+
+ // Wait until the containerizer is updated.
+ AWAIT_READY(update);
+
+ // The first container should not report perf statistics.
+ usage = containerizer2.get()->usage(containerId1);
+ AWAIT_READY(usage);
+
+ EXPECT_FALSE(usage.get().has_perf());
+
+ // Start a new container which will start reporting perf statistics.
+ TaskInfo task2 = createTask(offers2.get()[0], "sleep 1000");
+ vector<TaskInfo> tasks2;
+ tasks2.push_back(task2);
+
+ // Message expectations.
+ registerExecutor =
+ FUTURE_MESSAGE(Eq(RegisterExecutorMessage().GetTypeName()), _, _);
+
+ driver.launchTasks(offers2.get()[0].id(), tasks2);
+
+ AWAIT_READY(registerExecutor);
+
+ containers = containerizer2.get()->containers();
+ AWAIT_READY(containers);
+ ASSERT_EQ(2u, containers.get().size());
+ EXPECT_TRUE(containers.get().contains(containerId1));
+
+ ContainerID containerId2;
+ foreach (const ContainerID containerId, containers.get()) {
+ if (containerId != containerId1) {
+ containerId2.CopyFrom(containerId);
+ }
+ }
+
+ usage = containerizer2.get()->usage(containerId2);
+ AWAIT_READY(usage);
+
+ EXPECT_TRUE(usage.get().has_perf());
+
+ driver.stop();
+ driver.join();
+
+ this->Shutdown();
+ delete containerizer2.get();
+}
+#endif // __linux__
[4/5] git commit: Add a cgroup perf_event isolator.
Posted by id...@apache.org.
Add a cgroup perf_event isolator.
This isolator only creates the perf_event cgroup and places the executor
into the cgroup; no performance statistics are gathered/exported.
Review: https://reviews.apache.org/r/20963
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/2e15bc93
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/2e15bc93
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/2e15bc93
Branch: refs/heads/master
Commit: 2e15bc939f4eaae8becfc76255fb20bcbbb27146
Parents: 16345a7
Author: Ian Downes <id...@twitter.com>
Authored: Fri Apr 4 16:27:39 2014 -0700
Committer: Ian Downes <id...@twitter.com>
Committed: Fri Jun 13 15:32:08 2014 -0700
----------------------------------------------------------------------
src/Makefile.am | 2 +
.../isolators/cgroups/perf_event.cpp | 272 +++++++++++++++++++
.../isolators/cgroups/perf_event.hpp | 100 +++++++
3 files changed, 374 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/2e15bc93/src/Makefile.am
----------------------------------------------------------------------
diff --git a/src/Makefile.am b/src/Makefile.am
index c91b438..5d3196d 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -289,6 +289,7 @@ if OS_LINUX
libmesos_no_3rdparty_la_SOURCES += linux/cgroups.cpp
libmesos_no_3rdparty_la_SOURCES += slave/containerizer/isolators/cgroups/cpushare.cpp
libmesos_no_3rdparty_la_SOURCES += slave/containerizer/isolators/cgroups/mem.cpp
+ libmesos_no_3rdparty_la_SOURCES += slave/containerizer/isolators/cgroups/perf_event.cpp
libmesos_no_3rdparty_la_SOURCES += slave/containerizer/linux_launcher.cpp
libmesos_no_3rdparty_la_SOURCES += linux/fs.cpp
else
@@ -355,6 +356,7 @@ libmesos_no_3rdparty_la_SOURCES += \
slave/containerizer/isolator.hpp \
slave/containerizer/isolators/cgroups/cpushare.hpp \
slave/containerizer/isolators/cgroups/mem.hpp \
+ slave/containerizer/isolators/cgroups/perf_event.hpp \
slave/containerizer/isolators/posix.hpp \
slave/containerizer/launcher.hpp \
slave/containerizer/mesos_containerizer.hpp \
http://git-wip-us.apache.org/repos/asf/mesos/blob/2e15bc93/src/slave/containerizer/isolators/cgroups/perf_event.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/cgroups/perf_event.cpp b/src/slave/containerizer/isolators/cgroups/perf_event.cpp
new file mode 100644
index 0000000..d9c8b25
--- /dev/null
+++ b/src/slave/containerizer/isolators/cgroups/perf_event.cpp
@@ -0,0 +1,272 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdint.h>
+
+#include <vector>
+
+#include <mesos/resources.hpp>
+#include <mesos/values.hpp>
+
+#include <process/collect.hpp>
+#include <process/defer.hpp>
+#include <process/pid.hpp>
+
+#include <stout/bytes.hpp>
+#include <stout/check.hpp>
+#include <stout/error.hpp>
+#include <stout/foreach.hpp>
+#include <stout/hashmap.hpp>
+#include <stout/hashset.hpp>
+#include <stout/lambda.hpp>
+#include <stout/nothing.hpp>
+#include <stout/stringify.hpp>
+#include <stout/try.hpp>
+
+#include "common/type_utils.hpp"
+
+#include "linux/cgroups.hpp"
+
+#include "slave/containerizer/isolators/cgroups/perf_event.hpp"
+
+using namespace process;
+
+using std::list;
+using std::ostringstream;
+using std::string;
+using std::vector;
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+CgroupsPerfEventIsolatorProcess::CgroupsPerfEventIsolatorProcess(
+ const Flags& _flags,
+ const string& _hierarchy)
+ : flags(_flags), hierarchy(_hierarchy) {}
+
+
+CgroupsPerfEventIsolatorProcess::~CgroupsPerfEventIsolatorProcess() {}
+
+
+Try<Isolator*> CgroupsPerfEventIsolatorProcess::create(const Flags& flags)
+{
+ Try<string> hierarchy = cgroups::prepare(
+ flags.cgroups_hierarchy, "perf_event", flags.cgroups_root);
+
+ if (hierarchy.isError()) {
+ return Error("Failed to create perf_event cgroup: " + hierarchy.error());
+ }
+
+ process::Owned<IsolatorProcess> process(
+ new CgroupsPerfEventIsolatorProcess(flags, hierarchy.get()));
+
+ return new Isolator(process);
+}
+
+
+Future<Nothing> CgroupsPerfEventIsolatorProcess::recover(
+ const list<state::RunState>& states)
+{
+ hashset<string> cgroups;
+
+ foreach (const state::RunState& state, states) {
+ if (state.id.isNone()) {
+ foreachvalue (Info* info, infos) {
+ delete info;
+ }
+ infos.clear();
+ return Failure("ContainerID is required to recover");
+ }
+
+ const ContainerID& containerId = state.id.get();
+
+ Info* info = new Info(
+ containerId, path::join(flags.cgroups_root, containerId.value()));
+ CHECK_NOTNULL(info);
+
+ infos[containerId] = info;
+ cgroups.insert(info->cgroup);
+
+ Try<bool> exists = cgroups::exists(hierarchy, info->cgroup);
+ if (exists.isError()) {
+ delete info;
+ foreachvalue (Info* info, infos) {
+ delete info;
+ }
+ infos.clear();
+ return Failure("Failed to check cgroup for container '" +
+ stringify(containerId) + "'");
+ }
+
+ if (!exists.get()) {
+ VLOG(1) << "Couldn't find cgroup for container " << containerId;
+ // This may occur if the executor is exiting and the isolator has
+ // destroyed the cgroup but the slave dies before noticing this. This
+ // will be detected when the containerizer tries to monitor the
+ // executor's pid.
+ // NOTE: This could also occur if this isolator is now enabled for a
+ // container that was started without this isolator. For this particular
+ // isolator it is okay to continue running this container without its
+ // perf_event cgroup existing because we don't ever query it and the
+ // destroy will succeed immediately.
+ }
+ }
+
+ Try<vector<string> > orphans = cgroups::get(
+ hierarchy, flags.cgroups_root);
+ if (orphans.isError()) {
+ foreachvalue (Info* info, infos) {
+ delete info;
+ }
+ infos.clear();
+ return Failure(orphans.error());
+ }
+
+ foreach (const string& orphan, orphans.get()) {
+ if (!cgroups.contains(orphan)) {
+ LOG(INFO) << "Removing orphaned cgroup '" << orphan << "'";
+ cgroups::destroy(hierarchy, orphan);
+ }
+ }
+
+ return Nothing();
+}
+
+
+Future<Option<CommandInfo> > CgroupsPerfEventIsolatorProcess::prepare(
+ const ContainerID& containerId,
+ const ExecutorInfo& executorInfo)
+{
+ if (infos.contains(containerId)) {
+ return Failure("Container has already been prepared");
+ }
+
+ Info* info = new Info(
+ containerId, path::join(flags.cgroups_root, containerId.value()));
+
+ infos[containerId] = CHECK_NOTNULL(info);
+
+ // Create a cgroup for this container.
+ Try<bool> exists = cgroups::exists(hierarchy, info->cgroup);
+
+ if (exists.isError()) {
+ return Failure("Failed to prepare isolator: " + exists.error());
+ }
+
+ if (exists.get()) {
+ return Failure("Failed to prepare isolator: cgroup already exists");
+ }
+
+ if (!exists.get()) {
+ Try<Nothing> create = cgroups::create(hierarchy, info->cgroup);
+ if (create.isError()) {
+ return Failure("Failed to prepare isolator: " + create.error());
+ }
+ }
+
+ return None();
+}
+
+
+Future<Nothing> CgroupsPerfEventIsolatorProcess::isolate(
+ const ContainerID& containerId,
+ pid_t pid)
+{
+ if (!infos.contains(containerId)) {
+ return Failure("Unknown container");
+ }
+
+ Info* info = CHECK_NOTNULL(infos[containerId]);
+
+ CHECK(info->pid.isNone());
+ info->pid = pid;
+
+ Try<Nothing> assign = cgroups::assign(hierarchy, info->cgroup, pid);
+ if (assign.isError()) {
+ return Failure("Failed to assign container '" +
+ stringify(info->containerId) + "' to its own cgroup '" +
+ path::join(hierarchy, info->cgroup) +
+ "' : " + assign.error());
+ }
+
+ return Nothing();
+}
+
+
+Future<Limitation> CgroupsPerfEventIsolatorProcess::watch(
+ const ContainerID& containerId)
+{
+ if (!infos.contains(containerId)) {
+ return Failure("Unknown container");
+ }
+
+ CHECK_NOTNULL(infos[containerId]);
+
+ return infos[containerId]->limitation.future();
+}
+
+
+Future<Nothing> CgroupsPerfEventIsolatorProcess::update(
+ const ContainerID& containerId,
+ const Resources& resources)
+{
+ // Nothing to update.
+ return Nothing();
+}
+
+
+Future<ResourceStatistics> CgroupsPerfEventIsolatorProcess::usage(
+ const ContainerID& containerId)
+{
+ // No resource statistics provided by this isolator.
+ return ResourceStatistics();
+}
+
+
+Future<Nothing> CgroupsPerfEventIsolatorProcess::cleanup(
+ const ContainerID& containerId)
+{
+ if (!infos.contains(containerId)) {
+ return Failure("Unknown container");
+ }
+
+ Info* info = CHECK_NOTNULL(infos[containerId]);
+
+ return cgroups::destroy(hierarchy, info->cgroup)
+ .then(defer(PID<CgroupsPerfEventIsolatorProcess>(this),
+ &CgroupsPerfEventIsolatorProcess::_cleanup,
+ containerId));
+}
+
+
+Future<Nothing> CgroupsPerfEventIsolatorProcess::_cleanup(
+ const ContainerID& containerId)
+{
+ CHECK(infos.contains(containerId));
+
+ delete infos[containerId];
+ infos.erase(containerId);
+
+ return Nothing();
+}
+
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {
http://git-wip-us.apache.org/repos/asf/mesos/blob/2e15bc93/src/slave/containerizer/isolators/cgroups/perf_event.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/cgroups/perf_event.hpp b/src/slave/containerizer/isolators/cgroups/perf_event.hpp
new file mode 100644
index 0000000..2db7b3e
--- /dev/null
+++ b/src/slave/containerizer/isolators/cgroups/perf_event.hpp
@@ -0,0 +1,100 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PERF_EVENT_ISOLATOR_HPP__
+#define __PERF_EVENT_ISOLATOR_HPP__
+
+#include <mesos/resources.hpp>
+
+#include <process/future.hpp>
+
+#include <stout/hashmap.hpp>
+#include <stout/try.hpp>
+
+#include "slave/containerizer/isolator.hpp"
+
+#include "slave/flags.hpp"
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+class CgroupsPerfEventIsolatorProcess : public IsolatorProcess
+{
+public:
+ static Try<Isolator*> create(const Flags& flags);
+
+ virtual ~CgroupsPerfEventIsolatorProcess();
+
+ virtual process::Future<Nothing> recover(
+ const std::list<state::RunState>& states);
+
+ virtual process::Future<Option<CommandInfo> > prepare(
+ const ContainerID& containerId,
+ const ExecutorInfo& executorInfo);
+
+ virtual process::Future<Nothing> isolate(
+ const ContainerID& containerId,
+ pid_t pid);
+
+ virtual process::Future<Limitation> watch(
+ const ContainerID& containerId);
+
+ virtual process::Future<Nothing> update(
+ const ContainerID& containerId,
+ const Resources& resources);
+
+ virtual process::Future<ResourceStatistics> usage(
+ const ContainerID& containerId);
+
+ virtual process::Future<Nothing> cleanup(
+ const ContainerID& containerId);
+
+private:
+ CgroupsPerfEventIsolatorProcess(
+ const Flags& flags,
+ const std::string& hierarchy);
+
+ virtual process::Future<Nothing> _cleanup(const ContainerID& containerId);
+
+ struct Info
+ {
+ Info(const ContainerID& _containerId, const std::string& _cgroup)
+ : containerId(_containerId), cgroup(_cgroup) {}
+
+ const ContainerID containerId;
+ const std::string cgroup;
+ Option<pid_t> pid;
+
+ process::Promise<Limitation> limitation;
+ };
+
+ const Flags flags;
+
+ // The path to the cgroups subsystem hierarchy root.
+ const std::string hierarchy;
+
+ hashmap<ContainerID, Info*> infos;
+};
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {
+
+#endif // __PERF_EVENT_ISOLATOR_HPP__
+
[2/5] git commit: Introduce a PerfStatistics protobuf.
Posted by id...@apache.org.
Introduce a PerfStatistics protobuf.
Review: https://reviews.apache.org/r/21442
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/160e9e0c
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/160e9e0c
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/160e9e0c
Branch: refs/heads/master
Commit: 160e9e0ccaf320c1881036f351d4a08a2542442d
Parents: 2e15bc9
Author: Ian Downes <id...@twitter.com>
Authored: Wed May 14 10:15:12 2014 -0700
Committer: Ian Downes <id...@twitter.com>
Committed: Fri Jun 13 15:32:08 2014 -0700
----------------------------------------------------------------------
include/mesos/mesos.proto | 78 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 78 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/160e9e0c/include/mesos/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto
index 8c6933d..709b8b1 100644
--- a/include/mesos/mesos.proto
+++ b/include/mesos/mesos.proto
@@ -405,6 +405,9 @@ message ResourceStatistics {
// TODO(bmahler): Add disk usage.
// TODO(bmahler): Add network usage?
+
+ // Perf statistics.
+ optional PerfStatistics perf = 13;
}
@@ -437,6 +440,81 @@ message ResourceUsage {
/**
+ * Describes a sample of events from "perf stat". Only available on
+ * Linux.
+ *
+ * NOTE: Each optional field matches the name of a perf event (see
+ * "perf list") with the following changes:
+ * 1. Names are downcased.
+ * 2. Hyphens ('-') are replaced with underscores ('_').
+ * 3. Events with alternate names use the name "perf stat" returns,
+ * e.g., for the event "cycles OR cpu-cycles" perf always returns
+ * cycles.
+ */
+message PerfStatistics {
+ required double timestamp = 1; // Start of sample interval, in seconds since the Epoch.
+ required double duration = 2; // Duration of sample interval, in seconds.
+
+ // Hardware event.
+ optional uint64 cycles = 3;
+ optional uint64 stalled_cycles_frontend = 4;
+ optional uint64 stalled_cycles_backend = 5;
+ optional uint64 instructions = 6;
+ optional uint64 cache_references = 7;
+ optional uint64 cache_misses = 8;
+ optional uint64 branches = 9;
+ optional uint64 branch_misses = 10;
+ optional uint64 bus_cycles = 11;
+ optional uint64 ref_cycles = 12;
+
+ // Software event.
+ optional double cpu_clock = 13;
+ optional double task_clock = 14;
+ optional uint64 page_faults = 15;
+ optional uint64 minor_faults = 16;
+ optional uint64 major_faults = 17;
+ optional uint64 context_switches = 18;
+ optional uint64 cpu_migrations = 19;
+ optional uint64 alignment_faults = 20;
+ optional uint64 emulation_faults = 21;
+
+ // Hardware cache event.
+ optional uint64 l1_dcache_loads = 22;
+ optional uint64 l1_dcache_load_misses = 23;
+ optional uint64 l1_dcache_stores = 24;
+ optional uint64 l1_dcache_store_misses = 25;
+ optional uint64 l1_dcache_prefetches = 26;
+ optional uint64 l1_dcache_prefetch_misses = 27;
+ optional uint64 l1_icache_loads = 28;
+ optional uint64 l1_icache_load_misses = 29;
+ optional uint64 l1_icache_prefetches = 30;
+ optional uint64 l1_icache_prefetch_misses = 31;
+ optional uint64 llc_loads = 32;
+ optional uint64 llc_load_misses = 33;
+ optional uint64 llc_stores = 34;
+ optional uint64 llc_store_misses = 35;
+ optional uint64 llc_prefetches = 36;
+ optional uint64 llc_prefetch_misses = 37;
+ optional uint64 dtlb_loads = 38;
+ optional uint64 dtlb_load_misses = 39;
+ optional uint64 dtlb_stores = 40;
+ optional uint64 dtlb_store_misses = 41;
+ optional uint64 dtlb_prefetches = 42;
+ optional uint64 dtlb_prefetch_misses = 43;
+ optional uint64 itlb_loads = 44;
+ optional uint64 itlb_load_misses = 45;
+ optional uint64 branch_loads = 46;
+ optional uint64 branch_load_misses = 47;
+ optional uint64 node_loads = 48;
+ optional uint64 node_load_misses = 49;
+ optional uint64 node_stores = 50;
+ optional uint64 node_store_misses = 51;
+ optional uint64 node_prefetches = 52;
+ optional uint64 node_prefetch_misses = 53;
+}
+
+
+/**
* Describes a request for resources that can be used by a framework
* to proactively influence the allocator. If 'slave_id' is provided
* then this request is assumed to only apply to resources on that
[3/5] git commit: Make unknown container not a Failure for
Isolator::cleanup.
Posted by id...@apache.org.
Make unknown container not a Failure for Isolator::cleanup.
Review: https://reviews.apache.org/r/22049
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/16345a7b
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/16345a7b
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/16345a7b
Branch: refs/heads/master
Commit: 16345a7be08b48873e263ca9f9867f4b2f298f27
Parents: 4c83662
Author: Ian Downes <id...@twitter.com>
Authored: Thu May 29 13:23:04 2014 -0700
Committer: Ian Downes <id...@twitter.com>
Committed: Fri Jun 13 15:32:08 2014 -0700
----------------------------------------------------------------------
src/slave/containerizer/isolators/cgroups/cpushare.cpp | 5 ++++-
src/slave/containerizer/isolators/cgroups/mem.cpp | 5 ++++-
2 files changed, 8 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/16345a7b/src/slave/containerizer/isolators/cgroups/cpushare.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/cgroups/cpushare.cpp b/src/slave/containerizer/isolators/cgroups/cpushare.cpp
index 3d253af..eb8933f 100644
--- a/src/slave/containerizer/isolators/cgroups/cpushare.cpp
+++ b/src/slave/containerizer/isolators/cgroups/cpushare.cpp
@@ -432,8 +432,11 @@ Future<ResourceStatistics> CgroupsCpushareIsolatorProcess::usage(
Future<Nothing> CgroupsCpushareIsolatorProcess::cleanup(
const ContainerID& containerId)
{
+ // Multiple calls may occur during test clean up.
if (!infos.contains(containerId)) {
- return Failure("Unknown container");
+ VLOG(1) << "Ignoring cleanup request for unknown container: "
+ << containerId;
+ return Nothing();
}
Info* info = CHECK_NOTNULL(infos[containerId]);
http://git-wip-us.apache.org/repos/asf/mesos/blob/16345a7b/src/slave/containerizer/isolators/cgroups/mem.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/cgroups/mem.cpp b/src/slave/containerizer/isolators/cgroups/mem.cpp
index 60013d4..73b926f 100644
--- a/src/slave/containerizer/isolators/cgroups/mem.cpp
+++ b/src/slave/containerizer/isolators/cgroups/mem.cpp
@@ -367,8 +367,11 @@ Future<ResourceStatistics> CgroupsMemIsolatorProcess::usage(
Future<Nothing> CgroupsMemIsolatorProcess::cleanup(
const ContainerID& containerId)
{
+ // Multiple calls may occur during test clean up.
if (!infos.contains(containerId)) {
- return Failure("Unknown container");
+ VLOG(1) << "Ignoring cleanup request for unknown container: "
+ << containerId;
+ return Nothing();
}
Info* info = CHECK_NOTNULL(infos[containerId]);