You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2013/09/04 09:09:33 UTC

[6/6] git commit: Exposing cpu.stat statistics in the monitoring endpoint.

Exposing cpu.stat statistics in the monitoring endpoint.

From: Christina Delimitrou <ch...@gmail.com>
Review: https://reviews.apache.org/r/13868


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/4954b75f
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/4954b75f
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/4954b75f

Branch: refs/heads/master
Commit: 4954b75f6fbe2d23fb3e11489441036bd3676017
Parents: 189cefe
Author: Benjamin Mahler <bm...@twitter.com>
Authored: Tue Sep 3 23:12:55 2013 -0700
Committer: Benjamin Mahler <bm...@twitter.com>
Committed: Wed Sep 4 00:09:09 2013 -0700

----------------------------------------------------------------------
 include/mesos/mesos.proto      |  5 +++++
 src/slave/cgroups_isolator.cpp | 23 +++++++++++++++++++++
 src/slave/constants.cpp        |  2 +-
 src/slave/monitor.cpp          | 41 +++++++++++++++++++++++++++++++++++++
 src/tests/monitor_tests.cpp    | 13 ++++++++++--
 5 files changed, 81 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/4954b75f/include/mesos/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto
index bbf1d31..8f845cc 100644
--- a/include/mesos/mesos.proto
+++ b/include/mesos/mesos.proto
@@ -274,6 +274,11 @@ message ResourceStatistics {
   // Number of CPUs allocated.
   required double cpus_limit = 4;
 
+  // cpu.stat on process throttling (for contention issues).
+  optional uint32 cpus_nr_periods = 7;
+  optional uint32 cpus_nr_throttled = 8;
+  optional double cpus_throttled_time_secs = 9;
+
   // Memory Usage Information:
   optional uint64 mem_rss_bytes = 5; // Resident Set Size.
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/4954b75f/src/slave/cgroups_isolator.cpp
----------------------------------------------------------------------
diff --git a/src/slave/cgroups_isolator.cpp b/src/slave/cgroups_isolator.cpp
index ec9f8ec..a1f5b32 100644
--- a/src/slave/cgroups_isolator.cpp
+++ b/src/slave/cgroups_isolator.cpp
@@ -782,6 +782,29 @@ Future<ResourceStatistics> CgroupsIsolator::usage(
     result.set_mem_rss_bytes(stat.get()["rss"]);
   }
 
+  // Add the cpu.stat information.
+  stat = cgroups::stat(hierarchy, info->name(), "cpu.stat");
+
+  if (stat.isError()) {
+    return Future<ResourceStatistics>::failed(
+        "Failed to read cpu.stat: " + stat.error());
+  }
+
+  if (stat.get().contains("nr_periods")) {
+    result.set_cpus_nr_periods(
+        (uint32_t) stat.get()["nr_periods"]);
+  }
+
+  if (stat.get().contains("nr_throttled")) {
+    result.set_cpus_nr_throttled(
+        (uint32_t) stat.get()["nr_throttled"]);
+  }
+
+  if (stat.get().contains("throttled_time")) {
+    result.set_cpus_throttled_time_secs(
+        Nanoseconds(stat.get()["throttled_time"]).secs());
+  }
+
   return result;
 }
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/4954b75f/src/slave/constants.cpp
----------------------------------------------------------------------
diff --git a/src/slave/constants.cpp b/src/slave/constants.cpp
index 8c74c00..5573d39 100644
--- a/src/slave/constants.cpp
+++ b/src/slave/constants.cpp
@@ -30,7 +30,7 @@ const Duration GC_DELAY = Weeks(1);
 const double GC_DISK_HEADROOM = 0.1;
 const Duration DISK_WATCH_INTERVAL = Minutes(1);
 const Duration RECOVERY_TIMEOUT = Minutes(15);
-const Duration RESOURCE_MONITORING_INTERVAL = Seconds(5);
+const Duration RESOURCE_MONITORING_INTERVAL = Seconds(1);
 const uint32_t MAX_COMPLETED_FRAMEWORKS = 50;
 const uint32_t MAX_COMPLETED_EXECUTORS_PER_FRAMEWORK = 150;
 const uint32_t MAX_COMPLETED_TASKS_PER_EXECUTOR = 200;

http://git-wip-us.apache.org/repos/asf/mesos/blob/4954b75f/src/slave/monitor.cpp
----------------------------------------------------------------------
diff --git a/src/slave/monitor.cpp b/src/slave/monitor.cpp
index 8e1eb35..9cb6256 100644
--- a/src/slave/monitor.cpp
+++ b/src/slave/monitor.cpp
@@ -52,12 +52,16 @@ using process::wait; // Necessary on some OS's to disambiguate.
 // These match the names in the ResourceStatistics protobuf.
 // TODO(bmahler): Later, when we have a richer monitoring story,
 // we will want to publish these outside of this file.
+// TODO(cdel): Check if we need any more of the cgroup stats.
 const std::string CPUS_TIME_SECS        = "cpus_time_secs";
 const std::string CPUS_USER_TIME_SECS   = "cpus_user_time_secs";
 const std::string CPUS_SYSTEM_TIME_SECS = "cpus_system_time_secs";
 const std::string CPUS_LIMIT            = "cpus_limit";
 const std::string MEM_RSS_BYTES         = "mem_rss_bytes";
 const std::string MEM_LIMIT_BYTES       = "mem_limit_bytes";
+const std::string CPUS_NR_PERIODS       = "cpus_nr_periods";
+const std::string CPUS_NR_THROTTLED     = "cpus_nr_throttled";
+const std::string CPUS_THROTTLED_TIME_SECS = "cpus_throttled_time_secs";
 
 // TODO(bmahler): Deprecated statistical names, these will be removed!
 const std::string CPU_TIME   = "cpu_time";
@@ -126,6 +130,9 @@ Future<Nothing> ResourceMonitorProcess::unwatch(
   ::statistics->archive("monitor", prefix + CPUS_LIMIT);
   ::statistics->archive("monitor", prefix + MEM_RSS_BYTES);
   ::statistics->archive("monitor", prefix + MEM_LIMIT_BYTES);
+  ::statistics->archive("monitor", prefix + CPUS_NR_PERIODS);
+  ::statistics->archive("monitor", prefix + CPUS_NR_THROTTLED);
+  ::statistics->archive("monitor", prefix + CPUS_THROTTLED_TIME_SECS);
 
   if (!watches.contains(frameworkId) ||
       !watches[frameworkId].contains(executorId)) {
@@ -248,6 +255,23 @@ void publish(
       prefix + MEM_LIMIT_BYTES,
       statistics.mem_limit_bytes(),
       time);
+
+  // Publish cpu.stat statistics.
+  ::statistics->set(
+      "monitor",
+      prefix + CPUS_NR_PERIODS,
+      statistics.cpus_nr_periods(),
+      time);
+  ::statistics->set(
+      "monitor",
+      prefix + CPUS_NR_THROTTLED,
+      statistics.cpus_nr_throttled(),
+      time);
+  ::statistics->set(
+      "monitor",
+      prefix + CPUS_THROTTLED_TIME_SECS,
+      statistics.cpus_throttled_time_secs(),
+      time);
 }
 
 
@@ -286,6 +310,9 @@ Future<http::Response> _statisticsJSON(
       usage.values[CPUS_LIMIT] = 0;
       usage.values[MEM_RSS_BYTES] = 0;
       usage.values[MEM_LIMIT_BYTES] = 0;
+      usage.values[CPUS_NR_PERIODS] = 0;
+      usage.values[CPUS_NR_THROTTLED] = 0;
+      usage.values[CPUS_THROTTLED_TIME_SECS] = 0;
 
       // Set the cpu usage data if present.
       if (statistics.count(prefix + CPUS_USER_TIME_SECS) > 0) {
@@ -310,6 +337,20 @@ Future<http::Response> _statisticsJSON(
           statistics.find(prefix + MEM_LIMIT_BYTES)->second;
       }
 
+      // Set the cpu.stat data if present.
+      if (statistics.count(prefix + CPUS_NR_PERIODS) > 0) {
+        usage.values[CPUS_NR_PERIODS] =
+          statistics.find(prefix + CPUS_NR_PERIODS)->second;
+      }
+      if (statistics.count(prefix + CPUS_NR_THROTTLED) > 0) {
+        usage.values[CPUS_NR_THROTTLED] =
+          statistics.find(prefix + CPUS_NR_THROTTLED)->second;
+      }
+      if (statistics.count(prefix + CPUS_THROTTLED_TIME_SECS) > 0) {
+        usage.values[CPUS_THROTTLED_TIME_SECS] =
+          statistics.find(prefix + CPUS_THROTTLED_TIME_SECS)->second;
+      }
+
       JSON::Object entry;
       entry.values["framework_id"] = frameworkId.value();
       entry.values["executor_id"] = executorId.value();

http://git-wip-us.apache.org/repos/asf/mesos/blob/4954b75f/src/tests/monitor_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/monitor_tests.cpp b/src/tests/monitor_tests.cpp
index 3142416..3d3f8af 100644
--- a/src/tests/monitor_tests.cpp
+++ b/src/tests/monitor_tests.cpp
@@ -75,15 +75,18 @@ TEST(MonitorTest, WatchUnwatch)
   ResourceStatistics initialStatistics;
   initialStatistics.set_cpus_user_time_secs(0);
   initialStatistics.set_cpus_system_time_secs(0);
-  initialStatistics.set_cpus_limit(1.0);
+  initialStatistics.set_cpus_limit(2.5);
   initialStatistics.set_mem_rss_bytes(0);
   initialStatistics.set_mem_limit_bytes(2048);
   initialStatistics.set_timestamp(Clock::now().secs());
 
   ResourceStatistics statistics;
+  statistics.set_cpus_nr_periods(100);
+  statistics.set_cpus_nr_throttled(2);
   statistics.set_cpus_user_time_secs(4);
   statistics.set_cpus_system_time_secs(1);
-  statistics.set_cpus_limit(1.0);
+  statistics.set_cpus_throttled_time_secs(0.5);
+  statistics.set_cpus_limit(2.5);
   statistics.set_mem_rss_bytes(1024);
   statistics.set_mem_limit_bytes(2048);
   statistics.set_timestamp(
@@ -183,14 +186,20 @@ TEST(MonitorTest, WatchUnwatch)
               "\"source\":\"source\","
               "\"statistics\":{"
                   "\"cpus_limit\":%g,"
+                  "\"cpus_nr_periods\":%d,"
+                  "\"cpus_nr_throttled\":%d,"
                   "\"cpus_system_time_secs\":%g,"
+                  "\"cpus_throttled_time_secs\":%g,"
                   "\"cpus_user_time_secs\":%g,"
                   "\"mem_limit_bytes\":%lu,"
                   "\"mem_rss_bytes\":%lu"
               "}"
           "}]",
           statistics.cpus_limit(),
+          statistics.cpus_nr_periods(),
+          statistics.cpus_nr_throttled(),
           statistics.cpus_system_time_secs(),
+          statistics.cpus_throttled_time_secs(),
           statistics.cpus_user_time_secs(),
           statistics.mem_limit_bytes(),
           statistics.mem_rss_bytes()).get(),