You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2014/05/14 21:13:33 UTC

git commit: Ported some of the master stats to the new metrics library.

Repository: mesos
Updated Branches:
  refs/heads/master 2ac0c1695 -> 3ad032008


Ported some of the master stats to the new metrics library.

Review: https://reviews.apache.org/r/19504


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/3ad03200
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/3ad03200
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/3ad03200

Branch: refs/heads/master
Commit: 3ad032008c0b8014600608ac3b002680382ba5e0
Parents: 2ac0c16
Author: Dominic Hamon <dh...@twopensource.com>
Authored: Wed May 14 11:42:39 2014 -0700
Committer: Benjamin Mahler <bm...@twitter.com>
Committed: Wed May 14 12:13:21 2014 -0700

----------------------------------------------------------------------
 src/master/http.cpp        |   1 +
 src/master/master.cpp      | 163 ++++++++++++++++++++++++++++++++++++++++
 src/master/master.hpp      |  97 +++++++++++-------------
 src/tests/master_tests.cpp |  21 ++++++
 4 files changed, 229 insertions(+), 53 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/3ad03200/src/master/http.cpp
----------------------------------------------------------------------
diff --git a/src/master/http.cpp b/src/master/http.cpp
index f54c841..fe74d5b 100644
--- a/src/master/http.cpp
+++ b/src/master/http.cpp
@@ -377,6 +377,7 @@ Future<Response> Master::Http::stats(const Request& request)
   }
   object.values["active_tasks_gauge"] = active_tasks;
 
+  // TODO(dhamon): Port these to metrics library.
   // Get total and used (note, not offered) resources in order to
   // compute capacity of scalar resources.
   Resources totalResources;

http://git-wip-us.apache.org/repos/asf/mesos/blob/3ad03200/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index d545367..2f0e902 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -30,6 +30,8 @@
 #include <process/id.hpp>
 #include <process/run.hpp>
 
+#include <process/metrics/metrics.hpp>
+
 #include <stout/check.hpp>
 #include <stout/lambda.hpp>
 #include <stout/memory.hpp>
@@ -2024,6 +2026,7 @@ void Master::schedulerMessage(
       << " of framework " << frameworkId
       << " because the framework cannot be found";
     stats.invalidFrameworkMessages++;
+    metrics.invalid_framework_to_executor_messages++;
     return;
   }
 
@@ -2034,6 +2037,7 @@ void Master::schedulerMessage(
       << " because it is not from the registered framework "
       << framework->pid;
     stats.invalidFrameworkMessages++;
+    metrics.invalid_framework_to_executor_messages++;
     return;
   }
 
@@ -2043,6 +2047,7 @@ void Master::schedulerMessage(
                  << frameworkId << " to slave " << slaveId
                  << " because slave is not activated";
     stats.invalidFrameworkMessages++;
+    metrics.invalid_framework_to_executor_messages++;
     return;
   }
 
@@ -2051,6 +2056,7 @@ void Master::schedulerMessage(
                  << frameworkId << " to slave " << *slave
                  << " because slave is disconnected";
     stats.invalidFrameworkMessages++;
+    metrics.invalid_framework_to_executor_messages++;
     return;
   }
 
@@ -2065,6 +2071,7 @@ void Master::schedulerMessage(
   send(slave->pid, message);
 
   stats.validFrameworkMessages++;
+  metrics.valid_framework_to_executor_messages++;
 }
 
 
@@ -2417,6 +2424,7 @@ void Master::statusUpdate(const StatusUpdate& update, const UPID& pid)
     send(pid, message);
 
     stats.invalidStatusUpdates++;
+    metrics.invalid_status_updates++;
     return;
   }
 
@@ -2425,6 +2433,7 @@ void Master::statusUpdate(const StatusUpdate& update, const UPID& pid)
                  << " from unknown slave " << pid
                  << " with id " << update.slave_id();
     stats.invalidStatusUpdates++;
+    metrics.invalid_status_updates++;
     return;
   }
 
@@ -2436,6 +2445,7 @@ void Master::statusUpdate(const StatusUpdate& update, const UPID& pid)
     LOG(WARNING) << "Ignoring status update " << update
                  << " from slave " << *slave << ": " << _forward.error();
     stats.invalidStatusUpdates++;
+    metrics.invalid_status_updates++;
     return;
   }
 
@@ -2446,6 +2456,7 @@ void Master::statusUpdate(const StatusUpdate& update, const UPID& pid)
     LOG(WARNING) << "Could not lookup task for status update " << update
                  << " from slave " << *slave;
     stats.invalidStatusUpdates++;
+    metrics.invalid_status_updates++;
     return;
   }
 
@@ -2466,6 +2477,7 @@ void Master::statusUpdate(const StatusUpdate& update, const UPID& pid)
 
   stats.tasks[status.state()]++;
   stats.validStatusUpdates++;
+  metrics.valid_status_updates++;
 }
 
 
@@ -3654,6 +3666,157 @@ SlaveID Master::newSlaveId()
   return slaveId;
 }
 
+
+double Master::_active_slaves()
+{
+  double count = 0.0;
+  foreachvalue (Slave* slave, slaves.activated) {
+    if (!slave->disconnected) {
+      count++;
+    }
+  }
+  return count;
+}
+
+
+double Master::_inactive_slaves()
+{
+  double count = 0.0;
+  foreachvalue (Slave* slave, slaves.activated) {
+    if (slave->disconnected) {
+      count++;
+    }
+  }
+  return count;
+}
+
+
+// TODO(dhamon): Consider moving to master/metrics.cpp|hpp.
+Master::Metrics::Metrics(const Master& master)
+  : uptime_secs(
+        "master/uptime_secs",
+        defer(master, &Master::_uptime_secs)),
+    elected(
+        "master/elected",
+        defer(master, &Master::_elected)),
+    active_slaves(
+        "master/active_slaves",
+        defer(master, &Master::_active_slaves)),
+    inactive_slaves(
+        "master/inactive_slaves",
+        defer(master, &Master::_inactive_slaves)),
+    active_frameworks(
+        "master/active_frameworks",
+        defer(master, &Master::_active_frameworks)),
+    inactive_frameworks(
+        "master/inactive_frameworks",
+        defer(master, &Master::_inactive_frameworks)),
+    outstanding_offers(
+        "master/outstanding_offers",
+        defer(master, &Master::_outstanding_offers)),
+    dropped_messages(
+        "master/dropped_messages"),
+    framework_registration_messages(
+        "master/framework_registration_messages"),
+    framework_reregistration_messages(
+        "master/framework_reregistration_messages"),
+    slave_registration_messages(
+        "master/slave_registration_messages"),
+    slave_reregistration_messages(
+        "master/slave_reregistration_messages"),
+    valid_framework_to_executor_messages(
+        "master/valid_framework_to_executor_messages"),
+    invalid_framework_to_executor_messages(
+        "master/invalid_framework_to_executor_messages"),
+    valid_status_updates(
+        "master/valid_status_updates"),
+    invalid_status_updates(
+        "master/invalid_status_updates"),
+    recovery_slave_removals(
+        "master/recovery_slave_removals"),
+    event_queue_size(
+        "master/event_queue_size",
+        defer(master, &Master::_event_queue_size)),
+    slave_registrations(
+        "master/slave_registrations"),
+    slave_reregistrations(
+        "master/slave_reregistrations"),
+    slave_removals(
+        "master/slave_removals")
+{
+  // TODO(dhamon): Check return values of 'add'.
+  process::metrics::add(uptime_secs);
+  process::metrics::add(elected);
+
+  process::metrics::add(active_slaves);
+  process::metrics::add(inactive_slaves);
+
+  process::metrics::add(active_frameworks);
+  process::metrics::add(inactive_frameworks);
+
+  process::metrics::add(outstanding_offers);
+
+  process::metrics::add(dropped_messages);
+
+  process::metrics::add(framework_registration_messages);
+  process::metrics::add(framework_reregistration_messages);
+
+  process::metrics::add(slave_registration_messages);
+  process::metrics::add(slave_reregistration_messages);
+
+  process::metrics::add(valid_framework_to_executor_messages);
+  process::metrics::add(invalid_framework_to_executor_messages);
+
+  process::metrics::add(valid_status_updates);
+  process::metrics::add(invalid_status_updates);
+
+  process::metrics::add(recovery_slave_removals);
+
+  process::metrics::add(event_queue_size);
+
+  process::metrics::add(slave_registrations);
+  process::metrics::add(slave_reregistrations);
+  process::metrics::add(slave_removals);
+}
+
+
+Master::Metrics::~Metrics()
+{
+  // TODO(dhamon): Check return values of 'remove'.
+  process::metrics::remove(uptime_secs);
+  process::metrics::remove(elected);
+
+  process::metrics::remove(active_slaves);
+  process::metrics::remove(inactive_slaves);
+
+  process::metrics::remove(active_frameworks);
+  process::metrics::remove(inactive_frameworks);
+
+  process::metrics::remove(outstanding_offers);
+
+  process::metrics::remove(dropped_messages);
+
+  process::metrics::remove(framework_registration_messages);
+  process::metrics::remove(framework_reregistration_messages);
+
+  process::metrics::remove(slave_registration_messages);
+  process::metrics::remove(slave_reregistration_messages);
+
+  process::metrics::remove(valid_framework_to_executor_messages);
+  process::metrics::remove(invalid_framework_to_executor_messages);
+
+  process::metrics::remove(valid_status_updates);
+  process::metrics::remove(invalid_status_updates);
+
+  process::metrics::remove(recovery_slave_removals);
+
+  process::metrics::remove(event_queue_size);
+
+  process::metrics::remove(slave_registrations);
+  process::metrics::remove(slave_reregistrations);
+  process::metrics::remove(slave_removals);
+}
+
 } // namespace master {
 } // namespace internal {
 } // namespace mesos {

http://git-wip-us.apache.org/repos/asf/mesos/blob/3ad03200/src/master/master.hpp
----------------------------------------------------------------------
diff --git a/src/master/master.hpp b/src/master/master.hpp
index 4f9ae36..12111cf 100644
--- a/src/master/master.hpp
+++ b/src/master/master.hpp
@@ -474,64 +474,20 @@ private:
 
   struct Metrics
   {
-    Metrics(const Master& master)
-      : dropped_messages(
-            "master/dropped_messages"),
-        framework_registration_messages(
-            "master/framework_registration_messages"),
-        framework_reregistration_messages(
-            "master/framework_reregistration_messages"),
-        slave_registration_messages(
-            "master/slave_registration_messages"),
-        slave_reregistration_messages(
-            "master/slave_reregistration_messages"),
-        recovery_slave_removals(
-            "master/recovery_slave_removals"),
-        event_queue_size(
-            "master/event_queue_size",
-            defer(master, &Master::_event_queue_size)),
-        slave_registrations(
-            "master/slave_registrations"),
-        slave_reregistrations(
-            "master/slave_reregistrations"),
-        slave_removals(
-            "master/slave_removals")
-    {
-      process::metrics::add(dropped_messages);
-
-      process::metrics::add(framework_registration_messages);
-      process::metrics::add(framework_reregistration_messages);
-
-      process::metrics::add(slave_registration_messages);
-      process::metrics::add(slave_reregistration_messages);
-
-      process::metrics::add(recovery_slave_removals);
+    Metrics(const Master& master);
 
-      process::metrics::add(event_queue_size);
-
-      process::metrics::add(slave_registrations);
-      process::metrics::add(slave_reregistrations);
-      process::metrics::add(slave_removals);
-    }
-
-    ~Metrics()
-    {
-      process::metrics::remove(dropped_messages);
+    ~Metrics();
 
-      process::metrics::remove(framework_registration_messages);
-      process::metrics::remove(framework_reregistration_messages);
+    process::metrics::Gauge uptime_secs;
+    process::metrics::Gauge elected;
 
-      process::metrics::remove(slave_registration_messages);
-      process::metrics::remove(slave_reregistration_messages);
+    process::metrics::Gauge active_slaves;
+    process::metrics::Gauge inactive_slaves;
 
-      process::metrics::remove(recovery_slave_removals);
+    process::metrics::Gauge active_frameworks;
+    process::metrics::Gauge inactive_frameworks;
 
-      process::metrics::remove(event_queue_size);
-
-      process::metrics::remove(slave_registrations);
-      process::metrics::remove(slave_reregistrations);
-      process::metrics::remove(slave_removals);
-    }
+    process::metrics::Gauge outstanding_offers;
 
     // Message counters.
     // TODO(bmahler): Add counters for other messages: kill task,
@@ -544,6 +500,12 @@ private:
     process::metrics::Counter slave_registration_messages;
     process::metrics::Counter slave_reregistration_messages;
 
+    process::metrics::Counter valid_framework_to_executor_messages;
+    process::metrics::Counter invalid_framework_to_executor_messages;
+
+    process::metrics::Counter valid_status_updates;
+    process::metrics::Counter invalid_status_updates;
+
     // Recovery counters.
     process::metrics::Counter recovery_slave_removals;
 
@@ -557,6 +519,35 @@ private:
   } metrics;
 
   // Gauge handlers.
+  double _uptime_secs()
+  {
+    return (process::Clock::now() - startTime).secs();
+  }
+
+  double _elected()
+  {
+    return elected() ? 1 : 0;
+  }
+
+  double _active_slaves();
+
+  double _inactive_slaves();
+
+  double _active_frameworks()
+  {
+    return getActiveFrameworks().size();
+  }
+
+  double _inactive_frameworks()
+  {
+    return frameworks.activated.size() - _active_frameworks();
+  }
+
+  double _outstanding_offers()
+  {
+    return offers.size();
+  }
+
   double _event_queue_size()
   {
     size_t size;

http://git-wip-us.apache.org/repos/asf/mesos/blob/3ad03200/src/tests/master_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_tests.cpp b/src/tests/master_tests.cpp
index 7aa678a..dcda0c7 100644
--- a/src/tests/master_tests.cpp
+++ b/src/tests/master_tests.cpp
@@ -1409,6 +1409,17 @@ TEST_F(MasterTest, MetricsInStatsEndpoint)
 
   JSON::Object stats = parse.get();
 
+  EXPECT_EQ(1u, stats.values.count("master/uptime_secs"));
+  EXPECT_EQ(1u, stats.values.count("master/elected"));
+
+  EXPECT_EQ(1u, stats.values.count("master/active_slaves"));
+  EXPECT_EQ(1u, stats.values.count("master/inactive_slaves"));
+
+  EXPECT_EQ(1u, stats.values.count("master/active_frameworks"));
+  EXPECT_EQ(1u, stats.values.count("master/inactive_frameworks"));
+
+  EXPECT_EQ(1u, stats.values.count("master/outstanding_offers"));
+
   EXPECT_EQ(1u, stats.values.count("master/dropped_messages"));
 
   EXPECT_EQ(1u, stats.values.count("master/framework_registration_messages"));
@@ -1417,6 +1428,16 @@ TEST_F(MasterTest, MetricsInStatsEndpoint)
   EXPECT_EQ(1u, stats.values.count("master/slave_registration_messages"));
   EXPECT_EQ(1u, stats.values.count("master/slave_reregistration_messages"));
 
+  EXPECT_EQ(
+      1u,
+      stats.values.count("master/valid_framework_to_executor_messages"));
+  EXPECT_EQ(
+      1u,
+      stats.values.count("master/invalid_framework_to_executor_messages"));
+
+  EXPECT_EQ(1u, stats.values.count("master/valid_status_updates"));
+  EXPECT_EQ(1u, stats.values.count("master/invalid_status_updates"));
+
   EXPECT_EQ(1u, stats.values.count("master/recovery_slave_removals"));
 
   EXPECT_EQ(1u, stats.values.count("master/event_queue_size"));