You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2016/04/12 21:23:35 UTC
[2/5] mesos git commit: Added some metrics to the
long-lived-framework example.
Added some metrics to the long-lived-framework example.
Adds metrics to gauge the health of the framework. This includes:
* uptime_secs = How long the framework has been running.
* registered = If the framework is registered.
* offers_received = A counter used to determine if the framework is
starved or not.
* tasks_launched = Number of tasks launched.
* abnormal_terminations = Number of terminal status updates which
were not `TASK_FINISHED`.
Also adds an endpoint `/framework/counters` which returns the list of
metrics which are "counters".
Review: https://reviews.apache.org/r/45440/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/c82c12ae
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/c82c12ae
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/c82c12ae
Branch: refs/heads/master
Commit: c82c12aecea0e44214e0a9ac8ad02e33aa197274
Parents: debf0ac
Author: Joseph Wu <jo...@mesosphere.io>
Authored: Tue Apr 12 12:23:13 2016 -0700
Committer: Vinod Kone <vi...@gmail.com>
Committed: Tue Apr 12 12:23:13 2016 -0700
----------------------------------------------------------------------
src/examples/long_lived_framework.cpp | 136 ++++++++++++++++++++++++++++-
1 file changed, 134 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/c82c12ae/src/examples/long_lived_framework.cpp
----------------------------------------------------------------------
diff --git a/src/examples/long_lived_framework.cpp b/src/examples/long_lived_framework.cpp
index 29ee6f2..035ddda 100644
--- a/src/examples/long_lived_framework.cpp
+++ b/src/examples/long_lived_framework.cpp
@@ -22,6 +22,18 @@
#include <mesos/resources.hpp>
#include <mesos/scheduler.hpp>
+#include <process/clock.hpp>
+#include <process/defer.hpp>
+#include <process/help.hpp>
+#include <process/http.hpp>
+#include <process/process.hpp>
+#include <process/protobuf.hpp>
+#include <process/time.hpp>
+
+#include <process/metrics/counter.hpp>
+#include <process/metrics/gauge.hpp>
+#include <process/metrics/metrics.hpp>
+
#include <stout/flags.hpp>
#include <stout/foreach.hpp>
#include <stout/option.hpp>
@@ -34,6 +46,18 @@ using namespace mesos;
using std::string;
using std::vector;
+using process::AUTHENTICATION;
+using process::Clock;
+using process::defer;
+using process::DESCRIPTION;
+using process::HELP;
+using process::TLDR;
+
+using process::http::OK;
+
+using process::metrics::Gauge;
+using process::metrics::Counter;
+
// NOTE: Per-task resources are nominal because all of the resources for the
// container are provisioned when the executor is created. The executor can
@@ -57,25 +81,39 @@ public:
taskResources(Resources::parse(
"cpus:" + stringify(CPUS_PER_TASK) +
";mem:" + stringify(MEM_PER_TASK)).get()),
- tasksLaunched(0) {}
+ tasksLaunched(0),
+ metrics(*this)
+ {
+ process::spawn(metrics);
+ }
- virtual ~LongLivedScheduler() {}
+ virtual ~LongLivedScheduler()
+ {
+ process::terminate(metrics);
+ process::wait(metrics);
+ }
virtual void registered(SchedulerDriver*,
const FrameworkID&,
const MasterInfo&)
{
LOG(INFO) << "Registered!";
+
+ metrics.isRegistered = true;
}
virtual void reregistered(SchedulerDriver*, const MasterInfo& masterInfo)
{
LOG(INFO) << "Re-registered!";
+
+ metrics.isRegistered = true;
}
virtual void disconnected(SchedulerDriver* driver)
{
LOG(INFO) << "Disconnected!";
+
+ metrics.isRegistered = false;
}
virtual void resourceOffers(SchedulerDriver* driver,
@@ -83,6 +121,8 @@ public:
{
static const Resources EXECUTOR_RESOURCES = Resources(executor.resources());
+ metrics.offers_received += offers.size();
+
foreach (const Offer& offer, offers) {
if (slaveId.isNone()) {
// No active executor running in the cluster.
@@ -129,6 +169,13 @@ public:
<< "Task " << status.task_id().value()
<< " is in state " << TaskState_Name(status.state())
<< (status.has_message() ? " with message: " + status.message() : "");
+
+ if (status.state() == TASK_KILLED ||
+ status.state() == TASK_LOST ||
+ status.state() == TASK_FAILED ||
+ status.state() == TASK_ERROR) {
+ ++metrics.abnormal_terminations;
+ }
}
virtual void frameworkMessage(SchedulerDriver* driver,
@@ -193,6 +240,91 @@ private:
// Unless that slave/executor dies, this framework will not launch
// an executor on any other slave.
Option<SlaveID> slaveId;
+
+ struct Metrics : process::Process<Metrics>
+ {
+ Metrics(const LongLivedScheduler& _scheduler)
+ : ProcessBase("framework"),
+ scheduler(_scheduler),
+ isRegistered(false),
+ uptime_secs(
+ "long_lived_framework/uptime_secs",
+ defer(this, &Self::_uptime_secs)),
+ registered(
+ "long_lived_framework/registered",
+ defer(this, &Self::_registered)),
+ offers_received("long_lived_framework/offers_received"),
+ tasks_launched(
+ "long_lived_framework/tasks_launched",
+ defer(this, &Self::_tasksLaunched)),
+ abnormal_terminations("long_lived_framework/abnormal_terminations")
+ {
+ start_time = Clock::now();
+
+ process::metrics::add(uptime_secs);
+ process::metrics::add(registered);
+ process::metrics::add(offers_received);
+ process::metrics::add(tasks_launched);
+ process::metrics::add(abnormal_terminations);
+ }
+
+ virtual void initialize()
+ {
+ // Special route for metric metadata.
+ route(
+ "/counters",
+ HELP(
+ TLDR("List of counter-type metrics."),
+ DESCRIPTION("Returns 200 OK iff the request is accepted."),
+ AUTHENTICATION(false)),
+ [this](const process::http::Request& request) {
+ JSON::Array array;
+ array.values.push_back("long_lived_framework/offers_received");
+ array.values.push_back(
+ "long_lived_framework/abnormal_terminations");
+
+ return OK(array, request.url.query.get("jsonp"));
+ });
+ }
+
+ ~Metrics()
+ {
+ process::metrics::remove(uptime_secs);
+ process::metrics::remove(registered);
+ process::metrics::remove(offers_received);
+ process::metrics::remove(tasks_launched);
+ process::metrics::remove(abnormal_terminations);
+ }
+
+ const LongLivedScheduler& scheduler;
+
+ process::Time start_time;
+ double _uptime_secs()
+ {
+ return (Clock::now() - start_time).secs();
+ }
+
+ bool isRegistered;
+ double _registered()
+ {
+ return isRegistered ? 1 : 0;
+ }
+
+ double _tasksLaunched()
+ {
+ return scheduler.tasksLaunched;
+ }
+
+ process::metrics::Gauge uptime_secs;
+ process::metrics::Gauge registered;
+
+ process::metrics::Counter offers_received;
+ process::metrics::Gauge tasks_launched;
+
+ // The only expected terminal state is TASK_FINISHED.
+ // Other terminal states are considered incorrect.
+ process::metrics::Counter abnormal_terminations;
+ } metrics;
};