You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by zh...@apache.org on 2018/03/14 19:55:32 UTC
[3/3] mesos git commit: Added a gauge for how long agent recovery
takes.
Added a gauge for how long agent recovery takes.
The new metric `slave/recover_time_secs` can be used to tell us how long
Mesos agent needed to finish its recovery cycle. This is an important
metric on agent machines which have a lot of completed executor
sandboxes.
Note that the metric 1) will only be available after recovery succeeded
and 2) never change its value across agent process lifecycle afterwards.
Review: https://reviews.apache.org/r/65954
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/026dafd3
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/026dafd3
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/026dafd3
Branch: refs/heads/master
Commit: 026dafd33cd23d41818e18e31ec271fa2c13abd2
Parents: 768d6fc
Author: Zhitao Li <zh...@gmail.com>
Authored: Tue Mar 6 17:43:48 2018 -0800
Committer: Zhitao Li <zh...@gmail.com>
Committed: Wed Mar 14 12:54:54 2018 -0700
----------------------------------------------------------------------
src/slave/metrics.cpp | 18 ++++++++++++++++++
src/slave/metrics.hpp | 3 +++
src/slave/slave.cpp | 2 ++
3 files changed, 23 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/026dafd3/src/slave/metrics.cpp
----------------------------------------------------------------------
diff --git a/src/slave/metrics.cpp b/src/slave/metrics.cpp
index 0eb2b59..44294af 100644
--- a/src/slave/metrics.cpp
+++ b/src/slave/metrics.cpp
@@ -243,6 +243,24 @@ Metrics::~Metrics()
process::metrics::remove(gauge);
}
resources_revocable_percent.clear();
+
+ if (recovery_time_secs.isSome()) {
+ process::metrics::remove(recovery_time_secs.get());
+ }
+}
+
+
+void Metrics::setRecoveryTime(const Duration& duration)
+{
+ CHECK_NONE(recovery_time_secs);
+
+ const double recovery_seconds = duration.secs();
+
+ recovery_time_secs = process::metrics::Gauge(
+ "slave/recovery_time_secs",
+ [recovery_seconds]() { return recovery_seconds;});
+
+ process::metrics::add(recovery_time_secs.get());
}
} // namespace slave {
http://git-wip-us.apache.org/repos/asf/mesos/blob/026dafd3/src/slave/metrics.hpp
----------------------------------------------------------------------
diff --git a/src/slave/metrics.hpp b/src/slave/metrics.hpp
index 3fc933c..b771c4b 100644
--- a/src/slave/metrics.hpp
+++ b/src/slave/metrics.hpp
@@ -35,10 +35,13 @@ struct Metrics
~Metrics();
+ void setRecoveryTime(const Duration& duration);
+
process::metrics::Gauge uptime_secs;
process::metrics::Gauge registered;
process::metrics::Counter recovery_errors;
+ Option<process::metrics::Gauge> recovery_time_secs;
process::metrics::Gauge frameworks_active;
http://git-wip-us.apache.org/repos/asf/mesos/blob/026dafd3/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 4112163..0962ea7 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -7304,6 +7304,8 @@ void Slave::__recover(const Future<Nothing>& future)
}
recoveryInfo.recovered.set(Nothing()); // Signal recovery.
+
+ metrics.setRecoveryTime(process::Clock::now() - startTime);
}