You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by jp...@apache.org on 2018/10/25 15:46:06 UTC

[mesos] 02/03: Add a flag to toggle per-framework metrics.

This is an automated email from the ASF dual-hosted git repository.

jpeach pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 90eddb08c7744337e5c43b15154851a3942001ed
Author: Jacob Janco <jj...@gmail.com>
AuthorDate: Thu Oct 25 07:59:48 2018 -0700

    Add a flag to toggle per-framework metrics.
    
    In clusters with high numbers of frameworks, it can be necessary
    to control publishing of per framework metrics. Metrics are still
    collected for active frameworks, but will not be pulished if this
    flag is set. This allows future extraction of these metrics, e.g.
    an API call or logs while keeping the code change simple.
    
    Review: https://reviews.apache.org/r/68956/
---
 include/mesos/allocator/allocator.hpp       |  1 +
 src/master/allocator/mesos/hierarchical.cpp | 10 +++--
 src/master/allocator/mesos/hierarchical.hpp |  5 ++-
 src/master/allocator/mesos/metrics.cpp      | 28 ++++++++++--
 src/master/allocator/mesos/metrics.hpp      | 10 ++++-
 src/master/flags.cpp                        |  8 ++++
 src/master/flags.hpp                        |  1 +
 src/master/framework.cpp                    |  2 +-
 src/master/master.cpp                       |  1 +
 src/master/master.hpp                       |  2 +-
 src/master/metrics.cpp                      | 70 ++++++++++++++++++-----------
 src/master/metrics.hpp                      |  9 +++-
 12 files changed, 109 insertions(+), 38 deletions(-)

diff --git a/include/mesos/allocator/allocator.hpp b/include/mesos/allocator/allocator.hpp
index 4c4273a..2a6849b 100644
--- a/include/mesos/allocator/allocator.hpp
+++ b/include/mesos/allocator/allocator.hpp
@@ -52,6 +52,7 @@ struct Options
   Option<DomainInfo> domain = None();
   Option<std::vector<Resources>> minAllocatableResources = None();
   size_t maxCompletedFrameworks = 0;
+  bool publishPerFrameworkMetrics = true;
 };
 
 
diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index eae5a12..cc8ab91 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -136,12 +136,13 @@ private:
 Framework::Framework(
     const FrameworkInfo& frameworkInfo,
     const set<string>& _suppressedRoles,
-    bool _active)
+    bool _active,
+    bool publishPerFrameworkMetrics)
   : roles(protobuf::framework::getRoles(frameworkInfo)),
     suppressedRoles(_suppressedRoles),
     capabilities(frameworkInfo.capabilities()),
     active(_active),
-    metrics(new FrameworkMetrics(frameworkInfo)) {}
+    metrics(new FrameworkMetrics(frameworkInfo, publishPerFrameworkMetrics)) {}
 
 
 void HierarchicalAllocatorProcess::initialize(
@@ -265,7 +266,10 @@ void HierarchicalAllocatorProcess::addFramework(
   CHECK(!frameworks.contains(frameworkId));
 
   frameworks.insert(
-      {frameworkId, Framework(frameworkInfo, suppressedRoles, active)});
+      {frameworkId, Framework(frameworkInfo,
+          suppressedRoles,
+          active,
+          options.publishPerFrameworkMetrics)});
 
   const Framework& framework = frameworks.at(frameworkId);
 
diff --git a/src/master/allocator/mesos/hierarchical.hpp b/src/master/allocator/mesos/hierarchical.hpp
index 405f485..a4425bc 100644
--- a/src/master/allocator/mesos/hierarchical.hpp
+++ b/src/master/allocator/mesos/hierarchical.hpp
@@ -81,7 +81,8 @@ struct Framework
   Framework(
       const FrameworkInfo& frameworkInfo,
       const std::set<std::string>& suppressedRoles,
-      bool active);
+      bool active,
+      bool publishPerFrameworkMetrics);
 
   std::set<std::string> roles;
 
@@ -97,6 +98,8 @@ struct Framework
 
   bool active;
 
+  bool publishPerFrameworkMetrics;
+
   process::Owned<FrameworkMetrics> metrics;
 };
 
diff --git a/src/master/allocator/mesos/metrics.cpp b/src/master/allocator/mesos/metrics.cpp
index 73e68eb..5533eb9 100644
--- a/src/master/allocator/mesos/metrics.cpp
+++ b/src/master/allocator/mesos/metrics.cpp
@@ -214,8 +214,11 @@ void Metrics::removeRole(const string& role)
 }
 
 
-FrameworkMetrics::FrameworkMetrics(const FrameworkInfo& _frameworkInfo)
-  : frameworkInfo(_frameworkInfo)
+FrameworkMetrics::FrameworkMetrics(
+    const FrameworkInfo& _frameworkInfo,
+    const bool _publishPerFrameworkMetrics)
+  : frameworkInfo(_frameworkInfo),
+    publishPerFrameworkMetrics(_publishPerFrameworkMetrics)
 {
   // TODO(greggomann): Calling `getRoles` below copies the roles from the
   // framework info, which could become expensive if the number of roles grows
@@ -263,7 +266,7 @@ void FrameworkMetrics::addSubscribedRole(const string& role)
           role + "/suppressed"));
 
   CHECK(result.second);
-  process::metrics::add(result.first->second);
+  addMetric(result.first->second);
 }
 
 
@@ -272,10 +275,27 @@ void FrameworkMetrics::removeSubscribedRole(const string& role)
   auto iter = suppressed.find(role);
 
   CHECK(iter != suppressed.end());
-  process::metrics::remove(iter->second);
+  removeMetric(iter->second);
   suppressed.erase(iter);
 }
 
+
+template <typename T>
+void FrameworkMetrics::addMetric(const T& metric) {
+  if (publishPerFrameworkMetrics) {
+    process::metrics::add(metric);
+  }
+}
+
+
+template <typename T>
+void FrameworkMetrics::removeMetric(const T& metric) {
+  if (publishPerFrameworkMetrics) {
+    process::metrics::remove(metric);
+  }
+}
+
+
 } // namespace internal {
 } // namespace allocator {
 } // namespace master {
diff --git a/src/master/allocator/mesos/metrics.hpp b/src/master/allocator/mesos/metrics.hpp
index 34cc16b..a26278a 100644
--- a/src/master/allocator/mesos/metrics.hpp
+++ b/src/master/allocator/mesos/metrics.hpp
@@ -94,7 +94,9 @@ struct Metrics
 
 struct FrameworkMetrics
 {
-  explicit FrameworkMetrics(const FrameworkInfo& _frameworkInfo);
+  FrameworkMetrics(
+      const FrameworkInfo& _frameworkInfo,
+      const bool _publishPerFrameworkMetrics);
 
   ~FrameworkMetrics();
 
@@ -106,8 +108,14 @@ struct FrameworkMetrics
   void addSubscribedRole(const std::string& role);
   void removeSubscribedRole(const std::string& role);
 
+  // Add or remove per-framework metrics.
+  template <typename T> void addMetric(const T& metric);
+  template <typename T> void removeMetric(const T& metric);
+
   const FrameworkInfo frameworkInfo;
 
+  const bool publishPerFrameworkMetrics;
+
   // Suppresion state metric (boolean 0 or 1) for each role.
   hashmap<std::string, process::metrics::PushGauge> suppressed;
 };
diff --git a/src/master/flags.cpp b/src/master/flags.cpp
index 6ad53ed..2677738 100644
--- a/src/master/flags.cpp
+++ b/src/master/flags.cpp
@@ -683,6 +683,14 @@ mesos::internal::master::Flags::Flags()
       "If true, only agents with a configured domain can register.\n",
       false);
 
+  add(&Flags::publish_per_framework_metrics,
+      "publish_per_framework_metrics",
+      "If true, an extensive set of metrics for each active framework will\n"
+      "be published. These metrics are useful for understanding cluster\n"
+      "behavior, but can be overwhelming for very large numbers of\n"
+      "frameworks.",
+      true);
+
   add(&Flags::domain,
       "domain",
       "Domain that the master belongs to. Mesos currently only supports\n"
diff --git a/src/master/flags.hpp b/src/master/flags.hpp
index 4a26015..ed2d76a 100644
--- a/src/master/flags.hpp
+++ b/src/master/flags.hpp
@@ -99,6 +99,7 @@ public:
   Duration registry_max_agent_age;
   size_t registry_max_agent_count;
   bool require_agent_domain;
+  bool publish_per_framework_metrics;
   Option<DomainInfo> domain;
 
   // The following flags are executable specific (e.g., since we only
diff --git a/src/master/framework.cpp b/src/master/framework.cpp
index 7cfe9f4..36eda9f 100644
--- a/src/master/framework.cpp
+++ b/src/master/framework.cpp
@@ -69,7 +69,7 @@ Framework::Framework(
     reregisteredTime(time),
     completedTasks(masterFlags.max_completed_tasks_per_framework),
     unreachableTasks(masterFlags.max_unreachable_tasks_per_framework),
-    metrics(_info)
+    metrics(_info, masterFlags.publish_per_framework_metrics)
 {
   CHECK(_info.has_id());
 
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 98a5bb1..704dfc0 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -764,6 +764,7 @@ void Master::initialize()
   options.domain = flags.domain;
   options.minAllocatableResources = CHECK_NOTERROR(minAllocatableResources);
   options.maxCompletedFrameworks = flags.max_completed_frameworks;
+  options.publishPerFrameworkMetrics = flags.publish_per_framework_metrics;
 
   // Initialize the allocator.
   allocator->initialize(
diff --git a/src/master/master.hpp b/src/master/master.hpp
index ea7e924..e77babf 100644
--- a/src/master/master.hpp
+++ b/src/master/master.hpp
@@ -2574,7 +2574,7 @@ struct Framework
   Option<process::Owned<
       Heartbeater<mesos::scheduler::Event, v1::scheduler::Event>>> heartbeater;
 
-  // This is used for per-framwork metrics.
+  // This is used for per-framework metrics.
   FrameworkMetrics metrics;
 
 private:
diff --git a/src/master/metrics.cpp b/src/master/metrics.cpp
index 56a7eef..f69ed52 100644
--- a/src/master/metrics.cpp
+++ b/src/master/metrics.cpp
@@ -552,8 +552,11 @@ void Metrics::incrementTasksStates(
 }
 
 
-FrameworkMetrics::FrameworkMetrics(const FrameworkInfo& _frameworkInfo)
+FrameworkMetrics::FrameworkMetrics(
+    const FrameworkInfo& _frameworkInfo,
+    bool _publishPerFrameworkMetrics)
   : frameworkInfo(_frameworkInfo),
+    publishPerFrameworkMetrics(_publishPerFrameworkMetrics),
     subscribed(
         getFrameworkMetricPrefix(frameworkInfo) + "subscribed"),
     calls(
@@ -571,15 +574,14 @@ FrameworkMetrics::FrameworkMetrics(const FrameworkInfo& _frameworkInfo)
     operations(
         getFrameworkMetricPrefix(frameworkInfo) + "operations")
 {
-  process::metrics::add(subscribed);
-
-  process::metrics::add(offers_sent);
-  process::metrics::add(offers_accepted);
-  process::metrics::add(offers_declined);
-  process::metrics::add(offers_rescinded);
+  addMetric(subscribed);
+  addMetric(offers_sent);
+  addMetric(offers_accepted);
+  addMetric(offers_declined);
+  addMetric(offers_rescinded);
 
   // Add metrics for scheduler calls.
-  process::metrics::add(calls);
+  addMetric(calls);
   for (int index = 0;
        index < scheduler::Call::Type_descriptor()->value_count();
        index++) {
@@ -598,11 +600,11 @@ FrameworkMetrics::FrameworkMetrics(const FrameworkInfo& _frameworkInfo)
         strings::lower(descriptor->name()));
 
     call_types.put(type, counter);
-    process::metrics::add(counter);
+    addMetric(counter);
   }
 
   // Add metrics for scheduler events.
-  process::metrics::add(events);
+  addMetric(events);
   for (int index = 0;
        index < scheduler::Event::Type_descriptor()->value_count();
        index++) {
@@ -621,7 +623,7 @@ FrameworkMetrics::FrameworkMetrics(const FrameworkInfo& _frameworkInfo)
         strings::lower(descriptor->name()));
 
     event_types.put(type, counter);
-    process::metrics::add(counter);
+    addMetric(counter);
   }
 
   // Add metrics for both active and terminal task states.
@@ -637,19 +639,19 @@ FrameworkMetrics::FrameworkMetrics(const FrameworkInfo& _frameworkInfo)
           strings::lower(descriptor->name()));
 
       terminal_task_states.put(state, counter);
-      process::metrics::add(counter);
+      addMetric(counter);
     } else {
       PushGauge gauge = PushGauge(
           getFrameworkMetricPrefix(frameworkInfo) + "tasks/active/" +
           strings::lower(TaskState_Name(state)));
 
       active_task_states.put(state, gauge);
-      process::metrics::add(gauge);
+      addMetric(gauge);
     }
   }
 
   // Add metrics for offer operations.
-  process::metrics::add(operations);
+  addMetric(operations);
   for (int index = 0;
        index < Offer::Operation::Type_descriptor()->value_count();
        index++) {
@@ -668,41 +670,41 @@ FrameworkMetrics::FrameworkMetrics(const FrameworkInfo& _frameworkInfo)
       "operations/" + strings::lower(descriptor->name()));
 
     operation_types.put(type, counter);
-    process::metrics::add(counter);
+    addMetric(counter);
   }
 }
 
 
 FrameworkMetrics::~FrameworkMetrics()
 {
-  process::metrics::remove(subscribed);
+  removeMetric(subscribed);
 
-  process::metrics::remove(calls);
+  removeMetric(calls);
   foreachvalue (const Counter& counter, call_types) {
-    process::metrics::remove(counter);
+    removeMetric(counter);
   }
 
   process::metrics::remove(events);
   foreachvalue (const Counter& counter, event_types) {
-    process::metrics::remove(counter);
+    removeMetric(counter);
   }
 
-  process::metrics::remove(offers_sent);
-  process::metrics::remove(offers_accepted);
-  process::metrics::remove(offers_declined);
-  process::metrics::remove(offers_rescinded);
+  removeMetric(offers_sent);
+  removeMetric(offers_accepted);
+  removeMetric(offers_declined);
+  removeMetric(offers_rescinded);
 
   foreachvalue (const Counter& counter, terminal_task_states) {
-    process::metrics::remove(counter);
+    removeMetric(counter);
   }
 
   foreachvalue (const PushGauge& gauge, active_task_states) {
-    process::metrics::remove(gauge);
+    removeMetric(gauge);
   }
 
   process::metrics::remove(operations);
   foreachvalue (const Counter& counter, operation_types) {
-    process::metrics::remove(counter);
+    removeMetric(counter);
   }
 }
 
@@ -753,6 +755,22 @@ string getFrameworkMetricPrefix(const FrameworkInfo& frameworkInfo)
 }
 
 
+template <typename T>
+void FrameworkMetrics::addMetric(const T& metric)  {
+  if (publishPerFrameworkMetrics) {
+    process::metrics::add(metric);
+  }
+}
+
+
+template <typename T>
+void FrameworkMetrics::removeMetric(const T& metric)  {
+  if (publishPerFrameworkMetrics) {
+    process::metrics::remove(metric);
+  }
+}
+
+
 void FrameworkMetrics::incrementEvent(const scheduler::Event& event)
 {
   ++CHECK_NOTNONE(event_types.get(event.type()));
diff --git a/src/master/metrics.hpp b/src/master/metrics.hpp
index e1da18e..f6bb89d 100644
--- a/src/master/metrics.hpp
+++ b/src/master/metrics.hpp
@@ -220,7 +220,9 @@ struct Metrics
 
 struct FrameworkMetrics
 {
-  explicit FrameworkMetrics(const FrameworkInfo& _frameworkInfo);
+  FrameworkMetrics(
+      const FrameworkInfo& _frameworkInfo,
+      bool publishPerFrameworkMetrics);
 
   ~FrameworkMetrics();
 
@@ -247,8 +249,13 @@ struct FrameworkMetrics
 
   void incrementOperation(const Offer::Operation& operation);
 
+  template <typename T> void addMetric(const T& metric);
+  template <typename T> void removeMetric(const T& metric);
+
   const FrameworkInfo frameworkInfo;
 
+  bool publishPerFrameworkMetrics;
+
   process::metrics::PushGauge subscribed;
 
   process::metrics::Counter calls;