You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bb...@apache.org on 2019/01/24 09:40:01 UTC

[mesos] branch master updated (4c2a848 -> 7bfec08)

This is an automated email from the ASF dual-hosted git repository.

bbannier pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git.


    from 4c2a848  Fixed some typo in agent reboot tests.
     new a7d960a  Made starting and stopping on CSI plugin containers more verbose.
     new 7bfec08  Exposed subscriptions and disconnections RP manager metrics.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/resource_provider/manager.cpp             | 48 +++++++++++++++++--------
 src/resource_provider/storage/provider.cpp    | 10 ++++++
 src/tests/resource_provider_manager_tests.cpp | 51 +++++++++++++++++++++++++--
 3 files changed, 93 insertions(+), 16 deletions(-)


[mesos] 01/02: Made starting and stopping on CSI plugin containers more verbose.

Posted by bb...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bbannier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit a7d960a91d6bb213cb9b8e5054ee0741436d6aee
Author: Benjamin Bannier <be...@mesosphere.io>
AuthorDate: Thu Jan 24 10:04:51 2019 +0100

    Made starting and stopping on CSI plugin containers more verbose.
    
    This patch adds some additional logging so it becomes easier to follow
    the lifecycle of CSI plugins. The container daemon already logged some
    related information, but didn't directly call out that it was working
    with CSI plugin containers.
    
    Review: https://reviews.apache.org/r/69606/
---
 src/resource_provider/storage/provider.cpp | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/resource_provider/storage/provider.cpp b/src/resource_provider/storage/provider.cpp
index a064166..6f0c062 100644
--- a/src/resource_provider/storage/provider.cpp
+++ b/src/resource_provider/storage/provider.cpp
@@ -2017,6 +2017,11 @@ Future<csi::v0::Client> StorageLocalResourceProviderProcess::getService(
       config->resources(),
       containerInfo,
       std::function<Future<Nothing>()>(defer(self(), [=]() -> Future<Nothing> {
+        LOG(INFO)
+          << "CSI plugin container '" << containerId << "' started for plugin"
+          << " type '" << info.storage().plugin().type() << "' and "
+          << " name '" << info.storage().plugin().name() << "'";
+
         CHECK(services.at(containerId)->associate(connect(endpointPath)));
         return services.at(containerId)->future()
           .then([] { return Nothing(); });
@@ -2027,6 +2032,11 @@ Future<csi::v0::Client> StorageLocalResourceProviderProcess::getService(
         services.at(containerId)->discard();
         services.at(containerId).reset(new Promise<csi::v0::Client>());
 
+        LOG(INFO)
+          << "CSI plugin container '" << containerId << "' stopped for plugin"
+          << " type '" << info.storage().plugin().type() << "' and "
+          << " name '" << info.storage().plugin().name() << "'";
+
         if (os::exists(endpointPath)) {
           Try<Nothing> rm = os::rm(endpointPath);
           if (rm.isError()) {


[mesos] 02/02: Exposed subscriptions and disconnections RP manager metrics.

Posted by bb...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bbannier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 7bfec082b83bb898c3c45457d2967685d90b663f
Author: Benjamin Bannier <be...@mesosphere.io>
AuthorDate: Thu Jan 24 10:04:54 2019 +0100

    Exposed subscriptions and disconnections RP manager metrics.
    
    This patch adds monotonically increasing counters for subscriptions and
    disconnections of resource providers with the resource provider manager
    (`resource_provider_manager/events/subscribed` and
    `resource_provider_manager/events/disconnects`, respectively). While the
    existing gauge `resource_provider_manager/subscribed` exposed the
    current state, these added counters can be used to monitor resource
    provider state for unexpected events.
    
    Review: https://reviews.apache.org/r/69719/
---
 src/resource_provider/manager.cpp             | 48 +++++++++++++++++--------
 src/tests/resource_provider_manager_tests.cpp | 51 +++++++++++++++++++++++++--
 2 files changed, 83 insertions(+), 16 deletions(-)

diff --git a/src/resource_provider/manager.cpp b/src/resource_provider/manager.cpp
index 65852c6..2cde62a 100644
--- a/src/resource_provider/manager.cpp
+++ b/src/resource_provider/manager.cpp
@@ -33,8 +33,9 @@
 #include <process/id.hpp>
 #include <process/process.hpp>
 
-#include <process/metrics/pull_gauge.hpp>
+#include <process/metrics/counter.hpp>
 #include <process/metrics/metrics.hpp>
+#include <process/metrics/pull_gauge.hpp>
 
 #include <stout/hashmap.hpp>
 #include <stout/nothing.hpp>
@@ -92,6 +93,7 @@ using process::http::UnsupportedMediaType;
 
 using process::http::authentication::Principal;
 
+using process::metrics::Counter;
 using process::metrics::PullGauge;
 
 namespace mesos {
@@ -244,6 +246,8 @@ private:
     ~Metrics();
 
     PullGauge subscribed;
+    Counter subscriptions;
+    Counter disconnections;
   };
 
   Owned<Registrar> registrar;
@@ -820,17 +824,6 @@ void ResourceProviderManagerProcess::_subscribe(
 
   const ResourceProviderID& resourceProviderId = resourceProvider->info.id();
 
-  Event event;
-  event.set_type(Event::SUBSCRIBED);
-  event.mutable_subscribed()->mutable_provider_id()
-    ->CopyFrom(resourceProviderId);
-
-  if (!resourceProvider->http.send(event)) {
-    LOG(WARNING) << "Failed to send SUBSCRIBED event to resource provider "
-                 << resourceProviderId << ": connection closed";
-    return;
-  }
-
   resourceProvider->http.closed()
     .onAny(defer(self(), [=](const Future<Nothing>& future) {
       // Iff the remote side closes the HTTP connection, the future will be
@@ -852,6 +845,8 @@ void ResourceProviderManagerProcess::_subscribe(
       message.disconnect = std::move(disconnect);
 
       messages.put(std::move(message));
+
+      ++metrics.disconnections;
     }));
 
   if (!resourceProviders.known.contains(resourceProviderId)) {
@@ -869,12 +864,31 @@ void ResourceProviderManagerProcess::_subscribe(
   message.type = ResourceProviderMessage::Type::SUBSCRIBE;
   message.subscribe = std::move(subscribe);
 
+  // Store a pointer to the resource provider so that we can access it
+  // even after moving it into the list of subscribed providers below.
+  ResourceProvider* _resourceProvider = resourceProvider.get();
+
   // TODO(jieyu): Start heartbeat for the resource provider.
   resourceProviders.subscribed.put(
       resourceProviderId,
       std::move(resourceProvider));
 
   messages.put(std::move(message));
+  ++metrics.subscriptions;
+
+  // Only send a `SUBSCRIBED` event after we have updated our internal
+  // bookkeeping so that e.g., metrics always reflect correct
+  // subscription state.
+  Event event;
+  event.set_type(Event::SUBSCRIBED);
+  event.mutable_subscribed()->mutable_provider_id()
+    ->CopyFrom(resourceProviderId);
+
+  if (!_resourceProvider->http.send(event)) {
+    LOG(WARNING) << "Failed to send SUBSCRIBED event to resource provider "
+                 << resourceProviderId << ": connection closed";
+    return;
+  }
 }
 
 
@@ -992,16 +1006,22 @@ double ResourceProviderManagerProcess::gaugeSubscribed()
 ResourceProviderManagerProcess::Metrics::Metrics(
     const ResourceProviderManagerProcess& manager)
   : subscribed(
-      "resource_provider_manager/subscribed",
-      defer(manager, &ResourceProviderManagerProcess::gaugeSubscribed))
+        "resource_provider_manager/subscribed",
+        defer(manager, &ResourceProviderManagerProcess::gaugeSubscribed)),
+    subscriptions("resource_provider_manager/events/subscribe"),
+    disconnections("resource_provider_manager/events/disconnect")
 {
   process::metrics::add(subscribed);
+  process::metrics::add(subscriptions);
+  process::metrics::add(disconnections);
 }
 
 
 ResourceProviderManagerProcess::Metrics::~Metrics()
 {
   process::metrics::remove(subscribed);
+  process::metrics::remove(subscriptions);
+  process::metrics::remove(disconnections);
 }
 
 
diff --git a/src/tests/resource_provider_manager_tests.cpp b/src/tests/resource_provider_manager_tests.cpp
index 455ce7d..7d48f18 100644
--- a/src/tests/resource_provider_manager_tests.cpp
+++ b/src/tests/resource_provider_manager_tests.cpp
@@ -1458,6 +1458,21 @@ TEST_F(ResourceProviderManagerHttpApiTest, Metrics)
 
   AWAIT_READY(updateSlaveMessage);
 
+  JSON::Object snapshot = Metrics();
+
+  ASSERT_EQ(1u, snapshot.values.count("resource_provider_manager/subscribed"));
+  EXPECT_EQ(0, snapshot.values.at("resource_provider_manager/subscribed"));
+
+  ASSERT_EQ(
+      1u, snapshot.values.count("resource_provider_manager/events/subscribe"));
+  EXPECT_EQ(
+      0, snapshot.values.at("resource_provider_manager/events/subscribe"));
+
+  ASSERT_EQ(
+      1u, snapshot.values.count("resource_provider_manager/events/disconnect"));
+  EXPECT_EQ(
+      0, snapshot.values.at("resource_provider_manager/events/disconnect"));
+
   mesos::v1::ResourceProviderInfo resourceProviderInfo;
   resourceProviderInfo.set_type("org.apache.mesos.rp.test");
   resourceProviderInfo.set_name("test");
@@ -1473,13 +1488,45 @@ TEST_F(ResourceProviderManagerHttpApiTest, Metrics)
   EXPECT_CALL(*resourceProvider, subscribed(_))
     .WillOnce(FutureArg<0>(&subscribed));
 
-  resourceProvider->start(endpointDetector, ContentType::PROTOBUF);
+  resourceProvider->start(std::move(endpointDetector), ContentType::PROTOBUF);
 
   AWAIT_READY(subscribed);
 
-  const JSON::Object snapshot = Metrics();
+  snapshot = Metrics();
 
+  ASSERT_EQ(1u, snapshot.values.count("resource_provider_manager/subscribed"));
   EXPECT_EQ(1, snapshot.values.at("resource_provider_manager/subscribed"));
+
+  ASSERT_EQ(
+      1u, snapshot.values.count("resource_provider_manager/events/subscribe"));
+  EXPECT_EQ(
+      1, snapshot.values.at("resource_provider_manager/events/subscribe"));
+
+  ASSERT_EQ(
+      1u, snapshot.values.count("resource_provider_manager/events/disconnect"));
+  EXPECT_EQ(
+      0, snapshot.values.at("resource_provider_manager/events/disconnect"));
+
+  updateSlaveMessage = FUTURE_PROTOBUF(UpdateSlaveMessage(), _, _);
+  resourceProvider.reset();
+
+  // Make sure the resource provider manager processes the disconnection.
+  Clock::settle();
+
+  snapshot = Metrics();
+
+  ASSERT_EQ(1u, snapshot.values.count("resource_provider_manager/subscribed"));
+  EXPECT_EQ(0, snapshot.values.at("resource_provider_manager/subscribed"));
+
+  ASSERT_EQ(
+      1u, snapshot.values.count("resource_provider_manager/events/subscribe"));
+  EXPECT_EQ(
+      1, snapshot.values.at("resource_provider_manager/events/subscribe"));
+
+  ASSERT_EQ(
+      1u, snapshot.values.count("resource_provider_manager/events/disconnect"));
+  EXPECT_EQ(
+      1, snapshot.values.at("resource_provider_manager/events/disconnect"));
 }