You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2019/07/03 22:01:27 UTC

[mesos] branch 1.8.x updated (e938d6e -> 07d053f)

This is an automated email from the ASF dual-hosted git repository.

bmahler pushed a change to branch 1.8.x
in repository https://gitbox.apache.org/repos/asf/mesos.git.


    from e938d6e  Added MESOS-9852 to the 1.8.1 CHANGELOG.
     new 0a2d366  Added 'operator-()' for set difference to stout.
     new d39b992  Extracted suppression logic in allocator for use in update framework.
     new 29ed4a3  Fixed bugs in updating framework roles in the allocator.
     new 07d053f  Added MESOS-9870 to the 1.8.1 CHANGELOG.

The 4 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 3rdparty/stout/include/stout/set.hpp        |  16 +++-
 CHANGELOG                                   |   1 +
 src/master/allocator/mesos/hierarchical.cpp | 143 ++++++++++++++--------------
 src/master/allocator/mesos/hierarchical.hpp |  10 ++
 4 files changed, 96 insertions(+), 74 deletions(-)


[mesos] 04/04: Added MESOS-9870 to the 1.8.1 CHANGELOG.

Posted by bm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bmahler pushed a commit to branch 1.8.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 07d053f68b75505a4386913f05d521fa5e36373d
Author: Benjamin Mahler <bm...@apache.org>
AuthorDate: Wed Jul 3 17:42:46 2019 -0400

    Added MESOS-9870 to the 1.8.1 CHANGELOG.
---
 CHANGELOG | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG b/CHANGELOG
index 9f3e27d..87146f9 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -16,6 +16,7 @@ Release Notes - Mesos - Version 1.8.1 (WIP)
   * [MESOS-9831] - Master should not report disconnected resource providers.
   * [MESOS-9852] - Slow memory growth in master due to deferred deletion of offer filters and timers.
   * [MESOS-9856] - REVIVE call with specified role(s) clears filters for all roles of a framework.
+  * [MESOS-9870] - Simultaneous adding/removal of a role from framework's roles and its suppressed roles crashes the master.
 
 ** Improvement
   * [MESOS-9759] - Log required quota headroom and available quota headroom in the allocator.


[mesos] 03/04: Fixed bugs in updating framework roles in the allocator.

Posted by bm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bmahler pushed a commit to branch 1.8.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 29ed4a3166360002a16574c1d226fd9e7d9d5e97
Author: Andrei Sekretenko <as...@mesosphere.io>
AuthorDate: Wed Jul 3 16:56:19 2019 -0400

    Fixed bugs in updating framework roles in the allocator.
    
    This patch replaces the largest part of the logic around
    (un)suppressing framework roles in the 'updateFramework()'
    method with calling 'suppressRoles()'/'unsuppressRoles()'.
    
    This fixes bugs which are triggered by simulatneous updates of
    framework's roles and its suppressed roles (see MESOS-9870), namely:
     - master crashes due to changing nonexistent role metrics
     - master crash due to activating removed frameworkSorter
     - spurious activation of a role added in suppressed state
    
    Review: https://reviews.apache.org/r/70995/
---
 src/master/allocator/mesos/hierarchical.cpp | 89 +++++++++--------------------
 1 file changed, 26 insertions(+), 63 deletions(-)

diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index 993c62c..061b702 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -66,6 +66,11 @@ using process::Timeout;
 
 
 namespace mesos {
+
+// Needed to prevent shadowing of template '::operator-<std::set<T>>'
+// by non-template '::mesos::operator-'
+using ::operator-;
+
 namespace internal {
 namespace master {
 namespace allocator {
@@ -485,38 +490,26 @@ void HierarchicalAllocatorProcess::updateFramework(
 
   Framework& framework = frameworks.at(frameworkId);
 
-  set<string> oldRoles = framework.roles;
-  set<string> newRoles = protobuf::framework::getRoles(frameworkInfo);
-  set<string> oldSuppressedRoles = framework.suppressedRoles;
+  const set<string> oldRoles = framework.roles;
+  const set<string> newRoles = protobuf::framework::getRoles(frameworkInfo);
+  const set<string> oldSuppressedRoles = framework.suppressedRoles;
 
-  // TODO(xujyan): Add a stout set difference method that wraps around
-  // `std::set_difference` for this.
-  const set<string> removedRoles = [&]() {
-    set<string> result = oldRoles;
-    foreach (const string& role, newRoles) {
-      result.erase(role);
-    }
-    return result;
-  }();
+  foreach (const string& role, newRoles - oldRoles) {
+    framework.metrics->addSubscribedRole(role);
 
-  const set<string> newSuppressedRoles = [&]() {
-    set<string> result = suppressedRoles;
-    foreach (const string& role, oldSuppressedRoles) {
-      result.erase(role);
+    // NOTE: It's possible that we're already tracking this framework
+    // under the role because a framework can unsubscribe from a role
+    // while it still has resources allocated to the role.
+    if (!isFrameworkTrackedUnderRole(frameworkId, role)) {
+      trackFrameworkUnderRole(frameworkId, role);
     }
-    return result;
-  }();
-
-  foreach (const string& role, newSuppressedRoles) {
-    framework.metrics->suppressRole(role);
   }
 
-  foreach (const string& role, removedRoles | newSuppressedRoles) {
+  foreach (const string& role, oldRoles - newRoles) {
     CHECK(frameworkSorters.contains(role));
+
     frameworkSorters.at(role)->deactivate(frameworkId.value());
-  }
 
-  foreach (const string& role, removedRoles) {
     // Stop tracking the framework under this role if there are
     // no longer any resources allocated to it.
     if (frameworkSorters.at(role)->allocation(frameworkId.value()).empty()) {
@@ -528,51 +521,21 @@ void HierarchicalAllocatorProcess::updateFramework(
     }
 
     framework.metrics->removeSubscribedRole(role);
-  }
-
-  const set<string> addedRoles = [&]() {
-    set<string> result = newRoles;
-    foreach (const string& role, oldRoles) {
-      result.erase(role);
-    }
-    return result;
-  }();
-
-  const set<string> newRevivedRoles = [&]() {
-    set<string> result = oldSuppressedRoles;
-    foreach (const string& role, suppressedRoles) {
-      result.erase(role);
-    }
-    return result;
-  }();
-
-  foreach (const string& role, newRevivedRoles) {
-    framework.metrics->reviveRole(role);
-  }
-
-  foreach (const string& role, addedRoles) {
-    framework.metrics->addSubscribedRole(role);
-
-    // NOTE: It's possible that we're already tracking this framework
-    // under the role because a framework can unsubscribe from a role
-    // while it still has resources allocated to the role.
-    if (!isFrameworkTrackedUnderRole(frameworkId, role)) {
-      trackFrameworkUnderRole(frameworkId, role);
-    }
-  }
-
-  // TODO(anindya_sinha): We should activate the roles only if the
-  // framework is active (instead of always).
-  foreach (const string& role, addedRoles | newRevivedRoles) {
-    CHECK(frameworkSorters.contains(role));
-    frameworkSorters.at(role)->activate(frameworkId.value());
+    framework.suppressedRoles.erase(role);
   }
 
   framework.roles = newRoles;
-  framework.suppressedRoles = suppressedRoles;
   framework.capabilities = frameworkInfo.capabilities();
   framework.minAllocatableResources =
     unpackFrameworkOfferFilters(frameworkInfo.offer_filters());
+
+  suppressRoles(frameworkId, suppressedRoles);
+  unsuppressRoles(frameworkId, newRoles - suppressedRoles);
+
+  CHECK(framework.suppressedRoles == suppressedRoles)
+    << "After updating framework " << frameworkId
+    << " its set of suppressed roles " << stringify(framework.suppressedRoles)
+    << " differs from required " << stringify(suppressedRoles);
 }
 
 


[mesos] 01/04: Added 'operator-()' for set difference to stout.

Posted by bm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bmahler pushed a commit to branch 1.8.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 0a2d366547a9ee4211fdfd372f9162a63948b27a
Author: Andrei Sekretenko <as...@mesosphere.io>
AuthorDate: Mon Jul 1 15:18:50 2019 -0400

    Added 'operator-()' for set difference to stout.
    
    Review: https://reviews.apache.org/r/70974/
---
 3rdparty/stout/include/stout/set.hpp | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/3rdparty/stout/include/stout/set.hpp b/3rdparty/stout/include/stout/set.hpp
index 96238a7..764fc3c 100644
--- a/3rdparty/stout/include/stout/set.hpp
+++ b/3rdparty/stout/include/stout/set.hpp
@@ -13,7 +13,7 @@
 #ifndef __STOUT_SET_HPP__
 #define __STOUT_SET_HPP__
 
-#include <algorithm> // For std::set_intersection.
+#include <algorithm> // For std::set_intersection and std::set_difference.
 #include <set>
 
 template <typename T>
@@ -49,4 +49,18 @@ std::set<T> operator&(const std::set<T>& left, const std::set<T>& right)
   return result;
 }
 
+
+template <typename T>
+std::set<T> operator-(const std::set<T>& left, const std::set<T>& right)
+{
+  std::set<T> result;
+  std::set_difference(
+      left.begin(),
+      left.end(),
+      right.begin(),
+      right.end(),
+      std::inserter(result, result.begin()));
+  return result;
+}
+
 #endif // __STOUT_SET_HPP__


[mesos] 02/04: Extracted suppression logic in allocator for use in update framework.

Posted by bm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bmahler pushed a commit to branch 1.8.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit d39b99256aad43cdc7bc415834a29f280562a97a
Author: Andrei Sekretenko <as...@mesosphere.io>
AuthorDate: Wed Jul 3 16:08:33 2019 -0400

    Extracted suppression logic in allocator for use in update framework.
    
    This patch moves the logic of suppressing/unsuppressing a role set from
    the inside of 'suppressOffers()'/'reviveOffers()' into separate methods.
    Specifically, 'reviveOffers()' includes filter clearing logic that we
    don't want when unsuppressing roles during framework update. For
    'supppressOffers()', we need the empty set == all roles semantics, but
    we don't want that in the suppression logic during framework update.
    
    Longer term, the empty set == all roles semantics could be done in
    the master and we won't need the extra function to provide empty set
    == all roles logic in the allocator.
    
    This is a prerequisite for using thes methods to fix
    'updateFramework()' in a subsequent patch.
    
    Review: https://reviews.apache.org/r/70994/
---
 src/master/allocator/mesos/hierarchical.cpp | 54 +++++++++++++++++++++++------
 src/master/allocator/mesos/hierarchical.hpp | 10 ++++++
 2 files changed, 54 insertions(+), 10 deletions(-)

diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index d75ab02..993c62c 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -1357,9 +1357,9 @@ void HierarchicalAllocatorProcess::recoverResources(
 }
 
 
-void HierarchicalAllocatorProcess::suppressOffers(
+void HierarchicalAllocatorProcess::suppressRoles(
     const FrameworkID& frameworkId,
-    const set<string>& roles_)
+    const set<string>& roles)
 {
   CHECK(initialized);
   CHECK(frameworks.contains(frameworkId));
@@ -1369,7 +1369,6 @@ void HierarchicalAllocatorProcess::suppressOffers(
   // Deactivating the framework in the sorter is fine as long as
   // SUPPRESS is not parameterized. When parameterization is added,
   // we have to differentiate between the cases here.
-  const set<string>& roles = roles_.empty() ? framework.roles : roles_;
 
   foreach (const string& role, roles) {
     CHECK(frameworkSorters.contains(role));
@@ -1379,12 +1378,14 @@ void HierarchicalAllocatorProcess::suppressOffers(
     framework.metrics->suppressRole(role);
   }
 
+  // TODO(bmahler): This logs roles that were already suppressed,
+  // only log roles that transitioned from unsuppressed -> suppressed.
   LOG(INFO) << "Suppressed offers for roles " << stringify(roles)
             << " of framework " << frameworkId;
 }
 
 
-void HierarchicalAllocatorProcess::reviveOffers(
+void HierarchicalAllocatorProcess::suppressOffers(
     const FrameworkID& frameworkId,
     const set<string>& roles_)
 {
@@ -1392,24 +1393,57 @@ void HierarchicalAllocatorProcess::reviveOffers(
   CHECK(frameworks.contains(frameworkId));
 
   Framework& framework = frameworks.at(frameworkId);
-  framework.inverseOfferFilters.clear();
 
   const set<string>& roles = roles_.empty() ? framework.roles : roles_;
+  suppressRoles(frameworkId, roles);
+}
+
+
+void HierarchicalAllocatorProcess::unsuppressRoles(
+    const FrameworkID& frameworkId,
+    const set<string>& roles)
+{
+  CHECK(initialized);
+  CHECK(frameworks.contains(frameworkId));
+
+  Framework& framework = frameworks.at(frameworkId);
 
-  // Activating the framework in the sorter on REVIVE is fine as long as
+  // Activating the framework in the sorter is fine as long as
   // SUPPRESS is not parameterized. When parameterization is added,
   // we may need to differentiate between the cases here.
   foreach (const string& role, roles) {
-    framework.offerFilters.erase(role);
-
     CHECK(frameworkSorters.contains(role));
-    frameworkSorters.at(role)->activate(frameworkId.value());
 
+    frameworkSorters.at(role)->activate(frameworkId.value());
     framework.suppressedRoles.erase(role);
     framework.metrics->reviveRole(role);
   }
 
-  LOG(INFO) << "Revived offers for roles " << stringify(roles)
+  // TODO(bmahler): This logs roles that were already unsuppressed,
+  // only log roles that transitioned from suppressed -> unsuppressed.
+  LOG(INFO) << "Unsuppressed offers for roles " << stringify(roles)
+            << " of framework " << frameworkId;
+}
+
+
+void HierarchicalAllocatorProcess::reviveOffers(
+    const FrameworkID& frameworkId,
+    const set<string>& roles_)
+{
+  CHECK(initialized);
+  CHECK(frameworks.contains(frameworkId));
+
+  Framework& framework = frameworks.at(frameworkId);
+  framework.inverseOfferFilters.clear();
+
+  const set<string>& roles = roles_.empty() ? framework.roles : roles_;
+  foreach (const string& role, roles) {
+    framework.offerFilters.erase(role);
+  }
+
+  unsuppressRoles(frameworkId, roles);
+
+  LOG(INFO) << "Revived roles " << stringify(roles)
             << " of framework " << frameworkId;
 
   allocate();
diff --git a/src/master/allocator/mesos/hierarchical.hpp b/src/master/allocator/mesos/hierarchical.hpp
index 2ba44d7..4f71682 100644
--- a/src/master/allocator/mesos/hierarchical.hpp
+++ b/src/master/allocator/mesos/hierarchical.hpp
@@ -648,6 +648,16 @@ private:
       const FrameworkID& frameworkId,
       const std::string& role);
 
+  // TODO(bmahler): Take a `Framework*` here (and in other functions!),
+  // it's currently not possible because `Framework` doesn't store the
+  // framework ID.
+  void suppressRoles(
+      const FrameworkID& frameworkId,
+      const std::set<std::string>& roles);
+  void unsuppressRoles(
+      const FrameworkID& frameworkId,
+      const std::set<std::string>& roles);
+
   // `trackReservations` and `untrackReservations` are helpers
   // to track role resource reservations. We need to keep
   // track of reservations to enforce role quota limit