You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2019/07/03 21:07:40 UTC

[mesos] 01/03: Fixed bugs in updating framework roles in the allocator.

This is an automated email from the ASF dual-hosted git repository.

bmahler pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit d96669d563e60563198c99604b7c5d62a28506aa
Author: Andrei Sekretenko <as...@mesosphere.io>
AuthorDate: Wed Jul 3 16:56:19 2019 -0400

    Fixed bugs in updating framework roles in the allocator.
    
    This patch replaces the largest part of the logic around
    (un)suppressing framework roles in the 'updateFramework()'
    method with calling 'suppressRoles()'/'unsuppressRoles()'.
    
    This fixes bugs which are triggered by simulatneous updates of
    framework's roles and its suppressed roles (see MESOS-9870), namely:
     - master crashes due to changing nonexistent role metrics
     - master crash due to activating removed frameworkSorter
     - spurious activation of a role added in suppressed state
    
    Review: https://reviews.apache.org/r/70995/
---
 src/master/allocator/mesos/hierarchical.cpp | 89 ++++++++---------------------
 1 file changed, 25 insertions(+), 64 deletions(-)

diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index c2333bc..f1fa838 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -67,6 +67,11 @@ using process::Timeout;
 
 
 namespace mesos {
+
+// Needed to prevent shadowing of template '::operator-<std::set<T>>'
+// by non-template '::mesos::operator-'
+using ::operator-;
+
 namespace internal {
 namespace master {
 namespace allocator {
@@ -481,39 +486,26 @@ void HierarchicalAllocatorProcess::updateFramework(
 
   Framework& framework = frameworks.at(frameworkId);
 
-  set<string> oldRoles = framework.roles;
-  set<string> newRoles = protobuf::framework::getRoles(frameworkInfo);
-  set<string> oldSuppressedRoles = framework.suppressedRoles;
+  const set<string> oldRoles = framework.roles;
+  const set<string> newRoles = protobuf::framework::getRoles(frameworkInfo);
+  const set<string> oldSuppressedRoles = framework.suppressedRoles;
 
-  // TODO(xujyan): Add a stout set difference method that wraps around
-  // `std::set_difference` for this.
-  const set<string> removedRoles = [&]() {
-    set<string> result = oldRoles;
-    foreach (const string& role, newRoles) {
-      result.erase(role);
-    }
-    return result;
-  }();
+  foreach (const string& role, newRoles - oldRoles) {
+    framework.metrics->addSubscribedRole(role);
 
-  const set<string> newSuppressedRoles = [&]() {
-    set<string> result = suppressedRoles;
-    foreach (const string& role, oldSuppressedRoles) {
-      result.erase(role);
+    // NOTE: It's possible that we're already tracking this framework
+    // under the role because a framework can unsubscribe from a role
+    // while it still has resources allocated to the role.
+    if (!isFrameworkTrackedUnderRole(frameworkId, role)) {
+      trackFrameworkUnderRole(frameworkId, role);
     }
-    return result;
-  }();
-
-  foreach (const string& role, newSuppressedRoles) {
-    framework.metrics->suppressRole(role);
   }
 
-  foreach (const string& role, removedRoles | newSuppressedRoles) {
+  foreach (const string& role, oldRoles - newRoles) {
     CHECK_CONTAINS(frameworkSorters, role);
 
     frameworkSorters.at(role)->deactivate(frameworkId.value());
-  }
 
-  foreach (const string& role, removedRoles) {
     // Stop tracking the framework under this role if there are
     // no longer any resources allocated to it.
     if (frameworkSorters.at(role)->allocation(frameworkId.value()).empty()) {
@@ -525,52 +517,21 @@ void HierarchicalAllocatorProcess::updateFramework(
     }
 
     framework.metrics->removeSubscribedRole(role);
-  }
-
-  const set<string> addedRoles = [&]() {
-    set<string> result = newRoles;
-    foreach (const string& role, oldRoles) {
-      result.erase(role);
-    }
-    return result;
-  }();
-
-  const set<string> newRevivedRoles = [&]() {
-    set<string> result = oldSuppressedRoles;
-    foreach (const string& role, suppressedRoles) {
-      result.erase(role);
-    }
-    return result;
-  }();
-
-  foreach (const string& role, newRevivedRoles) {
-    framework.metrics->reviveRole(role);
-  }
-
-  foreach (const string& role, addedRoles) {
-    framework.metrics->addSubscribedRole(role);
-
-    // NOTE: It's possible that we're already tracking this framework
-    // under the role because a framework can unsubscribe from a role
-    // while it still has resources allocated to the role.
-    if (!isFrameworkTrackedUnderRole(frameworkId, role)) {
-      trackFrameworkUnderRole(frameworkId, role);
-    }
-  }
-
-  // TODO(anindya_sinha): We should activate the roles only if the
-  // framework is active (instead of always).
-  foreach (const string& role, addedRoles | newRevivedRoles) {
-    CHECK_CONTAINS(frameworkSorters, role);
-
-    frameworkSorters.at(role)->activate(frameworkId.value());
+    framework.suppressedRoles.erase(role);
   }
 
   framework.roles = newRoles;
-  framework.suppressedRoles = suppressedRoles;
   framework.capabilities = frameworkInfo.capabilities();
   framework.minAllocatableResources =
     unpackFrameworkOfferFilters(frameworkInfo.offer_filters());
+
+  suppressRoles(frameworkId, suppressedRoles);
+  unsuppressRoles(frameworkId, newRoles - suppressedRoles);
+
+  CHECK(framework.suppressedRoles == suppressedRoles)
+    << "After updating framework " << frameworkId
+    << " its set of suppressed roles " << stringify(framework.suppressedRoles)
+    << " differs from required " << stringify(suppressedRoles);
 }