You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2019/07/03 21:07:40 UTC
[mesos] 01/03: Fixed bugs in updating framework roles in the
allocator.
This is an automated email from the ASF dual-hosted git repository.
bmahler pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git
commit d96669d563e60563198c99604b7c5d62a28506aa
Author: Andrei Sekretenko <as...@mesosphere.io>
AuthorDate: Wed Jul 3 16:56:19 2019 -0400
Fixed bugs in updating framework roles in the allocator.
This patch replaces the largest part of the logic around
(un)suppressing framework roles in the 'updateFramework()'
method with calling 'suppressRoles()'/'unsuppressRoles()'.
This fixes bugs which are triggered by simulatneous updates of
framework's roles and its suppressed roles (see MESOS-9870), namely:
- master crashes due to changing nonexistent role metrics
- master crash due to activating removed frameworkSorter
- spurious activation of a role added in suppressed state
Review: https://reviews.apache.org/r/70995/
---
src/master/allocator/mesos/hierarchical.cpp | 89 ++++++++---------------------
1 file changed, 25 insertions(+), 64 deletions(-)
diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index c2333bc..f1fa838 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -67,6 +67,11 @@ using process::Timeout;
namespace mesos {
+
+// Needed to prevent shadowing of template '::operator-<std::set<T>>'
+// by non-template '::mesos::operator-'
+using ::operator-;
+
namespace internal {
namespace master {
namespace allocator {
@@ -481,39 +486,26 @@ void HierarchicalAllocatorProcess::updateFramework(
Framework& framework = frameworks.at(frameworkId);
- set<string> oldRoles = framework.roles;
- set<string> newRoles = protobuf::framework::getRoles(frameworkInfo);
- set<string> oldSuppressedRoles = framework.suppressedRoles;
+ const set<string> oldRoles = framework.roles;
+ const set<string> newRoles = protobuf::framework::getRoles(frameworkInfo);
+ const set<string> oldSuppressedRoles = framework.suppressedRoles;
- // TODO(xujyan): Add a stout set difference method that wraps around
- // `std::set_difference` for this.
- const set<string> removedRoles = [&]() {
- set<string> result = oldRoles;
- foreach (const string& role, newRoles) {
- result.erase(role);
- }
- return result;
- }();
+ foreach (const string& role, newRoles - oldRoles) {
+ framework.metrics->addSubscribedRole(role);
- const set<string> newSuppressedRoles = [&]() {
- set<string> result = suppressedRoles;
- foreach (const string& role, oldSuppressedRoles) {
- result.erase(role);
+ // NOTE: It's possible that we're already tracking this framework
+ // under the role because a framework can unsubscribe from a role
+ // while it still has resources allocated to the role.
+ if (!isFrameworkTrackedUnderRole(frameworkId, role)) {
+ trackFrameworkUnderRole(frameworkId, role);
}
- return result;
- }();
-
- foreach (const string& role, newSuppressedRoles) {
- framework.metrics->suppressRole(role);
}
- foreach (const string& role, removedRoles | newSuppressedRoles) {
+ foreach (const string& role, oldRoles - newRoles) {
CHECK_CONTAINS(frameworkSorters, role);
frameworkSorters.at(role)->deactivate(frameworkId.value());
- }
- foreach (const string& role, removedRoles) {
// Stop tracking the framework under this role if there are
// no longer any resources allocated to it.
if (frameworkSorters.at(role)->allocation(frameworkId.value()).empty()) {
@@ -525,52 +517,21 @@ void HierarchicalAllocatorProcess::updateFramework(
}
framework.metrics->removeSubscribedRole(role);
- }
-
- const set<string> addedRoles = [&]() {
- set<string> result = newRoles;
- foreach (const string& role, oldRoles) {
- result.erase(role);
- }
- return result;
- }();
-
- const set<string> newRevivedRoles = [&]() {
- set<string> result = oldSuppressedRoles;
- foreach (const string& role, suppressedRoles) {
- result.erase(role);
- }
- return result;
- }();
-
- foreach (const string& role, newRevivedRoles) {
- framework.metrics->reviveRole(role);
- }
-
- foreach (const string& role, addedRoles) {
- framework.metrics->addSubscribedRole(role);
-
- // NOTE: It's possible that we're already tracking this framework
- // under the role because a framework can unsubscribe from a role
- // while it still has resources allocated to the role.
- if (!isFrameworkTrackedUnderRole(frameworkId, role)) {
- trackFrameworkUnderRole(frameworkId, role);
- }
- }
-
- // TODO(anindya_sinha): We should activate the roles only if the
- // framework is active (instead of always).
- foreach (const string& role, addedRoles | newRevivedRoles) {
- CHECK_CONTAINS(frameworkSorters, role);
-
- frameworkSorters.at(role)->activate(frameworkId.value());
+ framework.suppressedRoles.erase(role);
}
framework.roles = newRoles;
- framework.suppressedRoles = suppressedRoles;
framework.capabilities = frameworkInfo.capabilities();
framework.minAllocatableResources =
unpackFrameworkOfferFilters(frameworkInfo.offer_filters());
+
+ suppressRoles(frameworkId, suppressedRoles);
+ unsuppressRoles(frameworkId, newRoles - suppressedRoles);
+
+ CHECK(framework.suppressedRoles == suppressedRoles)
+ << "After updating framework " << frameworkId
+ << " its set of suppressed roles " << stringify(framework.suppressedRoles)
+ << " differs from required " << stringify(suppressedRoles);
}