You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by mz...@apache.org on 2019/10/07 15:46:30 UTC
[mesos] 03/07: Track per-role and per-agent allocated resources in
the allocator.
This is an automated email from the ASF dual-hosted git repository.
mzhu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git
commit 83e262132c680a59aaf4a616db25f0bb2182f956
Author: Andrei Sekretenko <as...@mesosphere.io>
AuthorDate: Fri Oct 4 18:14:49 2019 -0400
Track per-role and per-agent allocated resources in the allocator.
This patch adds tracking of per-role allocated resources in the
allocator's roles tree. Also, tracking of per-agent allocated resources
is added to facilitate correct untracking of the role's allocated
resources on agent removal.
Review: https://reviews.apache.org/r/71488/
---
src/master/allocator/mesos/hierarchical.cpp | 65 ++++++++++++++++++++++++++---
src/master/allocator/mesos/hierarchical.hpp | 22 ++++++++--
2 files changed, 79 insertions(+), 8 deletions(-)
diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index b36f893..29f650a 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -315,6 +315,11 @@ bool RoleTree::tryRemove(const std::string& role)
break;
}
+ CHECK(current->allocatedScalars_.empty())
+ << "An empty role " << current->role
+ << " has non-empty allocated scalar resources: "
+ << current->allocatedScalars_;
+
Role* parent = CHECK_NOTNULL(current->parent);
parent->removeChild(current);
@@ -376,6 +381,41 @@ void RoleTree::untrackReservations(const Resources& resources)
}
+void RoleTree::trackAllocated(const Resources& resources_)
+{
+ foreachpair (
+ const string& role,
+ const Resources& resources,
+ resources_.scalars().allocations()) {
+ CHECK_CONTAINS(roles_, role);
+
+ // Track it hierarchically up to the root.
+ for (Role* current = &(roles_.at(role)); current != nullptr;
+ current = current->parent) {
+ current->allocatedScalars_ += resources;
+ }
+ }
+}
+
+
+void RoleTree::untrackAllocated(const Resources& resources_)
+{
+ foreachpair (
+ const string& role,
+ const Resources& resources,
+ resources_.scalars().allocations()) {
+ CHECK_CONTAINS(roles_, role);
+
+ // Track it hierarchically up to the root.
+ for (Role* current = &(roles_.at(role)); current != nullptr;
+ current = current->parent) {
+ CHECK_CONTAINS(current->allocatedScalars_, resources);
+ current->allocatedScalars_ -= resources;
+ }
+ }
+}
+
+
void RoleTree::trackFramework(
const FrameworkID& frameworkId, const string& rolePath)
{
@@ -658,6 +698,8 @@ void HierarchicalAllocatorProcess::addFramework(
// The slave struct will already be aware of the allocated
// resources, so we only need to track them in the sorters.
trackAllocatedResources(slaveId, frameworkId, resources);
+
+ roleTree.trackAllocated(resources);
}
LOG(INFO) << "Added framework " << frameworkId;
@@ -898,6 +940,8 @@ void HierarchicalAllocatorProcess::addSlave(
}
trackAllocatedResources(slaveId, frameworkId, allocation);
+
+ roleTree.trackAllocated(allocation);
}
// If we have just a number of recovered agents, we cannot distinguish
@@ -935,6 +979,10 @@ void HierarchicalAllocatorProcess::removeSlave(
{
const Slave& slave = *CHECK_NOTNONE(getSlave(slaveId));
+ // untrackAllocatedResources() potentially removes allocation roles, thus
+ // we need to untrack actually allocated resources in the roles tree first.
+ roleTree.untrackAllocated(slave.totalAllocated);
+
// Untrack resources in roleTree and sorter.
foreachpair (
const FrameworkID& frameworkId,
@@ -1042,7 +1090,8 @@ void HierarchicalAllocatorProcess::addResourceProvider(
// There are two cases here:
//
// (1) The framework has already been added to the allocator.
- // In this case, we track the allocation in the sorters.
+ // In this case, we track the allocation in the sorters
+ // and the role tree.
//
// (2) The framework has not yet been added to the allocator.
// We do not track the resources allocated to this
@@ -1457,10 +1506,13 @@ HierarchicalAllocatorProcess::getInverseOfferStatuses()
return result;
}
+
void HierarchicalAllocatorProcess::transitionOfferedToAllocated(
const SlaveID& slaveId,
const Resources& resources)
{
+ CHECK_NOTNONE(getSlave(slaveId))->totalAllocated += resources;
+ roleTree.trackAllocated(resources);
}
@@ -1477,12 +1529,15 @@ void HierarchicalAllocatorProcess::recoverResources(
return;
}
- // TODO(asekretenko): untrack allocated resources in the roles tree
- // if recovering actually used resources (isAllocated==true)
-
- Option<Framework*> framework = getFramework(frameworkId);
Option<Slave*> slave = getSlave(slaveId);
+ if (isAllocated && slave.isSome()) {
+ CHECK_CONTAINS((*slave)->totalAllocated, resources);
+ (*slave)->totalAllocated -= resources;
+ roleTree.untrackAllocated(resources);
+ }
+
+ Option<Framework*> framework = getFramework(frameworkId);
// No work to do if either the framework or the agent no longer exists.
//
// The framework may not exist if we dispatched Master::offer before we
diff --git a/src/master/allocator/mesos/hierarchical.hpp b/src/master/allocator/mesos/hierarchical.hpp
index 7d48bdc..da22603 100644
--- a/src/master/allocator/mesos/hierarchical.hpp
+++ b/src/master/allocator/mesos/hierarchical.hpp
@@ -193,6 +193,10 @@ private:
// Note that non-scalar resources, such as ports, are excluded.
ResourceQuantities reservationScalarQuantities_;
+ // Scalar resources actually allocated (i.e. used for launching tasks) to this
+ // role and any of its subroles, both reserved and unreserved, on all agents.
+ Resources allocatedScalars_;
+
hashmap<std::string, Role*> children_;
};
@@ -229,6 +233,10 @@ public:
void trackReservations(const Resources& resources);
void untrackReservations(const Resources& resources);
+ // We keep track of allocated resources which are actially used by frameworks.
+ void trackAllocated(const Resources& resources);
+ void untrackAllocated(const Resources& resources);
+
void trackFramework(
const FrameworkID& frameworkId, const std::string& role);
void untrackFramework(
@@ -280,13 +288,14 @@ public:
const protobuf::slave::Capabilities& _capabilities,
bool _activated,
const Resources& _total,
- const hashmap<FrameworkID, Resources>& _offeredOrAllocated)
+ const hashmap<FrameworkID, Resources>& _allocated)
: info(_info),
capabilities(_capabilities),
activated(_activated),
+ totalAllocated(Resources::sum(_allocated)),
total(_total),
- offeredOrAllocated(_offeredOrAllocated),
- totalOfferedOrAllocated(Resources::sum(_offeredOrAllocated)),
+ offeredOrAllocated(_allocated),
+ totalOfferedOrAllocated(Resources::sum(_allocated)),
shared(_total.shared()),
hasGpu_(_total.gpus().getOrElse(0) > 0)
{
@@ -399,6 +408,13 @@ public:
// to send out `InverseOffers`.
Option<Maintenance> maintenance;
+ // Sum of all allocated (i.e. occupied by running tasks) resources on the
+ // agent. This information is needed to untrack allocated resources when the
+ // agent is removed, because the master is not obligated to separately inform
+ // allocator that resources of the removed agent are not offered/allocated
+ // anymore.
+ Resources totalAllocated;
+
private:
void updateAvailable()
{