You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2017/12/16 02:24:47 UTC
[3/3] mesos git commit: Fixed a bug with quota headroom that can
leave reservations unallocated.
Fixed a bug with quota headroom that can leave reservations unallocated.
Also fixed a bug where quota headroom may include revocable resources.
Review: https://reviews.apache.org/r/64636/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/4fa90924
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/4fa90924
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/4fa90924
Branch: refs/heads/master
Commit: 4fa909245c1ec8a4bc50f9867164be1f1e3653ef
Parents: 91ea75e
Author: Meng Zhu <mz...@mesosphere.io>
Authored: Fri Dec 15 14:52:22 2017 -0800
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Fri Dec 15 18:22:06 2017 -0800
----------------------------------------------------------------------
src/master/allocator/mesos/hierarchical.cpp | 168 +++++++++++++----------
1 file changed, 97 insertions(+), 71 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/4fa90924/src/master/allocator/mesos/hierarchical.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index fccd71c..2cabafe 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -1819,73 +1819,96 @@ void HierarchicalAllocatorProcess::__allocate()
}
}
- // Calculate the total quantity of scalar resources (including revocable
- // and reserved) that are available for allocation in the next round. We
- // need this in order to ensure we do not over-allocate resources during
- // the second stage.
- //
- // For performance reasons (MESOS-4833), this omits information about
- // dynamic reservations or persistent volumes in the resources.
- //
- // NOTE: We use total cluster resources, and not just those based on the
- // agents participating in the current allocation (i.e. provided as an
- // argument to the `allocate()` call) so that frameworks in roles without
- // quota are not unnecessarily deprived of resources.
- Resources remainingClusterResources = roleSorter->totalScalarQuantities();
- foreachkey (const string& role, roles) {
- remainingClusterResources -= roleSorter->allocationScalarQuantities(role);
- }
-
// Frameworks in a quota'ed role may temporarily reject resources by
- // filtering or suppressing offers. Hence quotas may not be fully allocated.
- Resources unallocatedQuotaResources;
- foreachpair (const string& name, const Quota& quota, quotas) {
+ // filtering or suppressing offers. Hence, quotas may not be fully
+ // allocated and if so we need to hold back some headroom to ensure
+ // the quota can later be satisfied when needed.
+ Resources requiredHeadroom;
+ foreachpair (const string& role, const Quota& quota, quotas) {
// Compute the amount of quota that the role does not have allocated.
//
// NOTE: Revocable resources are excluded in `quotaRoleSorter`.
// NOTE: Only scalars are considered for quota.
- Resources allocated = getQuotaRoleAllocatedResources(name);
+ Resources allocated = getQuotaRoleAllocatedResources(role);
const Resources required = quota.info.guarantee();
- unallocatedQuotaResources += (required - allocated);
+ requiredHeadroom += (required - allocated);
}
- // Determine how many resources we may allocate during the next stage.
+ // The amount of headroom currently available consists of
+ // unallocated resources that can be allocated to satisfy quota.
+ // What that means is that the resources need to be:
//
- // NOTE: Resources for quota allocations are already accounted in
- // `remainingClusterResources`.
- remainingClusterResources -= unallocatedQuotaResources;
-
- // Shared resources are excluded in determination of over-allocation of
- // available resources since shared resources are always allocatable.
- remainingClusterResources = remainingClusterResources.nonShared();
-
- // To ensure we do not over-allocate resources during the second stage
- // with all frameworks, we use 2 stopping criteria:
- // * No available resources for the second stage left, i.e.
- // `remainingClusterResources` - `allocatedStage2` is empty.
- // * A potential offer will force the second stage to use more resources
- // than available, i.e. `remainingClusterResources` does not contain
- // (`allocatedStage2` + potential offer). In this case we skip this
- // agent and continue to the next one.
+ // (1) Non-revocable
+ // (2) Unreserved, or reserved to a quota role.
//
- // NOTE: Like `remainingClusterResources`, `allocatedStage2` omits
- // information about dynamic reservations and persistent volumes for
- // performance reasons. This invariant is preserved because we only add
- // resources to it that have also had this metadata stripped from them
- // (typically by using `Resources::createStrippedScalarQuantity`).
- Resources allocatedStage2;
-
- // At this point resources for quotas are allocated or accounted for.
- // Proceed with allocating the remaining free pool.
- foreach (const SlaveID& slaveId, slaveIds) {
- // If there are no resources available for the second stage, stop.
- //
- // TODO(mzhu): Breaking early here means that we may leave reservations
- // unallocated. See MESOS-8293.
- if (!allocatable(remainingClusterResources - allocatedStage2)) {
- break;
+ // TODO(bmahler): Note that we currently do not consider quota
+ // headroom on an per-role basis, which means we may not hold
+ // back sufficient headroom for a particular quota role if some
+ // other quota roles have more reservations than quota.
+ // See MESOS-8339.
+ //
+ // We will allocate resources while ensuring that the required
+ // headroom is still available. Otherwise we will not be able
+ // to satisfy quota later. Reservations to non-quota roles and
+ // revocable resources will always be included in the offers
+ // since these are not part of the available headroom for quota.
+ //
+ // available headroom = unallocated resources -
+ // unallocated non-quota role reservations -
+ // unallocated revocable resources
+
+ // NOTE: `totalScalarQuantities` omits dynamic reservation,
+ // persistent volume info, and allocation info. We additionally
+ // remove the static reservations here via `toUnreserved()`.
+ Resources availableHeadroom =
+ roleSorter->totalScalarQuantities().toUnreserved();
+
+ // Subtract allocated resources from the total.
+ foreachkey (const string& role, roles) {
+ // NOTE: `totalScalarQuantities` omits dynamic reservation,
+ // persistent volume info, and allocation info. We additionally
+ // remove the static reservations here via `toUnreserved()`.
+ availableHeadroom -=
+ roleSorter->allocationScalarQuantities(role).toUnreserved();
+ }
+
+ // Subtract non quota role reservations.
+ foreachkey (const string& role, reservationScalarQuantities) {
+ if (quotas.contains(role)) {
+ continue; // Skip quota roles.
+ }
+
+ hashmap<SlaveID, Resources> allocations;
+ if (roleSorter->contains(role)) {
+ allocations = roleSorter->allocation(role);
+ }
+
+ Resources unallocatedNonQuotaRoleReservations =
+ reservationScalarQuantities.get(role).getOrElse(Resources());
+
+ foreachvalue (const Resources& resources, allocations) {
+ // NOTE: `totalScalarQuantities` omits dynamic reservation,
+ // persistent volume info, and allocation info. We additionally
+ // remove the static reservations here via `toUnreserved()`.
+ unallocatedNonQuotaRoleReservations -=
+ resources.reserved().createStrippedScalarQuantity().toUnreserved();
}
+ // Subtract the unallocated reservations for this non quota
+ // role from the headroom.
+ availableHeadroom -= unallocatedNonQuotaRoleReservations;
+ }
+
+ // Subtract revocable resources.
+ foreachvalue (const Slave& slave, slaves) {
+ // NOTE: `totalScalarQuantities` omits dynamic reservation,
+ // persistent volume info, and allocation info. We additionally
+ // remove the static reservations here via `toUnreserved()`.
+ availableHeadroom -= slave.available().revocable()
+ .createStrippedScalarQuantity().toUnreserved();
+ }
+
+ foreach (const SlaveID& slaveId, slaveIds) {
foreach (const string& role, roleSorter->sort()) {
// In the second allocation stage, we only allocate
// for non-quota roles.
@@ -1982,6 +2005,20 @@ void HierarchicalAllocatorProcess::__allocate()
});
}
+ // If allocating these resources would reduce the headroom
+ // below what is required, we will hold them back.
+ const Resources headroomToAllocate = resources
+ .scalars().unreserved().nonRevocable();
+
+ bool sufficientHeadroom =
+ (availableHeadroom -
+ headroomToAllocate.createStrippedScalarQuantity())
+ .contains(requiredHeadroom);
+
+ if (!sufficientHeadroom) {
+ resources -= headroomToAllocate;
+ }
+
// If the resources are not allocatable, ignore. We cannot break
// here, because another framework under the same role could accept
// revocable resources and breaking would skip all other frameworks.
@@ -1994,21 +2031,6 @@ void HierarchicalAllocatorProcess::__allocate()
continue;
}
- // If the offer generated by `resources` would force the second
- // stage to use more than `remainingClusterResources`, move along.
- // We do not terminate early, as offers generated further in the
- // loop may be small enough to fit within `remainingClusterResources`.
- //
- // We exclude shared resources from over-allocation check because
- // shared resources are always allocatable.
- const Resources scalarQuantity =
- resources.nonShared().createStrippedScalarQuantity();
-
- if (!remainingClusterResources.contains(
- allocatedStage2 + scalarQuantity)) {
- continue;
- }
-
VLOG(2) << "Allocating " << resources << " on agent " << slaveId
<< " to role " << role << " of framework " << frameworkId;
@@ -2021,7 +2043,11 @@ void HierarchicalAllocatorProcess::__allocate()
// agent as part of quota.
offerable[frameworkId][role][slaveId] += resources;
offeredSharedResources[slaveId] += resources.shared();
- allocatedStage2 += scalarQuantity;
+
+ if (sufficientHeadroom) {
+ availableHeadroom -=
+ headroomToAllocate.createStrippedScalarQuantity();
+ }
slave.allocated += resources;