You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2018/05/03 00:00:57 UTC

[01/20] mesos git commit: Fixed an allocator test to avoid using quota on a hierarchical role.

Repository: mesos
Updated Branches:
  refs/heads/1.4.x 315d04743 -> 35fcdd778


Fixed an allocator test to avoid using quota on a hierarchical role.

Currently, hierarchical quota only supports setting quota for
the top-level roles. Setting quota for non-top-level role is
not supported.

Review: https://reviews.apache.org/r/64321/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/5adc12ec
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/5adc12ec
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/5adc12ec

Branch: refs/heads/1.4.x
Commit: 5adc12ec36308108f5a0129353093e0a8f6fb160
Parents: 315d047
Author: Meng Zhu <mz...@mesosphere.io>
Authored: Mon Dec 4 19:24:33 2017 -0800
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed May 2 16:43:12 2018 -0700

----------------------------------------------------------------------
 src/tests/hierarchical_allocator_tests.cpp | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/5adc12ec/src/tests/hierarchical_allocator_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/hierarchical_allocator_tests.cpp b/src/tests/hierarchical_allocator_tests.cpp
index e68e39a..1f27af0 100644
--- a/src/tests/hierarchical_allocator_tests.cpp
+++ b/src/tests/hierarchical_allocator_tests.cpp
@@ -4588,8 +4588,7 @@ TEST_F(HierarchicalAllocatorTest, DisproportionateQuotaVsAllocation)
 
 
 // This test ensures that resources reserved to ancestor roles can be offered
-// to their descendants. Since both quota and fair-share stages perform
-// reservation allocations, we use a quota'd role to test both cases.
+// to their descendants.
 TEST_F(HierarchicalAllocatorTest, OfferAncestorReservationsToDescendantChild)
 {
   // Pausing the clock is not necessary, but ensures that the test
@@ -4601,9 +4600,6 @@ TEST_F(HierarchicalAllocatorTest, OfferAncestorReservationsToDescendantChild)
 
   initialize();
 
-  const Quota quota = createQuota(ROLE, "cpus:1;mem:512");
-  allocator->setQuota(ROLE, quota);
-
   FrameworkInfo framework = createFrameworkInfo({ROLE});
   allocator->addFramework(framework.id(), framework, {}, true, {});
 
@@ -4617,8 +4613,7 @@ TEST_F(HierarchicalAllocatorTest, OfferAncestorReservationsToDescendantChild)
       {});
 
   {
-    // All the resources of agent1 are offered, and this
-    // should satisfy the quota of ROLE.
+    // All the resources of agent1 are offered.
     Allocation expected = Allocation(
         framework.id(),
         {{ROLE, {{agent1.id(), agent1.resources()}}}});
@@ -4639,11 +4634,11 @@ TEST_F(HierarchicalAllocatorTest, OfferAncestorReservationsToDescendantChild)
       {});
 
   {
-    // Since quota of ROLE is already satisfied by agent1,
-    // we expect only reserved resource of agent2 is offered.
+    // We expect all resources of agent2 including the reserved resources
+    // for the parent of ROLE will be offered.
     Allocation expected = Allocation(
         framework.id(),
-        {{ROLE, {{agent2.id(), Resources(agent2.resources()).reserved()}}}});
+        {{ROLE, {{agent2.id(), agent2.resources()}}}});
 
     AWAIT_EXPECT_EQ(expected, allocations.get());
   }


[14/20] mesos git commit: Fixed a bug where quota headroom is under-calculated.

Posted by bm...@apache.org.
Fixed a bug where quota headroom is under-calculated.

When calculating the quota headroom, we failed to consider
ancestor's reservation allocated to the child. This leads
to under-calculation of available headroom and excessive
resources being set aside for headroom. See MESOS-8604.

This patches fixes this issue by counting ancestor's reservation
allocated to the child as allocated-reservation even though
the child itself has no reservation.

Review: https://reviews.apache.org/r/65806/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/740be44e
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/740be44e
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/740be44e

Branch: refs/heads/1.4.x
Commit: 740be44e7960711acbfc1933035b2f89eabef01a
Parents: d592914
Author: Meng Zhu <mz...@mesosphere.io>
Authored: Mon Feb 26 19:25:09 2018 -0800
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed May 2 16:44:26 2018 -0700

----------------------------------------------------------------------
 src/master/allocator/mesos/hierarchical.cpp | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/740be44e/src/master/allocator/mesos/hierarchical.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index afbc94b..e871999 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -1594,30 +1594,34 @@ void HierarchicalAllocatorProcess::__allocate()
       roleSorter->allocationScalarQuantities(role).toUnreserved();
   }
 
-  // Subtract all unallocated reservations.
-  foreachkey (const string& role, reservationScalarQuantities) {
+  // Calculate total allocated reservations. Note that we need to ensure
+  // we count a reservation for "a" being allocated to "a/b", therefore
+  // we cannot simply loop over the reservations' roles.
+  Resources totalAllocatedReservationScalarQuantities;
+  foreachkey (const string& role, roles) {
     hashmap<SlaveID, Resources> allocations;
     if (quotaRoleSorter->contains(role)) {
       allocations = quotaRoleSorter->allocation(role);
     } else if (roleSorter->contains(role)) {
       allocations = roleSorter->allocation(role);
+    } else {
+      continue; // This role has no allocation.
     }
 
-    Resources unallocatedReservations =
-      reservationScalarQuantities.get(role).getOrElse(Resources());
-
     foreachvalue (const Resources& resources, allocations) {
       // NOTE: `totalScalarQuantities` omits dynamic reservation,
       // persistent volume info, and allocation info. We additionally
       // remove the static reservations here via `toUnreserved()`.
-      unallocatedReservations -=
+      totalAllocatedReservationScalarQuantities +=
         resources.reserved().createStrippedScalarQuantity().toUnreserved();
     }
-
-    // Subtract the unallocated reservations for this role from the headroom.
-    availableHeadroom -= unallocatedReservations;
   }
 
+  // Subtract total unallocated reservations.
+  availableHeadroom -=
+    Resources::sum(reservationScalarQuantities) -
+    totalAllocatedReservationScalarQuantities;
+
   // Subtract revocable resources.
   foreachvalue (const Slave& slave, slaves) {
     // NOTE: `totalScalarQuantities` omits dynamic reservation,


[13/20] mesos git commit: Fixed a quota headroom violation during quota allocation.

Posted by bm...@apache.org.
Fixed a quota headroom violation during quota allocation.

In the quota role allocation stage, if a role gets some resources
on an agent to meet its quota, it will also get all other resources
on the same agent that it does not have quota for. This may violate
the guarantees of other roles that have set quota on these resources.

We fix this issue during the quota allocation stage by ensuring that,
if a role has no quota set for a scalar resource, it will get that
resource only when two conditions are both met:

  (1) It is tentatively being allocated some other resources on the
      same agent to meet its quota, or some reservations (which may
      or may not involve quota resources); and

  (2) After allocating those resources, quota headroom is still
      above the required amount.

Also refactored the fine-grained quota allocation logic.

Review: https://reviews.apache.org/r/64793/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/d5929146
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/d5929146
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/d5929146

Branch: refs/heads/1.4.x
Commit: d5929146a1841c0b69778fcd9d5bfb3a92292a26
Parents: 3f7b7f9
Author: Meng Zhu <mz...@mesosphere.io>
Authored: Fri Dec 22 14:05:08 2017 -0800
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed May 2 16:44:22 2018 -0700

----------------------------------------------------------------------
 src/master/allocator/mesos/hierarchical.cpp | 152 ++++++++++++-----------
 1 file changed, 80 insertions(+), 72 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/d5929146/src/master/allocator/mesos/hierarchical.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index dade4ae..afbc94b 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -1746,8 +1746,9 @@ void HierarchicalAllocatorProcess::__allocate()
         //      allocate 10 cpus to the role to keep it within its limit.
         //
         // In the case that the role needs some of the resources on this
-        // agent to make progress towards its quota, we'll *also* allocate
-        // all of the resources for which it does not have quota.
+        // agent to make progress towards its quota, or the role is being
+        // allocated some reservation(s), we'll *also* allocate all of
+        // the resources for which it does not have quota.
         //
         // E.g. The agent has 1 cpu, 1024 mem, 1024 disk, 1 gpu, 5 ports
         //      and the role has quota for 1 cpu, 1024 mem. We'll include
@@ -1755,13 +1756,9 @@ void HierarchicalAllocatorProcess::__allocate()
         //      role not having any quota guarantee for them.
         //
         // We have to do this for now because it's not possible to set
-        // quota on non-scalar resources, like ports. However, for scalar
-        // resources that can have quota set, we should ideally make
-        // sure that when we include them, we're not violating some
-        // other role's guarantee!
-        //
-        // TODO(mzhu): Check the headroom when we're including scalar
-        // resources that this role does not have quota for.
+        // quota on non-scalar resources, like ports. For scalar resources
+        // that this role has no quota for, it can be allocated as long
+        // as the quota headroom is not violated.
         //
         // TODO(mzhu): Since we're treating the resources with unset
         // quota as having no guarantee and no limit, these should be
@@ -1778,78 +1775,92 @@ void HierarchicalAllocatorProcess::__allocate()
         // Unreserved resources that are tentatively going to be
         // allocated towards this role's quota. These resources may
         // not get allocated due to framework filters.
-        Resources newQuotaAllocation;
-
-        if (!unsatisfiedQuota.empty()) {
-          Resources unreserved = available.nonRevocable().unreserved();
-
-          set<string> quotaResourceNames =
-            Resources(quota.info.guarantee()).names();
-
-          // When "chopping" resources, there is more than 1 "chop" that
-          // can be done to satisfy the limits. Consider the case with
-          // two disks of 1GB, one is PATH and another is MOUNT. And a role
-          // has a "disk" quota of 1GB. We could pick either of the disks here,
-          // but not both.
-          //
-          // In order to avoid repeatedly choosing the same "chop" of
-          // the resources each time we allocate, we introduce some
-          // randomness by shuffling the resources.
-          google::protobuf::RepeatedPtrField<Resource>
-            resourceVector = unreserved;
-          random_shuffle(resourceVector.begin(), resourceVector.end());
-
-          foreach (const Resource& resource, resourceVector) {
-            if (quotaResourceNames.count(resource.name()) == 0) {
-              // This resource has no quota set. Allocate it.
-              resources += resource;
-              continue;
-            }
+        // These are __quantities__ with no meta-data.
+        Resources newQuotaAllocationScalarQuantities;
 
-            // Currently, we guarantee that quota resources are scalars.
-            CHECK_EQ(resource.type(), Value::SCALAR);
+        // We put resource that this role has no quota for in
+        // `nonQuotaResources` tentatively.
+        Resources nonQuotaResources;
 
-            Option<Value::Scalar> newUnsatisfiedQuotaScalar =
-              (unsatisfiedQuota -
-                newQuotaAllocation.createStrippedScalarQuantity())
-                  .get<Value::Scalar>(resource.name());
+        Resources unreserved = available.nonRevocable().unreserved();
 
-            if (newUnsatisfiedQuotaScalar.isNone()) {
-              // Quota for this resource has been satisfied.
-              continue;
-            }
+        set<string> quotaResourceNames =
+          Resources(quota.info.guarantee()).names();
 
-            const Value::Scalar& resourceScalar = resource.scalar();
-
-            if (resourceScalar <= newUnsatisfiedQuotaScalar.get()) {
-              // We can safely allocate `resource` entirely here without
-              // breaking the quota limit.
-              resources += resource;
-              newQuotaAllocation += resource;
+        // When "chopping" resources, there is more than 1 "chop" that
+        // can be done to satisfy the limits. Consider the case with
+        // two disks of 1GB, one is PATH and another is MOUNT. And a
+        // role has a "disk" quota of 1GB. We could pick either of
+        // the disks here, but not both.
+        //
+        // In order to avoid repeatedly choosing the same "chop" of
+        // the resources each time we allocate, we introduce some
+        // randomness by shuffling the resources.
+        google::protobuf::RepeatedPtrField<Resource>
+          resourceVector = unreserved;
+        random_shuffle(resourceVector.begin(), resourceVector.end());
+
+        foreach (Resource& resource, resourceVector) {
+          if (resource.type() != Value::SCALAR) {
+            // We currently do not support quota for non-scalar resources,
+            // add it to `nonQuotaResources`. See `nonQuotaResources`
+            // regarding how these resources are allocated.
+            nonQuotaResources += resource;
+            continue;
+          }
 
-              continue;
+          if (quotaResourceNames.count(resource.name()) == 0) {
+            // Allocating resource that this role has NO quota for,
+            // the limit concern here is that it should not break the
+            // quota headroom.
+            //
+            // Allocation Limit = Available Headroom - Required Headroom -
+            //                    Tentative Allocation to Role
+            Resources upperLimitScalarQuantities =
+              availableHeadroom - requiredHeadroom -
+              (newQuotaAllocationScalarQuantities +
+                nonQuotaResources.createStrippedScalarQuantity());
+
+            Option<Value::Scalar> limitScalar =
+              upperLimitScalarQuantities.get<Value::Scalar>(resource.name());
+
+            if (limitScalar.isNone()) {
+              continue; // Already have a headroom deficit.
             }
 
-            // At this point, we need to chop `resource` to satisfy
-            // the quota. We chop `resource` by making a copy and
-            // shrinking its scalar value
-            Resource choppedResourceToAllocate = resource;
-            choppedResourceToAllocate.mutable_scalar()
-              ->CopyFrom(newUnsatisfiedQuotaScalar.get());
-
-            // Not all resources are choppable. Some resources such as MOUNT
-            // disk has to be offered entirely. We use resources `contains()`
-            // utility to verify this. Specifically, if a resource `contains()`
-            // a smaller version of itself, then it can be safely chopped.
-            if (!Resources(resource).contains(choppedResourceToAllocate)) {
-              continue;
+            if (Resources::shrink(&resource, limitScalar.get())) {
+              nonQuotaResources += resource;
+            }
+          } else {
+            // Allocating resource that this role has quota for,
+            // the limit concern is that it should not exceed this
+            // role's unsatisfied quota.
+            Resources upperLimitScalarQuantities =
+              unsatisfiedQuota - newQuotaAllocationScalarQuantities;
+
+            Option<Value::Scalar> limitScalar =
+              upperLimitScalarQuantities.get<Value::Scalar>(resource.name());
+
+            if (limitScalar.isNone()) {
+              continue; // Quota limit already met.
             }
 
-            resources += choppedResourceToAllocate;
-            newQuotaAllocation += choppedResourceToAllocate;
+            if (Resources::shrink(&resource, limitScalar.get())) {
+              resources += resource;
+              newQuotaAllocationScalarQuantities +=
+                Resources(resource).createStrippedScalarQuantity();
+            }
           }
         }
 
+        // We include the non-quota resources (with headroom taken
+        // into account) if this role is being allocated some resources
+        // already: either some quota resources or a reservation
+        // (possibly with quota resources).
+        if (!resources.empty()) {
+          resources += nonQuotaResources;
+        }
+
         // It is safe to break here, because all frameworks under a role would
         // consider the same resources, so in case we don't have allocatable
         // resources, we don't have to check for other frameworks under the
@@ -1896,9 +1907,6 @@ void HierarchicalAllocatorProcess::__allocate()
         offerable[frameworkId][role][slaveId] += resources;
         offeredSharedResources[slaveId] += resources.shared();
 
-        Resources newQuotaAllocationScalarQuantities =
-          newQuotaAllocation.createStrippedScalarQuantity();
-
         unsatisfiedQuota -= newQuotaAllocationScalarQuantities;
 
         // Track quota headroom change.


[16/20] mesos git commit: Added MESOS-8339 to the 1.4.2 CHANGELOG.

Posted by bm...@apache.org.
Added MESOS-8339 to the 1.4.2 CHANGELOG.


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/a99403aa
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/a99403aa
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/a99403aa

Branch: refs/heads/1.4.x
Commit: a99403aa029a58ce793634c4d65b6d7e1d509ed4
Parents: 0244bbc
Author: Benjamin Mahler <bm...@apache.org>
Authored: Wed May 2 16:51:50 2018 -0700
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed May 2 16:54:24 2018 -0700

----------------------------------------------------------------------
 CHANGELOG | 1 +
 1 file changed, 1 insertion(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/a99403aa/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index 9d956f7..a18c542 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -14,6 +14,7 @@ Release Notes - Mesos - Version 1.4.2 (WIP)
   * [MESOS-8237] - Strip (Offer|Resource).allocation_info for non-MULTI_ROLE schedulers.
   * [MESOS-8293] - Reservation may not be allocated when the role has no quota.
   * [MESOS-8297] - Built-in driver-based executors ignore kill task if the task has not been launched.
+  * [MESOS-8339] - Quota headroom may be insufficiently held when role has more reservation than quota.
   * [MESOS-8356] - Persistent volume ownership is set to root despite of sandbox owner (frameworkInfo.user) when docker executor is used.
   * [MESOS-8411] - Killing a queued task can lead to the command executor never terminating.
   * [MESOS-8480] - Mesos returns high resource usage when killing a Docker task.


[08/20] mesos git commit: Fixed a bug where insufficient headroom is held for quota.

Posted by bm...@apache.org.
Fixed a bug where insufficient headroom is held for quota.

If a role has more reservation than its quota, the current quota
headroom calculation is insufficient in guaranteeing quota
allocation. See MESOS-8339.

This patch fixes the issue by computing the amount of unreserved
resources we need to hold back such that each quota role can have
its quota satisfied. Previously, we included the quota role
unallocated reservations as part of the headroom since we knew it
would be held back, but we didn't handle the case that a quota role
had more reservations than quota.

Review: https://reviews.apache.org/r/64698/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/dd1df2df
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/dd1df2df
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/dd1df2df

Branch: refs/heads/1.4.x
Commit: dd1df2dfef6fd052efd1d11b08c6ffc64c70aa17
Parents: 6ab4301
Author: Meng Zhu <mz...@mesosphere.io>
Authored: Mon Dec 18 19:40:59 2017 -0800
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed May 2 16:44:02 2018 -0700

----------------------------------------------------------------------
 src/master/allocator/mesos/hierarchical.cpp | 79 +++++++++++++-----------
 1 file changed, 43 insertions(+), 36 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/dd1df2df/src/master/allocator/mesos/hierarchical.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index a2c0da6..2424484 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -1741,41 +1741,51 @@ void HierarchicalAllocatorProcess::__allocate()
 
   // Frameworks in a quota'ed role may temporarily reject resources by
   // filtering or suppressing offers. Hence, quotas may not be fully
-  // allocated and if so we need to hold back some headroom to ensure
-  // the quota can later be satisfied when needed.
+  // allocated and if so we need to hold back enough unreserved
+  // resources to ensure the quota can later be satisfied when needed:
+  //
+  //   Required unreserved headroom =
+  //     sum (unsatisfied quota(r) - unallocated reservations(r))
+  //       for each quota role r
+  //
+  // Given the above, if a role has more reservations than quota,
+  // we don't need to hold back any unreserved headroom for it.
   Resources requiredHeadroom;
   foreachpair (const string& role, const Quota& quota, quotas) {
-    // Compute the amount of quota that the role does not have allocated.
-    //
     // NOTE: Revocable resources are excluded in `quotaRoleSorter`.
     // NOTE: Only scalars are considered for quota.
+    // NOTE: The following should all be quantities with no meta-data!
     Resources allocated = getQuotaRoleAllocatedResources(role);
-    const Resources required = quota.info.guarantee();
-    requiredHeadroom += (required - allocated);
+    const Resources guarantee = quota.info.guarantee();
+
+    if (allocated.contains(guarantee)) {
+      continue; // Quota already satisifed.
+    }
+
+    Resources unallocated = guarantee - allocated;
+
+    Resources unallocatedReservations =
+      reservationScalarQuantities.get(role).getOrElse(Resources()) -
+      allocatedReservationScalarQuantities.get(role).getOrElse(Resources());
+
+    requiredHeadroom += unallocated - unallocatedReservations;
   }
 
-  // The amount of headroom currently available consists of
-  // unallocated resources that can be allocated to satisfy quota.
-  // What that means is that the resources need to be:
-  //
-  //   (1) Non-revocable
-  //   (2) Unreserved, or reserved to a quota role.
+  // We will allocate resources while ensuring that the required
+  // unreserved non-revocable headroom is still available. Otherwise,
+  // we will not be able to satisfy quota later. Reservations to
+  // non-quota roles and revocable resources will always be included
+  // in the offers since these are not part of the headroom (and
+  // therefore can't be used to satisfy quota).
   //
-  // TODO(bmahler): Note that we currently do not consider quota
-  // headroom on an per-role basis, which means we may not hold
-  // back sufficient headroom for a particular quota role if some
-  // other quota roles have more reservations than quota.
-  // See MESOS-8339.
+  //   available headroom = unallocated unreserved non-revocable resources
   //
-  // We will allocate resources while ensuring that the required
-  // headroom is still available. Otherwise we will not be able
-  // to satisfy quota later. Reservations to non-quota roles and
-  // revocable resources will always be included in the offers
-  // since these are not part of the available headroom for quota.
+  // We compute this as:
   //
-  //  available headroom = unallocated resources -
-  //                       unallocated non-quota role reservations -
-  //                       unallocated revocable resources
+  //   available headroom = total resources -
+  //                        allocated resources -
+  //                        unallocated reservations -
+  //                        unallocated revocable resources
 
   // NOTE: `totalScalarQuantities` omits dynamic reservation,
   // persistent volume info, and allocation info. We additionally
@@ -1792,31 +1802,28 @@ void HierarchicalAllocatorProcess::__allocate()
       roleSorter->allocationScalarQuantities(role).toUnreserved();
   }
 
-  // Subtract non quota role reservations.
+  // Subtract all unallocated reservations.
   foreachkey (const string& role, reservationScalarQuantities) {
-    if (quotas.contains(role)) {
-      continue; // Skip quota roles.
-    }
-
     hashmap<SlaveID, Resources> allocations;
-    if (roleSorter->contains(role)) {
+    if (quotaRoleSorter->contains(role)) {
+      allocations = quotaRoleSorter->allocation(role);
+    } else if (roleSorter->contains(role)) {
       allocations = roleSorter->allocation(role);
     }
 
-    Resources unallocatedNonQuotaRoleReservations =
+    Resources unallocatedReservations =
       reservationScalarQuantities.get(role).getOrElse(Resources());
 
     foreachvalue (const Resources& resources, allocations) {
       // NOTE: `totalScalarQuantities` omits dynamic reservation,
       // persistent volume info, and allocation info. We additionally
       // remove the static reservations here via `toUnreserved()`.
-      unallocatedNonQuotaRoleReservations -=
+      unallocatedReservations -=
         resources.reserved().createStrippedScalarQuantity().toUnreserved();
     }
 
-    // Subtract the unallocated reservations for this non quota
-    // role from the headroom.
-    availableHeadroom -= unallocatedNonQuotaRoleReservations;
+    // Subtract the unallocated reservations for this role from the headroom.
+    availableHeadroom -= unallocatedReservations;
   }
 
   // Subtract revocable resources.


[04/20] mesos git commit: Updated the allocator to untrack allocations via a single code path.

Posted by bm...@apache.org.
Updated the allocator to untrack allocations via a single code path.

Review: https://reviews.apache.org/r/64357/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/69b3aa0c
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/69b3aa0c
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/69b3aa0c

Branch: refs/heads/1.4.x
Commit: 69b3aa0cc60f29e632d0194853a54ab37b6cd42a
Parents: cd450bc
Author: Meng Zhu <mz...@mesosphere.io>
Authored: Thu Dec 7 13:14:10 2017 -0800
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed May 2 16:43:29 2018 -0700

----------------------------------------------------------------------
 src/master/allocator/mesos/hierarchical.cpp | 58 ++++++++++++++++--------
 src/master/allocator/mesos/hierarchical.hpp | 12 ++++-
 2 files changed, 51 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/69b3aa0c/src/master/allocator/mesos/hierarchical.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index 640f9ce..b6db12e 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -313,6 +313,8 @@ void HierarchicalAllocatorProcess::removeFramework(
   foreach (const string& role, framework.roles) {
     // Might not be in 'frameworkSorters[role]' because it
     // was previously deactivated and never re-added.
+    //
+    // TODO(mzhu): This check may no longer be necessary.
     if (!frameworkSorters.contains(role) ||
         !frameworkSorters.at(role)->contains(frameworkId.value())) {
       continue;
@@ -325,14 +327,7 @@ void HierarchicalAllocatorProcess::removeFramework(
     foreachpair (const SlaveID& slaveId,
                  const Resources& allocated,
                  allocation) {
-      roleSorter->unallocated(role, slaveId, allocated);
-      frameworkSorters.at(role)->remove(slaveId, allocated);
-
-      if (quotas.contains(role)) {
-        // See comment at `quotaRoleSorter` declaration
-        // regarding non-revocable.
-        quotaRoleSorter->unallocated(role, slaveId, allocated.nonRevocable());
-      }
+      untrackAllocatedResources(slaveId, frameworkId, allocated);
     }
 
     untrackFrameworkUnderRole(frameworkId, role);
@@ -1098,16 +1093,7 @@ void HierarchicalAllocatorProcess::recoverResources(
     const Owned<Sorter>& frameworkSorter = frameworkSorters.at(role);
 
     if (frameworkSorter->contains(frameworkId.value())) {
-      frameworkSorter->unallocated(frameworkId.value(), slaveId, resources);
-      frameworkSorter->remove(slaveId, resources);
-      roleSorter->unallocated(role, slaveId, resources);
-
-      if (quotas.contains(role)) {
-        // See comment at `quotaRoleSorter` declaration
-        // regarding non-revocable
-        quotaRoleSorter->unallocated(
-            role, slaveId, resources.nonRevocable());
-      }
+      untrackAllocatedResources(slaveId, frameworkId, resources);
 
       // Stop tracking the framework under this role if it's no longer
       // subscribed and no longer has resources allocated to the role.
@@ -2419,6 +2405,42 @@ void HierarchicalAllocatorProcess::trackAllocatedResources(
   }
 }
 
+
+void HierarchicalAllocatorProcess::untrackAllocatedResources(
+    const SlaveID& slaveId,
+    const FrameworkID& frameworkId,
+    const Resources& allocated)
+{
+  // TODO(mzhu): Add a `CHECK(slaves.contains(slaveId));`
+  // here once MESOS-621 is resolved. Ideally, `removeSlave()`
+  // should unallocate resources in the framework sorters.
+  // But currently, a slave is removed first via `removeSlave()`
+  // and later a call to `recoverResources()` occurs to recover
+  // the framework's resources.
+  CHECK(frameworks.contains(frameworkId));
+
+  // TODO(bmahler): Calling allocations() is expensive since it has
+  // to construct a map. Avoid this.
+  foreachpair (const string& role,
+               const Resources& allocation,
+               allocated.allocations()) {
+    CHECK(roleSorter->contains(role));
+    CHECK(frameworkSorters.contains(role));
+    CHECK(frameworkSorters.at(role)->contains(frameworkId.value()));
+
+    frameworkSorters.at(role)->unallocated(
+        frameworkId.value(), slaveId, allocation);
+    frameworkSorters.at(role)->remove(slaveId, allocation);
+
+    roleSorter->unallocated(role, slaveId, allocation);
+
+    if (quotas.contains(role)) {
+      // See comment at `quotaRoleSorter` declaration regarding non-revocable.
+      quotaRoleSorter->unallocated(role, slaveId, allocation.nonRevocable());
+    }
+  }
+}
+
 } // namespace internal {
 } // namespace allocator {
 } // namespace master {

http://git-wip-us.apache.org/repos/asf/mesos/blob/69b3aa0c/src/master/allocator/mesos/hierarchical.hpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.hpp b/src/master/allocator/mesos/hierarchical.hpp
index a53a952..0d30179 100644
--- a/src/master/allocator/mesos/hierarchical.hpp
+++ b/src/master/allocator/mesos/hierarchical.hpp
@@ -539,11 +539,21 @@ private:
   // the agent and the master are both configured with a fault domain.
   bool isRemoteSlave(const Slave& slave) const;
 
-  // Helper to track used resources on an agent.
+  // Helper to track allocated resources on an agent.
   void trackAllocatedResources(
       const SlaveID& slaveId,
       const FrameworkID& frameworkId,
       const Resources& allocated);
+
+  // Helper to untrack resources that are no longer allocated on an agent.
+  void untrackAllocatedResources(
+      const SlaveID& slaveId,
+      const FrameworkID& frameworkId,
+      const Resources& allocated);
+
+  // Helper that removes all existing offer filters for the given slave
+  // id.
+  void removeFilters(const SlaveID& slaveId);
 };
 
 


[07/20] mesos git commit: Fixed a bug with quota headroom that can leave reservations unallocated.

Posted by bm...@apache.org.
Fixed a bug with quota headroom that can leave reservations unallocated.

Also fixed a bug where quota headroom may include revocable resources.

Review: https://reviews.apache.org/r/64636/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/6ab4301c
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/6ab4301c
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/6ab4301c

Branch: refs/heads/1.4.x
Commit: 6ab4301cbcc0a5245eed2024690fc468779bce09
Parents: d281646
Author: Meng Zhu <mz...@mesosphere.io>
Authored: Fri Dec 15 14:52:22 2017 -0800
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed May 2 16:43:57 2018 -0700

----------------------------------------------------------------------
 src/master/allocator/mesos/hierarchical.cpp | 168 +++++++++++++----------
 1 file changed, 97 insertions(+), 71 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/6ab4301c/src/master/allocator/mesos/hierarchical.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index 71a146e..a2c0da6 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -1739,73 +1739,96 @@ void HierarchicalAllocatorProcess::__allocate()
     }
   }
 
-  // Calculate the total quantity of scalar resources (including revocable
-  // and reserved) that are available for allocation in the next round. We
-  // need this in order to ensure we do not over-allocate resources during
-  // the second stage.
-  //
-  // For performance reasons (MESOS-4833), this omits information about
-  // dynamic reservations or persistent volumes in the resources.
-  //
-  // NOTE: We use total cluster resources, and not just those based on the
-  // agents participating in the current allocation (i.e. provided as an
-  // argument to the `allocate()` call) so that frameworks in roles without
-  // quota are not unnecessarily deprived of resources.
-  Resources remainingClusterResources = roleSorter->totalScalarQuantities();
-  foreachkey (const string& role, roles) {
-    remainingClusterResources -= roleSorter->allocationScalarQuantities(role);
-  }
-
   // Frameworks in a quota'ed role may temporarily reject resources by
-  // filtering or suppressing offers. Hence quotas may not be fully allocated.
-  Resources unallocatedQuotaResources;
-  foreachpair (const string& name, const Quota& quota, quotas) {
+  // filtering or suppressing offers. Hence, quotas may not be fully
+  // allocated and if so we need to hold back some headroom to ensure
+  // the quota can later be satisfied when needed.
+  Resources requiredHeadroom;
+  foreachpair (const string& role, const Quota& quota, quotas) {
     // Compute the amount of quota that the role does not have allocated.
     //
     // NOTE: Revocable resources are excluded in `quotaRoleSorter`.
     // NOTE: Only scalars are considered for quota.
-    Resources allocated = getQuotaRoleAllocatedResources(name);
+    Resources allocated = getQuotaRoleAllocatedResources(role);
     const Resources required = quota.info.guarantee();
-    unallocatedQuotaResources += (required - allocated);
+    requiredHeadroom += (required - allocated);
   }
 
-  // Determine how many resources we may allocate during the next stage.
+  // The amount of headroom currently available consists of
+  // unallocated resources that can be allocated to satisfy quota.
+  // What that means is that the resources need to be:
   //
-  // NOTE: Resources for quota allocations are already accounted in
-  // `remainingClusterResources`.
-  remainingClusterResources -= unallocatedQuotaResources;
-
-  // Shared resources are excluded in determination of over-allocation of
-  // available resources since shared resources are always allocatable.
-  remainingClusterResources = remainingClusterResources.nonShared();
-
-  // To ensure we do not over-allocate resources during the second stage
-  // with all frameworks, we use 2 stopping criteria:
-  //   * No available resources for the second stage left, i.e.
-  //     `remainingClusterResources` - `allocatedStage2` is empty.
-  //   * A potential offer will force the second stage to use more resources
-  //     than available, i.e. `remainingClusterResources` does not contain
-  //     (`allocatedStage2` + potential offer). In this case we skip this
-  //     agent and continue to the next one.
+  //   (1) Non-revocable
+  //   (2) Unreserved, or reserved to a quota role.
   //
-  // NOTE: Like `remainingClusterResources`, `allocatedStage2` omits
-  // information about dynamic reservations and persistent volumes for
-  // performance reasons. This invariant is preserved because we only add
-  // resources to it that have also had this metadata stripped from them
-  // (typically by using `Resources::createStrippedScalarQuantity`).
-  Resources allocatedStage2;
-
-  // At this point resources for quotas are allocated or accounted for.
-  // Proceed with allocating the remaining free pool.
-  foreach (const SlaveID& slaveId, slaveIds) {
-    // If there are no resources available for the second stage, stop.
-    //
-    // TODO(mzhu): Breaking early here means that we may leave reservations
-    // unallocated. See MESOS-8293.
-    if (!allocatable(remainingClusterResources - allocatedStage2)) {
-      break;
+  // TODO(bmahler): Note that we currently do not consider quota
+  // headroom on an per-role basis, which means we may not hold
+  // back sufficient headroom for a particular quota role if some
+  // other quota roles have more reservations than quota.
+  // See MESOS-8339.
+  //
+  // We will allocate resources while ensuring that the required
+  // headroom is still available. Otherwise we will not be able
+  // to satisfy quota later. Reservations to non-quota roles and
+  // revocable resources will always be included in the offers
+  // since these are not part of the available headroom for quota.
+  //
+  //  available headroom = unallocated resources -
+  //                       unallocated non-quota role reservations -
+  //                       unallocated revocable resources
+
+  // NOTE: `totalScalarQuantities` omits dynamic reservation,
+  // persistent volume info, and allocation info. We additionally
+  // remove the static reservations here via `toUnreserved()`.
+  Resources availableHeadroom =
+    roleSorter->totalScalarQuantities().toUnreserved();
+
+  // Subtract allocated resources from the total.
+  foreachkey (const string& role, roles) {
+    // NOTE: `totalScalarQuantities` omits dynamic reservation,
+    // persistent volume info, and allocation info. We additionally
+    // remove the static reservations here via `toUnreserved()`.
+    availableHeadroom -=
+      roleSorter->allocationScalarQuantities(role).toUnreserved();
+  }
+
+  // Subtract non quota role reservations.
+  foreachkey (const string& role, reservationScalarQuantities) {
+    if (quotas.contains(role)) {
+      continue; // Skip quota roles.
+    }
+
+    hashmap<SlaveID, Resources> allocations;
+    if (roleSorter->contains(role)) {
+      allocations = roleSorter->allocation(role);
+    }
+
+    Resources unallocatedNonQuotaRoleReservations =
+      reservationScalarQuantities.get(role).getOrElse(Resources());
+
+    foreachvalue (const Resources& resources, allocations) {
+      // NOTE: `totalScalarQuantities` omits dynamic reservation,
+      // persistent volume info, and allocation info. We additionally
+      // remove the static reservations here via `toUnreserved()`.
+      unallocatedNonQuotaRoleReservations -=
+        resources.reserved().createStrippedScalarQuantity().toUnreserved();
     }
 
+    // Subtract the unallocated reservations for this non quota
+    // role from the headroom.
+    availableHeadroom -= unallocatedNonQuotaRoleReservations;
+  }
+
+  // Subtract revocable resources.
+  foreachvalue (const Slave& slave, slaves) {
+    // NOTE: `totalScalarQuantities` omits dynamic reservation,
+    // persistent volume info, and allocation info. We additionally
+    // remove the static reservations here via `toUnreserved()`.
+    availableHeadroom -= slave.available().revocable()
+      .createStrippedScalarQuantity().toUnreserved();
+  }
+
+  foreach (const SlaveID& slaveId, slaveIds) {
     foreach (const string& role, roleSorter->sort()) {
       // In the second allocation stage, we only allocate
       // for non-quota roles.
@@ -1903,6 +1926,20 @@ void HierarchicalAllocatorProcess::__allocate()
           });
         }
 
+        // If allocating these resources would reduce the headroom
+        // below what is required, we will hold them back.
+        const Resources headroomToAllocate = resources
+          .scalars().unreserved().nonRevocable();
+
+        bool sufficientHeadroom =
+          (availableHeadroom -
+            headroomToAllocate.createStrippedScalarQuantity())
+            .contains(requiredHeadroom);
+
+        if (!sufficientHeadroom) {
+          resources -= headroomToAllocate;
+        }
+
         // If the resources are not allocatable, ignore. We cannot break
         // here, because another framework under the same role could accept
         // revocable resources and breaking would skip all other frameworks.
@@ -1915,21 +1952,6 @@ void HierarchicalAllocatorProcess::__allocate()
           continue;
         }
 
-        // If the offer generated by `resources` would force the second
-        // stage to use more than `remainingClusterResources`, move along.
-        // We do not terminate early, as offers generated further in the
-        // loop may be small enough to fit within `remainingClusterResources`.
-        //
-        // We exclude shared resources from over-allocation check because
-        // shared resources are always allocatable.
-        const Resources scalarQuantity =
-          resources.nonShared().createStrippedScalarQuantity();
-
-        if (!remainingClusterResources.contains(
-                allocatedStage2 + scalarQuantity)) {
-          continue;
-        }
-
         VLOG(2) << "Allocating " << resources << " on agent " << slaveId
                 << " to role " << role << " of framework " << frameworkId;
 
@@ -1942,7 +1964,11 @@ void HierarchicalAllocatorProcess::__allocate()
         // agent as part of quota.
         offerable[frameworkId][role][slaveId] += resources;
         offeredSharedResources[slaveId] += resources.shared();
-        allocatedStage2 += scalarQuantity;
+
+        if (sufficientHeadroom) {
+          availableHeadroom -=
+            headroomToAllocate.createStrippedScalarQuantity();
+        }
 
         slave.allocated += resources;
 


[20/20] mesos git commit: Added MESOS-8604 to the 1.4.2 CHANGELOG.

Posted by bm...@apache.org.
Added MESOS-8604 to the 1.4.2 CHANGELOG.


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/35fcdd77
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/35fcdd77
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/35fcdd77

Branch: refs/heads/1.4.x
Commit: 35fcdd778e8d9eebfc3058cc64ef7e15e837e7fa
Parents: 0fedea1
Author: Benjamin Mahler <bm...@apache.org>
Authored: Wed May 2 16:53:37 2018 -0700
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed May 2 16:54:25 2018 -0700

----------------------------------------------------------------------
 CHANGELOG | 1 +
 1 file changed, 1 insertion(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/35fcdd77/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index d037f70..1807b29 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -26,6 +26,7 @@ Release Notes - Mesos - Version 1.4.2 (WIP)
   * [MESOS-8574] - Docker executor makes no progress when 'docker inspect' hangs.
   * [MESOS-8575] - Improve discard handling for 'Docker::stop' and 'Docker::pull'.
   * [MESOS-8576] - Improve discard handling of 'Docker::inspect()'.
+  * [MESOS-8604] - Quota headroom tracking may be incorrect in the presence of hierarchical reservation.
   * [MESOS-8605] - Terminal task status update will not send if 'docker inspect' is hung.
   * [MESOS-8651] - Potential memory leaks in the `volume/sandbox_path` isolator.
 


[02/20] mesos git commit: Introduced an allocator helper function to track used resources.

Posted by bm...@apache.org.
Introduced an allocator helper function to track used resources.

This patch introduces a helper to track allocated resources. It
encapsulates all needed updates to the various sorters for
reusability.

Review: https://reviews.apache.org/r/64136/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/e2d52d36
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/e2d52d36
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/e2d52d36

Branch: refs/heads/1.4.x
Commit: e2d52d362bde8ebd1a7f8bed79196842afbf2a38
Parents: 5adc12e
Author: Benjamin Bannier <be...@mesosphere.io>
Authored: Thu Nov 30 17:03:45 2017 +0100
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed May 2 16:43:19 2018 -0700

----------------------------------------------------------------------
 src/master/allocator/mesos/hierarchical.cpp | 93 +++++++++++-------------
 src/master/allocator/mesos/hierarchical.hpp |  5 ++
 2 files changed, 49 insertions(+), 49 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/e2d52d36/src/master/allocator/mesos/hierarchical.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index f021c34..dc6f27f 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -284,20 +284,7 @@ void HierarchicalAllocatorProcess::addFramework(
       continue;
     }
 
-    hashmap<string, Resources> allocations = resources.allocations();
-
-    foreachpair (const string& role, const Resources& allocation, allocations) {
-      roleSorter->allocated(role, slaveId, allocation);
-      frameworkSorters.at(role)->add(slaveId, allocation);
-      frameworkSorters.at(role)->allocated(
-          frameworkId.value(), slaveId, allocation);
-
-      if (quotas.contains(role)) {
-        // See comment at `quotaRoleSorter` declaration
-        // regarding non-revocable.
-        quotaRoleSorter->allocated(role, slaveId, allocation.nonRevocable());
-      }
-    }
+    trackAllocatedResources(slaveId, {{frameworkId, resources}});
   }
 
   LOG(INFO) << "Added framework " << frameworkId;
@@ -516,41 +503,7 @@ void HierarchicalAllocatorProcess::addSlave(
   // See comment at `quotaRoleSorter` declaration regarding non-revocable.
   quotaRoleSorter->add(slaveId, total.nonRevocable());
 
-  // Update the allocation for each framework.
-  foreachpair (const FrameworkID& frameworkId,
-               const Resources& used_,
-               used) {
-    if (!frameworks.contains(frameworkId)) {
-      continue;
-    }
-
-    foreachpair (const string& role,
-                 const Resources& allocated,
-                 used_.allocations()) {
-      // The framework has resources allocated to this role but it may
-      // or may not be subscribed to the role. Either way, we need to
-      // track the framework under the role.
-      if (!isFrameworkTrackedUnderRole(frameworkId, role)) {
-        trackFrameworkUnderRole(frameworkId, role);
-      }
-
-      // TODO(bmahler): Validate that the reserved resources have the
-      // framework's role.
-      CHECK(roleSorter->contains(role));
-      CHECK(frameworkSorters.contains(role));
-      CHECK(frameworkSorters.at(role)->contains(frameworkId.value()));
-
-      roleSorter->allocated(role, slaveId, allocated);
-      frameworkSorters.at(role)->add(slaveId, allocated);
-      frameworkSorters.at(role)->allocated(
-          frameworkId.value(), slaveId, allocated);
-
-      if (quotas.contains(role)) {
-        // See comment at `quotaRoleSorter` declaration regarding non-revocable.
-        quotaRoleSorter->allocated(role, slaveId, allocated.nonRevocable());
-      }
-    }
-  }
+  trackAllocatedResources(slaveId, used);
 
   slaves[slaveId] = Slave();
 
@@ -2417,6 +2370,48 @@ bool HierarchicalAllocatorProcess::isRemoteSlave(const Slave& slave) const
   return masterRegion != slaveRegion;
 }
 
+
+void HierarchicalAllocatorProcess::trackAllocatedResources(
+    const SlaveID& slaveId,
+    const hashmap<FrameworkID, Resources>& used)
+{
+  // Update the allocation for each framework.
+  foreachpair (const FrameworkID& frameworkId,
+               const Resources& used_,
+               used) {
+    if (!frameworks.contains(frameworkId)) {
+      continue;
+    }
+
+    foreachpair (const string& role,
+                 const Resources& allocated,
+                 used_.allocations()) {
+      // The framework has resources allocated to this role but it may
+      // or may not be subscribed to the role. Either way, we need to
+      // track the framework under the role.
+      if (!isFrameworkTrackedUnderRole(frameworkId, role)) {
+        trackFrameworkUnderRole(frameworkId, role);
+      }
+
+      // TODO(bmahler): Validate that the reserved resources have the
+      // framework's role.
+      CHECK(roleSorter->contains(role));
+      CHECK(frameworkSorters.contains(role));
+      CHECK(frameworkSorters.at(role)->contains(frameworkId.value()));
+
+      roleSorter->allocated(role, slaveId, allocated);
+      frameworkSorters.at(role)->add(slaveId, allocated);
+      frameworkSorters.at(role)->allocated(
+          frameworkId.value(), slaveId, allocated);
+
+      if (quotas.contains(role)) {
+        // See comment at `quotaRoleSorter` declaration regarding non-revocable.
+        quotaRoleSorter->allocated(role, slaveId, allocated.nonRevocable());
+      }
+    }
+  }
+}
+
 } // namespace internal {
 } // namespace allocator {
 } // namespace master {

http://git-wip-us.apache.org/repos/asf/mesos/blob/e2d52d36/src/master/allocator/mesos/hierarchical.hpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.hpp b/src/master/allocator/mesos/hierarchical.hpp
index c234605..ececf31 100644
--- a/src/master/allocator/mesos/hierarchical.hpp
+++ b/src/master/allocator/mesos/hierarchical.hpp
@@ -538,6 +538,11 @@ private:
   // different region than the master. This can only be the case if
   // the agent and the master are both configured with a fault domain.
   bool isRemoteSlave(const Slave& slave) const;
+
+  // Helper to track used resources on an agent.
+  void trackAllocatedResources(
+      const SlaveID& slaveId,
+      const hashmap<FrameworkID, Resources>& used);
 };
 
 


[19/20] mesos git commit: Added MESOS-8352 to the 1.4.2 CHANGELOG.

Posted by bm...@apache.org.
Added MESOS-8352 to the 1.4.2 CHANGELOG.


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/0fedea17
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/0fedea17
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/0fedea17

Branch: refs/heads/1.4.x
Commit: 0fedea172a988d436aac0253e7633bd5ea35c7ec
Parents: a99403a
Author: Benjamin Mahler <bm...@apache.org>
Authored: Wed May 2 16:52:47 2018 -0700
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed May 2 16:54:25 2018 -0700

----------------------------------------------------------------------
 CHANGELOG | 1 +
 1 file changed, 1 insertion(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/0fedea17/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index a18c542..d037f70 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -15,6 +15,7 @@ Release Notes - Mesos - Version 1.4.2 (WIP)
   * [MESOS-8293] - Reservation may not be allocated when the role has no quota.
   * [MESOS-8297] - Built-in driver-based executors ignore kill task if the task has not been launched.
   * [MESOS-8339] - Quota headroom may be insufficiently held when role has more reservation than quota.
+  * [MESOS-8352] - Resources may get over allocated to some roles while fail to meet the quota of other roles.
   * [MESOS-8356] - Persistent volume ownership is set to root despite of sandbox owner (frameworkInfo.user) when docker executor is used.
   * [MESOS-8411] - Killing a queued task can lead to the command executor never terminating.
   * [MESOS-8480] - Mesos returns high resource usage when killing a Docker task.


[06/20] mesos git commit: Enforced quota limit in the presence of unallocated reservations.

Posted by bm...@apache.org.
Enforced quota limit in the presence of unallocated reservations.

See MESOS-4527, currently a role can exceed its limit when it
leaves reservations unallocated (i.e. by declining them).

Also modify the allocation process such that the first stage
allocation is solely for quota roles while the second stage is
solely for non-quota roles.

Review: https://reviews.apache.org/r/64304/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/d2816460
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/d2816460
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/d2816460

Branch: refs/heads/1.4.x
Commit: d2816460749ed787d13edaf5fba12ace30d1a088
Parents: 35b0e1c
Author: Meng Zhu <mz...@mesosphere.io>
Authored: Mon Dec 11 18:09:35 2017 -0800
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed May 2 16:43:53 2018 -0700

----------------------------------------------------------------------
 src/master/allocator/mesos/hierarchical.cpp | 135 ++++++++++++++++++-----
 1 file changed, 105 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/d2816460/src/master/allocator/mesos/hierarchical.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index b70868d..71a146e 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -1494,6 +1494,48 @@ void HierarchicalAllocatorProcess::__allocate()
     return quotaRoleSorter->allocationScalarQuantities(role).toUnreserved();
   };
 
+  // We need to keep track of allocated reserved resourecs for roles
+  // with quota in order to enforce their quota limit. Note these are
+  // __quantities__ with no meta-data.
+  hashmap<string, Resources> allocatedReservationScalarQuantities;
+
+  // We build the map here to avoid repetitive aggregation
+  // in the allocation loop. Note, this map will still need to be
+  // updated during the allocation loop when new allocations
+  // are made.
+  //
+  // TODO(mzhu): Ideally, we want to buildup and persist this information
+  // across allocation cycles in track/untrackAllocatedResources().
+  // But due to the presence of shared resources, we need to keep track of
+  // the allocated resources (and not just scalar quantities) on a slave
+  // to account for multiple copies of the same shared resources.
+  // While the `allocated` info in the `struct slave` gives us just that,
+  // we can not simply use that in track/untrackAllocatedResources() since
+  // `allocated` is currently updated outside the scope of
+  // track/untrackAllocatedResources(), meaning that it may get updated
+  // either before or after the tracking calls.
+  //
+  // TODO(mzhu): Ideally, we want these helpers to instead track the
+  // reservations as *allocated* in the sorters even when the
+  // reservations have not been allocated yet. This will help to:
+  //
+  //   (1) Solve the fairness issue when roles with unallocated
+  //       reservations may game the allocator (See MESOS-8299).
+  //
+  //   (2) Simplify the quota enforcement logic -- the allocator
+  //       would no longer need to track reservations separately.
+  foreachkey (const string& role, quotas) {
+    const hashmap<SlaveID, Resources> allocations =
+      quotaRoleSorter->allocation(role);
+
+    foreachvalue (const Resources& resources, allocations) {
+      // We need to remove the static reservation metadata here via
+      // `toUnreserved()`.
+      allocatedReservationScalarQuantities[role] +=
+        resources.reserved().createStrippedScalarQuantity().toUnreserved();
+    }
+  }
+
   // Due to the two stages in the allocation algorithm and the nature of
   // shared resources being re-offerable even if already allocated, the
   // same shared resources can appear in two (and not more due to the
@@ -1520,15 +1562,37 @@ void HierarchicalAllocatorProcess::__allocate()
         continue;
       }
 
-      // Get the total quantity of resources allocated to a quota role. The
-      // value omits role, reservation, and persistence info.
-      Resources roleConsumedResources = getQuotaRoleAllocatedResources(role);
+      // This is a __quantity__ with no meta-data.
+      Resources roleReservationScalarQuantities =
+        reservationScalarQuantities.get(role).getOrElse(Resources());
 
-      // If quota for the role is satisfied, we do not need to do
-      // any further allocations for this role, at least at this
-      // stage. More precisely, we stop allocating if at least
-      // one of the resource guarantees is met. This technique
-      // prevents gaming of the quota allocation, see MESOS-6432.
+      // This is a __quantity__ with no meta-data.
+      Resources roleAllocatedReservationScalarQuantities =
+        allocatedReservationScalarQuantities.get(role).getOrElse(Resources());
+
+      // We charge a role against its quota by considering its
+      // allocation as well as any unallocated reservations
+      // since reservations are bound to the role. In other
+      // words, we always consider reservations as consuming
+      // quota, regardless of whether they are allocated.
+      // The equation used here is:
+      //
+      //   Consumed Quota = reservations + unreserved allocation
+      //                  = reservations + (allocation - allocated reservations)
+      //
+      // This is a __quantity__ with no meta-data.
+      Resources resourcesChargedAgainstQuota =
+        roleReservationScalarQuantities +
+          (getQuotaRoleAllocatedResources(role) -
+               roleAllocatedReservationScalarQuantities);
+
+      // If quota for the role is considered satisfied, then we only
+      // further allocate reservations for the role.
+      //
+      // More precisely, we stop allocating unreserved resources if at
+      // least one of the resource guarantees is considered consumed.
+      // This technique prevents gaming of the quota allocation,
+      // see MESOS-6432.
       //
       // Longer term, we could ideally allocate what remains
       // unsatisfied to allow an existing container to scale
@@ -1541,16 +1605,12 @@ void HierarchicalAllocatorProcess::__allocate()
       //   * Removing satisfied roles from the sorter.
       bool someGuaranteesReached = false;
       foreach (const Resource& guarantee, quota.info.guarantee()) {
-        if (roleConsumedResources.contains(guarantee)) {
+        if (resourcesChargedAgainstQuota.contains(guarantee)) {
           someGuaranteesReached = true;
           break;
         }
       }
 
-      if (someGuaranteesReached) {
-        continue;
-      }
-
       // Fetch frameworks according to their fair share.
       // NOTE: Suppressed frameworks are not included in the sort.
       CHECK(frameworkSorters.contains(role));
@@ -1598,19 +1658,18 @@ void HierarchicalAllocatorProcess::__allocate()
           }
         }
 
-        // The resources we offer are the unreserved resources as well as the
-        // reserved resources for this particular role and all its ancestors
-        // in the role hierarchy.
+        // If a role's quota limit has been reached, we only allocate
+        // its reservations. Otherwise, we allocate its reservations
+        // plus unreserved resources.
         //
         // NOTE: Currently, frameworks are allowed to have '*' role.
         // Calling reserved('*') returns an empty Resources object.
         //
-        // Quota is satisfied from the available non-revocable resources on the
-        // agent. It's important that we include reserved resources here since
-        // reserved resources are accounted towards the quota guarantee. If we
-        // were to rely on stage 2 to offer them out, they would not be checked
-        // against the quota guarantee.
-        Resources resources = available.allocatableTo(role).nonRevocable();
+        // TODO(mzhu): Do fine-grained allocations up to the limit.
+        // See MESOS-8293.
+        Resources resources = someGuaranteesReached ?
+          available.reserved(role).nonRevocable() :
+          available.allocatableTo(role).nonRevocable();
 
         // It is safe to break here, because all frameworks under a role would
         // consider the same resources, so in case we don't have allocatable
@@ -1658,6 +1717,21 @@ void HierarchicalAllocatorProcess::__allocate()
         offerable[frameworkId][role][slaveId] += resources;
         offeredSharedResources[slaveId] += resources.shared();
 
+        // Update the tracking of allocated reservations.
+        //
+        // Note it is important to do this before updating `slave.allocated`
+        // because we rely on `slave.allocated` to check against accounting
+        // multiple copies of the same shared resources.
+        const Resources newShared = resources.shared()
+          .filter([this, &slaveId](const Resources& resource) {
+            return !slaves.at(slaveId).allocated.contains(resource);
+          });
+
+        // We remove the static reservation metadata here via `toUnreserved()`.
+        allocatedReservationScalarQuantities[role] +=
+          (resources.reserved(role).nonShared() + newShared)
+            .createStrippedScalarQuantity().toUnreserved();
+
         slave.allocated += resources;
 
         trackAllocatedResources(slaveId, frameworkId, resources);
@@ -1725,11 +1799,20 @@ void HierarchicalAllocatorProcess::__allocate()
   // Proceed with allocating the remaining free pool.
   foreach (const SlaveID& slaveId, slaveIds) {
     // If there are no resources available for the second stage, stop.
+    //
+    // TODO(mzhu): Breaking early here means that we may leave reservations
+    // unallocated. See MESOS-8293.
     if (!allocatable(remainingClusterResources - allocatedStage2)) {
       break;
     }
 
     foreach (const string& role, roleSorter->sort()) {
+      // In the second allocation stage, we only allocate
+      // for non-quota roles.
+      if (quotas.contains(role)) {
+        continue;
+      }
+
       // NOTE: Suppressed frameworks are not included in the sort.
       CHECK(frameworkSorters.contains(role));
       const Owned<Sorter>& frameworkSorter = frameworkSorters.at(role);
@@ -1783,16 +1866,8 @@ void HierarchicalAllocatorProcess::__allocate()
         // NOTE: Currently, frameworks are allowed to have '*' role.
         // Calling reserved('*') returns an empty Resources object.
         //
-        // NOTE: We do not offer roles with quota any more non-revocable
-        // resources once their quota is satisfied. However, note that this is
-        // not strictly true due to the coarse-grained nature (per agent) of the
-        // allocation algorithm in stage 1.
-        //
         // TODO(mpark): Offer unreserved resources as revocable beyond quota.
         Resources resources = available.allocatableTo(role);
-        if (quotas.contains(role)) {
-          resources -= available.unreserved();
-        }
 
         // It is safe to break here, because all frameworks under a role would
         // consider the same resources, so in case we don't have allocatable


[15/20] mesos git commit: Added MESOS-4527 to the 1.4.2 CHANGELOG.

Posted by bm...@apache.org.
Added MESOS-4527 to the 1.4.2 CHANGELOG.


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/7a1d3924
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/7a1d3924
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/7a1d3924

Branch: refs/heads/1.4.x
Commit: 7a1d39244bba02ba94e90b94e73378bbfa3be09b
Parents: 740be44
Author: Benjamin Mahler <bm...@apache.org>
Authored: Wed May 2 16:47:28 2018 -0700
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed May 2 16:54:24 2018 -0700

----------------------------------------------------------------------
 CHANGELOG | 1 +
 1 file changed, 1 insertion(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/7a1d3924/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index cf8dbac..4df94fc 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -3,6 +3,7 @@ Release Notes - Mesos - Version 1.4.2 (WIP)
 * This is a bug fix release.
 
 ** Bug
+  * [MESOS-4527] - Roles can exceed limit allocation via reservations.
   * [MESOS-6616] - Error: dereferencing type-punned pointer will break strict-aliasing rules.
   * [MESOS-7504] - Parent's mount namespace cannot be determined when launching a nested container.
   * [MESOS-7975] - The command/default/docker executor can incorrectly send a TASK_FINISHED update even when the task is killed.


[18/20] mesos git commit: Added MESOS-8293 to the 1.4.2 CHANGELOG.

Posted by bm...@apache.org.
Added MESOS-8293 to the 1.4.2 CHANGELOG.


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/0244bbc0
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/0244bbc0
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/0244bbc0

Branch: refs/heads/1.4.x
Commit: 0244bbc0268d5eac4251f15648de7f33443401a7
Parents: 8e8f151
Author: Benjamin Mahler <bm...@apache.org>
Authored: Wed May 2 16:51:17 2018 -0700
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed May 2 16:54:24 2018 -0700

----------------------------------------------------------------------
 CHANGELOG | 1 +
 1 file changed, 1 insertion(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/0244bbc0/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index 63d14bc..9d956f7 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -12,6 +12,7 @@ Release Notes - Mesos - Version 1.4.2 (WIP)
   * [MESOS-8159] - ns::clone uses an async signal unsafe stack.
   * [MESOS-8171] - Using a failoverTimeout of 0 with Mesos native scheduler client can result in infinite subscribe loop.
   * [MESOS-8237] - Strip (Offer|Resource).allocation_info for non-MULTI_ROLE schedulers.
+  * [MESOS-8293] - Reservation may not be allocated when the role has no quota.
   * [MESOS-8297] - Built-in driver-based executors ignore kill task if the task has not been launched.
   * [MESOS-8356] - Persistent volume ownership is set to root despite of sandbox owner (frameworkInfo.user) when docker executor is used.
   * [MESOS-8411] - Killing a queued task can lead to the command executor never terminating.


[09/20] mesos git commit: Made quota resource allocation fine-grained.

Posted by bm...@apache.org.
Made quota resource allocation fine-grained.

The allocator now does fine-grained resource allocation up
to the role's quota limit. And a role's quota is only
considered to be satisfied if all of its quota resources
are satisfied. Previously, a role's quota is considered
to be met if any one of its quota resources reaches the limit.

Also fixed a few affected allocator tests.

Review: https://reviews.apache.org/r/64003/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/6fdd5c16
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/6fdd5c16
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/6fdd5c16

Branch: refs/heads/1.4.x
Commit: 6fdd5c165f7a43b7e07956f063e13b3f1cfe7bf9
Parents: dd1df2d
Author: Meng Zhu <mz...@mesosphere.io>
Authored: Wed Dec 20 18:32:02 2017 -0800
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed May 2 16:44:06 2018 -0700

----------------------------------------------------------------------
 src/master/allocator/mesos/hierarchical.cpp | 140 +++++++++++++++++++----
 src/tests/hierarchical_allocator_tests.cpp  | 114 +++++++++---------
 2 files changed, 169 insertions(+), 85 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/6fdd5c16/src/master/allocator/mesos/hierarchical.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index 2424484..565f109 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -1589,27 +1589,14 @@ void HierarchicalAllocatorProcess::__allocate()
       // If quota for the role is considered satisfied, then we only
       // further allocate reservations for the role.
       //
-      // More precisely, we stop allocating unreserved resources if at
-      // least one of the resource guarantees is considered consumed.
-      // This technique prevents gaming of the quota allocation,
-      // see MESOS-6432.
-      //
-      // Longer term, we could ideally allocate what remains
-      // unsatisfied to allow an existing container to scale
-      // vertically, or to allow the launching of a container
-      // with best-effort cpus/mem/disk/etc.
-      //
       // TODO(alexr): Skipping satisfied roles is pessimistic. Better
       // alternatives are:
       //   * A custom sorter that is aware of quotas and sorts accordingly.
       //   * Removing satisfied roles from the sorter.
-      bool someGuaranteesReached = false;
-      foreach (const Resource& guarantee, quota.info.guarantee()) {
-        if (resourcesChargedAgainstQuota.contains(guarantee)) {
-          someGuaranteesReached = true;
-          break;
-        }
-      }
+      //
+      // This is a scalar quantity with no meta-data.
+      Resources unsatisfiedQuota = Resources(quota.info.guarantee()) -
+        resourcesChargedAgainstQuota;
 
       // Fetch frameworks according to their fair share.
       // NOTE: Suppressed frameworks are not included in the sort.
@@ -1658,18 +1645,119 @@ void HierarchicalAllocatorProcess::__allocate()
           }
         }
 
-        // If a role's quota limit has been reached, we only allocate
-        // its reservations. Otherwise, we allocate its reservations
-        // plus unreserved resources.
+        // We allocate the role's reservations as well as any unreserved
+        // resources while ensuring the role stays within its quota limits.
+        // This means that we'll "chop" the unreserved resources up to
+        // the quota limit if necessary.
+        //
+        // E.g. A role has no allocations or reservations yet and a 10 cpu
+        //      quota limit. We'll chop a 15 cpu agent down to only
+        //      allocate 10 cpus to the role to keep it within its limit.
         //
+        // In the case that the role needs some of the resources on this
+        // agent to make progress towards its quota, we'll *also* allocate
+        // all of the resources for which it does not have quota.
+        //
+        // E.g. The agent has 1 cpu, 1024 mem, 1024 disk, 1 gpu, 5 ports
+        //      and the role has quota for 1 cpu, 1024 mem. We'll include
+        //      the disk, gpu, and ports in the allocation, despite the
+        //      role not having any quota guarantee for them.
+        //
+        // We have to do this for now because it's not possible to set
+        // quota on non-scalar resources, like ports. However, for scalar
+        // resources that can have quota set, we should ideally make
+        // sure that when we include them, we're not violating some
+        // other role's guarantee!
+        //
+        // TODO(mzhu): Check the headroom when we're including scalar
+        // resources that this role does not have quota for.
+        //
+        // TODO(mzhu): Since we're treating the resources with unset
+        // quota as having no guarantee and no limit, these should be
+        // also be allocated further in the second allocation "phase"
+        // below (above guarantee up to limit).
+
         // NOTE: Currently, frameworks are allowed to have '*' role.
         // Calling reserved('*') returns an empty Resources object.
         //
-        // TODO(mzhu): Do fine-grained allocations up to the limit.
-        // See MESOS-8293.
-        Resources resources = someGuaranteesReached ?
-          available.reserved(role).nonRevocable() :
-          available.allocatableTo(role).nonRevocable();
+        // NOTE: Since we currently only support top-level roles to
+        // have quota, there are no ancestor reservations involved here.
+        Resources resources = available.reserved(role).nonRevocable();
+
+        // Unreserved resources that are tentatively going to be
+        // allocated towards this role's quota. These resources may
+        // not get allocated due to framework filters.
+        Resources newQuotaAllocation;
+
+        if (!unsatisfiedQuota.empty()) {
+          Resources unreserved = available.nonRevocable().unreserved();
+
+          set<string> quotaResourceNames =
+            Resources(quota.info.guarantee()).names();
+
+          // When "chopping" resources, there is more than 1 "chop" that
+          // can be done to satisfy the limits. Consider the case with
+          // two disks of 1GB, one is PATH and another is MOUNT. And a role
+          // has a "disk" quota of 1GB. We could pick either of the disks here,
+          // but not both.
+          //
+          // In order to avoid repeatedly choosing the same "chop" of
+          // the resources each time we allocate, we introduce some
+          // randomness by shuffling the resources.
+          google::protobuf::RepeatedPtrField<Resource>
+            resourceVector = unreserved;
+          random_shuffle(resourceVector.begin(), resourceVector.end());
+
+          foreach (const Resource& resource, resourceVector) {
+            if (quotaResourceNames.count(resource.name()) == 0) {
+              // This resource has no quota set. Allocate it.
+              resources += resource;
+              continue;
+            }
+
+            // Currently, we guarantee that quota resources are scalars.
+            CHECK_EQ(resource.type(), Value::SCALAR);
+
+            Option<Value::Scalar> newUnsatisfiedQuotaScalar =
+              (unsatisfiedQuota -
+                newQuotaAllocation.createStrippedScalarQuantity())
+                  .get<Value::Scalar>(resource.name());
+
+            if (newUnsatisfiedQuotaScalar.isNone()) {
+              // Quota for this resource has been satisfied.
+              continue;
+            }
+
+            const Value::Scalar& resourceScalar = resource.scalar();
+
+            if (resourceScalar <= newUnsatisfiedQuotaScalar.get()) {
+              // We can safely allocate `resource` entirely here without
+              // breaking the quota limit.
+              resources += resource;
+              newQuotaAllocation += resource;
+
+              continue;
+            }
+
+            // At this point, we need to chop `resource` to satisfy
+            // the quota. We chop `resource` by making a copy and
+            // shrinking its scalar value
+            Resource choppedResourceToAllocate = resource;
+            choppedResourceToAllocate.mutable_scalar()
+              ->CopyFrom(newUnsatisfiedQuotaScalar.get());
+
+            // Not all resources are choppable. Some resources such as MOUNT
+            // disk has to be offered entirely. We use resources `contains()`
+            // utility to verify this. Specifically, if a resource `contains()`
+            // a smaller version of itself, then it can be safely chopped.
+            if (!Resources(resource).contains(choppedResourceToAllocate)) {
+              continue;
+            }
+
+            resources += choppedResourceToAllocate;
+            newQuotaAllocation += choppedResourceToAllocate;
+          }
+        }
 
         // It is safe to break here, because all frameworks under a role would
         // consider the same resources, so in case we don't have allocatable
@@ -1717,6 +1805,8 @@ void HierarchicalAllocatorProcess::__allocate()
         offerable[frameworkId][role][slaveId] += resources;
         offeredSharedResources[slaveId] += resources.shared();
 
+        unsatisfiedQuota -= newQuotaAllocation.createStrippedScalarQuantity();
+
         // Update the tracking of allocated reservations.
         //
         // Note it is important to do this before updating `slave.allocated`

http://git-wip-us.apache.org/repos/asf/mesos/blob/6fdd5c16/src/tests/hierarchical_allocator_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/hierarchical_allocator_tests.cpp b/src/tests/hierarchical_allocator_tests.cpp
index 1f27af0..6ea7d50 100644
--- a/src/tests/hierarchical_allocator_tests.cpp
+++ b/src/tests/hierarchical_allocator_tests.cpp
@@ -2659,10 +2659,8 @@ TEST_F(HierarchicalAllocatorTest, MultipleFrameworksInRoleWithQuota)
 }
 
 
-// The allocator performs coarse-grained allocations, and allocations
-// to satisfy quota are no exception. A role may get more resources as
-// part of its quota if the agent remaining resources are greater than
-// the unsatisfied part of the role's quota.
+// Quota allocations should be fine-grained. A role should get no more
+// resources than its quota even if the agent has more resources to offer.
 TEST_F(HierarchicalAllocatorTest, QuotaAllocationGranularity)
 {
   // Pausing the clock is not necessary, but ensures that the test
@@ -2683,15 +2681,6 @@ TEST_F(HierarchicalAllocatorTest, QuotaAllocationGranularity)
   const Quota quota = createQuota(QUOTA_ROLE, "cpus:0.5;mem:200");
   allocator->setQuota(QUOTA_ROLE, quota);
 
-  // Create `framework2` in a non-quota'ed role.
-  FrameworkInfo framework2 = createFrameworkInfo({NO_QUOTA_ROLE});
-  allocator->addFramework(framework2.id(), framework2, {}, true, {});
-
-  // Process all triggered allocation events.
-  //
-  // NOTE: No allocations happen because there are no resources to allocate.
-  Clock::settle();
-
   SlaveInfo agent = createSlaveInfo("cpus:1;mem:512;disk:0");
   allocator->addSlave(
       agent.id(),
@@ -2701,20 +2690,34 @@ TEST_F(HierarchicalAllocatorTest, QuotaAllocationGranularity)
       agent.resources(),
       {});
 
-  // `framework1` will be offered all of `agent`'s resources because
-  // it is the only framework in the only role with unsatisfied quota
-  // and the allocator performs coarse-grained allocation.
+  // Due to fine-grained allocation, `framework1` will be offered the
+  // exact amount of quota resources on `agent` even though the agent
+  // has more resources to offer.
   Allocation expected = Allocation(
       framework1.id(),
-      {{QUOTA_ROLE, {{agent.id(), agent.resources()}}}});
+      {{QUOTA_ROLE, {{agent.id(), quota.info.guarantee()}}}});
 
   AWAIT_EXPECT_EQ(expected, allocations.get());
 
   // Total cluster resources: cpus=1, mem=512.
-  // QUOTA_ROLE share = 1 (cpus=1, mem=512) [quota: cpus=0.5, mem=200]
+  // QUOTA_ROLE share = 1 (cpus=0.5, mem=200) [quota: cpus=0.5, mem=200]
   //   framework1 share = 1
-  // NO_QUOTA_ROLE share = 0
-  //   framework2 share = 0
+
+  // Create `framework2` in a non-quota'ed role.
+  FrameworkInfo framework2 = createFrameworkInfo({NO_QUOTA_ROLE});
+  allocator->addFramework(framework2.id(), framework2, {}, true, {});
+
+  // `framework2` will get the remaining resources of `agent`.
+  expected = Allocation(
+      framework2.id(),
+      {{NO_QUOTA_ROLE, {{agent.id(), agent.resources() -
+          Resources(quota.info.guarantee())}}}});
+
+  AWAIT_EXPECT_EQ(expected, allocations.get());
+
+  // Total cluster resources: cpus=1, mem=512.
+  // QUOTA_ROLE  (cpus=0.5, mem=200) [quota: cpus=0.5, mem=200]
+  // NO_QUOTA_ROLE  (cpus=0.5, mem=312)
 }
 
 
@@ -3161,7 +3164,7 @@ TEST_F(HierarchicalAllocatorTest, MultiQuotaWithFrameworks)
   // TODO(bmahler): Add assertions to test this is accurate!
   //
   // Total cluster resources (2 identical agents): cpus=2, mem=2048.
-  // QUOTA_ROLE1 share = 0.5 (cpus=1, mem=1024) [quota: cpus=1, mem=200]
+  // QUOTA_ROLE1 share = 0.5 (cpus=1, mem=200) [quota: cpus=1, mem=200]
   //   framework1 share = 1
   // QUOTA_ROLE2 share = 0.5 (cpus=1, mem=1024) [quota: cpus=2, mem=2000]
   //   framework2 share = 1
@@ -3179,18 +3182,18 @@ TEST_F(HierarchicalAllocatorTest, MultiQuotaWithFrameworks)
       agent3.resources(),
       {});
 
-  // `framework2` will get all agent3's resources because its role is under
-  // quota, while other roles' quotas are satisfied.
+  // `framework2` will get some of agent3's resources to meet its quota.
   Allocation expected = Allocation(
       framework2.id(),
-      {{QUOTA_ROLE2, {{agent3.id(), agent3.resources()}}}});
+      {{QUOTA_ROLE2, {{agent3.id(),
+          Resources(quota2.info.guarantee()) - agent2.resources()}}}});
 
   AWAIT_EXPECT_EQ(expected, allocations.get());
 
   // Total cluster resources (3 agents): cpus=4, mem=4096.
-  // QUOTA_ROLE1 share = 0.25 (cpus=1, mem=1024) [quota: cpus=1, mem=200]
+  // QUOTA_ROLE1 share = 0.33 (cpus=1, mem=1024) [quota: cpus=1, mem=200]
   //   framework1 share = 1
-  // QUOTA_ROLE2 share = 0.75 (cpus=3, mem=3072) [quota: cpus=2, mem=2000]
+  // QUOTA_ROLE2 share = 0.66 (cpus=2, mem=3=2000) [quota: cpus=2, mem=2000]
   //   framework2 share = 1
 }
 
@@ -3318,14 +3321,14 @@ TEST_F(HierarchicalAllocatorTest, QuotaSetAsideReservedResources)
   FrameworkInfo framework1 = createFrameworkInfo({QUOTA_ROLE});
   allocator->addFramework(framework1.id(), framework1, {}, true, {});
 
-  const Quota quota = createQuota(QUOTA_ROLE, "cpus:4");
+  const Quota quota = createQuota(QUOTA_ROLE, "cpus:4;mem:512");
   allocator->setQuota(QUOTA_ROLE, quota);
 
-  // `framework1` will be offered resources at `agent1` because the
-  // resources at `agent2` are reserved for a different role.
+  // `framework1` will be offered resources at `agent1` up to its quota limit
+  // because the resources at `agent2` are reserved for a different role.
   Allocation expected = Allocation(
       framework1.id(),
-      {{QUOTA_ROLE, {{agent1.id(), agent1.resources()}}}});
+      {{QUOTA_ROLE, {{agent1.id(), quota.info.guarantee()}}}});
 
   AWAIT_EXPECT_EQ(expected, allocations.get());
 
@@ -3337,7 +3340,7 @@ TEST_F(HierarchicalAllocatorTest, QuotaSetAsideReservedResources)
   allocator->recoverResources(
       framework1.id(),
       agent1.id(),
-      allocatedResources(agent1.resources(), QUOTA_ROLE),
+      allocatedResources(quota.info.guarantee(), QUOTA_ROLE),
       longFilter);
 
   // Trigger a batch allocation for good measure, but don't expect any
@@ -4504,18 +4507,14 @@ TEST_F(HierarchicalAllocatorTest, DontOfferOldAgentToMultiRoleFramework)
 
 
 // This tests the behavior of quota when the allocation and
-// quota are disproportionate. The current approach (see MESOS-6432)
-// is to stop allocation quota once one of the resource
-// guarantees is reached. We test the following example:
+// quota are disproportionate. We always try to allocate
+// up to the limit for all resources.
+// We test the following example:
 //
 //        Quota: cpus:4;mem:1024
 //   Allocation: cpus:2;mem:1024
 //
-// Here no more allocation occurs to the role since the memory
-// guarantee is reached. Longer term, we could offer the
-// unsatisfied cpus to allow an existing container to scale
-// vertically, or to allow the launching of a container with
-// best-effort memory/disk, etc.
+// Role will only get cpus resources in this case to meet its remaining qutoa.
 TEST_F(HierarchicalAllocatorTest, DisproportionateQuotaVsAllocation)
 {
   // Pausing the clock is not necessary, but ensures that the test
@@ -4528,27 +4527,23 @@ TEST_F(HierarchicalAllocatorTest, DisproportionateQuotaVsAllocation)
   const string QUOTA_ROLE{"quota-role"};
   const string NO_QUOTA_ROLE{"no-quota-role"};
 
-  // Since resource allocation is coarse-grained, we use a quota such
-  // that mem can be satisfied from a single agent, but cpus requires
-  // multiple agents.
+  // We use a quota such that mem can be satisfied from a single agent,
+  // but cpus requires multiple agents.
   const string agentResources = "cpus:2;mem:1024";
   const string quotaResources = "cpus:4;mem:1024";
 
   Quota quota = createQuota(QUOTA_ROLE, quotaResources);
   allocator->setQuota(QUOTA_ROLE, quota);
 
-  // Register two frameworks where one is using a role with quota.
-  FrameworkInfo framework1 = createFrameworkInfo({QUOTA_ROLE});
-  FrameworkInfo framework2 = createFrameworkInfo({NO_QUOTA_ROLE});
-  allocator->addFramework(framework1.id(), framework1, {}, true, {});
-  allocator->addFramework(framework2.id(), framework2, {}, true, {});
+  // Register `framework` under `QUOTA_ROLE`.
+  FrameworkInfo framework = createFrameworkInfo({QUOTA_ROLE});
+  allocator->addFramework(framework.id(), framework, {}, true, {});
 
   // Register an agent. This triggers an allocation of all of the
   // agent's resources to partially satisfy QUOTA_ROLE's quota. After
   // the allocation QUOTA_ROLE's quota for mem will be satisfied while
-  // still being below the set quota for cpus. With that QUOTA_ROLE
-  // will not receive more resources since we currently do not
-  // have an ability to offer out only the cpus.
+  // still being below the set quota for cpus. With that framework under
+  // QUOTA_ROLE will only receive cpus resources.
   SlaveInfo agent1 = createSlaveInfo(agentResources);
   allocator->addSlave(
       agent1.id(),
@@ -4559,18 +4554,12 @@ TEST_F(HierarchicalAllocatorTest, DisproportionateQuotaVsAllocation)
       {});
 
   Allocation expected = Allocation(
-      framework1.id(),
+      framework.id(),
       {{QUOTA_ROLE, {{agent1.id(), agent1.resources()}}}});
 
   AWAIT_EXPECT_EQ(expected, allocations.get());
 
-  // Register a second agent. Since QUOTA_ROLE has reached one
-  // of its quota guarantees and quota currently acts as an upper
-  // limit, no further resources are allocated for quota.
-  // In addition, we "hold back" the resources of the second
-  // agent in order to ensure that the quota could be satisfied
-  // should the first framework decide to consume proportionally
-  // to its quota (e.g. cpus:2;mem:512 on each agent).
+  // Register a second agent.
   SlaveInfo agent2 = createSlaveInfo(agentResources);
   allocator->addSlave(
       agent2.id(),
@@ -4582,8 +4571,13 @@ TEST_F(HierarchicalAllocatorTest, DisproportionateQuotaVsAllocation)
 
   Clock::settle();
 
-  Future<Allocation> allocation = allocations.get();
-  EXPECT_TRUE(allocation.isPending());
+  // `framework` will get its unsatisfied quota resources (2cpus).
+  expected = Allocation(
+    framework.id(),
+    {{QUOTA_ROLE, {{agent2.id(),
+      Resources::parse(quotaResources).get() - agent1.resources()}}}});
+
+  AWAIT_EXPECT_EQ(expected, allocations.get());
 }
 
 


[12/20] mesos git commit: Added a utility to shrink scalar resource while keeping its meta-data.

Posted by bm...@apache.org.
Added a utility to shrink scalar resource while keeping its meta-data.

This is particularly useful when performing fine-grained allocation
of resources by "chopping off" some of the resources when possible.
Note that some resources, e.g. MOUNT volumes, cannot be shrunk.

Review: https://reviews.apache.org/r/64798/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/3f7b7f95
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/3f7b7f95
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/3f7b7f95

Branch: refs/heads/1.4.x
Commit: 3f7b7f95de18c01996a507fd8b20bdacd6ca9787
Parents: 2324ec8
Author: Meng Zhu <mz...@mesosphere.io>
Authored: Fri Dec 22 13:49:27 2017 -0800
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed May 2 16:44:18 2018 -0700

----------------------------------------------------------------------
 include/mesos/resources.hpp    |  7 +++++++
 include/mesos/v1/resources.hpp |  7 +++++++
 src/common/resources.cpp       | 23 +++++++++++++++++++++++
 src/v1/resources.cpp           | 23 +++++++++++++++++++++++
 4 files changed, 60 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/3f7b7f95/include/mesos/resources.hpp
----------------------------------------------------------------------
diff --git a/include/mesos/resources.hpp b/include/mesos/resources.hpp
index 9f95949..688093f 100644
--- a/include/mesos/resources.hpp
+++ b/include/mesos/resources.hpp
@@ -310,6 +310,13 @@ public:
   // This must be called only when the resource is reserved!
   static const std::string& reservationRole(const Resource& resource);
 
+  // Shrinks a scalar type `resource` to the target size.
+  // Returns true if the resource was shrunk to the target size,
+  // or the resource is already within the target size.
+  // Returns false otherwise (i.e. the resource is indivisible.
+  // E.g. MOUNT volume).
+  static bool shrink(Resource* resource, const Value::Scalar& target);
+
   // Returns the summed up Resources given a hashmap<Key, Resources>.
   //
   // NOTE: While scalar resources such as "cpus" sum correctly,

http://git-wip-us.apache.org/repos/asf/mesos/blob/3f7b7f95/include/mesos/v1/resources.hpp
----------------------------------------------------------------------
diff --git a/include/mesos/v1/resources.hpp b/include/mesos/v1/resources.hpp
index a621685..94125f8 100644
--- a/include/mesos/v1/resources.hpp
+++ b/include/mesos/v1/resources.hpp
@@ -310,6 +310,13 @@ public:
   // This must be called only when the resource is reserved!
   static const std::string& reservationRole(const Resource& resource);
 
+  // Shrinks a scalar type `resource` to the target size.
+  // Returns true if the resource was shrunk to the target size,
+  // or the resource is already within the target size.
+  // Returns false otherwise (i.e. the resource is indivisible.
+  // E.g. MOUNT volume).
+  static bool shrink(Resource* resource, const Value::Scalar& target);
+
   // Returns the summed up Resources given a hashmap<Key, Resources>.
   //
   // NOTE: While scalar resources such as "cpus" sum correctly,

http://git-wip-us.apache.org/repos/asf/mesos/blob/3f7b7f95/src/common/resources.cpp
----------------------------------------------------------------------
diff --git a/src/common/resources.cpp b/src/common/resources.cpp
index 8d43889..b31e8a7 100644
--- a/src/common/resources.cpp
+++ b/src/common/resources.cpp
@@ -1150,6 +1150,29 @@ const string& Resources::reservationRole(const Resource& resource)
   return resource.reservations().rbegin()->role();
 }
 
+
+bool Resources::shrink(Resource* resource, const Value::Scalar& target)
+{
+  if (resource->scalar() <= target) {
+    return true; // Already within target.
+  }
+
+  Resource copy = *resource;
+  copy.mutable_scalar()->CopyFrom(target);
+
+  // Some resources (e.g. MOUNT disk) are indivisible. We use
+  // a containement check to verify this. Specifically, if a
+  // contains a smaller version of itself, then it can safely
+  // be chopped into a smaller amount.
+  if (Resources(*resource).contains(copy)) {
+    resource->CopyFrom(copy);
+    return true;
+  }
+
+  return false;
+}
+
+
 /////////////////////////////////////////////////
 // Public member functions.
 /////////////////////////////////////////////////

http://git-wip-us.apache.org/repos/asf/mesos/blob/3f7b7f95/src/v1/resources.cpp
----------------------------------------------------------------------
diff --git a/src/v1/resources.cpp b/src/v1/resources.cpp
index 508f3f8..781fef6 100644
--- a/src/v1/resources.cpp
+++ b/src/v1/resources.cpp
@@ -1181,6 +1181,29 @@ const string& Resources::reservationRole(const Resource& resource)
   return resource.reservations().rbegin()->role();
 }
 
+
+bool Resources::shrink(Resource* resource, const Value::Scalar& target)
+{
+  if (resource->scalar() <= target) {
+    return true; // Already within target.
+  }
+
+  Resource copy = *resource;
+  copy.mutable_scalar()->CopyFrom(target);
+
+  // Some resources (e.g. MOUNT disk) are indivisible. We use
+  // a containement check to verify this. Specifically, if a
+  // contains a smaller version of itself, then it can safely
+  // be chopped into a smaller amount.
+  if (Resources(*resource).contains(copy)) {
+    resource->CopyFrom(copy);
+    return true;
+  }
+
+  return false;
+}
+
+
 /////////////////////////////////////////////////
 // Public member functions.
 /////////////////////////////////////////////////


[10/20] mesos git commit: Moved the quota headroom tracking before quota allocation.

Posted by bm...@apache.org.
Moved the quota headroom tracking before quota allocation.

This paves the way for improved quota headroom enforcement
for the quota role allocation stage.

Review: https://reviews.apache.org/r/64761/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/3d044094
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/3d044094
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/3d044094

Branch: refs/heads/1.4.x
Commit: 3d04409466602f3aa6f3bbf71debabeb1b319250
Parents: 6fdd5c1
Author: Meng Zhu <mz...@mesosphere.io>
Authored: Wed Dec 20 18:52:21 2017 -0800
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed May 2 16:44:11 2018 -0700

----------------------------------------------------------------------
 src/master/allocator/mesos/hierarchical.cpp | 192 ++++++++++++-----------
 1 file changed, 100 insertions(+), 92 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/3d044094/src/master/allocator/mesos/hierarchical.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index 565f109..de6a7e2 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -1536,6 +1536,97 @@ void HierarchicalAllocatorProcess::__allocate()
     }
   }
 
+  // We need to constantly make sure that we are holding back enough unreserved
+  // resources that the remaining quota can later be satisfied when needed:
+  //
+  //   Required unreserved headroom =
+  //     sum (unsatisfied quota(r) - unallocated reservations(r))
+  //       for each quota role r
+  //
+  // Given the above, if a role has more reservations than quota,
+  // we don't need to hold back any unreserved headroom for it.
+  Resources requiredHeadroom;
+  foreachpair (const string& role, const Quota& quota, quotas) {
+    // NOTE: Revocable resources are excluded in `quotaRoleSorter`.
+    // NOTE: Only scalars are considered for quota.
+    // NOTE: The following should all be quantities with no meta-data!
+    Resources allocated = getQuotaRoleAllocatedResources(role);
+    const Resources guarantee = quota.info.guarantee();
+
+    if (allocated.contains(guarantee)) {
+      continue; // Quota already satisifed.
+    }
+
+    Resources unallocated = guarantee - allocated;
+
+    Resources unallocatedReservations =
+      reservationScalarQuantities.get(role).getOrElse(Resources()) -
+      allocatedReservationScalarQuantities.get(role).getOrElse(Resources());
+
+    requiredHeadroom += unallocated - unallocatedReservations;
+  }
+
+  // We will allocate resources while ensuring that the required
+  // unreserved non-revocable headroom is still available. Otherwise,
+  // we will not be able to satisfy quota later.
+  //
+  //   available headroom = unallocated unreserved non-revocable resources
+  //
+  // We compute this as:
+  //
+  //   available headroom = total resources -
+  //                        allocated resources -
+  //                        unallocated reservations -
+  //                        unallocated revocable resources
+
+  // NOTE: `totalScalarQuantities` omits dynamic reservation,
+  // persistent volume info, and allocation info. We additionally
+  // remove the static reservations here via `toUnreserved()`.
+  Resources availableHeadroom =
+    roleSorter->totalScalarQuantities().toUnreserved();
+
+  // Subtract allocated resources from the total.
+  foreachkey (const string& role, roles) {
+    // NOTE: `totalScalarQuantities` omits dynamic reservation,
+    // persistent volume info, and allocation info. We additionally
+    // remove the static reservations here via `toUnreserved()`.
+    availableHeadroom -=
+      roleSorter->allocationScalarQuantities(role).toUnreserved();
+  }
+
+  // Subtract all unallocated reservations.
+  foreachkey (const string& role, reservationScalarQuantities) {
+    hashmap<SlaveID, Resources> allocations;
+    if (quotaRoleSorter->contains(role)) {
+      allocations = quotaRoleSorter->allocation(role);
+    } else if (roleSorter->contains(role)) {
+      allocations = roleSorter->allocation(role);
+    }
+
+    Resources unallocatedReservations =
+      reservationScalarQuantities.get(role).getOrElse(Resources());
+
+    foreachvalue (const Resources& resources, allocations) {
+      // NOTE: `totalScalarQuantities` omits dynamic reservation,
+      // persistent volume info, and allocation info. We additionally
+      // remove the static reservations here via `toUnreserved()`.
+      unallocatedReservations -=
+        resources.reserved().createStrippedScalarQuantity().toUnreserved();
+    }
+
+    // Subtract the unallocated reservations for this role from the headroom.
+    availableHeadroom -= unallocatedReservations;
+  }
+
+  // Subtract revocable resources.
+  foreachvalue (const Slave& slave, slaves) {
+    // NOTE: `totalScalarQuantities` omits dynamic reservation,
+    // persistent volume info, and allocation info. We additionally
+    // remove the static reservations here via `toUnreserved()`.
+    availableHeadroom -= slave.available().revocable()
+      .createStrippedScalarQuantity().toUnreserved();
+  }
+
   // Due to the two stages in the allocation algorithm and the nature of
   // shared resources being re-offerable even if already allocated, the
   // same shared resources can appear in two (and not more due to the
@@ -1807,6 +1898,12 @@ void HierarchicalAllocatorProcess::__allocate()
 
         unsatisfiedQuota -= newQuotaAllocation.createStrippedScalarQuantity();
 
+        // Track quota headroom change.
+        Resources headroomDelta =
+          resources.unreserved().createStrippedScalarQuantity();
+        requiredHeadroom -= headroomDelta;
+        availableHeadroom -= headroomDelta;
+
         // Update the tracking of allocated reservations.
         //
         // Note it is important to do this before updating `slave.allocated`
@@ -1829,101 +1926,12 @@ void HierarchicalAllocatorProcess::__allocate()
     }
   }
 
-  // Frameworks in a quota'ed role may temporarily reject resources by
-  // filtering or suppressing offers. Hence, quotas may not be fully
-  // allocated and if so we need to hold back enough unreserved
-  // resources to ensure the quota can later be satisfied when needed:
-  //
-  //   Required unreserved headroom =
-  //     sum (unsatisfied quota(r) - unallocated reservations(r))
-  //       for each quota role r
-  //
-  // Given the above, if a role has more reservations than quota,
-  // we don't need to hold back any unreserved headroom for it.
-  Resources requiredHeadroom;
-  foreachpair (const string& role, const Quota& quota, quotas) {
-    // NOTE: Revocable resources are excluded in `quotaRoleSorter`.
-    // NOTE: Only scalars are considered for quota.
-    // NOTE: The following should all be quantities with no meta-data!
-    Resources allocated = getQuotaRoleAllocatedResources(role);
-    const Resources guarantee = quota.info.guarantee();
-
-    if (allocated.contains(guarantee)) {
-      continue; // Quota already satisifed.
-    }
-
-    Resources unallocated = guarantee - allocated;
-
-    Resources unallocatedReservations =
-      reservationScalarQuantities.get(role).getOrElse(Resources()) -
-      allocatedReservationScalarQuantities.get(role).getOrElse(Resources());
-
-    requiredHeadroom += unallocated - unallocatedReservations;
-  }
-
-  // We will allocate resources while ensuring that the required
-  // unreserved non-revocable headroom is still available. Otherwise,
-  // we will not be able to satisfy quota later. Reservations to
+  // Similar to the first stage, we will allocate resources while ensuring
+  // that the required unreserved non-revocable headroom is still available.
+  // Otherwise, we will not be able to satisfy quota later. Reservations to
   // non-quota roles and revocable resources will always be included
   // in the offers since these are not part of the headroom (and
   // therefore can't be used to satisfy quota).
-  //
-  //   available headroom = unallocated unreserved non-revocable resources
-  //
-  // We compute this as:
-  //
-  //   available headroom = total resources -
-  //                        allocated resources -
-  //                        unallocated reservations -
-  //                        unallocated revocable resources
-
-  // NOTE: `totalScalarQuantities` omits dynamic reservation,
-  // persistent volume info, and allocation info. We additionally
-  // remove the static reservations here via `toUnreserved()`.
-  Resources availableHeadroom =
-    roleSorter->totalScalarQuantities().toUnreserved();
-
-  // Subtract allocated resources from the total.
-  foreachkey (const string& role, roles) {
-    // NOTE: `totalScalarQuantities` omits dynamic reservation,
-    // persistent volume info, and allocation info. We additionally
-    // remove the static reservations here via `toUnreserved()`.
-    availableHeadroom -=
-      roleSorter->allocationScalarQuantities(role).toUnreserved();
-  }
-
-  // Subtract all unallocated reservations.
-  foreachkey (const string& role, reservationScalarQuantities) {
-    hashmap<SlaveID, Resources> allocations;
-    if (quotaRoleSorter->contains(role)) {
-      allocations = quotaRoleSorter->allocation(role);
-    } else if (roleSorter->contains(role)) {
-      allocations = roleSorter->allocation(role);
-    }
-
-    Resources unallocatedReservations =
-      reservationScalarQuantities.get(role).getOrElse(Resources());
-
-    foreachvalue (const Resources& resources, allocations) {
-      // NOTE: `totalScalarQuantities` omits dynamic reservation,
-      // persistent volume info, and allocation info. We additionally
-      // remove the static reservations here via `toUnreserved()`.
-      unallocatedReservations -=
-        resources.reserved().createStrippedScalarQuantity().toUnreserved();
-    }
-
-    // Subtract the unallocated reservations for this role from the headroom.
-    availableHeadroom -= unallocatedReservations;
-  }
-
-  // Subtract revocable resources.
-  foreachvalue (const Slave& slave, slaves) {
-    // NOTE: `totalScalarQuantities` omits dynamic reservation,
-    // persistent volume info, and allocation info. We additionally
-    // remove the static reservations here via `toUnreserved()`.
-    availableHeadroom -= slave.available().revocable()
-      .createStrippedScalarQuantity().toUnreserved();
-  }
 
   foreach (const SlaveID& slaveId, slaveIds) {
     foreach (const string& role, roleSorter->sort()) {


[05/20] mesos git commit: Tracked resource reservations in the allocator.

Posted by bm...@apache.org.
Tracked resource reservations in the allocator.

Resource reservations need to be tracked to make
sure quota limit will not be exceeded in the presence
of resource reservations.

Review: https://reviews.apache.org/r/64303/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/35b0e1cf
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/35b0e1cf
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/35b0e1cf

Branch: refs/heads/1.4.x
Commit: 35b0e1cf3da823ca5154989a283e06a37ba5e462
Parents: 69b3aa0
Author: Meng Zhu <mz...@mesosphere.io>
Authored: Mon Dec 11 16:45:49 2017 -0800
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed May 2 16:43:35 2018 -0700

----------------------------------------------------------------------
 src/master/allocator/mesos/hierarchical.cpp | 48 ++++++++++++++++++++++++
 src/master/allocator/mesos/hierarchical.hpp | 28 ++++++++++++++
 2 files changed, 76 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/35b0e1cf/src/master/allocator/mesos/hierarchical.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index b6db12e..b70868d 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -518,6 +518,8 @@ void HierarchicalAllocatorProcess::addSlave(
     slave.maintenance = Slave::Maintenance(unavailability.get());
   }
 
+  trackReservations(total.reservations());
+
   roleSorter->add(slaveId, total);
 
   // See comment at `quotaRoleSorter` declaration regarding non-revocable.
@@ -590,6 +592,8 @@ void HierarchicalAllocatorProcess::removeSlave(
   // See comment at `quotaRoleSorter` declaration regarding non-revocable.
   quotaRoleSorter->remove(slaveId, slaves.at(slaveId).total.nonRevocable());
 
+  untrackReservations(slaves.at(slaveId).total.reservations());
+
   slaves.erase(slaveId);
   allocationCandidates.erase(slaveId);
 
@@ -2303,6 +2307,42 @@ void HierarchicalAllocatorProcess::untrackFrameworkUnderRole(
 }
 
 
+void HierarchicalAllocatorProcess::trackReservations(
+    const hashmap<std::string, Resources>& reservations)
+{
+  foreachpair (const string& role,
+               const Resources& resources, reservations) {
+    // We remove the static reservation metadata here via `toUnreserved()`.
+    const Resources scalarQuantitesToTrack =
+        resources.createStrippedScalarQuantity().toUnreserved();
+
+    reservationScalarQuantities[role] += scalarQuantitesToTrack;
+  }
+}
+
+
+void HierarchicalAllocatorProcess::untrackReservations(
+    const hashmap<std::string, Resources>& reservations)
+{
+  foreachpair (const string& role,
+               const Resources& resources, reservations) {
+    CHECK(reservationScalarQuantities.contains(role));
+    Resources& currentReservationQuantity =
+        reservationScalarQuantities.at(role);
+
+    // We remove the static reservation metadata here via `toUnreserved()`.
+    const Resources scalarQuantitesToUntrack =
+        resources.createStrippedScalarQuantity().toUnreserved();
+    CHECK(currentReservationQuantity.contains(scalarQuantitesToUntrack));
+    currentReservationQuantity -= scalarQuantitesToUntrack;
+
+    if (currentReservationQuantity.empty()) {
+      reservationScalarQuantities.erase(role);
+    }
+  }
+}
+
+
 bool HierarchicalAllocatorProcess::updateSlaveTotal(
     const SlaveID& slaveId,
     const Resources& total)
@@ -2319,6 +2359,14 @@ bool HierarchicalAllocatorProcess::updateSlaveTotal(
 
   slave.total = total;
 
+  hashmap<std::string, Resources> oldReservations = oldTotal.reservations();
+  hashmap<std::string, Resources> newReservations = total.reservations();
+
+  if (oldReservations != newReservations) {
+    untrackReservations(oldReservations);
+    trackReservations(newReservations);
+  }
+
   // Currently `roleSorter` and `quotaRoleSorter`, being the root-level
   // sorters, maintain all of `slaves[slaveId].total` (or the `nonRevocable()`
   // portion in the case of `quotaRoleSorter`) in their own totals (which

http://git-wip-us.apache.org/repos/asf/mesos/blob/35b0e1cf/src/master/allocator/mesos/hierarchical.hpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.hpp b/src/master/allocator/mesos/hierarchical.hpp
index 0d30179..3f15b2c 100644
--- a/src/master/allocator/mesos/hierarchical.hpp
+++ b/src/master/allocator/mesos/hierarchical.hpp
@@ -442,6 +442,14 @@ protected:
   // change in the future.
   hashmap<std::string, Quota> quotas;
 
+  // Aggregated resource reservations on all agents tied to a
+  // particular role, if any. These are stripped scalar quantities
+  // that contain no meta-data. Used for accounting resource
+  // reservations for quota limit.
+  //
+  // Only roles with non-empty reservations will be stored in the map.
+  hashmap<std::string, Resources> reservationScalarQuantities;
+
   // Slaves to send offers for.
   Option<hashset<std::string>> whitelist;
 
@@ -529,6 +537,26 @@ private:
       const FrameworkID& frameworkId,
       const std::string& role);
 
+  // `trackReservations` and `untrackReservations` are helpers
+  // to track role resource reservations. We need to keep
+  // track of reservations to enforce role quota limit
+  // in the presence of unallocated reservations. See MESOS-4527.
+  //
+  // TODO(mzhu): Ideally, we want these helpers to instead track the
+  // reservations as *allocated* in the sorters even when the
+  // reservations have not been allocated yet. This will help to:
+  //
+  //   (1) Solve the fairness issue when roles with unallocated
+  //       reservations may game the allocator (See MESOS-8299).
+  //
+  //   (2) Simplify the quota enforcement logic -- the allocator
+  //       would no longer need to track reservations separately.
+  void trackReservations(
+      const hashmap<std::string, Resources>& reservations);
+
+  void untrackReservations(
+      const hashmap<std::string, Resources>& reservations);
+
   // Helper to update the agent's total resources maintained in the allocator
   // and the role and quota sorters (whose total resources match the agent's
   // total resources). Returns true iff the stored agent total was changed.


[17/20] mesos git commit: Added MESOS-7099 to the 1.4.2 CHANGELOG.

Posted by bm...@apache.org.
Added MESOS-7099 to the 1.4.2 CHANGELOG.


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/8e8f1510
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/8e8f1510
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/8e8f1510

Branch: refs/heads/1.4.x
Commit: 8e8f1510c056257dd8b99328f5b04e71790e0d8f
Parents: 7a1d392
Author: Benjamin Mahler <bm...@apache.org>
Authored: Wed May 2 16:50:10 2018 -0700
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed May 2 16:54:24 2018 -0700

----------------------------------------------------------------------
 CHANGELOG | 1 +
 1 file changed, 1 insertion(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/8e8f1510/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index 4df94fc..63d14bc 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -5,6 +5,7 @@ Release Notes - Mesos - Version 1.4.2 (WIP)
 ** Bug
   * [MESOS-4527] - Roles can exceed limit allocation via reservations.
   * [MESOS-6616] - Error: dereferencing type-punned pointer will break strict-aliasing rules.
+  * [MESOS-7099] - Quota can be exceeded due to coarse-grained offer technique.
   * [MESOS-7504] - Parent's mount namespace cannot be determined when launching a nested container.
   * [MESOS-7975] - The command/default/docker executor can incorrectly send a TASK_FINISHED update even when the task is killed.
   * [MESOS-8125] - Agent should properly handle recovering an executor when its pid is reused.


[03/20] mesos git commit: Updated the allocator to track allocations via a single code path.

Posted by bm...@apache.org.
Updated the allocator to track allocations via a single code path.

A helper was introduced for tracking allocated resources in the
sorters. This updates the code to allow the other two copies of
this code to use the function.

This also documents some of the addFramework/addSlave cases.

Review: https://reviews.apache.org/r/64238


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/cd450bc4
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/cd450bc4
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/cd450bc4

Branch: refs/heads/1.4.x
Commit: cd450bc41384532811a3a5213cf90d07214d7998
Parents: e2d52d3
Author: Benjamin Mahler <bm...@apache.org>
Authored: Thu Nov 30 16:36:42 2017 -0800
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed May 2 16:43:25 2018 -0700

----------------------------------------------------------------------
 src/master/allocator/mesos/hierarchical.cpp | 117 ++++++++++++-----------
 src/master/allocator/mesos/hierarchical.hpp |   3 +-
 2 files changed, 64 insertions(+), 56 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/cd450bc4/src/master/allocator/mesos/hierarchical.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index dc6f27f..640f9ce 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -280,11 +280,16 @@ void HierarchicalAllocatorProcess::addFramework(
 
   // Update the allocation for this framework.
   foreachpair (const SlaveID& slaveId, const Resources& resources, used) {
+    // TODO(bmahler): The master won't tell us about resources
+    // allocated to agents that have not yet been added, consider
+    // CHECKing this case.
     if (!slaves.contains(slaveId)) {
       continue;
     }
 
-    trackAllocatedResources(slaveId, {{frameworkId, resources}});
+    // The slave struct will already be aware of the allocated
+    // resources, so we only need to track them in the sorters.
+    trackAllocatedResources(slaveId, frameworkId, resources);
   }
 
   LOG(INFO) << "Added framework " << frameworkId;
@@ -498,13 +503,6 @@ void HierarchicalAllocatorProcess::addSlave(
   CHECK(!slaves.contains(slaveId));
   CHECK(!paused || expectedAgentCount.isSome());
 
-  roleSorter->add(slaveId, total);
-
-  // See comment at `quotaRoleSorter` declaration regarding non-revocable.
-  quotaRoleSorter->add(slaveId, total.nonRevocable());
-
-  trackAllocatedResources(slaveId, used);
-
   slaves[slaveId] = Slave();
 
   Slave& slave = slaves.at(slaveId);
@@ -525,6 +523,35 @@ void HierarchicalAllocatorProcess::addSlave(
     slave.maintenance = Slave::Maintenance(unavailability.get());
   }
 
+  roleSorter->add(slaveId, total);
+
+  // See comment at `quotaRoleSorter` declaration regarding non-revocable.
+  quotaRoleSorter->add(slaveId, total.nonRevocable());
+
+  foreachpair (const FrameworkID& frameworkId,
+               const Resources& allocation,
+               used) {
+    // There are two cases here:
+    //
+    //   (1) The framework has already been added to the allocator.
+    //       In this case, we track the allocation in the sorters.
+    //
+    //   (2) The framework has not yet been added to the allocator.
+    //       The master will imminently add the framework using
+    //       the `FrameworkInfo` recovered from the agent, and in
+    //       the interim we do not track the resources allocated to
+    //       this framework. This leaves a small window where the
+    //       role sorting will under-account for the roles belonging
+    //       to this framework.
+    //
+    // TODO(bmahler): Fix the issue outlined in (2).
+    if (!frameworks.contains(frameworkId)) {
+      continue;
+    }
+
+    trackAllocatedResources(slaveId, frameworkId, allocation);
+  }
+
   // If we have just a number of recovered agents, we cannot distinguish
   // between "old" agents from the registry and "new" ones joined after
   // recovery has started. Because we do not persist enough information
@@ -1643,14 +1670,7 @@ void HierarchicalAllocatorProcess::__allocate()
 
         slave.allocated += resources;
 
-        // Resources allocated as part of the quota count towards the
-        // role's and the framework's fair share.
-        //
-        // NOTE: Revocable resources have already been excluded.
-        frameworkSorter->add(slaveId, resources);
-        frameworkSorter->allocated(frameworkId_, slaveId, resources);
-        roleSorter->allocated(role, slaveId, resources);
-        quotaRoleSorter->allocated(role, slaveId, resources);
+        trackAllocatedResources(slaveId, frameworkId, resources);
       }
     }
   }
@@ -1861,15 +1881,7 @@ void HierarchicalAllocatorProcess::__allocate()
 
         slave.allocated += resources;
 
-        frameworkSorter->add(slaveId, resources);
-        frameworkSorter->allocated(frameworkId_, slaveId, resources);
-        roleSorter->allocated(role, slaveId, resources);
-
-        if (quotas.contains(role)) {
-          // See comment at `quotaRoleSorter` declaration regarding
-          // non-revocable.
-          quotaRoleSorter->allocated(role, slaveId, resources.nonRevocable());
-        }
+        trackAllocatedResources(slaveId, frameworkId, resources);
       }
     }
   }
@@ -2373,41 +2385,36 @@ bool HierarchicalAllocatorProcess::isRemoteSlave(const Slave& slave) const
 
 void HierarchicalAllocatorProcess::trackAllocatedResources(
     const SlaveID& slaveId,
-    const hashmap<FrameworkID, Resources>& used)
+    const FrameworkID& frameworkId,
+    const Resources& allocated)
 {
-  // Update the allocation for each framework.
-  foreachpair (const FrameworkID& frameworkId,
-               const Resources& used_,
-               used) {
-    if (!frameworks.contains(frameworkId)) {
-      continue;
-    }
+  CHECK(slaves.contains(slaveId));
+  CHECK(frameworks.contains(frameworkId));
 
-    foreachpair (const string& role,
-                 const Resources& allocated,
-                 used_.allocations()) {
-      // The framework has resources allocated to this role but it may
-      // or may not be subscribed to the role. Either way, we need to
-      // track the framework under the role.
-      if (!isFrameworkTrackedUnderRole(frameworkId, role)) {
-        trackFrameworkUnderRole(frameworkId, role);
-      }
+  // TODO(bmahler): Calling allocations() is expensive since it has
+  // to construct a map. Avoid this.
+  foreachpair (const string& role,
+               const Resources& allocation,
+               allocated.allocations()) {
+    // The framework has resources allocated to this role but it may
+    // or may not be subscribed to the role. Either way, we need to
+    // track the framework under the role.
+    if (!isFrameworkTrackedUnderRole(frameworkId, role)) {
+      trackFrameworkUnderRole(frameworkId, role);
+    }
 
-      // TODO(bmahler): Validate that the reserved resources have the
-      // framework's role.
-      CHECK(roleSorter->contains(role));
-      CHECK(frameworkSorters.contains(role));
-      CHECK(frameworkSorters.at(role)->contains(frameworkId.value()));
+    CHECK(roleSorter->contains(role));
+    CHECK(frameworkSorters.contains(role));
+    CHECK(frameworkSorters.at(role)->contains(frameworkId.value()));
 
-      roleSorter->allocated(role, slaveId, allocated);
-      frameworkSorters.at(role)->add(slaveId, allocated);
-      frameworkSorters.at(role)->allocated(
-          frameworkId.value(), slaveId, allocated);
+    roleSorter->allocated(role, slaveId, allocation);
+    frameworkSorters.at(role)->add(slaveId, allocation);
+    frameworkSorters.at(role)->allocated(
+        frameworkId.value(), slaveId, allocation);
 
-      if (quotas.contains(role)) {
-        // See comment at `quotaRoleSorter` declaration regarding non-revocable.
-        quotaRoleSorter->allocated(role, slaveId, allocated.nonRevocable());
-      }
+    if (quotas.contains(role)) {
+      // See comment at `quotaRoleSorter` declaration regarding non-revocable.
+      quotaRoleSorter->allocated(role, slaveId, allocation.nonRevocable());
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/mesos/blob/cd450bc4/src/master/allocator/mesos/hierarchical.hpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.hpp b/src/master/allocator/mesos/hierarchical.hpp
index ececf31..a53a952 100644
--- a/src/master/allocator/mesos/hierarchical.hpp
+++ b/src/master/allocator/mesos/hierarchical.hpp
@@ -542,7 +542,8 @@ private:
   // Helper to track used resources on an agent.
   void trackAllocatedResources(
       const SlaveID& slaveId,
-      const hashmap<FrameworkID, Resources>& used);
+      const FrameworkID& frameworkId,
+      const Resources& allocated);
 };
 
 


[11/20] mesos git commit: Fixed a bug where quota headroom is tracked incorrectly.

Posted by bm...@apache.org.
Fixed a bug where quota headroom is tracked incorrectly.

Only resources allocated towards a role's quota should be
subtracted from `requiredHeadroom`. In addition to this,
unreserved resources in the quota allocation stage may also
contain resources that a role does not have quota for, they
should not be subtracted from `requiredHeadroom`.

Review: https://reviews.apache.org/r/64792/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/2324ec88
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/2324ec88
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/2324ec88

Branch: refs/heads/1.4.x
Commit: 2324ec88686badfa854b3e3f233fa0dbbdfb0c1f
Parents: 3d04409
Author: Meng Zhu <mz...@mesosphere.io>
Authored: Thu Dec 21 17:27:30 2017 -0800
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed May 2 16:44:15 2018 -0700

----------------------------------------------------------------------
 src/master/allocator/mesos/hierarchical.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/2324ec88/src/master/allocator/mesos/hierarchical.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index de6a7e2..dade4ae 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -1896,13 +1896,15 @@ void HierarchicalAllocatorProcess::__allocate()
         offerable[frameworkId][role][slaveId] += resources;
         offeredSharedResources[slaveId] += resources.shared();
 
-        unsatisfiedQuota -= newQuotaAllocation.createStrippedScalarQuantity();
+        Resources newQuotaAllocationScalarQuantities =
+          newQuotaAllocation.createStrippedScalarQuantity();
+
+        unsatisfiedQuota -= newQuotaAllocationScalarQuantities;
 
         // Track quota headroom change.
-        Resources headroomDelta =
+        requiredHeadroom -= newQuotaAllocationScalarQuantities;
+        availableHeadroom -=
           resources.unreserved().createStrippedScalarQuantity();
-        requiredHeadroom -= headroomDelta;
-        availableHeadroom -= headroomDelta;
 
         // Update the tracking of allocated reservations.
         //