You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2017/12/21 02:54:05 UTC

[1/3] mesos git commit: Made quota resource allocation fine-grained.

Repository: mesos
Updated Branches:
  refs/heads/master 3035c8828 -> b386fe110


Made quota resource allocation fine-grained.

The allocator now does fine-grained resource allocation up
to the role's quota limit. And a role's quota is only
considered to be satisfied if all of its quota resources
are satisfied. Previously, a role's quota is considered
to be met if any one of its quota resources reaches the limit.

Also fixed a few affected allocator tests.

Review: https://reviews.apache.org/r/64003/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/9aeef9ce
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/9aeef9ce
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/9aeef9ce

Branch: refs/heads/master
Commit: 9aeef9ce3a90829b2166dac931e64d5bbf18e230
Parents: 3035c88
Author: Meng Zhu <mz...@mesosphere.io>
Authored: Wed Dec 20 18:32:02 2017 -0800
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed Dec 20 18:47:33 2017 -0800

----------------------------------------------------------------------
 src/master/allocator/mesos/hierarchical.cpp | 140 +++++++++++++++++++----
 src/tests/hierarchical_allocator_tests.cpp  | 120 +++++++++----------
 2 files changed, 172 insertions(+), 88 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/9aeef9ce/src/master/allocator/mesos/hierarchical.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index 1c767f7..63a7327 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -1668,27 +1668,14 @@ void HierarchicalAllocatorProcess::__allocate()
       // If quota for the role is considered satisfied, then we only
       // further allocate reservations for the role.
       //
-      // More precisely, we stop allocating unreserved resources if at
-      // least one of the resource guarantees is considered consumed.
-      // This technique prevents gaming of the quota allocation,
-      // see MESOS-6432.
-      //
-      // Longer term, we could ideally allocate what remains
-      // unsatisfied to allow an existing container to scale
-      // vertically, or to allow the launching of a container
-      // with best-effort cpus/mem/disk/etc.
-      //
       // TODO(alexr): Skipping satisfied roles is pessimistic. Better
       // alternatives are:
       //   * A custom sorter that is aware of quotas and sorts accordingly.
       //   * Removing satisfied roles from the sorter.
-      bool someGuaranteesReached = false;
-      foreach (const Resource& guarantee, quota.info.guarantee()) {
-        if (resourcesChargedAgainstQuota.contains(guarantee)) {
-          someGuaranteesReached = true;
-          break;
-        }
-      }
+      //
+      // This is a scalar quantity with no meta-data.
+      Resources unsatisfiedQuota = Resources(quota.info.guarantee()) -
+        resourcesChargedAgainstQuota;
 
       // Fetch frameworks according to their fair share.
       // NOTE: Suppressed frameworks are not included in the sort.
@@ -1738,18 +1725,119 @@ void HierarchicalAllocatorProcess::__allocate()
           }
         }
 
-        // If a role's quota limit has been reached, we only allocate
-        // its reservations. Otherwise, we allocate its reservations
-        // plus unreserved resources.
+        // We allocate the role's reservations as well as any unreserved
+        // resources while ensuring the role stays within its quota limits.
+        // This means that we'll "chop" the unreserved resources up to
+        // the quota limit if necessary.
+        //
+        // E.g. A role has no allocations or reservations yet and a 10 cpu
+        //      quota limit. We'll chop a 15 cpu agent down to only
+        //      allocate 10 cpus to the role to keep it within its limit.
         //
+        // In the case that the role needs some of the resources on this
+        // agent to make progress towards its quota, we'll *also* allocate
+        // all of the resources for which it does not have quota.
+        //
+        // E.g. The agent has 1 cpu, 1024 mem, 1024 disk, 1 gpu, 5 ports
+        //      and the role has quota for 1 cpu, 1024 mem. We'll include
+        //      the disk, gpu, and ports in the allocation, despite the
+        //      role not having any quota guarantee for them.
+        //
+        // We have to do this for now because it's not possible to set
+        // quota on non-scalar resources, like ports. However, for scalar
+        // resources that can have quota set, we should ideally make
+        // sure that when we include them, we're not violating some
+        // other role's guarantee!
+        //
+        // TODO(mzhu): Check the headroom when we're including scalar
+        // resources that this role does not have quota for.
+        //
+        // TODO(mzhu): Since we're treating the resources with unset
+        // quota as having no guarantee and no limit, these should be
+        // also be allocated further in the second allocation "phase"
+        // below (above guarantee up to limit).
+
         // NOTE: Currently, frameworks are allowed to have '*' role.
         // Calling reserved('*') returns an empty Resources object.
         //
-        // TODO(mzhu): Do fine-grained allocations up to the limit.
-        // See MESOS-8293.
-        Resources resources = someGuaranteesReached ?
-          available.reserved(role).nonRevocable() :
-          available.allocatableTo(role).nonRevocable();
+        // NOTE: Since we currently only support top-level roles to
+        // have quota, there are no ancestor reservations involved here.
+        Resources resources = available.reserved(role).nonRevocable();
+
+        // Unreserved resources that are tentatively going to be
+        // allocated towards this role's quota. These resources may
+        // not get allocated due to framework filters.
+        Resources newQuotaAllocation;
+
+        if (!unsatisfiedQuota.empty()) {
+          Resources unreserved = available.nonRevocable().unreserved();
+
+          set<string> quotaResourceNames =
+            Resources(quota.info.guarantee()).names();
+
+          // When "chopping" resources, there is more than 1 "chop" that
+          // can be done to satisfy the limits. Consider the case with
+          // two disks of 1GB, one is PATH and another is MOUNT. And a role
+          // has a "disk" quota of 1GB. We could pick either of the disks here,
+          // but not both.
+          //
+          // In order to avoid repeatedly choosing the same "chop" of
+          // the resources each time we allocate, we introduce some
+          // randomness by shuffling the resources.
+          google::protobuf::RepeatedPtrField<Resource>
+            resourceVector = unreserved;
+          random_shuffle(resourceVector.begin(), resourceVector.end());
+
+          foreach (const Resource& resource, resourceVector) {
+            if (quotaResourceNames.count(resource.name()) == 0) {
+              // This resource has no quota set. Allocate it.
+              resources += resource;
+              continue;
+            }
+
+            // Currently, we guarantee that quota resources are scalars.
+            CHECK_EQ(resource.type(), Value::SCALAR);
+
+            Option<Value::Scalar> newUnsatisfiedQuotaScalar =
+              (unsatisfiedQuota -
+                newQuotaAllocation.createStrippedScalarQuantity())
+                  .get<Value::Scalar>(resource.name());
+
+            if (newUnsatisfiedQuotaScalar.isNone()) {
+              // Quota for this resource has been satisfied.
+              continue;
+            }
+
+            const Value::Scalar& resourceScalar = resource.scalar();
+
+            if (resourceScalar <= newUnsatisfiedQuotaScalar.get()) {
+              // We can safely allocate `resource` entirely here without
+              // breaking the quota limit.
+              resources += resource;
+              newQuotaAllocation += resource;
+
+              continue;
+            }
+
+            // At this point, we need to chop `resource` to satisfy
+            // the quota. We chop `resource` by making a copy and
+            // shrinking its scalar value
+            Resource choppedResourceToAllocate = resource;
+            choppedResourceToAllocate.mutable_scalar()
+              ->CopyFrom(newUnsatisfiedQuotaScalar.get());
+
+            // Not all resources are choppable. Some resources such as MOUNT
+            // disk has to be offered entirely. We use resources `contains()`
+            // utility to verify this. Specifically, if a resource `contains()`
+            // a smaller version of itself, then it can be safely chopped.
+            if (!Resources(resource).contains(choppedResourceToAllocate)) {
+              continue;
+            }
+
+            resources += choppedResourceToAllocate;
+            newQuotaAllocation += choppedResourceToAllocate;
+          }
+        }
 
         // It is safe to break here, because all frameworks under a role would
         // consider the same resources, so in case we don't have allocatable
@@ -1797,6 +1885,8 @@ void HierarchicalAllocatorProcess::__allocate()
         offerable[frameworkId][role][slaveId] += resources;
         offeredSharedResources[slaveId] += resources.shared();
 
+        unsatisfiedQuota -= newQuotaAllocation.createStrippedScalarQuantity();
+
         // Update the tracking of allocated reservations.
         //
         // Note it is important to do this before updating `slave.allocated`

http://git-wip-us.apache.org/repos/asf/mesos/blob/9aeef9ce/src/tests/hierarchical_allocator_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/hierarchical_allocator_tests.cpp b/src/tests/hierarchical_allocator_tests.cpp
index 173e4fb..194730d 100644
--- a/src/tests/hierarchical_allocator_tests.cpp
+++ b/src/tests/hierarchical_allocator_tests.cpp
@@ -1651,7 +1651,7 @@ TEST_P(HierarchicalAllocatorTestWithReservations,
   // Note: 50 shared disk is allocated twice but should only count once.
 
   // Add agent2. This will trigger a batch allocation.
-  SlaveInfo agent2 = createSlaveInfo("cpus:0.5;mem:512;disk:50");
+  SlaveInfo agent2 = createSlaveInfo("cpus:1;mem:1024;disk:50");
   allocator->addSlave(
       agent2.id(),
       agent2,
@@ -1668,8 +1668,8 @@ TEST_P(HierarchicalAllocatorTestWithReservations,
   AWAIT_EXPECT_EQ(expected, allocations.get());
 
   // Quota: "cpus:3;mem:2048;disk:100"
-  // Allocated quota: "cpus:2.5;mem:1536;disk:100"
-  // The quota is met (disk limit has been reached).
+  // Allocated quota: "cpus:3;mem:2048;disk:100"
+  // The quota is met.
 
   // Add agent3.
   // This will trigger a batch allocation.
@@ -3326,10 +3326,8 @@ TEST_F(HierarchicalAllocatorTest, MultipleFrameworksInRoleWithQuota)
 }
 
 
-// The allocator performs coarse-grained allocations, and allocations
-// to satisfy quota are no exception. A role may get more resources as
-// part of its quota if the agent remaining resources are greater than
-// the unsatisfied part of the role's quota.
+// Quota allocations should be fine-grained. A role should get no more
+// resources than its quota even if the agent has more resources to offer.
 TEST_F(HierarchicalAllocatorTest, QuotaAllocationGranularity)
 {
   // Pausing the clock is not necessary, but ensures that the test
@@ -3350,15 +3348,6 @@ TEST_F(HierarchicalAllocatorTest, QuotaAllocationGranularity)
   const Quota quota = createQuota(QUOTA_ROLE, "cpus:0.5;mem:200");
   allocator->setQuota(QUOTA_ROLE, quota);
 
-  // Create `framework2` in a non-quota'ed role.
-  FrameworkInfo framework2 = createFrameworkInfo({NO_QUOTA_ROLE});
-  allocator->addFramework(framework2.id(), framework2, {}, true, {});
-
-  // Process all triggered allocation events.
-  //
-  // NOTE: No allocations happen because there are no resources to allocate.
-  Clock::settle();
-
   SlaveInfo agent = createSlaveInfo("cpus:1;mem:512;disk:0");
   allocator->addSlave(
       agent.id(),
@@ -3368,20 +3357,34 @@ TEST_F(HierarchicalAllocatorTest, QuotaAllocationGranularity)
       agent.resources(),
       {});
 
-  // `framework1` will be offered all of `agent`'s resources because
-  // it is the only framework in the only role with unsatisfied quota
-  // and the allocator performs coarse-grained allocation.
+  // Due to fine-grained allocation, `framework1` will be offered the
+  // exact amount of quota resources on `agent` even though the agent
+  // has more resources to offer.
   Allocation expected = Allocation(
       framework1.id(),
-      {{QUOTA_ROLE, {{agent.id(), agent.resources()}}}});
+      {{QUOTA_ROLE, {{agent.id(), quota.info.guarantee()}}}});
 
   AWAIT_EXPECT_EQ(expected, allocations.get());
 
   // Total cluster resources: cpus=1, mem=512.
-  // QUOTA_ROLE share = 1 (cpus=1, mem=512) [quota: cpus=0.5, mem=200]
+  // QUOTA_ROLE share = 1 (cpus=0.5, mem=200) [quota: cpus=0.5, mem=200]
   //   framework1 share = 1
-  // NO_QUOTA_ROLE share = 0
-  //   framework2 share = 0
+
+  // Create `framework2` in a non-quota'ed role.
+  FrameworkInfo framework2 = createFrameworkInfo({NO_QUOTA_ROLE});
+  allocator->addFramework(framework2.id(), framework2, {}, true, {});
+
+  // `framework2` will get the remaining resources of `agent`.
+  expected = Allocation(
+      framework2.id(),
+      {{NO_QUOTA_ROLE, {{agent.id(), agent.resources() -
+          Resources(quota.info.guarantee())}}}});
+
+  AWAIT_EXPECT_EQ(expected, allocations.get());
+
+  // Total cluster resources: cpus=1, mem=512.
+  // QUOTA_ROLE  (cpus=0.5, mem=200) [quota: cpus=0.5, mem=200]
+  // NO_QUOTA_ROLE  (cpus=0.5, mem=312)
 }
 
 
@@ -3828,7 +3831,7 @@ TEST_F(HierarchicalAllocatorTest, MultiQuotaWithFrameworks)
   // TODO(bmahler): Add assertions to test this is accurate!
   //
   // Total cluster resources (2 identical agents): cpus=2, mem=2048.
-  // QUOTA_ROLE1 share = 0.5 (cpus=1, mem=1024) [quota: cpus=1, mem=200]
+  // QUOTA_ROLE1 share = 0.5 (cpus=1, mem=200) [quota: cpus=1, mem=200]
   //   framework1 share = 1
   // QUOTA_ROLE2 share = 0.5 (cpus=1, mem=1024) [quota: cpus=2, mem=2000]
   //   framework2 share = 1
@@ -3846,18 +3849,18 @@ TEST_F(HierarchicalAllocatorTest, MultiQuotaWithFrameworks)
       agent3.resources(),
       {});
 
-  // `framework2` will get all agent3's resources because its role is under
-  // quota, while other roles' quotas are satisfied.
+  // `framework2` will get some of agent3's resources to meet its quota.
   Allocation expected = Allocation(
       framework2.id(),
-      {{QUOTA_ROLE2, {{agent3.id(), agent3.resources()}}}});
+      {{QUOTA_ROLE2, {{agent3.id(),
+          Resources(quota2.info.guarantee()) - agent2.resources()}}}});
 
   AWAIT_EXPECT_EQ(expected, allocations.get());
 
   // Total cluster resources (3 agents): cpus=4, mem=4096.
-  // QUOTA_ROLE1 share = 0.25 (cpus=1, mem=1024) [quota: cpus=1, mem=200]
+  // QUOTA_ROLE1 share = 0.33 (cpus=1, mem=1024) [quota: cpus=1, mem=200]
   //   framework1 share = 1
-  // QUOTA_ROLE2 share = 0.75 (cpus=3, mem=3072) [quota: cpus=2, mem=2000]
+  // QUOTA_ROLE2 share = 0.66 (cpus=2, mem=3=2000) [quota: cpus=2, mem=2000]
   //   framework2 share = 1
 }
 
@@ -3985,14 +3988,14 @@ TEST_F(HierarchicalAllocatorTest, QuotaSetAsideReservedResources)
   FrameworkInfo framework1 = createFrameworkInfo({QUOTA_ROLE});
   allocator->addFramework(framework1.id(), framework1, {}, true, {});
 
-  const Quota quota = createQuota(QUOTA_ROLE, "cpus:4");
+  const Quota quota = createQuota(QUOTA_ROLE, "cpus:4;mem:512");
   allocator->setQuota(QUOTA_ROLE, quota);
 
-  // `framework1` will be offered resources at `agent1` because the
-  // resources at `agent2` are reserved for a different role.
+  // `framework1` will be offered resources at `agent1` up to its quota limit
+  // because the resources at `agent2` are reserved for a different role.
   Allocation expected = Allocation(
       framework1.id(),
-      {{QUOTA_ROLE, {{agent1.id(), agent1.resources()}}}});
+      {{QUOTA_ROLE, {{agent1.id(), quota.info.guarantee()}}}});
 
   AWAIT_EXPECT_EQ(expected, allocations.get());
 
@@ -4004,7 +4007,7 @@ TEST_F(HierarchicalAllocatorTest, QuotaSetAsideReservedResources)
   allocator->recoverResources(
       framework1.id(),
       agent1.id(),
-      allocatedResources(agent1.resources(), QUOTA_ROLE),
+      allocatedResources(quota.info.guarantee(), QUOTA_ROLE),
       longFilter);
 
   // Trigger a batch allocation for good measure, but don't expect any
@@ -5171,18 +5174,14 @@ TEST_F(HierarchicalAllocatorTest, DontOfferOldAgentToMultiRoleFramework)
 
 
 // This tests the behavior of quota when the allocation and
-// quota are disproportionate. The current approach (see MESOS-6432)
-// is to stop allocation quota once one of the resource
-// guarantees is reached. We test the following example:
+// quota are disproportionate. We always try to allocate
+// up to the limit for all resources.
+// We test the following example:
 //
 //        Quota: cpus:4;mem:1024
 //   Allocation: cpus:2;mem:1024
 //
-// Here no more allocation occurs to the role since the memory
-// guarantee is reached. Longer term, we could offer the
-// unsatisfied cpus to allow an existing container to scale
-// vertically, or to allow the launching of a container with
-// best-effort memory/disk, etc.
+// Role will only get cpus resources in this case to meet its remaining qutoa.
 TEST_F(HierarchicalAllocatorTest, DisproportionateQuotaVsAllocation)
 {
   // Pausing the clock is not necessary, but ensures that the test
@@ -5195,27 +5194,23 @@ TEST_F(HierarchicalAllocatorTest, DisproportionateQuotaVsAllocation)
   const string QUOTA_ROLE{"quota-role"};
   const string NO_QUOTA_ROLE{"no-quota-role"};
 
-  // Since resource allocation is coarse-grained, we use a quota such
-  // that mem can be satisfied from a single agent, but cpus requires
-  // multiple agents.
+  // We use a quota such that mem can be satisfied from a single agent,
+  // but cpus requires multiple agents.
   const string agentResources = "cpus:2;mem:1024";
   const string quotaResources = "cpus:4;mem:1024";
 
   Quota quota = createQuota(QUOTA_ROLE, quotaResources);
   allocator->setQuota(QUOTA_ROLE, quota);
 
-  // Register two frameworks where one is using a role with quota.
-  FrameworkInfo framework1 = createFrameworkInfo({QUOTA_ROLE});
-  FrameworkInfo framework2 = createFrameworkInfo({NO_QUOTA_ROLE});
-  allocator->addFramework(framework1.id(), framework1, {}, true, {});
-  allocator->addFramework(framework2.id(), framework2, {}, true, {});
+  // Register `framework` under `QUOTA_ROLE`.
+  FrameworkInfo framework = createFrameworkInfo({QUOTA_ROLE});
+  allocator->addFramework(framework.id(), framework, {}, true, {});
 
   // Register an agent. This triggers an allocation of all of the
   // agent's resources to partially satisfy QUOTA_ROLE's quota. After
   // the allocation QUOTA_ROLE's quota for mem will be satisfied while
-  // still being below the set quota for cpus. With that QUOTA_ROLE
-  // will not receive more resources since we currently do not
-  // have an ability to offer out only the cpus.
+  // still being below the set quota for cpus. With that framework under
+  // QUOTA_ROLE will only receive cpus resources.
   SlaveInfo agent1 = createSlaveInfo(agentResources);
   allocator->addSlave(
       agent1.id(),
@@ -5226,18 +5221,12 @@ TEST_F(HierarchicalAllocatorTest, DisproportionateQuotaVsAllocation)
       {});
 
   Allocation expected = Allocation(
-      framework1.id(),
+      framework.id(),
       {{QUOTA_ROLE, {{agent1.id(), agent1.resources()}}}});
 
   AWAIT_EXPECT_EQ(expected, allocations.get());
 
-  // Register a second agent. Since QUOTA_ROLE has reached one
-  // of its quota guarantees and quota currently acts as an upper
-  // limit, no further resources are allocated for quota.
-  // In addition, we "hold back" the resources of the second
-  // agent in order to ensure that the quota could be satisfied
-  // should the first framework decide to consume proportionally
-  // to its quota (e.g. cpus:2;mem:512 on each agent).
+  // Register a second agent.
   SlaveInfo agent2 = createSlaveInfo(agentResources);
   allocator->addSlave(
       agent2.id(),
@@ -5249,8 +5238,13 @@ TEST_F(HierarchicalAllocatorTest, DisproportionateQuotaVsAllocation)
 
   Clock::settle();
 
-  Future<Allocation> allocation = allocations.get();
-  EXPECT_TRUE(allocation.isPending());
+  // `framework` will get its unsatisfied quota resources (2cpus).
+  expected = Allocation(
+    framework.id(),
+    {{QUOTA_ROLE, {{agent2.id(),
+      Resources::parse(quotaResources).get() - agent1.resources()}}}});
+
+  AWAIT_EXPECT_EQ(expected, allocations.get());
 }
 
 


[2/3] mesos git commit: Added a test to ensure MOUNT disk is not chopped during allocation.

Posted by bm...@apache.org.
Added a test to ensure MOUNT disk is not chopped during allocation.

While quota allocation should be fine-grained, some resources are
unchoppable. They have to be offered entirely. This test verifies
one of the cases: disk resource of type MOUNT.

Review: https://reviews.apache.org/r/64758/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/2f6981e0
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/2f6981e0
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/2f6981e0

Branch: refs/heads/master
Commit: 2f6981e0ea544544c3c3101a80281b5309f1acaa
Parents: 9aeef9c
Author: Meng Zhu <mz...@mesosphere.io>
Authored: Wed Dec 20 18:49:42 2017 -0800
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed Dec 20 18:51:41 2017 -0800

----------------------------------------------------------------------
 src/tests/hierarchical_allocator_tests.cpp | 100 ++++++++++++++++++++++++
 1 file changed, 100 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/2f6981e0/src/tests/hierarchical_allocator_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/hierarchical_allocator_tests.cpp b/src/tests/hierarchical_allocator_tests.cpp
index 194730d..c98bebb 100644
--- a/src/tests/hierarchical_allocator_tests.cpp
+++ b/src/tests/hierarchical_allocator_tests.cpp
@@ -3387,6 +3387,106 @@ TEST_F(HierarchicalAllocatorTest, QuotaAllocationGranularity)
   // NO_QUOTA_ROLE  (cpus=0.5, mem=312)
 }
 
+// While quota allocation should be fine-grained, some resources are
+// unchoppable. They have to be offered entirely. This test verifies
+// one of the cases: disk resource of type MOUNT.
+TEST_F(HierarchicalAllocatorTest, QuotaAllocationGranularityUnchoppableResource)
+{
+  Clock::pause();
+  initialize();
+
+  const string QUOTA_ROLE{"quota-role"};
+
+  const Quota quota = createQuota(QUOTA_ROLE, "cpus:2;mem:2048;disk:150");
+  allocator->setQuota(QUOTA_ROLE, quota);
+
+  // Create 100 disk resource of type MOUNT.
+  Resources mountDiskResource =
+    createDiskResource("100", "*", None(), None(),
+      createDiskSourceMount(), false);
+
+  Resources agentResources =
+    Resources::parse("cpus:0.5;mem:512").get() + mountDiskResource;
+
+  SlaveInfo agent1 = createSlaveInfo(agentResources);
+  allocator->addSlave(
+      agent1.id(),
+      agent1,
+      AGENT_CAPABILITIES(),
+      None(),
+      agent1.resources(),
+      {});
+
+  // Create `framework` under `QUOTA_ROLE`.
+  // This will tigger an event-driven allocation loop.
+  FrameworkInfo framework = createFrameworkInfo({QUOTA_ROLE});
+  allocator->addFramework(framework.id(), framework, {}, true, {});
+
+  Clock::settle();
+
+  // `framework` will get all resources of `agent1` because it is the only
+  // framework.
+  Allocation expected = Allocation(
+      framework.id(),
+      {{QUOTA_ROLE, {{agent1.id(), agent1.resources()}}}});
+
+  AWAIT_EXPECT_EQ(expected, allocations.get());
+
+  // Quota: "cpus:2;mem:2048;disk:150".
+  // Allocated quota: "cpus:0.5;mem:512;disk:100".
+
+  // Add `agent2` with identical resources.
+  // This will trigger an event-driven allocation for `agent2`.
+  SlaveInfo agent2 = createSlaveInfo(agentResources);
+  allocator->addSlave(
+      agent2.id(),
+      agent2,
+      AGENT_CAPABILITIES(),
+      None(),
+      agent2.resources(),
+      {});
+
+  Clock::settle();
+
+  // `framework` will get all resources of `agent2` except the disk.
+  // Because the unsatisfied disk quota of `QUOTA_ROLE` is 50 and
+  // there is 100 MOUNT disk on `agent2` which cannot be chopped.
+  expected = Allocation(framework.id(),
+      {{QUOTA_ROLE, {{agent2.id(), agent2.resources() - mountDiskResource}}}});
+
+  AWAIT_EXPECT_EQ(expected, allocations.get());
+
+  // Quota: "cpus:2;mem:2048;disk:150".
+  // Allocated quota: "cpus:1;mem:1024;disk:100".
+
+  Resources allocatedQuota =
+    agent1.resources() + agent2.resources() - mountDiskResource;
+
+  // Create `agent3` that has enough resources to meet `QUOTA_ROLE` quota.
+  // This disk resource of agent3 can be chopped to meet `QUOTA_ROLE` quota
+  SlaveInfo agent3 = createSlaveInfo("cpus:2;mem:2048;disk:150");
+  allocator->addSlave(
+      agent3.id(),
+      agent3,
+      AGENT_CAPABILITIES(),
+      None(),
+      agent3.resources(),
+      {});
+
+  Clock::settle();
+
+  expected = Allocation(framework.id(),
+      {{QUOTA_ROLE, {{agent3.id(),
+        quota.info.guarantee() -
+          allocatedQuota.createStrippedScalarQuantity()}}}});
+
+  AWAIT_EXPECT_EQ(expected, allocations.get());
+
+  // Quota: "cpus:2;mem:2048;disk:150".
+  // Allocated quota: "cpus:2;mem:2048;disk:150",
+  // out of the 150 disk resource, 100 is MOUNT type, 50 is default type.
+}
+
 
 // This test verifies, that the free pool (what is left after all quotas
 // are satisfied) is allocated according to the DRF algorithm across the roles


[3/3] mesos git commit: Moved the quota headroom tracking before quota allocation.

Posted by bm...@apache.org.
Moved the quota headroom tracking before quota allocation.

This paves the way for improved quota headroom enforcement
for the quota role allocation stage.

Review: https://reviews.apache.org/r/64761/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/b386fe11
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/b386fe11
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/b386fe11

Branch: refs/heads/master
Commit: b386fe1108d2d41dedc74a7059d8d70018ac28e7
Parents: 2f6981e
Author: Meng Zhu <mz...@mesosphere.io>
Authored: Wed Dec 20 18:52:21 2017 -0800
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed Dec 20 18:52:21 2017 -0800

----------------------------------------------------------------------
 src/master/allocator/mesos/hierarchical.cpp | 192 ++++++++++++-----------
 1 file changed, 100 insertions(+), 92 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/b386fe11/src/master/allocator/mesos/hierarchical.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index 63a7327..247405a 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -1615,6 +1615,97 @@ void HierarchicalAllocatorProcess::__allocate()
     }
   }
 
+  // We need to constantly make sure that we are holding back enough unreserved
+  // resources that the remaining quota can later be satisfied when needed:
+  //
+  //   Required unreserved headroom =
+  //     sum (unsatisfied quota(r) - unallocated reservations(r))
+  //       for each quota role r
+  //
+  // Given the above, if a role has more reservations than quota,
+  // we don't need to hold back any unreserved headroom for it.
+  Resources requiredHeadroom;
+  foreachpair (const string& role, const Quota& quota, quotas) {
+    // NOTE: Revocable resources are excluded in `quotaRoleSorter`.
+    // NOTE: Only scalars are considered for quota.
+    // NOTE: The following should all be quantities with no meta-data!
+    Resources allocated = getQuotaRoleAllocatedResources(role);
+    const Resources guarantee = quota.info.guarantee();
+
+    if (allocated.contains(guarantee)) {
+      continue; // Quota already satisifed.
+    }
+
+    Resources unallocated = guarantee - allocated;
+
+    Resources unallocatedReservations =
+      reservationScalarQuantities.get(role).getOrElse(Resources()) -
+      allocatedReservationScalarQuantities.get(role).getOrElse(Resources());
+
+    requiredHeadroom += unallocated - unallocatedReservations;
+  }
+
+  // We will allocate resources while ensuring that the required
+  // unreserved non-revocable headroom is still available. Otherwise,
+  // we will not be able to satisfy quota later.
+  //
+  //   available headroom = unallocated unreserved non-revocable resources
+  //
+  // We compute this as:
+  //
+  //   available headroom = total resources -
+  //                        allocated resources -
+  //                        unallocated reservations -
+  //                        unallocated revocable resources
+
+  // NOTE: `totalScalarQuantities` omits dynamic reservation,
+  // persistent volume info, and allocation info. We additionally
+  // remove the static reservations here via `toUnreserved()`.
+  Resources availableHeadroom =
+    roleSorter->totalScalarQuantities().toUnreserved();
+
+  // Subtract allocated resources from the total.
+  foreachkey (const string& role, roles) {
+    // NOTE: `totalScalarQuantities` omits dynamic reservation,
+    // persistent volume info, and allocation info. We additionally
+    // remove the static reservations here via `toUnreserved()`.
+    availableHeadroom -=
+      roleSorter->allocationScalarQuantities(role).toUnreserved();
+  }
+
+  // Subtract all unallocated reservations.
+  foreachkey (const string& role, reservationScalarQuantities) {
+    hashmap<SlaveID, Resources> allocations;
+    if (quotaRoleSorter->contains(role)) {
+      allocations = quotaRoleSorter->allocation(role);
+    } else if (roleSorter->contains(role)) {
+      allocations = roleSorter->allocation(role);
+    }
+
+    Resources unallocatedReservations =
+      reservationScalarQuantities.get(role).getOrElse(Resources());
+
+    foreachvalue (const Resources& resources, allocations) {
+      // NOTE: `totalScalarQuantities` omits dynamic reservation,
+      // persistent volume info, and allocation info. We additionally
+      // remove the static reservations here via `toUnreserved()`.
+      unallocatedReservations -=
+        resources.reserved().createStrippedScalarQuantity().toUnreserved();
+    }
+
+    // Subtract the unallocated reservations for this role from the headroom.
+    availableHeadroom -= unallocatedReservations;
+  }
+
+  // Subtract revocable resources.
+  foreachvalue (const Slave& slave, slaves) {
+    // NOTE: `totalScalarQuantities` omits dynamic reservation,
+    // persistent volume info, and allocation info. We additionally
+    // remove the static reservations here via `toUnreserved()`.
+    availableHeadroom -= slave.available().revocable()
+      .createStrippedScalarQuantity().toUnreserved();
+  }
+
   // Due to the two stages in the allocation algorithm and the nature of
   // shared resources being re-offerable even if already allocated, the
   // same shared resources can appear in two (and not more due to the
@@ -1887,6 +1978,12 @@ void HierarchicalAllocatorProcess::__allocate()
 
         unsatisfiedQuota -= newQuotaAllocation.createStrippedScalarQuantity();
 
+        // Track quota headroom change.
+        Resources headroomDelta =
+          resources.unreserved().createStrippedScalarQuantity();
+        requiredHeadroom -= headroomDelta;
+        availableHeadroom -= headroomDelta;
+
         // Update the tracking of allocated reservations.
         //
         // Note it is important to do this before updating `slave.allocated`
@@ -1909,101 +2006,12 @@ void HierarchicalAllocatorProcess::__allocate()
     }
   }
 
-  // Frameworks in a quota'ed role may temporarily reject resources by
-  // filtering or suppressing offers. Hence, quotas may not be fully
-  // allocated and if so we need to hold back enough unreserved
-  // resources to ensure the quota can later be satisfied when needed:
-  //
-  //   Required unreserved headroom =
-  //     sum (unsatisfied quota(r) - unallocated reservations(r))
-  //       for each quota role r
-  //
-  // Given the above, if a role has more reservations than quota,
-  // we don't need to hold back any unreserved headroom for it.
-  Resources requiredHeadroom;
-  foreachpair (const string& role, const Quota& quota, quotas) {
-    // NOTE: Revocable resources are excluded in `quotaRoleSorter`.
-    // NOTE: Only scalars are considered for quota.
-    // NOTE: The following should all be quantities with no meta-data!
-    Resources allocated = getQuotaRoleAllocatedResources(role);
-    const Resources guarantee = quota.info.guarantee();
-
-    if (allocated.contains(guarantee)) {
-      continue; // Quota already satisifed.
-    }
-
-    Resources unallocated = guarantee - allocated;
-
-    Resources unallocatedReservations =
-      reservationScalarQuantities.get(role).getOrElse(Resources()) -
-      allocatedReservationScalarQuantities.get(role).getOrElse(Resources());
-
-    requiredHeadroom += unallocated - unallocatedReservations;
-  }
-
-  // We will allocate resources while ensuring that the required
-  // unreserved non-revocable headroom is still available. Otherwise,
-  // we will not be able to satisfy quota later. Reservations to
+  // Similar to the first stage, we will allocate resources while ensuring
+  // that the required unreserved non-revocable headroom is still available.
+  // Otherwise, we will not be able to satisfy quota later. Reservations to
   // non-quota roles and revocable resources will always be included
   // in the offers since these are not part of the headroom (and
   // therefore can't be used to satisfy quota).
-  //
-  //   available headroom = unallocated unreserved non-revocable resources
-  //
-  // We compute this as:
-  //
-  //   available headroom = total resources -
-  //                        allocated resources -
-  //                        unallocated reservations -
-  //                        unallocated revocable resources
-
-  // NOTE: `totalScalarQuantities` omits dynamic reservation,
-  // persistent volume info, and allocation info. We additionally
-  // remove the static reservations here via `toUnreserved()`.
-  Resources availableHeadroom =
-    roleSorter->totalScalarQuantities().toUnreserved();
-
-  // Subtract allocated resources from the total.
-  foreachkey (const string& role, roles) {
-    // NOTE: `totalScalarQuantities` omits dynamic reservation,
-    // persistent volume info, and allocation info. We additionally
-    // remove the static reservations here via `toUnreserved()`.
-    availableHeadroom -=
-      roleSorter->allocationScalarQuantities(role).toUnreserved();
-  }
-
-  // Subtract all unallocated reservations.
-  foreachkey (const string& role, reservationScalarQuantities) {
-    hashmap<SlaveID, Resources> allocations;
-    if (quotaRoleSorter->contains(role)) {
-      allocations = quotaRoleSorter->allocation(role);
-    } else if (roleSorter->contains(role)) {
-      allocations = roleSorter->allocation(role);
-    }
-
-    Resources unallocatedReservations =
-      reservationScalarQuantities.get(role).getOrElse(Resources());
-
-    foreachvalue (const Resources& resources, allocations) {
-      // NOTE: `totalScalarQuantities` omits dynamic reservation,
-      // persistent volume info, and allocation info. We additionally
-      // remove the static reservations here via `toUnreserved()`.
-      unallocatedReservations -=
-        resources.reserved().createStrippedScalarQuantity().toUnreserved();
-    }
-
-    // Subtract the unallocated reservations for this role from the headroom.
-    availableHeadroom -= unallocatedReservations;
-  }
-
-  // Subtract revocable resources.
-  foreachvalue (const Slave& slave, slaves) {
-    // NOTE: `totalScalarQuantities` omits dynamic reservation,
-    // persistent volume info, and allocation info. We additionally
-    // remove the static reservations here via `toUnreserved()`.
-    availableHeadroom -= slave.available().revocable()
-      .createStrippedScalarQuantity().toUnreserved();
-  }
 
   foreach (const SlaveID& slaveId, slaveIds) {
     foreach (const string& role, roleSorter->sort()) {