You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2018/05/03 00:01:05 UTC

[09/20] mesos git commit: Made quota resource allocation fine-grained.

Made quota resource allocation fine-grained.

The allocator now does fine-grained resource allocation up
to the role's quota limit. And a role's quota is only
considered to be satisfied if all of its quota resources
are satisfied. Previously, a role's quota is considered
to be met if any one of its quota resources reaches the limit.

Also fixed a few affected allocator tests.

Review: https://reviews.apache.org/r/64003/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/6fdd5c16
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/6fdd5c16
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/6fdd5c16

Branch: refs/heads/1.4.x
Commit: 6fdd5c165f7a43b7e07956f063e13b3f1cfe7bf9
Parents: dd1df2d
Author: Meng Zhu <mz...@mesosphere.io>
Authored: Wed Dec 20 18:32:02 2017 -0800
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed May 2 16:44:06 2018 -0700

----------------------------------------------------------------------
 src/master/allocator/mesos/hierarchical.cpp | 140 +++++++++++++++++++----
 src/tests/hierarchical_allocator_tests.cpp  | 114 +++++++++---------
 2 files changed, 169 insertions(+), 85 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/6fdd5c16/src/master/allocator/mesos/hierarchical.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index 2424484..565f109 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -1589,27 +1589,14 @@ void HierarchicalAllocatorProcess::__allocate()
       // If quota for the role is considered satisfied, then we only
       // further allocate reservations for the role.
       //
-      // More precisely, we stop allocating unreserved resources if at
-      // least one of the resource guarantees is considered consumed.
-      // This technique prevents gaming of the quota allocation,
-      // see MESOS-6432.
-      //
-      // Longer term, we could ideally allocate what remains
-      // unsatisfied to allow an existing container to scale
-      // vertically, or to allow the launching of a container
-      // with best-effort cpus/mem/disk/etc.
-      //
       // TODO(alexr): Skipping satisfied roles is pessimistic. Better
       // alternatives are:
       //   * A custom sorter that is aware of quotas and sorts accordingly.
       //   * Removing satisfied roles from the sorter.
-      bool someGuaranteesReached = false;
-      foreach (const Resource& guarantee, quota.info.guarantee()) {
-        if (resourcesChargedAgainstQuota.contains(guarantee)) {
-          someGuaranteesReached = true;
-          break;
-        }
-      }
+      //
+      // This is a scalar quantity with no meta-data.
+      Resources unsatisfiedQuota = Resources(quota.info.guarantee()) -
+        resourcesChargedAgainstQuota;
 
       // Fetch frameworks according to their fair share.
       // NOTE: Suppressed frameworks are not included in the sort.
@@ -1658,18 +1645,119 @@ void HierarchicalAllocatorProcess::__allocate()
           }
         }
 
-        // If a role's quota limit has been reached, we only allocate
-        // its reservations. Otherwise, we allocate its reservations
-        // plus unreserved resources.
+        // We allocate the role's reservations as well as any unreserved
+        // resources while ensuring the role stays within its quota limits.
+        // This means that we'll "chop" the unreserved resources up to
+        // the quota limit if necessary.
+        //
+        // E.g. A role has no allocations or reservations yet and a 10 cpu
+        //      quota limit. We'll chop a 15 cpu agent down to only
+        //      allocate 10 cpus to the role to keep it within its limit.
         //
+        // In the case that the role needs some of the resources on this
+        // agent to make progress towards its quota, we'll *also* allocate
+        // all of the resources for which it does not have quota.
+        //
+        // E.g. The agent has 1 cpu, 1024 mem, 1024 disk, 1 gpu, 5 ports
+        //      and the role has quota for 1 cpu, 1024 mem. We'll include
+        //      the disk, gpu, and ports in the allocation, despite the
+        //      role not having any quota guarantee for them.
+        //
+        // We have to do this for now because it's not possible to set
+        // quota on non-scalar resources, like ports. However, for scalar
+        // resources that can have quota set, we should ideally make
+        // sure that when we include them, we're not violating some
+        // other role's guarantee!
+        //
+        // TODO(mzhu): Check the headroom when we're including scalar
+        // resources that this role does not have quota for.
+        //
+        // TODO(mzhu): Since we're treating the resources with unset
+        // quota as having no guarantee and no limit, these should be
+        // also be allocated further in the second allocation "phase"
+        // below (above guarantee up to limit).
+
         // NOTE: Currently, frameworks are allowed to have '*' role.
         // Calling reserved('*') returns an empty Resources object.
         //
-        // TODO(mzhu): Do fine-grained allocations up to the limit.
-        // See MESOS-8293.
-        Resources resources = someGuaranteesReached ?
-          available.reserved(role).nonRevocable() :
-          available.allocatableTo(role).nonRevocable();
+        // NOTE: Since we currently only support top-level roles to
+        // have quota, there are no ancestor reservations involved here.
+        Resources resources = available.reserved(role).nonRevocable();
+
+        // Unreserved resources that are tentatively going to be
+        // allocated towards this role's quota. These resources may
+        // not get allocated due to framework filters.
+        Resources newQuotaAllocation;
+
+        if (!unsatisfiedQuota.empty()) {
+          Resources unreserved = available.nonRevocable().unreserved();
+
+          set<string> quotaResourceNames =
+            Resources(quota.info.guarantee()).names();
+
+          // When "chopping" resources, there is more than 1 "chop" that
+          // can be done to satisfy the limits. Consider the case with
+          // two disks of 1GB, one is PATH and another is MOUNT. And a role
+          // has a "disk" quota of 1GB. We could pick either of the disks here,
+          // but not both.
+          //
+          // In order to avoid repeatedly choosing the same "chop" of
+          // the resources each time we allocate, we introduce some
+          // randomness by shuffling the resources.
+          google::protobuf::RepeatedPtrField<Resource>
+            resourceVector = unreserved;
+          random_shuffle(resourceVector.begin(), resourceVector.end());
+
+          foreach (const Resource& resource, resourceVector) {
+            if (quotaResourceNames.count(resource.name()) == 0) {
+              // This resource has no quota set. Allocate it.
+              resources += resource;
+              continue;
+            }
+
+            // Currently, we guarantee that quota resources are scalars.
+            CHECK_EQ(resource.type(), Value::SCALAR);
+
+            Option<Value::Scalar> newUnsatisfiedQuotaScalar =
+              (unsatisfiedQuota -
+                newQuotaAllocation.createStrippedScalarQuantity())
+                  .get<Value::Scalar>(resource.name());
+
+            if (newUnsatisfiedQuotaScalar.isNone()) {
+              // Quota for this resource has been satisfied.
+              continue;
+            }
+
+            const Value::Scalar& resourceScalar = resource.scalar();
+
+            if (resourceScalar <= newUnsatisfiedQuotaScalar.get()) {
+              // We can safely allocate `resource` entirely here without
+              // breaking the quota limit.
+              resources += resource;
+              newQuotaAllocation += resource;
+
+              continue;
+            }
+
+            // At this point, we need to chop `resource` to satisfy
+            // the quota. We chop `resource` by making a copy and
+            // shrinking its scalar value
+            Resource choppedResourceToAllocate = resource;
+            choppedResourceToAllocate.mutable_scalar()
+              ->CopyFrom(newUnsatisfiedQuotaScalar.get());
+
+            // Not all resources are choppable. Some resources such as MOUNT
+            // disk has to be offered entirely. We use resources `contains()`
+            // utility to verify this. Specifically, if a resource `contains()`
+            // a smaller version of itself, then it can be safely chopped.
+            if (!Resources(resource).contains(choppedResourceToAllocate)) {
+              continue;
+            }
+
+            resources += choppedResourceToAllocate;
+            newQuotaAllocation += choppedResourceToAllocate;
+          }
+        }
 
         // It is safe to break here, because all frameworks under a role would
         // consider the same resources, so in case we don't have allocatable
@@ -1717,6 +1805,8 @@ void HierarchicalAllocatorProcess::__allocate()
         offerable[frameworkId][role][slaveId] += resources;
         offeredSharedResources[slaveId] += resources.shared();
 
+        unsatisfiedQuota -= newQuotaAllocation.createStrippedScalarQuantity();
+
         // Update the tracking of allocated reservations.
         //
         // Note it is important to do this before updating `slave.allocated`

http://git-wip-us.apache.org/repos/asf/mesos/blob/6fdd5c16/src/tests/hierarchical_allocator_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/hierarchical_allocator_tests.cpp b/src/tests/hierarchical_allocator_tests.cpp
index 1f27af0..6ea7d50 100644
--- a/src/tests/hierarchical_allocator_tests.cpp
+++ b/src/tests/hierarchical_allocator_tests.cpp
@@ -2659,10 +2659,8 @@ TEST_F(HierarchicalAllocatorTest, MultipleFrameworksInRoleWithQuota)
 }
 
 
-// The allocator performs coarse-grained allocations, and allocations
-// to satisfy quota are no exception. A role may get more resources as
-// part of its quota if the agent remaining resources are greater than
-// the unsatisfied part of the role's quota.
+// Quota allocations should be fine-grained. A role should get no more
+// resources than its quota even if the agent has more resources to offer.
 TEST_F(HierarchicalAllocatorTest, QuotaAllocationGranularity)
 {
   // Pausing the clock is not necessary, but ensures that the test
@@ -2683,15 +2681,6 @@ TEST_F(HierarchicalAllocatorTest, QuotaAllocationGranularity)
   const Quota quota = createQuota(QUOTA_ROLE, "cpus:0.5;mem:200");
   allocator->setQuota(QUOTA_ROLE, quota);
 
-  // Create `framework2` in a non-quota'ed role.
-  FrameworkInfo framework2 = createFrameworkInfo({NO_QUOTA_ROLE});
-  allocator->addFramework(framework2.id(), framework2, {}, true, {});
-
-  // Process all triggered allocation events.
-  //
-  // NOTE: No allocations happen because there are no resources to allocate.
-  Clock::settle();
-
   SlaveInfo agent = createSlaveInfo("cpus:1;mem:512;disk:0");
   allocator->addSlave(
       agent.id(),
@@ -2701,20 +2690,34 @@ TEST_F(HierarchicalAllocatorTest, QuotaAllocationGranularity)
       agent.resources(),
       {});
 
-  // `framework1` will be offered all of `agent`'s resources because
-  // it is the only framework in the only role with unsatisfied quota
-  // and the allocator performs coarse-grained allocation.
+  // Due to fine-grained allocation, `framework1` will be offered the
+  // exact amount of quota resources on `agent` even though the agent
+  // has more resources to offer.
   Allocation expected = Allocation(
       framework1.id(),
-      {{QUOTA_ROLE, {{agent.id(), agent.resources()}}}});
+      {{QUOTA_ROLE, {{agent.id(), quota.info.guarantee()}}}});
 
   AWAIT_EXPECT_EQ(expected, allocations.get());
 
   // Total cluster resources: cpus=1, mem=512.
-  // QUOTA_ROLE share = 1 (cpus=1, mem=512) [quota: cpus=0.5, mem=200]
+  // QUOTA_ROLE share = 1 (cpus=0.5, mem=200) [quota: cpus=0.5, mem=200]
   //   framework1 share = 1
-  // NO_QUOTA_ROLE share = 0
-  //   framework2 share = 0
+
+  // Create `framework2` in a non-quota'ed role.
+  FrameworkInfo framework2 = createFrameworkInfo({NO_QUOTA_ROLE});
+  allocator->addFramework(framework2.id(), framework2, {}, true, {});
+
+  // `framework2` will get the remaining resources of `agent`.
+  expected = Allocation(
+      framework2.id(),
+      {{NO_QUOTA_ROLE, {{agent.id(), agent.resources() -
+          Resources(quota.info.guarantee())}}}});
+
+  AWAIT_EXPECT_EQ(expected, allocations.get());
+
+  // Total cluster resources: cpus=1, mem=512.
+  // QUOTA_ROLE  (cpus=0.5, mem=200) [quota: cpus=0.5, mem=200]
+  // NO_QUOTA_ROLE  (cpus=0.5, mem=312)
 }
 
 
@@ -3161,7 +3164,7 @@ TEST_F(HierarchicalAllocatorTest, MultiQuotaWithFrameworks)
   // TODO(bmahler): Add assertions to test this is accurate!
   //
   // Total cluster resources (2 identical agents): cpus=2, mem=2048.
-  // QUOTA_ROLE1 share = 0.5 (cpus=1, mem=1024) [quota: cpus=1, mem=200]
+  // QUOTA_ROLE1 share = 0.5 (cpus=1, mem=200) [quota: cpus=1, mem=200]
   //   framework1 share = 1
   // QUOTA_ROLE2 share = 0.5 (cpus=1, mem=1024) [quota: cpus=2, mem=2000]
   //   framework2 share = 1
@@ -3179,18 +3182,18 @@ TEST_F(HierarchicalAllocatorTest, MultiQuotaWithFrameworks)
       agent3.resources(),
       {});
 
-  // `framework2` will get all agent3's resources because its role is under
-  // quota, while other roles' quotas are satisfied.
+  // `framework2` will get some of agent3's resources to meet its quota.
   Allocation expected = Allocation(
       framework2.id(),
-      {{QUOTA_ROLE2, {{agent3.id(), agent3.resources()}}}});
+      {{QUOTA_ROLE2, {{agent3.id(),
+          Resources(quota2.info.guarantee()) - agent2.resources()}}}});
 
   AWAIT_EXPECT_EQ(expected, allocations.get());
 
   // Total cluster resources (3 agents): cpus=4, mem=4096.
-  // QUOTA_ROLE1 share = 0.25 (cpus=1, mem=1024) [quota: cpus=1, mem=200]
+  // QUOTA_ROLE1 share = 0.33 (cpus=1, mem=1024) [quota: cpus=1, mem=200]
   //   framework1 share = 1
-  // QUOTA_ROLE2 share = 0.75 (cpus=3, mem=3072) [quota: cpus=2, mem=2000]
+  // QUOTA_ROLE2 share = 0.66 (cpus=2, mem=3=2000) [quota: cpus=2, mem=2000]
   //   framework2 share = 1
 }
 
@@ -3318,14 +3321,14 @@ TEST_F(HierarchicalAllocatorTest, QuotaSetAsideReservedResources)
   FrameworkInfo framework1 = createFrameworkInfo({QUOTA_ROLE});
   allocator->addFramework(framework1.id(), framework1, {}, true, {});
 
-  const Quota quota = createQuota(QUOTA_ROLE, "cpus:4");
+  const Quota quota = createQuota(QUOTA_ROLE, "cpus:4;mem:512");
   allocator->setQuota(QUOTA_ROLE, quota);
 
-  // `framework1` will be offered resources at `agent1` because the
-  // resources at `agent2` are reserved for a different role.
+  // `framework1` will be offered resources at `agent1` up to its quota limit
+  // because the resources at `agent2` are reserved for a different role.
   Allocation expected = Allocation(
       framework1.id(),
-      {{QUOTA_ROLE, {{agent1.id(), agent1.resources()}}}});
+      {{QUOTA_ROLE, {{agent1.id(), quota.info.guarantee()}}}});
 
   AWAIT_EXPECT_EQ(expected, allocations.get());
 
@@ -3337,7 +3340,7 @@ TEST_F(HierarchicalAllocatorTest, QuotaSetAsideReservedResources)
   allocator->recoverResources(
       framework1.id(),
       agent1.id(),
-      allocatedResources(agent1.resources(), QUOTA_ROLE),
+      allocatedResources(quota.info.guarantee(), QUOTA_ROLE),
       longFilter);
 
   // Trigger a batch allocation for good measure, but don't expect any
@@ -4504,18 +4507,14 @@ TEST_F(HierarchicalAllocatorTest, DontOfferOldAgentToMultiRoleFramework)
 
 
 // This tests the behavior of quota when the allocation and
-// quota are disproportionate. The current approach (see MESOS-6432)
-// is to stop allocation quota once one of the resource
-// guarantees is reached. We test the following example:
+// quota are disproportionate. We always try to allocate
+// up to the limit for all resources.
+// We test the following example:
 //
 //        Quota: cpus:4;mem:1024
 //   Allocation: cpus:2;mem:1024
 //
-// Here no more allocation occurs to the role since the memory
-// guarantee is reached. Longer term, we could offer the
-// unsatisfied cpus to allow an existing container to scale
-// vertically, or to allow the launching of a container with
-// best-effort memory/disk, etc.
+// Role will only get cpus resources in this case to meet its remaining qutoa.
 TEST_F(HierarchicalAllocatorTest, DisproportionateQuotaVsAllocation)
 {
   // Pausing the clock is not necessary, but ensures that the test
@@ -4528,27 +4527,23 @@ TEST_F(HierarchicalAllocatorTest, DisproportionateQuotaVsAllocation)
   const string QUOTA_ROLE{"quota-role"};
   const string NO_QUOTA_ROLE{"no-quota-role"};
 
-  // Since resource allocation is coarse-grained, we use a quota such
-  // that mem can be satisfied from a single agent, but cpus requires
-  // multiple agents.
+  // We use a quota such that mem can be satisfied from a single agent,
+  // but cpus requires multiple agents.
   const string agentResources = "cpus:2;mem:1024";
   const string quotaResources = "cpus:4;mem:1024";
 
   Quota quota = createQuota(QUOTA_ROLE, quotaResources);
   allocator->setQuota(QUOTA_ROLE, quota);
 
-  // Register two frameworks where one is using a role with quota.
-  FrameworkInfo framework1 = createFrameworkInfo({QUOTA_ROLE});
-  FrameworkInfo framework2 = createFrameworkInfo({NO_QUOTA_ROLE});
-  allocator->addFramework(framework1.id(), framework1, {}, true, {});
-  allocator->addFramework(framework2.id(), framework2, {}, true, {});
+  // Register `framework` under `QUOTA_ROLE`.
+  FrameworkInfo framework = createFrameworkInfo({QUOTA_ROLE});
+  allocator->addFramework(framework.id(), framework, {}, true, {});
 
   // Register an agent. This triggers an allocation of all of the
   // agent's resources to partially satisfy QUOTA_ROLE's quota. After
   // the allocation QUOTA_ROLE's quota for mem will be satisfied while
-  // still being below the set quota for cpus. With that QUOTA_ROLE
-  // will not receive more resources since we currently do not
-  // have an ability to offer out only the cpus.
+  // still being below the set quota for cpus. With that framework under
+  // QUOTA_ROLE will only receive cpus resources.
   SlaveInfo agent1 = createSlaveInfo(agentResources);
   allocator->addSlave(
       agent1.id(),
@@ -4559,18 +4554,12 @@ TEST_F(HierarchicalAllocatorTest, DisproportionateQuotaVsAllocation)
       {});
 
   Allocation expected = Allocation(
-      framework1.id(),
+      framework.id(),
       {{QUOTA_ROLE, {{agent1.id(), agent1.resources()}}}});
 
   AWAIT_EXPECT_EQ(expected, allocations.get());
 
-  // Register a second agent. Since QUOTA_ROLE has reached one
-  // of its quota guarantees and quota currently acts as an upper
-  // limit, no further resources are allocated for quota.
-  // In addition, we "hold back" the resources of the second
-  // agent in order to ensure that the quota could be satisfied
-  // should the first framework decide to consume proportionally
-  // to its quota (e.g. cpus:2;mem:512 on each agent).
+  // Register a second agent.
   SlaveInfo agent2 = createSlaveInfo(agentResources);
   allocator->addSlave(
       agent2.id(),
@@ -4582,8 +4571,13 @@ TEST_F(HierarchicalAllocatorTest, DisproportionateQuotaVsAllocation)
 
   Clock::settle();
 
-  Future<Allocation> allocation = allocations.get();
-  EXPECT_TRUE(allocation.isPending());
+  // `framework` will get its unsatisfied quota resources (2cpus).
+  expected = Allocation(
+    framework.id(),
+    {{QUOTA_ROLE, {{agent2.id(),
+      Resources::parse(quotaResources).get() - agent1.resources()}}}});
+
+  AWAIT_EXPECT_EQ(expected, allocations.get());
 }