You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2018/05/03 00:01:05 UTC
[09/20] mesos git commit: Made quota resource allocation fine-grained.
Made quota resource allocation fine-grained.
The allocator now does fine-grained resource allocation up
to the role's quota limit. And a role's quota is only
considered to be satisfied if all of its quota resources
are satisfied. Previously, a role's quota is considered
to be met if any one of its quota resources reaches the limit.
Also fixed a few affected allocator tests.
Review: https://reviews.apache.org/r/64003/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/6fdd5c16
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/6fdd5c16
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/6fdd5c16
Branch: refs/heads/1.4.x
Commit: 6fdd5c165f7a43b7e07956f063e13b3f1cfe7bf9
Parents: dd1df2d
Author: Meng Zhu <mz...@mesosphere.io>
Authored: Wed Dec 20 18:32:02 2017 -0800
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Wed May 2 16:44:06 2018 -0700
----------------------------------------------------------------------
src/master/allocator/mesos/hierarchical.cpp | 140 +++++++++++++++++++----
src/tests/hierarchical_allocator_tests.cpp | 114 +++++++++---------
2 files changed, 169 insertions(+), 85 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/6fdd5c16/src/master/allocator/mesos/hierarchical.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index 2424484..565f109 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -1589,27 +1589,14 @@ void HierarchicalAllocatorProcess::__allocate()
// If quota for the role is considered satisfied, then we only
// further allocate reservations for the role.
//
- // More precisely, we stop allocating unreserved resources if at
- // least one of the resource guarantees is considered consumed.
- // This technique prevents gaming of the quota allocation,
- // see MESOS-6432.
- //
- // Longer term, we could ideally allocate what remains
- // unsatisfied to allow an existing container to scale
- // vertically, or to allow the launching of a container
- // with best-effort cpus/mem/disk/etc.
- //
// TODO(alexr): Skipping satisfied roles is pessimistic. Better
// alternatives are:
// * A custom sorter that is aware of quotas and sorts accordingly.
// * Removing satisfied roles from the sorter.
- bool someGuaranteesReached = false;
- foreach (const Resource& guarantee, quota.info.guarantee()) {
- if (resourcesChargedAgainstQuota.contains(guarantee)) {
- someGuaranteesReached = true;
- break;
- }
- }
+ //
+ // This is a scalar quantity with no meta-data.
+ Resources unsatisfiedQuota = Resources(quota.info.guarantee()) -
+ resourcesChargedAgainstQuota;
// Fetch frameworks according to their fair share.
// NOTE: Suppressed frameworks are not included in the sort.
@@ -1658,18 +1645,119 @@ void HierarchicalAllocatorProcess::__allocate()
}
}
- // If a role's quota limit has been reached, we only allocate
- // its reservations. Otherwise, we allocate its reservations
- // plus unreserved resources.
+ // We allocate the role's reservations as well as any unreserved
+ // resources while ensuring the role stays within its quota limits.
+ // This means that we'll "chop" the unreserved resources up to
+ // the quota limit if necessary.
+ //
+ // E.g. A role has no allocations or reservations yet and a 10 cpu
+ // quota limit. We'll chop a 15 cpu agent down to only
+ // allocate 10 cpus to the role to keep it within its limit.
//
+ // In the case that the role needs some of the resources on this
+ // agent to make progress towards its quota, we'll *also* allocate
+ // all of the resources for which it does not have quota.
+ //
+ // E.g. The agent has 1 cpu, 1024 mem, 1024 disk, 1 gpu, 5 ports
+ // and the role has quota for 1 cpu, 1024 mem. We'll include
+ // the disk, gpu, and ports in the allocation, despite the
+ // role not having any quota guarantee for them.
+ //
+ // We have to do this for now because it's not possible to set
+ // quota on non-scalar resources, like ports. However, for scalar
+ // resources that can have quota set, we should ideally make
+ // sure that when we include them, we're not violating some
+ // other role's guarantee!
+ //
+ // TODO(mzhu): Check the headroom when we're including scalar
+ // resources that this role does not have quota for.
+ //
+ // TODO(mzhu): Since we're treating the resources with unset
+ // quota as having no guarantee and no limit, these should be
+ // also be allocated further in the second allocation "phase"
+ // below (above guarantee up to limit).
+
// NOTE: Currently, frameworks are allowed to have '*' role.
// Calling reserved('*') returns an empty Resources object.
//
- // TODO(mzhu): Do fine-grained allocations up to the limit.
- // See MESOS-8293.
- Resources resources = someGuaranteesReached ?
- available.reserved(role).nonRevocable() :
- available.allocatableTo(role).nonRevocable();
+ // NOTE: Since we currently only support top-level roles to
+ // have quota, there are no ancestor reservations involved here.
+ Resources resources = available.reserved(role).nonRevocable();
+
+ // Unreserved resources that are tentatively going to be
+ // allocated towards this role's quota. These resources may
+ // not get allocated due to framework filters.
+ Resources newQuotaAllocation;
+
+ if (!unsatisfiedQuota.empty()) {
+ Resources unreserved = available.nonRevocable().unreserved();
+
+ set<string> quotaResourceNames =
+ Resources(quota.info.guarantee()).names();
+
+ // When "chopping" resources, there is more than 1 "chop" that
+ // can be done to satisfy the limits. Consider the case with
+ // two disks of 1GB, one is PATH and another is MOUNT. And a role
+ // has a "disk" quota of 1GB. We could pick either of the disks here,
+ // but not both.
+ //
+ // In order to avoid repeatedly choosing the same "chop" of
+ // the resources each time we allocate, we introduce some
+ // randomness by shuffling the resources.
+ google::protobuf::RepeatedPtrField<Resource>
+ resourceVector = unreserved;
+ random_shuffle(resourceVector.begin(), resourceVector.end());
+
+ foreach (const Resource& resource, resourceVector) {
+ if (quotaResourceNames.count(resource.name()) == 0) {
+ // This resource has no quota set. Allocate it.
+ resources += resource;
+ continue;
+ }
+
+ // Currently, we guarantee that quota resources are scalars.
+ CHECK_EQ(resource.type(), Value::SCALAR);
+
+ Option<Value::Scalar> newUnsatisfiedQuotaScalar =
+ (unsatisfiedQuota -
+ newQuotaAllocation.createStrippedScalarQuantity())
+ .get<Value::Scalar>(resource.name());
+
+ if (newUnsatisfiedQuotaScalar.isNone()) {
+ // Quota for this resource has been satisfied.
+ continue;
+ }
+
+ const Value::Scalar& resourceScalar = resource.scalar();
+
+ if (resourceScalar <= newUnsatisfiedQuotaScalar.get()) {
+ // We can safely allocate `resource` entirely here without
+ // breaking the quota limit.
+ resources += resource;
+ newQuotaAllocation += resource;
+
+ continue;
+ }
+
+ // At this point, we need to chop `resource` to satisfy
+ // the quota. We chop `resource` by making a copy and
+ // shrinking its scalar value
+ Resource choppedResourceToAllocate = resource;
+ choppedResourceToAllocate.mutable_scalar()
+ ->CopyFrom(newUnsatisfiedQuotaScalar.get());
+
+ // Not all resources are choppable. Some resources such as MOUNT
+ // disk has to be offered entirely. We use resources `contains()`
+ // utility to verify this. Specifically, if a resource `contains()`
+ // a smaller version of itself, then it can be safely chopped.
+ if (!Resources(resource).contains(choppedResourceToAllocate)) {
+ continue;
+ }
+
+ resources += choppedResourceToAllocate;
+ newQuotaAllocation += choppedResourceToAllocate;
+ }
+ }
// It is safe to break here, because all frameworks under a role would
// consider the same resources, so in case we don't have allocatable
@@ -1717,6 +1805,8 @@ void HierarchicalAllocatorProcess::__allocate()
offerable[frameworkId][role][slaveId] += resources;
offeredSharedResources[slaveId] += resources.shared();
+ unsatisfiedQuota -= newQuotaAllocation.createStrippedScalarQuantity();
+
// Update the tracking of allocated reservations.
//
// Note it is important to do this before updating `slave.allocated`
http://git-wip-us.apache.org/repos/asf/mesos/blob/6fdd5c16/src/tests/hierarchical_allocator_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/hierarchical_allocator_tests.cpp b/src/tests/hierarchical_allocator_tests.cpp
index 1f27af0..6ea7d50 100644
--- a/src/tests/hierarchical_allocator_tests.cpp
+++ b/src/tests/hierarchical_allocator_tests.cpp
@@ -2659,10 +2659,8 @@ TEST_F(HierarchicalAllocatorTest, MultipleFrameworksInRoleWithQuota)
}
-// The allocator performs coarse-grained allocations, and allocations
-// to satisfy quota are no exception. A role may get more resources as
-// part of its quota if the agent remaining resources are greater than
-// the unsatisfied part of the role's quota.
+// Quota allocations should be fine-grained. A role should get no more
+// resources than its quota even if the agent has more resources to offer.
TEST_F(HierarchicalAllocatorTest, QuotaAllocationGranularity)
{
// Pausing the clock is not necessary, but ensures that the test
@@ -2683,15 +2681,6 @@ TEST_F(HierarchicalAllocatorTest, QuotaAllocationGranularity)
const Quota quota = createQuota(QUOTA_ROLE, "cpus:0.5;mem:200");
allocator->setQuota(QUOTA_ROLE, quota);
- // Create `framework2` in a non-quota'ed role.
- FrameworkInfo framework2 = createFrameworkInfo({NO_QUOTA_ROLE});
- allocator->addFramework(framework2.id(), framework2, {}, true, {});
-
- // Process all triggered allocation events.
- //
- // NOTE: No allocations happen because there are no resources to allocate.
- Clock::settle();
-
SlaveInfo agent = createSlaveInfo("cpus:1;mem:512;disk:0");
allocator->addSlave(
agent.id(),
@@ -2701,20 +2690,34 @@ TEST_F(HierarchicalAllocatorTest, QuotaAllocationGranularity)
agent.resources(),
{});
- // `framework1` will be offered all of `agent`'s resources because
- // it is the only framework in the only role with unsatisfied quota
- // and the allocator performs coarse-grained allocation.
+ // Due to fine-grained allocation, `framework1` will be offered the
+ // exact amount of quota resources on `agent` even though the agent
+ // has more resources to offer.
Allocation expected = Allocation(
framework1.id(),
- {{QUOTA_ROLE, {{agent.id(), agent.resources()}}}});
+ {{QUOTA_ROLE, {{agent.id(), quota.info.guarantee()}}}});
AWAIT_EXPECT_EQ(expected, allocations.get());
// Total cluster resources: cpus=1, mem=512.
- // QUOTA_ROLE share = 1 (cpus=1, mem=512) [quota: cpus=0.5, mem=200]
+ // QUOTA_ROLE share = 1 (cpus=0.5, mem=200) [quota: cpus=0.5, mem=200]
// framework1 share = 1
- // NO_QUOTA_ROLE share = 0
- // framework2 share = 0
+
+ // Create `framework2` in a non-quota'ed role.
+ FrameworkInfo framework2 = createFrameworkInfo({NO_QUOTA_ROLE});
+ allocator->addFramework(framework2.id(), framework2, {}, true, {});
+
+ // `framework2` will get the remaining resources of `agent`.
+ expected = Allocation(
+ framework2.id(),
+ {{NO_QUOTA_ROLE, {{agent.id(), agent.resources() -
+ Resources(quota.info.guarantee())}}}});
+
+ AWAIT_EXPECT_EQ(expected, allocations.get());
+
+ // Total cluster resources: cpus=1, mem=512.
+ // QUOTA_ROLE (cpus=0.5, mem=200) [quota: cpus=0.5, mem=200]
+ // NO_QUOTA_ROLE (cpus=0.5, mem=312)
}
@@ -3161,7 +3164,7 @@ TEST_F(HierarchicalAllocatorTest, MultiQuotaWithFrameworks)
// TODO(bmahler): Add assertions to test this is accurate!
//
// Total cluster resources (2 identical agents): cpus=2, mem=2048.
- // QUOTA_ROLE1 share = 0.5 (cpus=1, mem=1024) [quota: cpus=1, mem=200]
+ // QUOTA_ROLE1 share = 0.5 (cpus=1, mem=200) [quota: cpus=1, mem=200]
// framework1 share = 1
// QUOTA_ROLE2 share = 0.5 (cpus=1, mem=1024) [quota: cpus=2, mem=2000]
// framework2 share = 1
@@ -3179,18 +3182,18 @@ TEST_F(HierarchicalAllocatorTest, MultiQuotaWithFrameworks)
agent3.resources(),
{});
- // `framework2` will get all agent3's resources because its role is under
- // quota, while other roles' quotas are satisfied.
+ // `framework2` will get some of agent3's resources to meet its quota.
Allocation expected = Allocation(
framework2.id(),
- {{QUOTA_ROLE2, {{agent3.id(), agent3.resources()}}}});
+ {{QUOTA_ROLE2, {{agent3.id(),
+ Resources(quota2.info.guarantee()) - agent2.resources()}}}});
AWAIT_EXPECT_EQ(expected, allocations.get());
// Total cluster resources (3 agents): cpus=4, mem=4096.
- // QUOTA_ROLE1 share = 0.25 (cpus=1, mem=1024) [quota: cpus=1, mem=200]
+ // QUOTA_ROLE1 share = 0.33 (cpus=1, mem=1024) [quota: cpus=1, mem=200]
// framework1 share = 1
- // QUOTA_ROLE2 share = 0.75 (cpus=3, mem=3072) [quota: cpus=2, mem=2000]
+ // QUOTA_ROLE2 share = 0.66 (cpus=2, mem=3=2000) [quota: cpus=2, mem=2000]
// framework2 share = 1
}
@@ -3318,14 +3321,14 @@ TEST_F(HierarchicalAllocatorTest, QuotaSetAsideReservedResources)
FrameworkInfo framework1 = createFrameworkInfo({QUOTA_ROLE});
allocator->addFramework(framework1.id(), framework1, {}, true, {});
- const Quota quota = createQuota(QUOTA_ROLE, "cpus:4");
+ const Quota quota = createQuota(QUOTA_ROLE, "cpus:4;mem:512");
allocator->setQuota(QUOTA_ROLE, quota);
- // `framework1` will be offered resources at `agent1` because the
- // resources at `agent2` are reserved for a different role.
+ // `framework1` will be offered resources at `agent1` up to its quota limit
+ // because the resources at `agent2` are reserved for a different role.
Allocation expected = Allocation(
framework1.id(),
- {{QUOTA_ROLE, {{agent1.id(), agent1.resources()}}}});
+ {{QUOTA_ROLE, {{agent1.id(), quota.info.guarantee()}}}});
AWAIT_EXPECT_EQ(expected, allocations.get());
@@ -3337,7 +3340,7 @@ TEST_F(HierarchicalAllocatorTest, QuotaSetAsideReservedResources)
allocator->recoverResources(
framework1.id(),
agent1.id(),
- allocatedResources(agent1.resources(), QUOTA_ROLE),
+ allocatedResources(quota.info.guarantee(), QUOTA_ROLE),
longFilter);
// Trigger a batch allocation for good measure, but don't expect any
@@ -4504,18 +4507,14 @@ TEST_F(HierarchicalAllocatorTest, DontOfferOldAgentToMultiRoleFramework)
// This tests the behavior of quota when the allocation and
-// quota are disproportionate. The current approach (see MESOS-6432)
-// is to stop allocation quota once one of the resource
-// guarantees is reached. We test the following example:
+// quota are disproportionate. We always try to allocate
+// up to the limit for all resources.
+// We test the following example:
//
// Quota: cpus:4;mem:1024
// Allocation: cpus:2;mem:1024
//
-// Here no more allocation occurs to the role since the memory
-// guarantee is reached. Longer term, we could offer the
-// unsatisfied cpus to allow an existing container to scale
-// vertically, or to allow the launching of a container with
-// best-effort memory/disk, etc.
+// Role will only get cpus resources in this case to meet its remaining qutoa.
TEST_F(HierarchicalAllocatorTest, DisproportionateQuotaVsAllocation)
{
// Pausing the clock is not necessary, but ensures that the test
@@ -4528,27 +4527,23 @@ TEST_F(HierarchicalAllocatorTest, DisproportionateQuotaVsAllocation)
const string QUOTA_ROLE{"quota-role"};
const string NO_QUOTA_ROLE{"no-quota-role"};
- // Since resource allocation is coarse-grained, we use a quota such
- // that mem can be satisfied from a single agent, but cpus requires
- // multiple agents.
+ // We use a quota such that mem can be satisfied from a single agent,
+ // but cpus requires multiple agents.
const string agentResources = "cpus:2;mem:1024";
const string quotaResources = "cpus:4;mem:1024";
Quota quota = createQuota(QUOTA_ROLE, quotaResources);
allocator->setQuota(QUOTA_ROLE, quota);
- // Register two frameworks where one is using a role with quota.
- FrameworkInfo framework1 = createFrameworkInfo({QUOTA_ROLE});
- FrameworkInfo framework2 = createFrameworkInfo({NO_QUOTA_ROLE});
- allocator->addFramework(framework1.id(), framework1, {}, true, {});
- allocator->addFramework(framework2.id(), framework2, {}, true, {});
+ // Register `framework` under `QUOTA_ROLE`.
+ FrameworkInfo framework = createFrameworkInfo({QUOTA_ROLE});
+ allocator->addFramework(framework.id(), framework, {}, true, {});
// Register an agent. This triggers an allocation of all of the
// agent's resources to partially satisfy QUOTA_ROLE's quota. After
// the allocation QUOTA_ROLE's quota for mem will be satisfied while
- // still being below the set quota for cpus. With that QUOTA_ROLE
- // will not receive more resources since we currently do not
- // have an ability to offer out only the cpus.
+ // still being below the set quota for cpus. With that framework under
+ // QUOTA_ROLE will only receive cpus resources.
SlaveInfo agent1 = createSlaveInfo(agentResources);
allocator->addSlave(
agent1.id(),
@@ -4559,18 +4554,12 @@ TEST_F(HierarchicalAllocatorTest, DisproportionateQuotaVsAllocation)
{});
Allocation expected = Allocation(
- framework1.id(),
+ framework.id(),
{{QUOTA_ROLE, {{agent1.id(), agent1.resources()}}}});
AWAIT_EXPECT_EQ(expected, allocations.get());
- // Register a second agent. Since QUOTA_ROLE has reached one
- // of its quota guarantees and quota currently acts as an upper
- // limit, no further resources are allocated for quota.
- // In addition, we "hold back" the resources of the second
- // agent in order to ensure that the quota could be satisfied
- // should the first framework decide to consume proportionally
- // to its quota (e.g. cpus:2;mem:512 on each agent).
+ // Register a second agent.
SlaveInfo agent2 = createSlaveInfo(agentResources);
allocator->addSlave(
agent2.id(),
@@ -4582,8 +4571,13 @@ TEST_F(HierarchicalAllocatorTest, DisproportionateQuotaVsAllocation)
Clock::settle();
- Future<Allocation> allocation = allocations.get();
- EXPECT_TRUE(allocation.isPending());
+ // `framework` will get its unsatisfied quota resources (2cpus).
+ expected = Allocation(
+ framework.id(),
+ {{QUOTA_ROLE, {{agent2.id(),
+ Resources::parse(quotaResources).get() - agent1.resources()}}}});
+
+ AWAIT_EXPECT_EQ(expected, allocations.get());
}