You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2018/09/27 01:07:55 UTC
[mesos] branch master updated (e93cb84 -> 8665335)
This is an automated email from the ASF dual-hosted git repository.
bmahler pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git.
from e93cb84 Fixed libevent M4 macro to be fully compatible with older autoconf.
new 9934d41 Cached weights in the sorters nodes.
new a362621 Use total cluster resources as framework sorter "total".
new 8665335 Avoided dirtying the DRF sorter when an allocation is performed.
The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
src/master/allocator/mesos/hierarchical.cpp | 30 +++++++----
src/master/allocator/sorter/drf/sorter.cpp | 75 +++++++++++++++++++++++----
src/master/allocator/sorter/drf/sorter.hpp | 6 +++
src/master/allocator/sorter/random/sorter.cpp | 28 ++++++++--
src/master/allocator/sorter/random/sorter.hpp | 6 +++
5 files changed, 120 insertions(+), 25 deletions(-)
[mesos] 01/03: Cached weights in the sorters nodes.
Posted by bm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
bmahler pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git
commit 9934d41bec5a36b81b3fa94136303c31d6e7d76f
Author: Benjamin Mahler <bm...@apache.org>
AuthorDate: Sun Sep 16 19:18:49 2018 -0700
Cached weights in the sorters nodes.
This avoids making excessive map lookups each time we calculate the
share for a node in the tree. Now, when the weight is needed, the
value is cached. If the weight gets updated, we update the cached
value. This approach proved cleaner than trying to ensure freshly
constructed nodes have the right weight.
Review: https://reviews.apache.org/r/68732
---
src/master/allocator/sorter/drf/sorter.cpp | 28 ++++++++++++++++++++++-----
src/master/allocator/sorter/drf/sorter.hpp | 6 ++++++
src/master/allocator/sorter/random/sorter.cpp | 28 ++++++++++++++++++++++-----
src/master/allocator/sorter/random/sorter.hpp | 6 ++++++
4 files changed, 58 insertions(+), 10 deletions(-)
diff --git a/src/master/allocator/sorter/drf/sorter.cpp b/src/master/allocator/sorter/drf/sorter.cpp
index a45f66f..b04d684 100644
--- a/src/master/allocator/sorter/drf/sorter.cpp
+++ b/src/master/allocator/sorter/drf/sorter.cpp
@@ -312,6 +312,24 @@ void DRFSorter::updateWeight(const string& path, double weight)
// TODO(neilc): Avoid dirtying the tree in some circumstances.
dirty = true;
+
+ // Update the weight of the corresponding internal node,
+ // if it exists (this client may not exist despite there
+ // being a weight).
+ Node* node = find(path);
+
+ if (node == nullptr) {
+ return;
+ }
+
+ // If there is a virtual leaf, we need to move up one level.
+ if (node->name == ".") {
+ node = CHECK_NOTNULL(node->parent);
+ }
+
+ CHECK_EQ(path, node->path);
+
+ node->weight = weight;
}
@@ -625,11 +643,11 @@ double DRFSorter::calculateShare(const Node* node) const
double DRFSorter::getWeight(const Node* node) const
{
- // TODO(bmahler): It's expensive to have to hash the complete
- // role path and re-lookup the weight each time we calculate
- // the share, consider storing the weight directly in the
- // node struct.
- return weights.get(node->path).getOrElse(1.0);
+ if (node->weight.isNone()) {
+ node->weight = weights.get(node->path).getOrElse(1.0);
+ }
+
+ return CHECK_NOTNONE(node->weight);
}
diff --git a/src/master/allocator/sorter/drf/sorter.hpp b/src/master/allocator/sorter/drf/sorter.hpp
index 2957a2e..084df82 100644
--- a/src/master/allocator/sorter/drf/sorter.hpp
+++ b/src/master/allocator/sorter/drf/sorter.hpp
@@ -243,6 +243,12 @@ struct DRFSorter::Node
double share;
+ // Cached weight of the node, access this through `getWeight()`.
+ // The value is cached by `getWeight()` and updated by
+ // `updateWeight()`. Marked mutable since the caching writes
+ // to this are logically const.
+ mutable Option<double> weight;
+
Kind kind;
Node* parent;
diff --git a/src/master/allocator/sorter/random/sorter.cpp b/src/master/allocator/sorter/random/sorter.cpp
index fc47756..f578ef1 100644
--- a/src/master/allocator/sorter/random/sorter.cpp
+++ b/src/master/allocator/sorter/random/sorter.cpp
@@ -285,6 +285,24 @@ void RandomSorter::deactivate(const string& clientPath)
void RandomSorter::updateWeight(const string& path, double weight)
{
weights[path] = weight;
+
+ // Update the weight of the corresponding internal node,
+ // if it exists (this client may not exist despite there
+ // being a weight).
+ Node* node = find(path);
+
+ if (node == nullptr) {
+ return;
+ }
+
+ // If there is a virtual leaf, we need to move up one level.
+ if (node->name == ".") {
+ node = CHECK_NOTNULL(node->parent);
+ }
+
+ CHECK_EQ(path, node->path);
+
+ node->weight = weight;
}
@@ -542,11 +560,11 @@ size_t RandomSorter::count() const
double RandomSorter::getWeight(const Node* node) const
{
- // TODO(bmahler): It's expensive to have to hash the complete
- // role path and re-lookup the weight each time we calculate
- // the share, consider storing the weight directly in the
- // node struct.
- return weights.get(node->path).getOrElse(1.0);
+ if (node->weight.isNone()) {
+ node->weight = weights.get(node->path).getOrElse(1.0);
+ }
+
+ return CHECK_NOTNONE(node->weight);
}
diff --git a/src/master/allocator/sorter/random/sorter.hpp b/src/master/allocator/sorter/random/sorter.hpp
index 825c158..800b22c 100644
--- a/src/master/allocator/sorter/random/sorter.hpp
+++ b/src/master/allocator/sorter/random/sorter.hpp
@@ -235,6 +235,12 @@ struct RandomSorter::Node
// label for virtual leaf nodes.
std::string path;
+ // Cached weight of the node, access this through `getWeight()`.
+ // The value is cached by `getWeight()` and updated by
+ // `updateWeight()`. Marked mutable since the caching writes
+ // to this are logically const.
+ mutable Option<double> weight;
+
Kind kind;
Node* parent;
[mesos] 03/03: Avoided dirtying the DRF sorter when an allocation
is performed.
Posted by bm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
bmahler pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git
commit 86653356d763fee79e9467cf7b07bebb449e8aff
Author: Benjamin Mahler <bm...@apache.org>
AuthorDate: Fri Sep 21 12:26:19 2018 -0700
Avoided dirtying the DRF sorter when an allocation is performed.
This improves performance by ensuring that the DRF sorter can remain
sorted throughout an allocation cycle. Without this change, we spend
the majority of time re-sorting throughout an allocation cycle, when
there are large numbers of clients (roles / frameworks).
Before with --enable-optimize:
*HierarchicalAllocator_BENCHMARK_Test.DeclineOffers/21
Added 1000 frameworks in 28.69ms
Added 10000 agents in 3.96secs
round 0 allocate() took 3.18secs to make 10000 offers ...
round 1 allocate() took 3.40secs to make 10000 offers ...
round 2 allocate() took 3.31secs to make 10000 offers ...
After with --enable-optimize:
*HierarchicalAllocator_BENCHMARK_Test.DeclineOffers/21
Added 1000 frameworks in 40.38ms
Added 10000 agents in 1.61secs
round 0 allocate() took 1.06secs to make 10000 offers ...
round 1 allocate() took 1.06secs to make 10000 offers ...
round 2 allocate() took 1.06secs to make 10000 offers ...
Review: https://reviews.apache.org/r/68808
---
src/master/allocator/sorter/drf/sorter.cpp | 47 +++++++++++++++++++++++++++---
1 file changed, 43 insertions(+), 4 deletions(-)
diff --git a/src/master/allocator/sorter/drf/sorter.cpp b/src/master/allocator/sorter/drf/sorter.cpp
index b04d684..a648fac 100644
--- a/src/master/allocator/sorter/drf/sorter.cpp
+++ b/src/master/allocator/sorter/drf/sorter.cpp
@@ -16,6 +16,7 @@
#include "master/allocator/sorter/drf/sorter.hpp"
+#include <iterator>
#include <set>
#include <string>
#include <vector>
@@ -340,16 +341,51 @@ void DRFSorter::allocated(
{
Node* current = CHECK_NOTNULL(find(clientPath));
+ // Walk up the tree adjusting allocations. If the tree is
+ // sorted, we keep it sorted (see below).
+ //
// NOTE: We don't currently update the `allocation` for the root
// node. This is debatable, but the current implementation doesn't
// require looking at the allocation of the root node.
while (current != root) {
current->allocation.add(slaveId, resources);
+
+ // Note that inactive leaves are not sorted, and are always
+ // stored in `children` after the active leaves and internal
+ // nodes. See the comment on `Node::children`.
+ if (!dirty && current->kind != Node::INACTIVE_LEAF) {
+ current->share = calculateShare(current);
+
+ vector<Node*>& children = current->parent->children;
+
+ // Locate the node position in the parent's children
+ // and shift it into its sorted position.
+ //
+ // TODO(bmahler): Consider storing the node's position
+ // in the parent's children to avoid scanning.
+ auto position = std::find(children.begin(), children.end(), current);
+ CHECK(position != children.end());
+
+ // Shift left until done (if needed).
+ while (position != children.begin() &&
+ DRFSorter::Node::compareDRF(current, *std::prev(position))) {
+ std::swap(*position, *std::prev(position));
+ --position;
+ }
+
+ // Or, shift right until done (if needed). Note that when
+ // shifting right, we need to stop once we reach the
+ // inactive leaves (see `Node::children`).
+ while (std::next(position) != children.end() &&
+ (*std::next(position))->kind != Node::INACTIVE_LEAF &&
+ DRFSorter::Node::compareDRF(*std::next(position), current)) {
+ std::swap(*position, *std::next(position));
+ ++position;
+ }
+ }
+
current = CHECK_NOTNULL(current->parent);
}
-
- // TODO(neilc): Avoid dirtying the tree in some circumstances.
- dirty = true;
}
@@ -393,7 +429,10 @@ void DRFSorter::unallocated(
current = CHECK_NOTNULL(current->parent);
}
- // TODO(neilc): Avoid dirtying the tree in some circumstances.
+ // TODO(bmahler): Similar to `allocated()`, avoid dirtying the
+ // tree here in favor of adjusting the node positions. This
+ // hasn't been critical to do since we don't allocate upon
+ // resource recovery in the allocator.
dirty = true;
}
[mesos] 02/03: Use total cluster resources as framework sorter
"total".
Posted by bm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
bmahler pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git
commit a3626219b402ff837192208b6d35946b3a069ce6
Author: Benjamin Mahler <bm...@apache.org>
AuthorDate: Fri Sep 21 13:48:46 2018 -0700
Use total cluster resources as framework sorter "total".
The role sorter and framework sorters differed in that the role
sorter used the total cluster resources as the "total" in which
the allocation is divided to compute shares, whereas the framework
sorters used the role's allocation as the "total".
Per a discussion on the mailing list:
https://s.apache.org/differing-DRF
This patch makes the two types of sorters use the total cluster
as the "total". While this provides slightly different behavior,
it's unlikely any users were relying on this.
Review: https://reviews.apache.org/r/68833
---
src/master/allocator/mesos/hierarchical.cpp | 30 ++++++++++++++++++-----------
1 file changed, 19 insertions(+), 11 deletions(-)
diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index 906fddc..6f389ed 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -553,6 +553,10 @@ void HierarchicalAllocatorProcess::addSlave(
roleSorter->add(slaveId, total);
+ foreachvalue (const Owned<Sorter>& sorter, frameworkSorters) {
+ sorter->add(slaveId, total);
+ }
+
// See comment at `quotaRoleSorter` declaration regarding non-revocable.
quotaRoleSorter->add(slaveId, total.nonRevocable());
@@ -620,6 +624,10 @@ void HierarchicalAllocatorProcess::removeSlave(
roleSorter->remove(slaveId, slaves.at(slaveId).getTotal());
+ foreachvalue (const Owned<Sorter>& sorter, frameworkSorters) {
+ sorter->remove(slaveId, slaves.at(slaveId).getTotal());
+ }
+
// See comment at `quotaRoleSorter` declaration regarding non-revocable.
quotaRoleSorter->remove(
slaveId, slaves.at(slaveId).getTotal().nonRevocable());
@@ -945,10 +953,6 @@ void HierarchicalAllocatorProcess::updateAllocation(
updateSlaveTotal(slaveId, updatedTotal.get());
- // Update the total resources in the framework sorter.
- frameworkSorter->remove(slaveId, offeredResources);
- frameworkSorter->add(slaveId, updatedOfferedResources);
-
const Resources updatedFrameworkAllocation =
frameworkSorter->allocation(frameworkId.value(), slaveId);
@@ -2532,6 +2536,11 @@ void HierarchicalAllocatorProcess::trackFrameworkUnderRole(
CHECK(!frameworkSorters.contains(role));
frameworkSorters.insert({role, Owned<Sorter>(frameworkSorterFactory())});
frameworkSorters.at(role)->initialize(fairnessExcludeResourceNames);
+
+ foreachvalue (const Slave& slave, slaves) {
+ frameworkSorters.at(role)->add(slave.info.id(), slave.getTotal());
+ }
+
metrics.addRole(role);
}
@@ -2637,14 +2646,15 @@ bool HierarchicalAllocatorProcess::updateSlaveTotal(
trackReservations(newReservations);
}
- // Currently `roleSorter` and `quotaRoleSorter`, being the root-level
- // sorters, maintain all of `slaves[slaveId].total` (or the `nonRevocable()`
- // portion in the case of `quotaRoleSorter`) in their own totals (which
- // don't get updated in the allocation runs or during recovery of allocated
- // resources). So, we update them using the resources in `slave.total`.
+ // Update the totals in the sorters.
roleSorter->remove(slaveId, oldTotal);
roleSorter->add(slaveId, total);
+ foreachvalue (const Owned<Sorter>& sorter, frameworkSorters) {
+ sorter->remove(slaveId, oldTotal);
+ sorter->add(slaveId, total);
+ }
+
// See comment at `quotaRoleSorter` declaration regarding non-revocable.
quotaRoleSorter->remove(slaveId, oldTotal.nonRevocable());
quotaRoleSorter->add(slaveId, total.nonRevocable());
@@ -2768,7 +2778,6 @@ void HierarchicalAllocatorProcess::trackAllocatedResources(
CHECK(frameworkSorters.at(role)->contains(frameworkId.value()));
roleSorter->allocated(role, slaveId, allocation);
- frameworkSorters.at(role)->add(slaveId, allocation);
frameworkSorters.at(role)->allocated(
frameworkId.value(), slaveId, allocation);
@@ -2804,7 +2813,6 @@ void HierarchicalAllocatorProcess::untrackAllocatedResources(
frameworkSorters.at(role)->unallocated(
frameworkId.value(), slaveId, allocation);
- frameworkSorters.at(role)->remove(slaveId, allocation);
roleSorter->unallocated(role, slaveId, allocation);