You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2018/09/27 01:07:55 UTC

[mesos] branch master updated (e93cb84 -> 8665335)

This is an automated email from the ASF dual-hosted git repository.

bmahler pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git.


    from e93cb84  Fixed libevent M4 macro to be fully compatible with older autoconf.
     new 9934d41  Cached weights in the sorters nodes.
     new a362621  Use total cluster resources as framework sorter "total".
     new 8665335  Avoided dirtying the DRF sorter when an allocation is performed.

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/master/allocator/mesos/hierarchical.cpp   | 30 +++++++----
 src/master/allocator/sorter/drf/sorter.cpp    | 75 +++++++++++++++++++++++----
 src/master/allocator/sorter/drf/sorter.hpp    |  6 +++
 src/master/allocator/sorter/random/sorter.cpp | 28 ++++++++--
 src/master/allocator/sorter/random/sorter.hpp |  6 +++
 5 files changed, 120 insertions(+), 25 deletions(-)


[mesos] 01/03: Cached weights in the sorters nodes.

Posted by bm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bmahler pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 9934d41bec5a36b81b3fa94136303c31d6e7d76f
Author: Benjamin Mahler <bm...@apache.org>
AuthorDate: Sun Sep 16 19:18:49 2018 -0700

    Cached weights in the sorters nodes.
    
    This avoids making excessive map lookups each time we calculate the
    share for a node in the tree. Now, when the weight is needed, the
    value is cached. If the weight gets updated, we update the cached
    value. This approach proved cleaner than trying to ensure freshly
    constructed nodes have the right weight.
    
    Review: https://reviews.apache.org/r/68732
---
 src/master/allocator/sorter/drf/sorter.cpp    | 28 ++++++++++++++++++++++-----
 src/master/allocator/sorter/drf/sorter.hpp    |  6 ++++++
 src/master/allocator/sorter/random/sorter.cpp | 28 ++++++++++++++++++++++-----
 src/master/allocator/sorter/random/sorter.hpp |  6 ++++++
 4 files changed, 58 insertions(+), 10 deletions(-)

diff --git a/src/master/allocator/sorter/drf/sorter.cpp b/src/master/allocator/sorter/drf/sorter.cpp
index a45f66f..b04d684 100644
--- a/src/master/allocator/sorter/drf/sorter.cpp
+++ b/src/master/allocator/sorter/drf/sorter.cpp
@@ -312,6 +312,24 @@ void DRFSorter::updateWeight(const string& path, double weight)
 
   // TODO(neilc): Avoid dirtying the tree in some circumstances.
   dirty = true;
+
+  // Update the weight of the corresponding internal node,
+  // if it exists (this client may not exist despite there
+  // being a weight).
+  Node* node = find(path);
+
+  if (node == nullptr) {
+    return;
+  }
+
+  // If there is a virtual leaf, we need to move up one level.
+  if (node->name == ".") {
+    node = CHECK_NOTNULL(node->parent);
+  }
+
+  CHECK_EQ(path, node->path);
+
+  node->weight = weight;
 }
 
 
@@ -625,11 +643,11 @@ double DRFSorter::calculateShare(const Node* node) const
 
 double DRFSorter::getWeight(const Node* node) const
 {
-  // TODO(bmahler): It's expensive to have to hash the complete
-  // role path and re-lookup the weight each time we calculate
-  // the share, consider storing the weight directly in the
-  // node struct.
-  return weights.get(node->path).getOrElse(1.0);
+  if (node->weight.isNone()) {
+    node->weight = weights.get(node->path).getOrElse(1.0);
+  }
+
+  return CHECK_NOTNONE(node->weight);
 }
 
 
diff --git a/src/master/allocator/sorter/drf/sorter.hpp b/src/master/allocator/sorter/drf/sorter.hpp
index 2957a2e..084df82 100644
--- a/src/master/allocator/sorter/drf/sorter.hpp
+++ b/src/master/allocator/sorter/drf/sorter.hpp
@@ -243,6 +243,12 @@ struct DRFSorter::Node
 
   double share;
 
+  // Cached weight of the node, access this through `getWeight()`.
+  // The value is cached by `getWeight()` and updated by
+  // `updateWeight()`. Marked mutable since the caching writes
+  // to this are logically const.
+  mutable Option<double> weight;
+
   Kind kind;
 
   Node* parent;
diff --git a/src/master/allocator/sorter/random/sorter.cpp b/src/master/allocator/sorter/random/sorter.cpp
index fc47756..f578ef1 100644
--- a/src/master/allocator/sorter/random/sorter.cpp
+++ b/src/master/allocator/sorter/random/sorter.cpp
@@ -285,6 +285,24 @@ void RandomSorter::deactivate(const string& clientPath)
 void RandomSorter::updateWeight(const string& path, double weight)
 {
   weights[path] = weight;
+
+  // Update the weight of the corresponding internal node,
+  // if it exists (this client may not exist despite there
+  // being a weight).
+  Node* node = find(path);
+
+  if (node == nullptr) {
+    return;
+  }
+
+  // If there is a virtual leaf, we need to move up one level.
+  if (node->name == ".") {
+    node = CHECK_NOTNULL(node->parent);
+  }
+
+  CHECK_EQ(path, node->path);
+
+  node->weight = weight;
 }
 
 
@@ -542,11 +560,11 @@ size_t RandomSorter::count() const
 
 double RandomSorter::getWeight(const Node* node) const
 {
-  // TODO(bmahler): It's expensive to have to hash the complete
-  // role path and re-lookup the weight each time we calculate
-  // the share, consider storing the weight directly in the
-  // node struct.
-  return weights.get(node->path).getOrElse(1.0);
+  if (node->weight.isNone()) {
+    node->weight = weights.get(node->path).getOrElse(1.0);
+  }
+
+  return CHECK_NOTNONE(node->weight);
 }
 
 
diff --git a/src/master/allocator/sorter/random/sorter.hpp b/src/master/allocator/sorter/random/sorter.hpp
index 825c158..800b22c 100644
--- a/src/master/allocator/sorter/random/sorter.hpp
+++ b/src/master/allocator/sorter/random/sorter.hpp
@@ -235,6 +235,12 @@ struct RandomSorter::Node
   // label for virtual leaf nodes.
   std::string path;
 
+  // Cached weight of the node, access this through `getWeight()`.
+  // The value is cached by `getWeight()` and updated by
+  // `updateWeight()`. Marked mutable since the caching writes
+  // to this are logically const.
+  mutable Option<double> weight;
+
   Kind kind;
 
   Node* parent;


[mesos] 03/03: Avoided dirtying the DRF sorter when an allocation is performed.

Posted by bm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bmahler pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 86653356d763fee79e9467cf7b07bebb449e8aff
Author: Benjamin Mahler <bm...@apache.org>
AuthorDate: Fri Sep 21 12:26:19 2018 -0700

    Avoided dirtying the DRF sorter when an allocation is performed.
    
    This improves performance by ensuring that the DRF sorter can remain
    sorted throughout an allocation cycle. Without this change, we spend
    the majority of time re-sorting throughout an allocation cycle, when
    there are large numbers of clients (roles / frameworks).
    
    Before with --enable-optimize:
    
    *HierarchicalAllocator_BENCHMARK_Test.DeclineOffers/21
    Added 1000 frameworks in 28.69ms
    Added 10000 agents in 3.96secs
    round 0 allocate() took 3.18secs to make 10000 offers ...
    round 1 allocate() took 3.40secs to make 10000 offers ...
    round 2 allocate() took 3.31secs to make 10000 offers ...
    
    After with --enable-optimize:
    
    *HierarchicalAllocator_BENCHMARK_Test.DeclineOffers/21
    Added 1000 frameworks in 40.38ms
    Added 10000 agents in 1.61secs
    round 0 allocate() took 1.06secs to make 10000 offers ...
    round 1 allocate() took 1.06secs to make 10000 offers ...
    round 2 allocate() took 1.06secs to make 10000 offers ...
    
    Review: https://reviews.apache.org/r/68808
---
 src/master/allocator/sorter/drf/sorter.cpp | 47 +++++++++++++++++++++++++++---
 1 file changed, 43 insertions(+), 4 deletions(-)

diff --git a/src/master/allocator/sorter/drf/sorter.cpp b/src/master/allocator/sorter/drf/sorter.cpp
index b04d684..a648fac 100644
--- a/src/master/allocator/sorter/drf/sorter.cpp
+++ b/src/master/allocator/sorter/drf/sorter.cpp
@@ -16,6 +16,7 @@
 
 #include "master/allocator/sorter/drf/sorter.hpp"
 
+#include <iterator>
 #include <set>
 #include <string>
 #include <vector>
@@ -340,16 +341,51 @@ void DRFSorter::allocated(
 {
   Node* current = CHECK_NOTNULL(find(clientPath));
 
+  // Walk up the tree adjusting allocations. If the tree is
+  // sorted, we keep it sorted (see below).
+  //
   // NOTE: We don't currently update the `allocation` for the root
   // node. This is debatable, but the current implementation doesn't
   // require looking at the allocation of the root node.
   while (current != root) {
     current->allocation.add(slaveId, resources);
+
+    // Note that inactive leaves are not sorted, and are always
+    // stored in `children` after the active leaves and internal
+    // nodes. See the comment on `Node::children`.
+    if (!dirty && current->kind != Node::INACTIVE_LEAF) {
+      current->share = calculateShare(current);
+
+      vector<Node*>& children = current->parent->children;
+
+      // Locate the node position in the parent's children
+      // and shift it into its sorted position.
+      //
+      // TODO(bmahler): Consider storing the node's position
+      // in the parent's children to avoid scanning.
+      auto position = std::find(children.begin(), children.end(), current);
+      CHECK(position != children.end());
+
+      // Shift left until done (if needed).
+      while (position != children.begin() &&
+             DRFSorter::Node::compareDRF(current, *std::prev(position))) {
+        std::swap(*position, *std::prev(position));
+        --position;
+      }
+
+      // Or, shift right until done (if needed). Note that when
+      // shifting right, we need to stop once we reach the
+      // inactive leaves (see `Node::children`).
+      while (std::next(position) != children.end() &&
+             (*std::next(position))->kind != Node::INACTIVE_LEAF &&
+             DRFSorter::Node::compareDRF(*std::next(position), current)) {
+        std::swap(*position, *std::next(position));
+        ++position;
+      }
+    }
+
     current = CHECK_NOTNULL(current->parent);
   }
-
-  // TODO(neilc): Avoid dirtying the tree in some circumstances.
-  dirty = true;
 }
 
 
@@ -393,7 +429,10 @@ void DRFSorter::unallocated(
     current = CHECK_NOTNULL(current->parent);
   }
 
-  // TODO(neilc): Avoid dirtying the tree in some circumstances.
+  // TODO(bmahler): Similar to `allocated()`, avoid dirtying the
+  // tree here in favor of adjusting the node positions. This
+  // hasn't been critical to do since we don't allocate upon
+  // resource recovery in the allocator.
   dirty = true;
 }
 


[mesos] 02/03: Use total cluster resources as framework sorter "total".

Posted by bm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bmahler pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit a3626219b402ff837192208b6d35946b3a069ce6
Author: Benjamin Mahler <bm...@apache.org>
AuthorDate: Fri Sep 21 13:48:46 2018 -0700

    Use total cluster resources as framework sorter "total".
    
    The role sorter and framework sorters differed in that the role
    sorter used the total cluster resources as the "total" in which
    the allocation is divided to compute shares, whereas the framework
    sorters used the role's allocation as the "total".
    
    Per a discussion on the mailing list:
    
    https://s.apache.org/differing-DRF
    
    This patch makes the two types of sorters use the total cluster
    as the "total". While this provides slightly different behavior,
    it's unlikely any users were relying on this.
    
    Review: https://reviews.apache.org/r/68833
---
 src/master/allocator/mesos/hierarchical.cpp | 30 ++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index 906fddc..6f389ed 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -553,6 +553,10 @@ void HierarchicalAllocatorProcess::addSlave(
 
   roleSorter->add(slaveId, total);
 
+  foreachvalue (const Owned<Sorter>& sorter, frameworkSorters) {
+    sorter->add(slaveId, total);
+  }
+
   // See comment at `quotaRoleSorter` declaration regarding non-revocable.
   quotaRoleSorter->add(slaveId, total.nonRevocable());
 
@@ -620,6 +624,10 @@ void HierarchicalAllocatorProcess::removeSlave(
 
   roleSorter->remove(slaveId, slaves.at(slaveId).getTotal());
 
+  foreachvalue (const Owned<Sorter>& sorter, frameworkSorters) {
+    sorter->remove(slaveId, slaves.at(slaveId).getTotal());
+  }
+
   // See comment at `quotaRoleSorter` declaration regarding non-revocable.
   quotaRoleSorter->remove(
       slaveId, slaves.at(slaveId).getTotal().nonRevocable());
@@ -945,10 +953,6 @@ void HierarchicalAllocatorProcess::updateAllocation(
 
   updateSlaveTotal(slaveId, updatedTotal.get());
 
-  // Update the total resources in the framework sorter.
-  frameworkSorter->remove(slaveId, offeredResources);
-  frameworkSorter->add(slaveId, updatedOfferedResources);
-
   const Resources updatedFrameworkAllocation =
     frameworkSorter->allocation(frameworkId.value(), slaveId);
 
@@ -2532,6 +2536,11 @@ void HierarchicalAllocatorProcess::trackFrameworkUnderRole(
     CHECK(!frameworkSorters.contains(role));
     frameworkSorters.insert({role, Owned<Sorter>(frameworkSorterFactory())});
     frameworkSorters.at(role)->initialize(fairnessExcludeResourceNames);
+
+    foreachvalue (const Slave& slave, slaves) {
+      frameworkSorters.at(role)->add(slave.info.id(), slave.getTotal());
+    }
+
     metrics.addRole(role);
   }
 
@@ -2637,14 +2646,15 @@ bool HierarchicalAllocatorProcess::updateSlaveTotal(
     trackReservations(newReservations);
   }
 
-  // Currently `roleSorter` and `quotaRoleSorter`, being the root-level
-  // sorters, maintain all of `slaves[slaveId].total` (or the `nonRevocable()`
-  // portion in the case of `quotaRoleSorter`) in their own totals (which
-  // don't get updated in the allocation runs or during recovery of allocated
-  // resources). So, we update them using the resources in `slave.total`.
+  // Update the totals in the sorters.
   roleSorter->remove(slaveId, oldTotal);
   roleSorter->add(slaveId, total);
 
+  foreachvalue (const Owned<Sorter>& sorter, frameworkSorters) {
+    sorter->remove(slaveId, oldTotal);
+    sorter->add(slaveId, total);
+  }
+
   // See comment at `quotaRoleSorter` declaration regarding non-revocable.
   quotaRoleSorter->remove(slaveId, oldTotal.nonRevocable());
   quotaRoleSorter->add(slaveId, total.nonRevocable());
@@ -2768,7 +2778,6 @@ void HierarchicalAllocatorProcess::trackAllocatedResources(
     CHECK(frameworkSorters.at(role)->contains(frameworkId.value()));
 
     roleSorter->allocated(role, slaveId, allocation);
-    frameworkSorters.at(role)->add(slaveId, allocation);
     frameworkSorters.at(role)->allocated(
         frameworkId.value(), slaveId, allocation);
 
@@ -2804,7 +2813,6 @@ void HierarchicalAllocatorProcess::untrackAllocatedResources(
 
     frameworkSorters.at(role)->unallocated(
         frameworkId.value(), slaveId, allocation);
-    frameworkSorters.at(role)->remove(slaveId, allocation);
 
     roleSorter->unallocated(role, slaveId, allocation);