You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2018/09/27 18:59:46 UTC

[mesos] branch 1.7.x updated (01b3afc -> dcbb942)

This is an automated email from the ASF dual-hosted git repository.

bmahler pushed a change to branch 1.7.x
in repository https://gitbox.apache.org/repos/asf/mesos.git.


    from 01b3afc  Added MESOS-9228 to the 1.7.1 CHANGELOG.
     new 6b29bd6  Simplified the weight lookup logic in the sorters.
     new 61100f3  Reserve sort output vector in the sorters.
     new c3f32c5  Added a ScalarResourceQuantities type to improve sorters performance.
     new a41731b  Cached weights in the sorters nodes.
     new d9ee5e3  Added MESOS-9239 to the 1.7.1 CHANGELOG.
     new a7e339f  Use total cluster resources as framework sorter "total".
     new cb56b30  Added MESOS-9255 to the 1.7.1 CHANGELOG.
     new 6ef0df9  Avoided dirtying the DRF sorter when an allocation is performed.
     new 4d57303  Added MESOS-9249 to the 1.7.1 CHANGELOG.
     new dcbb942  Added a note about allocaiton cycle performance in the 1.7.1 CHANGELOG.

The 10 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 CHANGELOG                                     | 11 +++-
 src/master/allocator/mesos/hierarchical.cpp   | 30 ++++++----
 src/master/allocator/sorter/drf/sorter.cpp    | 81 ++++++++++++++++++++++----
 src/master/allocator/sorter/drf/sorter.hpp    | 34 ++++++-----
 src/master/allocator/sorter/random/sorter.cpp | 33 ++++++++---
 src/master/allocator/sorter/random/sorter.hpp | 34 ++++++-----
 src/master/allocator/sorter/sorter.hpp        | 84 +++++++++++++++++++++++++++
 7 files changed, 247 insertions(+), 60 deletions(-)


[mesos] 08/10: Avoided dirtying the DRF sorter when an allocation is performed.

Posted by bm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bmahler pushed a commit to branch 1.7.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 6ef0df93fb20a9a46f7885aa89a8062bc00d5f29
Author: Benjamin Mahler <bm...@apache.org>
AuthorDate: Fri Sep 21 12:26:19 2018 -0700

    Avoided dirtying the DRF sorter when an allocation is performed.
    
    This improves performance by ensuring that the DRF sorter can remain
    sorted throughout an allocation cycle. Without this change, we spend
    the majority of time re-sorting throughout an allocation cycle, when
    there are large numbers of clients (roles / frameworks).
    
    Before with --enable-optimize:
    
    *HierarchicalAllocator_BENCHMARK_Test.DeclineOffers/21
    Added 1000 frameworks in 28.69ms
    Added 10000 agents in 3.96secs
    round 0 allocate() took 3.18secs to make 10000 offers ...
    round 1 allocate() took 3.40secs to make 10000 offers ...
    round 2 allocate() took 3.31secs to make 10000 offers ...
    
    After with --enable-optimize:
    
    *HierarchicalAllocator_BENCHMARK_Test.DeclineOffers/21
    Added 1000 frameworks in 40.38ms
    Added 10000 agents in 1.61secs
    round 0 allocate() took 1.06secs to make 10000 offers ...
    round 1 allocate() took 1.06secs to make 10000 offers ...
    round 2 allocate() took 1.06secs to make 10000 offers ...
    
    Review: https://reviews.apache.org/r/68808
---
 src/master/allocator/sorter/drf/sorter.cpp | 47 +++++++++++++++++++++++++++---
 1 file changed, 43 insertions(+), 4 deletions(-)

diff --git a/src/master/allocator/sorter/drf/sorter.cpp b/src/master/allocator/sorter/drf/sorter.cpp
index b04d684..a648fac 100644
--- a/src/master/allocator/sorter/drf/sorter.cpp
+++ b/src/master/allocator/sorter/drf/sorter.cpp
@@ -16,6 +16,7 @@
 
 #include "master/allocator/sorter/drf/sorter.hpp"
 
+#include <iterator>
 #include <set>
 #include <string>
 #include <vector>
@@ -340,16 +341,51 @@ void DRFSorter::allocated(
 {
   Node* current = CHECK_NOTNULL(find(clientPath));
 
+  // Walk up the tree adjusting allocations. If the tree is
+  // sorted, we keep it sorted (see below).
+  //
   // NOTE: We don't currently update the `allocation` for the root
   // node. This is debatable, but the current implementation doesn't
   // require looking at the allocation of the root node.
   while (current != root) {
     current->allocation.add(slaveId, resources);
+
+    // Note that inactive leaves are not sorted, and are always
+    // stored in `children` after the active leaves and internal
+    // nodes. See the comment on `Node::children`.
+    if (!dirty && current->kind != Node::INACTIVE_LEAF) {
+      current->share = calculateShare(current);
+
+      vector<Node*>& children = current->parent->children;
+
+      // Locate the node position in the parent's children
+      // and shift it into its sorted position.
+      //
+      // TODO(bmahler): Consider storing the node's position
+      // in the parent's children to avoid scanning.
+      auto position = std::find(children.begin(), children.end(), current);
+      CHECK(position != children.end());
+
+      // Shift left until done (if needed).
+      while (position != children.begin() &&
+             DRFSorter::Node::compareDRF(current, *std::prev(position))) {
+        std::swap(*position, *std::prev(position));
+        --position;
+      }
+
+      // Or, shift right until done (if needed). Note that when
+      // shifting right, we need to stop once we reach the
+      // inactive leaves (see `Node::children`).
+      while (std::next(position) != children.end() &&
+             (*std::next(position))->kind != Node::INACTIVE_LEAF &&
+             DRFSorter::Node::compareDRF(*std::next(position), current)) {
+        std::swap(*position, *std::next(position));
+        ++position;
+      }
+    }
+
     current = CHECK_NOTNULL(current->parent);
   }
-
-  // TODO(neilc): Avoid dirtying the tree in some circumstances.
-  dirty = true;
 }
 
 
@@ -393,7 +429,10 @@ void DRFSorter::unallocated(
     current = CHECK_NOTNULL(current->parent);
   }
 
-  // TODO(neilc): Avoid dirtying the tree in some circumstances.
+  // TODO(bmahler): Similar to `allocated()`, avoid dirtying the
+  // tree here in favor of adjusting the node positions. This
+  // hasn't been critical to do since we don't allocate upon
+  // resource recovery in the allocator.
   dirty = true;
 }
 


[mesos] 07/10: Added MESOS-9255 to the 1.7.1 CHANGELOG.

Posted by bm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bmahler pushed a commit to branch 1.7.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit cb56b30d7f685af6f7d0341b0a8e0ab81806fcee
Author: Benjamin Mahler <bm...@apache.org>
AuthorDate: Thu Sep 27 11:57:04 2018 -0700

    Added MESOS-9255 to the 1.7.1 CHANGELOG.
---
 CHANGELOG | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG b/CHANGELOG
index 5100068..7238fad 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -9,6 +9,7 @@ Release Notes - Mesos - Version 1.7.1 (WIP)
 
 ** Improvement:
   * [MESOS-9239] - Improve sorting performance in the DRF sorter.
+  * [MESOS-9255] - Use consistent "totals" across role / framework DRF.
 
 
 Release Notes - Mesos - Version 1.7.0


[mesos] 02/10: Reserve sort output vector in the sorters.

Posted by bm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bmahler pushed a commit to branch 1.7.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 61100f3f33970f618bea96f4ba3df9ae156b7af4
Author: Benjamin Mahler <bm...@apache.org>
AuthorDate: Sun Sep 16 16:36:10 2018 -0700

    Reserve sort output vector in the sorters.
    
    This reserves the sort output vector consistently based on
    the number of clients, in both the DRF and random sorters.
    
    Review: https://reviews.apache.org/r/68730
---
 src/master/allocator/sorter/drf/sorter.cpp    | 4 ++++
 src/master/allocator/sorter/random/sorter.cpp | 3 +++
 2 files changed, 7 insertions(+)

diff --git a/src/master/allocator/sorter/drf/sorter.cpp b/src/master/allocator/sorter/drf/sorter.cpp
index aa925c4..a45f66f 100644
--- a/src/master/allocator/sorter/drf/sorter.cpp
+++ b/src/master/allocator/sorter/drf/sorter.cpp
@@ -551,6 +551,10 @@ vector<string> DRFSorter::sort()
   // inactive leaves sorted after active leaves and internal nodes.
   vector<string> result;
 
+  // TODO(bmahler): This over-reserves where there are inactive
+  // clients, only reserve the number of active clients.
+  result.reserve(clients.size());
+
   std::function<void (const Node*)> listClients =
       [&listClients, &result](const Node* node) {
     foreach (const Node* child, node->children) {
diff --git a/src/master/allocator/sorter/random/sorter.cpp b/src/master/allocator/sorter/random/sorter.cpp
index 7b98389..fc47756 100644
--- a/src/master/allocator/sorter/random/sorter.cpp
+++ b/src/master/allocator/sorter/random/sorter.cpp
@@ -497,6 +497,9 @@ vector<string> RandomSorter::sort()
   // The children of each node are already shuffled, with
   // inactive leaves stored after active leaves and internal nodes.
   vector<string> result;
+
+  // TODO(bmahler): This over-reserves where there are inactive
+  // clients, only reserve the number of active clients.
   result.reserve(clients.size());
 
   std::function<void (const Node*)> listClients =


[mesos] 10/10: Added a note about allocaiton cycle performance in the 1.7.1 CHANGELOG.

Posted by bm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bmahler pushed a commit to branch 1.7.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit dcbb94279d089d977c159ef36f8b38d639d0f569
Author: Benjamin Mahler <bm...@apache.org>
AuthorDate: Thu Sep 27 11:57:36 2018 -0700

    Added a note about allocaiton cycle performance in the 1.7.1 CHANGELOG.
---
 CHANGELOG | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG b/CHANGELOG
index a100945..123d90b 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,10 @@
 Release Notes - Mesos - Version 1.7.1 (WIP)
 -------------------------------------------
-* This is a bug fix release.
+* This is a bug fix release. Also includes performance improvements:
+
+  * **Allocator**: Improved allocation cycle time substantially
+    (see MESOS-9239 and MESOS-9249). These reduce the allocation
+    cycle time in some benchmarks by 80%.
 
 ** Bug
   * [MESOS-8545] - AgentAPIStreamingTest.AttachInputToNestedContainerSession is flaky.


[mesos] 05/10: Added MESOS-9239 to the 1.7.1 CHANGELOG.

Posted by bm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bmahler pushed a commit to branch 1.7.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit d9ee5e38783e00d9890a1a4dbd6235f8c7d1ff0c
Author: Benjamin Mahler <bm...@apache.org>
AuthorDate: Thu Sep 27 11:56:44 2018 -0700

    Added MESOS-9239 to the 1.7.1 CHANGELOG.
---
 CHANGELOG | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG b/CHANGELOG
index 32856e1..5100068 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -7,6 +7,9 @@ Release Notes - Mesos - Version 1.7.1 (WIP)
   * [MESOS-9131] - Health checks launching nested containers while a container is being destroyed lead to unkillable tasks.
   * [MESOS-9228] - SLRP does not clean up plugin containers after it is removed.
 
+** Improvement:
+  * [MESOS-9239] - Improve sorting performance in the DRF sorter.
+
 
 Release Notes - Mesos - Version 1.7.0
 -------------------------------------


[mesos] 03/10: Added a ScalarResourceQuantities type to improve sorters performance.

Posted by bm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bmahler pushed a commit to branch 1.7.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit c3f32c5a45aa6b326328f77a1ae546ee7fd18a93
Author: Benjamin Mahler <bm...@apache.org>
AuthorDate: Sun Sep 16 16:37:38 2018 -0700

    Added a ScalarResourceQuantities type to improve sorters performance.
    
    This type replaces the use of hashmaps keyed by resource names in
    favor of storing vectors of `pair<string,Value::Scalar>`, in order
    to avoid the performance penalty of using hashmaps.
    
    Running *HierarchicalAllocator_BENCHMARK_Test.DeclineOffers/21 shows
    the following improvement:
    
    Using 10000 agents and 1000 frameworks
    Added 1000 frameworks in 42.49ms -> 42.85ms (no change)
    Added 10000 agents in 7.69secs -> 4.89secs (normalized: 1 -> 0.64)
    round 0 allocate() took 5.42secs -> 3.53secs (nomralized: 1 -> 0.65)
    
    Review: https://reviews.apache.org/r/68731
---
 src/master/allocator/sorter/drf/sorter.hpp    | 26 ++++-----
 src/master/allocator/sorter/random/sorter.hpp | 26 ++++-----
 src/master/allocator/sorter/sorter.hpp        | 84 +++++++++++++++++++++++++++
 3 files changed, 108 insertions(+), 28 deletions(-)

diff --git a/src/master/allocator/sorter/drf/sorter.hpp b/src/master/allocator/sorter/drf/sorter.hpp
index 71352c8..2957a2e 100644
--- a/src/master/allocator/sorter/drf/sorter.hpp
+++ b/src/master/allocator/sorter/drf/sorter.hpp
@@ -169,14 +169,13 @@ private:
     // identities of resources and not quantities.
     Resources scalarQuantities;
 
-    // We also store a map version of `scalarQuantities`, mapping
-    // the `Resource::name` to aggregated scalar. This improves the
-    // performance of calculating shares. See MESOS-4694.
+    // To improve the performance of calculating shares, we store
+    // a redundant but more efficient version of `scalarQuantities`.
+    // See MESOS-4694.
     //
-    // TODO(bmahler): Ideally we do not store `scalarQuantities`
-    // redundantly here, investigate performance improvements to
-    // `Resources` to make this unnecessary.
-    hashmap<std::string, Value::Scalar> totals;
+    // TODO(bmahler): Can we remove `scalarQuantities` in favor of
+    // using this type whenever scalar quantities are needed?
+    ScalarResourceQuantities totals;
   } total_;
 
   // Metrics are optionally exposed by the sorter.
@@ -430,14 +429,13 @@ struct DRFSorter::Node
     // the corresponding resource. See notes above.
     Resources scalarQuantities;
 
-    // We also store a map version of `scalarQuantities`, mapping
-    // the `Resource::name` to aggregated scalar. This improves the
-    // performance of calculating shares. See MESOS-4694.
+    // To improve the performance of calculating shares, we store
+    // a redundant but more efficient version of `scalarQuantities`.
+    // See MESOS-4694.
     //
-    // TODO(bmahler): Ideally we do not store `scalarQuantities`
-    // redundantly here, investigate performance improvements to
-    // `Resources` to make this unnecessary.
-    hashmap<std::string, Value::Scalar> totals;
+    // TODO(bmahler): Can we remove `scalarQuantities` in favor of
+    // using this type whenever scalar quantities are needed?
+    ScalarResourceQuantities totals;
   } allocation;
 
   // Compares two nodes according to DRF share.
diff --git a/src/master/allocator/sorter/random/sorter.hpp b/src/master/allocator/sorter/random/sorter.hpp
index 6bfeda0..825c158 100644
--- a/src/master/allocator/sorter/random/sorter.hpp
+++ b/src/master/allocator/sorter/random/sorter.hpp
@@ -167,14 +167,13 @@ private:
     // identities of resources and not quantities.
     Resources scalarQuantities;
 
-    // We also store a map version of `scalarQuantities`, mapping
-    // the `Resource::name` to aggregated scalar. This improves the
-    // performance of calculating shares. See MESOS-4694.
+    // To improve the performance of calculating shares, we store
+    // a redundant but more efficient version of `scalarQuantities`.
+    // See MESOS-4694.
     //
-    // TODO(bmahler): Ideally we do not store `scalarQuantities`
-    // redundantly here, investigate performance improvements to
-    // `Resources` to make this unnecessary.
-    hashmap<std::string, Value::Scalar> totals;
+    // TODO(bmahler): Can we remove `scalarQuantities` in favor of
+    // using this type whenever scalar quantities are needed?
+    ScalarResourceQuantities totals;
   } total_;
 };
 
@@ -406,14 +405,13 @@ struct RandomSorter::Node
     // the corresponding resource. See notes above.
     Resources scalarQuantities;
 
-    // We also store a map version of `scalarQuantities`, mapping
-    // the `Resource::name` to aggregated scalar. This improves the
-    // performance of calculating shares. See MESOS-4694.
+    // To improve the performance of calculating shares, we store
+    // a redundant but more efficient version of `scalarQuantities`.
+    // See MESOS-4694.
     //
-    // TODO(bmahler): Ideally we do not store `scalarQuantities`
-    // redundantly here, investigate performance improvements to
-    // `Resources` to make this unnecessary.
-    hashmap<std::string, Value::Scalar> totals;
+    // TODO(bmahler): Can we remove `scalarQuantities` in favor of
+    // using this type whenever scalar quantities are needed?
+    ScalarResourceQuantities totals;
   } allocation;
 };
 
diff --git a/src/master/allocator/sorter/sorter.hpp b/src/master/allocator/sorter/sorter.hpp
index 432ccfe..68cf698 100644
--- a/src/master/allocator/sorter/sorter.hpp
+++ b/src/master/allocator/sorter/sorter.hpp
@@ -19,6 +19,7 @@
 
 #include <functional>
 #include <string>
+#include <utility>
 #include <vector>
 
 #include <mesos/resources.hpp>
@@ -148,6 +149,89 @@ public:
   virtual size_t count() const = 0;
 };
 
+// Efficient type for scalar resource quantities that avoids
+// the overhead of using `Resources`.
+//
+// TODO(bmahler): This was originally added to replace a
+// `hashmap<string, Scalar>` and hence the interface was
+// tailored to the particular usage of the map. In order
+// to move this up as a replacement of all quantities
+// (e.g. `Resources::createStrippedScalarQuantity()`),
+// this will need more functionality to do so (e.g.
+// arithmetic operators, containment check, etc).
+class ScalarResourceQuantities
+{
+public:
+  ScalarResourceQuantities()
+  {
+    // Pre-reserve space for first-class scalars.
+    quantities.reserve(4u);  // [cpus, disk, gpus, mem]
+  }
+
+  // Returns true if there is a non-zero amount of
+  // the specified resource.
+  bool contains(const std::string& name) const
+  {
+    // Don't bother binary searching since we don't expect
+    // a large number of quantities.
+    foreach (auto& quantity, quantities) {
+      if (quantity.first == name) {
+        return quantity.second.value() > 0.0;
+      }
+    }
+
+    return false;
+  }
+
+  const Value::Scalar& at(const std::string& name) const
+  {
+    // Don't bother binary searching since we don't expect
+    // a large number of quantities.
+    foreach (auto& quantity, quantities) {
+      if (quantity.first == name) {
+        return quantity.second;
+      }
+    }
+
+    // TODO(bmahler): Print out the vector, need to add
+    // a `stringify(const pair<T1, T2>& p)` overload.
+    LOG(FATAL) << "Failed to find '" << name << "'";
+  }
+
+  Value::Scalar& operator[](const std::string& name)
+  {
+    // Find the location to insert while maintaining
+    // alphabetical ordering. Don't bother binary searching
+    // since we don't expect a large number of quantities.
+    auto it = quantities.begin();
+    for (; it != quantities.end(); ++it) {
+      if (it->first == name) {
+        return it->second;
+      }
+
+      if (it->first > name) {
+        break;
+      }
+    }
+
+    it = quantities.insert(it, std::make_pair(name, Value::Scalar()));
+
+    return it->second;
+  }
+
+  typedef std::vector<std::pair<std::string, Value::Scalar>>::const_iterator
+    const_iterator;
+
+  const_iterator begin() const { return quantities.begin(); }
+  const_iterator   end() const { return quantities.end(); }
+
+private:
+  // List of scalar resources sorted by resource name.
+  // Arithmetic operations benefit from this sorting.
+  std::vector<std::pair<std::string, Value::Scalar>> quantities;
+};
+
+
 } // namespace allocator {
 } // namespace master {
 } // namespace internal {


[mesos] 09/10: Added MESOS-9249 to the 1.7.1 CHANGELOG.

Posted by bm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bmahler pushed a commit to branch 1.7.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 4d57303c4881c0fbc55fd1dc9631b461b43052c3
Author: Benjamin Mahler <bm...@apache.org>
AuthorDate: Thu Sep 27 11:57:13 2018 -0700

    Added MESOS-9249 to the 1.7.1 CHANGELOG.
---
 CHANGELOG | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG b/CHANGELOG
index 7238fad..a100945 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -9,6 +9,7 @@ Release Notes - Mesos - Version 1.7.1 (WIP)
 
 ** Improvement:
   * [MESOS-9239] - Improve sorting performance in the DRF sorter.
+  * [MESOS-9249] - Avoid dirtying the DRF sorter when allocating resources.
   * [MESOS-9255] - Use consistent "totals" across role / framework DRF.
 
 


[mesos] 01/10: Simplified the weight lookup logic in the sorters.

Posted by bm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bmahler pushed a commit to branch 1.7.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 6b29bd67c21f2df13d19fd52f52e1a8d5069ab9d
Author: Benjamin Mahler <bm...@apache.org>
AuthorDate: Sun Sep 16 13:05:43 2018 -0700

    Simplified the weight lookup logic in the sorters.
    
    Review: https://reviews.apache.org/r/68729
---
 src/master/allocator/sorter/drf/sorter.cpp    | 16 +++++++---------
 src/master/allocator/sorter/drf/sorter.hpp    |  2 +-
 src/master/allocator/sorter/random/sorter.cpp | 16 +++++++---------
 src/master/allocator/sorter/random/sorter.hpp |  2 +-
 4 files changed, 16 insertions(+), 20 deletions(-)

diff --git a/src/master/allocator/sorter/drf/sorter.cpp b/src/master/allocator/sorter/drf/sorter.cpp
index 07e5482..aa925c4 100644
--- a/src/master/allocator/sorter/drf/sorter.cpp
+++ b/src/master/allocator/sorter/drf/sorter.cpp
@@ -615,19 +615,17 @@ double DRFSorter::calculateShare(const Node* node) const
     }
   }
 
-  return share / findWeight(node);
+  return share / getWeight(node);
 }
 
 
-double DRFSorter::findWeight(const Node* node) const
+double DRFSorter::getWeight(const Node* node) const
 {
-  Option<double> weight = weights.get(node->path);
-
-  if (weight.isNone()) {
-    return 1.0;
-  }
-
-  return weight.get();
+  // TODO(bmahler): It's expensive to have to hash the complete
+  // role path and re-lookup the weight each time we calculate
+  // the share, consider storing the weight directly in the
+  // node struct.
+  return weights.get(node->path).getOrElse(1.0);
 }
 
 
diff --git a/src/master/allocator/sorter/drf/sorter.hpp b/src/master/allocator/sorter/drf/sorter.hpp
index 5a4fa5e..71352c8 100644
--- a/src/master/allocator/sorter/drf/sorter.hpp
+++ b/src/master/allocator/sorter/drf/sorter.hpp
@@ -116,7 +116,7 @@ private:
   // Returns the weight associated with the node. If no weight has
   // been configured for the node's path, the default weight (1.0) is
   // returned.
-  double findWeight(const Node* node) const;
+  double getWeight(const Node* node) const;
 
   // Returns the client associated with the given path. Returns
   // nullptr if the path is not found or if the path identifies an
diff --git a/src/master/allocator/sorter/random/sorter.cpp b/src/master/allocator/sorter/random/sorter.cpp
index d17f8af..7b98389 100644
--- a/src/master/allocator/sorter/random/sorter.cpp
+++ b/src/master/allocator/sorter/random/sorter.cpp
@@ -477,7 +477,7 @@ vector<string> RandomSorter::sort()
     vector<double> weights(inactiveBegin - node->children.begin());
 
     for (int i = 0; i < inactiveBegin - node->children.begin(); ++i) {
-      weights[i] = findWeight(node->children[i]);
+      weights[i] = getWeight(node->children[i]);
     }
 
     weightedShuffle(node->children.begin(), inactiveBegin, weights, generator);
@@ -537,15 +537,13 @@ size_t RandomSorter::count() const
 }
 
 
-double RandomSorter::findWeight(const Node* node) const
+double RandomSorter::getWeight(const Node* node) const
 {
-  Option<double> weight = weights.get(node->path);
-
-  if (weight.isNone()) {
-    return 1.0;
-  }
-
-  return weight.get();
+  // TODO(bmahler): It's expensive to have to hash the complete
+  // role path and re-lookup the weight each time we calculate
+  // the share, consider storing the weight directly in the
+  // node struct.
+  return weights.get(node->path).getOrElse(1.0);
 }
 
 
diff --git a/src/master/allocator/sorter/random/sorter.hpp b/src/master/allocator/sorter/random/sorter.hpp
index 7f6c0de..6bfeda0 100644
--- a/src/master/allocator/sorter/random/sorter.hpp
+++ b/src/master/allocator/sorter/random/sorter.hpp
@@ -117,7 +117,7 @@ private:
   // Returns the weight associated with the node. If no weight has
   // been configured for the node's path, the default weight (1.0) is
   // returned.
-  double findWeight(const Node* node) const;
+  double getWeight(const Node* node) const;
 
   // Returns the client associated with the given path. Returns
   // nullptr if the path is not found or if the path identifies an


[mesos] 04/10: Cached weights in the sorters nodes.

Posted by bm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bmahler pushed a commit to branch 1.7.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit a41731b0facd1a61301a7d0b99f10e28f3b03a48
Author: Benjamin Mahler <bm...@apache.org>
AuthorDate: Sun Sep 16 19:18:49 2018 -0700

    Cached weights in the sorters nodes.
    
    This avoids making excessive map lookups each time we calculate the
    share for a node in the tree. Now, when the weight is needed, the
    value is cached. If the weight gets updated, we update the cached
    value. This approach proved cleaner than trying to ensure freshly
    constructed nodes have the right weight.
    
    Review: https://reviews.apache.org/r/68732
---
 src/master/allocator/sorter/drf/sorter.cpp    | 28 ++++++++++++++++++++++-----
 src/master/allocator/sorter/drf/sorter.hpp    |  6 ++++++
 src/master/allocator/sorter/random/sorter.cpp | 28 ++++++++++++++++++++++-----
 src/master/allocator/sorter/random/sorter.hpp |  6 ++++++
 4 files changed, 58 insertions(+), 10 deletions(-)

diff --git a/src/master/allocator/sorter/drf/sorter.cpp b/src/master/allocator/sorter/drf/sorter.cpp
index a45f66f..b04d684 100644
--- a/src/master/allocator/sorter/drf/sorter.cpp
+++ b/src/master/allocator/sorter/drf/sorter.cpp
@@ -312,6 +312,24 @@ void DRFSorter::updateWeight(const string& path, double weight)
 
   // TODO(neilc): Avoid dirtying the tree in some circumstances.
   dirty = true;
+
+  // Update the weight of the corresponding internal node,
+  // if it exists (this client may not exist despite there
+  // being a weight).
+  Node* node = find(path);
+
+  if (node == nullptr) {
+    return;
+  }
+
+  // If there is a virtual leaf, we need to move up one level.
+  if (node->name == ".") {
+    node = CHECK_NOTNULL(node->parent);
+  }
+
+  CHECK_EQ(path, node->path);
+
+  node->weight = weight;
 }
 
 
@@ -625,11 +643,11 @@ double DRFSorter::calculateShare(const Node* node) const
 
 double DRFSorter::getWeight(const Node* node) const
 {
-  // TODO(bmahler): It's expensive to have to hash the complete
-  // role path and re-lookup the weight each time we calculate
-  // the share, consider storing the weight directly in the
-  // node struct.
-  return weights.get(node->path).getOrElse(1.0);
+  if (node->weight.isNone()) {
+    node->weight = weights.get(node->path).getOrElse(1.0);
+  }
+
+  return CHECK_NOTNONE(node->weight);
 }
 
 
diff --git a/src/master/allocator/sorter/drf/sorter.hpp b/src/master/allocator/sorter/drf/sorter.hpp
index 2957a2e..084df82 100644
--- a/src/master/allocator/sorter/drf/sorter.hpp
+++ b/src/master/allocator/sorter/drf/sorter.hpp
@@ -243,6 +243,12 @@ struct DRFSorter::Node
 
   double share;
 
+  // Cached weight of the node, access this through `getWeight()`.
+  // The value is cached by `getWeight()` and updated by
+  // `updateWeight()`. Marked mutable since the caching writes
+  // to this are logically const.
+  mutable Option<double> weight;
+
   Kind kind;
 
   Node* parent;
diff --git a/src/master/allocator/sorter/random/sorter.cpp b/src/master/allocator/sorter/random/sorter.cpp
index fc47756..f578ef1 100644
--- a/src/master/allocator/sorter/random/sorter.cpp
+++ b/src/master/allocator/sorter/random/sorter.cpp
@@ -285,6 +285,24 @@ void RandomSorter::deactivate(const string& clientPath)
 void RandomSorter::updateWeight(const string& path, double weight)
 {
   weights[path] = weight;
+
+  // Update the weight of the corresponding internal node,
+  // if it exists (this client may not exist despite there
+  // being a weight).
+  Node* node = find(path);
+
+  if (node == nullptr) {
+    return;
+  }
+
+  // If there is a virtual leaf, we need to move up one level.
+  if (node->name == ".") {
+    node = CHECK_NOTNULL(node->parent);
+  }
+
+  CHECK_EQ(path, node->path);
+
+  node->weight = weight;
 }
 
 
@@ -542,11 +560,11 @@ size_t RandomSorter::count() const
 
 double RandomSorter::getWeight(const Node* node) const
 {
-  // TODO(bmahler): It's expensive to have to hash the complete
-  // role path and re-lookup the weight each time we calculate
-  // the share, consider storing the weight directly in the
-  // node struct.
-  return weights.get(node->path).getOrElse(1.0);
+  if (node->weight.isNone()) {
+    node->weight = weights.get(node->path).getOrElse(1.0);
+  }
+
+  return CHECK_NOTNONE(node->weight);
 }
 
 
diff --git a/src/master/allocator/sorter/random/sorter.hpp b/src/master/allocator/sorter/random/sorter.hpp
index 825c158..800b22c 100644
--- a/src/master/allocator/sorter/random/sorter.hpp
+++ b/src/master/allocator/sorter/random/sorter.hpp
@@ -235,6 +235,12 @@ struct RandomSorter::Node
   // label for virtual leaf nodes.
   std::string path;
 
+  // Cached weight of the node, access this through `getWeight()`.
+  // The value is cached by `getWeight()` and updated by
+  // `updateWeight()`. Marked mutable since the caching writes
+  // to this are logically const.
+  mutable Option<double> weight;
+
   Kind kind;
 
   Node* parent;


[mesos] 06/10: Use total cluster resources as framework sorter "total".

Posted by bm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

bmahler pushed a commit to branch 1.7.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit a7e339f547f6f01a28730caa6b44e42dec228fa3
Author: Benjamin Mahler <bm...@apache.org>
AuthorDate: Fri Sep 21 13:48:46 2018 -0700

    Use total cluster resources as framework sorter "total".
    
    The role sorter and framework sorters differed in that the role
    sorter used the total cluster resources as the "total" in which
    the allocation is divided to compute shares, whereas the framework
    sorters used the role's allocation as the "total".
    
    Per a discussion on the mailing list:
    
    https://s.apache.org/differing-DRF
    
    This patch makes the two types of sorters use the total cluster
    as the "total". While this provides slightly different behavior,
    it's unlikely any users were relying on this.
    
    Review: https://reviews.apache.org/r/68833
---
 src/master/allocator/mesos/hierarchical.cpp | 30 ++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index 0b13d04..fd9e819 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -553,6 +553,10 @@ void HierarchicalAllocatorProcess::addSlave(
 
   roleSorter->add(slaveId, total);
 
+  foreachvalue (const Owned<Sorter>& sorter, frameworkSorters) {
+    sorter->add(slaveId, total);
+  }
+
   // See comment at `quotaRoleSorter` declaration regarding non-revocable.
   quotaRoleSorter->add(slaveId, total.nonRevocable());
 
@@ -620,6 +624,10 @@ void HierarchicalAllocatorProcess::removeSlave(
 
   roleSorter->remove(slaveId, slaves.at(slaveId).getTotal());
 
+  foreachvalue (const Owned<Sorter>& sorter, frameworkSorters) {
+    sorter->remove(slaveId, slaves.at(slaveId).getTotal());
+  }
+
   // See comment at `quotaRoleSorter` declaration regarding non-revocable.
   quotaRoleSorter->remove(
       slaveId, slaves.at(slaveId).getTotal().nonRevocable());
@@ -945,10 +953,6 @@ void HierarchicalAllocatorProcess::updateAllocation(
 
   updateSlaveTotal(slaveId, updatedTotal.get());
 
-  // Update the total resources in the framework sorter.
-  frameworkSorter->remove(slaveId, offeredResources);
-  frameworkSorter->add(slaveId, updatedOfferedResources);
-
   const Resources updatedFrameworkAllocation =
     frameworkSorter->allocation(frameworkId.value(), slaveId);
 
@@ -2532,6 +2536,11 @@ void HierarchicalAllocatorProcess::trackFrameworkUnderRole(
     CHECK(!frameworkSorters.contains(role));
     frameworkSorters.insert({role, Owned<Sorter>(frameworkSorterFactory())});
     frameworkSorters.at(role)->initialize(fairnessExcludeResourceNames);
+
+    foreachvalue (const Slave& slave, slaves) {
+      frameworkSorters.at(role)->add(slave.info.id(), slave.getTotal());
+    }
+
     metrics.addRole(role);
   }
 
@@ -2637,14 +2646,15 @@ bool HierarchicalAllocatorProcess::updateSlaveTotal(
     trackReservations(newReservations);
   }
 
-  // Currently `roleSorter` and `quotaRoleSorter`, being the root-level
-  // sorters, maintain all of `slaves[slaveId].total` (or the `nonRevocable()`
-  // portion in the case of `quotaRoleSorter`) in their own totals (which
-  // don't get updated in the allocation runs or during recovery of allocated
-  // resources). So, we update them using the resources in `slave.total`.
+  // Update the totals in the sorters.
   roleSorter->remove(slaveId, oldTotal);
   roleSorter->add(slaveId, total);
 
+  foreachvalue (const Owned<Sorter>& sorter, frameworkSorters) {
+    sorter->remove(slaveId, oldTotal);
+    sorter->add(slaveId, total);
+  }
+
   // See comment at `quotaRoleSorter` declaration regarding non-revocable.
   quotaRoleSorter->remove(slaveId, oldTotal.nonRevocable());
   quotaRoleSorter->add(slaveId, total.nonRevocable());
@@ -2768,7 +2778,6 @@ void HierarchicalAllocatorProcess::trackAllocatedResources(
     CHECK(frameworkSorters.at(role)->contains(frameworkId.value()));
 
     roleSorter->allocated(role, slaveId, allocation);
-    frameworkSorters.at(role)->add(slaveId, allocation);
     frameworkSorters.at(role)->allocated(
         frameworkId.value(), slaveId, allocation);
 
@@ -2804,7 +2813,6 @@ void HierarchicalAllocatorProcess::untrackAllocatedResources(
 
     frameworkSorters.at(role)->unallocated(
         frameworkId.value(), slaveId, allocation);
-    frameworkSorters.at(role)->remove(slaveId, allocation);
 
     roleSorter->unallocated(role, slaveId, allocation);