You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by mz...@apache.org on 2019/04/25 21:02:19 UTC

[mesos] branch master updated (6ef38d4 -> 5108f07)

This is an automated email from the ASF dual-hosted git repository.

mzhu pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git.


    from 6ef38d4  Simplified Sorter::add(client) implementations.
     new 5e52c68  Added a random sorter helper to find active internal nodes.
     new 5a75640  Fixed a bug in the random sorter.
     new 5108f07  Avoided some recalculation in the random sorter.

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/master/allocator/sorter/random/sorter.cpp | 192 ++++++++++++++++++--------
 src/master/allocator/sorter/random/sorter.hpp |  36 +++++
 src/tests/sorter_tests.cpp                    |   8 +-
 3 files changed, 172 insertions(+), 64 deletions(-)


[mesos] 03/03: Avoided some recalculation in the random sorter.

Posted by mz...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

mzhu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 5108f076e6a5c275cae6b124bbcb110bc6785f94
Author: Meng Zhu <mz...@mesosphere.io>
AuthorDate: Wed Apr 24 11:32:38 2019 -0700

    Avoided some recalculation in the random sorter.
    
    This patch keeps the sorting related information in the memory
    and accompanies a dirty bit with it. This helps to avoid
    unnecessary recalculation of this info in `sort()`.
    
    Review: https://reviews.apache.org/r/70430
---
 src/master/allocator/sorter/random/sorter.cpp | 22 ++++++++++++++++++++--
 src/master/allocator/sorter/random/sorter.hpp | 10 ++++++----
 2 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/src/master/allocator/sorter/random/sorter.cpp b/src/master/allocator/sorter/random/sorter.cpp
index 8cf01e3..f4132bb 100644
--- a/src/master/allocator/sorter/random/sorter.cpp
+++ b/src/master/allocator/sorter/random/sorter.cpp
@@ -47,13 +47,13 @@ namespace allocator {
 
 
 RandomSorter::RandomSorter()
-  : root(new Node("", Node::INTERNAL, nullptr)) {}
+  : sortInfo(this), root(new Node("", Node::INTERNAL, nullptr)) {}
 
 
 RandomSorter::RandomSorter(
     const UPID& allocator,
     const string& metricsPrefix)
-  : root(new Node("", Node::INTERNAL, nullptr)) {}
+  : sortInfo(this), root(new Node("", Node::INTERNAL, nullptr)) {}
 
 
 RandomSorter::~RandomSorter()
@@ -70,6 +70,8 @@ void RandomSorter::add(const string& clientPath)
 {
   CHECK(!clients.contains(clientPath)) << clientPath;
 
+  sortInfo.dirty = true;
+
   // Adding a client is a two phase algorithm:
   //
   //            root
@@ -160,6 +162,8 @@ void RandomSorter::add(const string& clientPath)
 
 void RandomSorter::remove(const string& clientPath)
 {
+  sortInfo.dirty = true;
+
   Node* current = CHECK_NOTNULL(find(clientPath));
 
   // Save a copy of the leaf node's allocated resources, because we
@@ -237,6 +241,8 @@ void RandomSorter::remove(const string& clientPath)
 
 void RandomSorter::activate(const string& clientPath)
 {
+  sortInfo.dirty = true;
+
   Node* client = CHECK_NOTNULL(find(clientPath));
 
   if (client->kind == Node::INACTIVE_LEAF) {
@@ -254,6 +260,8 @@ void RandomSorter::activate(const string& clientPath)
 
 void RandomSorter::deactivate(const string& clientPath)
 {
+  sortInfo.dirty = true;
+
   Node* client = CHECK_NOTNULL(find(clientPath));
 
   if (client->kind == Node::ACTIVE_LEAF) {
@@ -271,6 +279,8 @@ void RandomSorter::deactivate(const string& clientPath)
 
 void RandomSorter::updateWeight(const string& path, double weight)
 {
+  sortInfo.dirty = true;
+
   weights[path] = weight;
 
   // Update the weight of the corresponding internal node,
@@ -562,6 +572,10 @@ RandomSorter::SortInfo::getClientsAndWeights()
 
 void RandomSorter::SortInfo::updateRelativeWeights()
 {
+  if (!dirty) {
+    return;
+  }
+
   hashset<Node*> activeInternalNodes = sorter->activeInternalNodes();
 
   auto isActive = [&activeInternalNodes](Node* node) {
@@ -569,6 +583,8 @@ void RandomSorter::SortInfo::updateRelativeWeights()
            activeInternalNodes.contains(node);
   };
 
+  // Note, though we reserve here, the size of the vector will always
+  // grow (as we add more roles).
   clients.reserve(sorter->clients.size());
   weights.reserve(sorter->clients.size());
 
@@ -612,6 +628,8 @@ void RandomSorter::SortInfo::updateRelativeWeights()
     };
 
   calculateRelativeWeights(sorter->root, 0.0, 1.0);
+
+  dirty = false;
 }
 
 } // namespace allocator {
diff --git a/src/master/allocator/sorter/random/sorter.hpp b/src/master/allocator/sorter/random/sorter.hpp
index 15bc75c..55e22d7 100644
--- a/src/master/allocator/sorter/random/sorter.hpp
+++ b/src/master/allocator/sorter/random/sorter.hpp
@@ -134,18 +134,20 @@ private:
   Node* find(const std::string& clientPath) const;
 
   // Sorting related info are kept in memory to avoid recalculations.
-  //
-  // TODO(mzhu): Keep this in the memory to avoid recalculations.
   struct SortInfo
   {
   public:
-    SortInfo(const RandomSorter* sorter_) : sorter(sorter_) {}
+    SortInfo(const RandomSorter* sorter_) : dirty(true), sorter(sorter_) {}
 
     // Returns a pair of vectors which are active clients and
     // their corresponding relative weights.
     std::pair<std::vector<std::string>, std::vector<double>>
     getClientsAndWeights();
 
+    // A dirty bit indicates whether the info is up-to-date
+    // and requires recalculation.
+    bool dirty;
+
   private:
     void updateRelativeWeights();
 
@@ -160,7 +162,7 @@ private:
     std::vector<double> weights;
 
     const RandomSorter* sorter;
-  };
+  } sortInfo;
 
   // Used for random number generation.
   std::mt19937 generator;


[mesos] 01/03: Added a random sorter helper to find active internal nodes.

Posted by mz...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

mzhu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 5e52c686c29819113f42c6bde7d90324673b42dc
Author: Meng Zhu <mz...@mesosphere.io>
AuthorDate: Tue Apr 23 18:44:33 2019 -0700

    Added a random sorter helper to find active internal nodes.
    
    Active internal nodes are defined as internal nodes that have
    at least one active leaf node.
    
    Review: https://reviews.apache.org/r/70542
---
 src/master/allocator/sorter/random/sorter.cpp | 39 +++++++++++++++++++++++++++
 src/master/allocator/sorter/random/sorter.hpp |  5 ++++
 2 files changed, 44 insertions(+)

diff --git a/src/master/allocator/sorter/random/sorter.cpp b/src/master/allocator/sorter/random/sorter.cpp
index 183a903..ad06b0b 100644
--- a/src/master/allocator/sorter/random/sorter.cpp
+++ b/src/master/allocator/sorter/random/sorter.cpp
@@ -545,6 +545,45 @@ double RandomSorter::getWeight(const Node* node) const
 }
 
 
+hashset<RandomSorter::Node*> RandomSorter::activeInternalNodes() const
+{
+  // Using post-order traversal to find all the internal nodes
+  // that have at least one active leaf descendant and store
+  // them in the input `result` hashset.
+  //
+  // Returns true if the subtree at the input `node` contains any
+  // active leaf node.
+  std::function<bool(Node*, hashset<Node*>&)> searchActiveInternal =
+    [&searchActiveInternal](Node* node, hashset<Node*>& result) {
+      switch (node->kind) {
+        case Node::ACTIVE_LEAF: return true;
+
+        case Node::INACTIVE_LEAF: return false;
+
+        case Node::INTERNAL: {
+          bool active = false;
+          foreach (Node* child, node->children) {
+            if (searchActiveInternal(child, result)) {
+              active = true;
+            }
+          }
+
+          if (active) {
+            result.insert(node);
+          }
+          return active;
+        }
+      }
+      UNREACHABLE();
+    };
+
+  hashset<Node*> result;
+  searchActiveInternal(root, result);
+
+  return result;
+}
+
+
 RandomSorter::Node* RandomSorter::find(const string& clientPath) const
 {
   Option<Node*> client_ = clients.get(clientPath);
diff --git a/src/master/allocator/sorter/random/sorter.hpp b/src/master/allocator/sorter/random/sorter.hpp
index 125ce84..0c2d645 100644
--- a/src/master/allocator/sorter/random/sorter.hpp
+++ b/src/master/allocator/sorter/random/sorter.hpp
@@ -29,6 +29,7 @@
 
 #include <stout/check.hpp>
 #include <stout/hashmap.hpp>
+#include <stout/hashset.hpp>
 #include <stout/option.hpp>
 
 #include "common/resource_quantities.hpp"
@@ -123,6 +124,10 @@ private:
   // returned.
   double getWeight(const Node* node) const;
 
+  // Get active internal nodes -- internal nodes that have at least
+  // one active leaf descendant.
+  hashset<Node*> activeInternalNodes() const;
+
   // Returns the client associated with the given path. Returns
   // nullptr if the path is not found or if the path identifies an
   // internal node in the tree (not a client).


[mesos] 02/03: Fixed a bug in the random sorter.

Posted by mz...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

mzhu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 5a756402ad15cedbc6ccb8fa5de096745967f36f
Author: Meng Zhu <mz...@mesosphere.io>
AuthorDate: Wed Apr 24 10:51:06 2019 -0700

    Fixed a bug in the random sorter.
    
    Currently, in the presence of hierarchical roles, the
    random sorter shuffles roles level by level and then pick
    the active leave nodes using DFS. This could generate
    non-uniform random result since active leaves in a subtree
    are always picked together.
    
    This patch fixes the issue by first calculating the relative
    weights of each active leaf node and shuffle all of them
    only once.
    
    Review: https://reviews.apache.org/r/70429
---
 src/master/allocator/sorter/random/sorter.cpp | 131 ++++++++++++++------------
 src/master/allocator/sorter/random/sorter.hpp |  29 ++++++
 src/tests/sorter_tests.cpp                    |   8 +-
 3 files changed, 106 insertions(+), 62 deletions(-)

diff --git a/src/master/allocator/sorter/random/sorter.cpp b/src/master/allocator/sorter/random/sorter.cpp
index ad06b0b..8cf01e3 100644
--- a/src/master/allocator/sorter/random/sorter.cpp
+++ b/src/master/allocator/sorter/random/sorter.cpp
@@ -33,6 +33,7 @@
 #include <stout/option.hpp>
 #include <stout/strings.hpp>
 
+using std::pair;
 using std::set;
 using std::string;
 using std::vector;
@@ -460,66 +461,16 @@ void RandomSorter::remove(const SlaveID& slaveId, const Resources& resources)
 
 vector<string> RandomSorter::sort()
 {
-  std::function<void (Node*)> shuffleTree = [this, &shuffleTree](Node* node) {
-    // Inactive leaves are always stored at the end of the
-    // `children` vector; this means that we should only shuffle
-    // the prefix of the vector before the first inactive leaf.
-    auto inactiveBegin = std::find_if(
-        node->children.begin(),
-        node->children.end(),
-        [](Node* n) { return n->kind == Node::INACTIVE_LEAF; });
-
-    vector<double> weights(inactiveBegin - node->children.begin());
-
-    for (int i = 0; i < inactiveBegin - node->children.begin(); ++i) {
-      weights[i] = getWeight(node->children[i]);
-    }
-
-    weightedShuffle(node->children.begin(), inactiveBegin, weights, generator);
-
-    foreach (Node* child, node->children) {
-      if (child->kind == Node::INTERNAL) {
-        shuffleTree(child);
-      } else if (child->kind == Node::INACTIVE_LEAF) {
-        break;
-      }
-    }
-  };
-
-  shuffleTree(root);
-
-  // Return all active leaves in the tree via pre-order traversal.
-  // The children of each node are already shuffled, with
-  // inactive leaves stored after active leaves and internal nodes.
-  vector<string> result;
-
-  // TODO(bmahler): This over-reserves where there are inactive
-  // clients, only reserve the number of active clients.
-  result.reserve(clients.size());
-
-  std::function<void (const Node*)> listClients =
-      [&listClients, &result](const Node* node) {
-    foreach (const Node* child, node->children) {
-      switch (child->kind) {
-        case Node::ACTIVE_LEAF:
-          result.push_back(child->clientPath());
-          break;
-
-        case Node::INACTIVE_LEAF:
-          // As soon as we see the first inactive leaf, we can stop
-          // iterating over the current node's list of children.
-          return;
-
-        case Node::INTERNAL:
-          listClients(child);
-          break;
-      }
-    }
-  };
+  pair<vector<string>, vector<double>> clientsAndWeights =
+    SortInfo(this).getClientsAndWeights();
 
-  listClients(root);
+  weightedShuffle(
+      clientsAndWeights.first.begin(),
+      clientsAndWeights.first.end(),
+      clientsAndWeights.second,
+      generator);
 
-  return result;
+  return std::move(clientsAndWeights.first);
 }
 
 
@@ -599,6 +550,70 @@ RandomSorter::Node* RandomSorter::find(const string& clientPath) const
   return client;
 }
 
+
+pair<vector<string>, vector<double>>
+RandomSorter::SortInfo::getClientsAndWeights()
+{
+  updateRelativeWeights();
+
+  return std::make_pair(clients, weights);
+}
+
+
+void RandomSorter::SortInfo::updateRelativeWeights()
+{
+  hashset<Node*> activeInternalNodes = sorter->activeInternalNodes();
+
+  auto isActive = [&activeInternalNodes](Node* node) {
+    return node->kind == Node::ACTIVE_LEAF ||
+           activeInternalNodes.contains(node);
+  };
+
+  clients.reserve(sorter->clients.size());
+  weights.reserve(sorter->clients.size());
+
+  // We use the following formula to compute the relative weights (Rw):
+  //
+  //                                    weight(node)
+  // Rw(node) = Rw(parent) * -------------------------------------------
+  //                         weight(node) + SUM(weight(active siblings))
+  //
+  // Using pre-order traversal to calculate node's relative weight.
+  // Active leaves and their relative weights are stored in `clients`
+  // and `weights`.
+  std::function<void(Node*, double, double)> calculateRelativeWeights =
+    [&](Node* node, double siblingWeights, double parentRelativeWeight) {
+      // We only calculate relative weights for active nodes.
+      if (!isActive(node)) {
+        return;
+      }
+      double relativeWeight = parentRelativeWeight * sorter->getWeight(node) /
+                              (sorter->getWeight(node) + siblingWeights);
+
+      // Store the result for active leaves.
+      if (node->kind == Node::ACTIVE_LEAF) {
+        clients.push_back(node->clientPath());
+        weights.push_back(relativeWeight);
+      }
+
+      // Calculate active children's total weights.
+      double totalWeights_ = 0.0;
+
+      foreach (Node* child, node->children) {
+        totalWeights_ += isActive(child) ? sorter->getWeight(child) : 0.0;
+      }
+
+      foreach (Node* child, node->children) {
+        if (isActive(child)) {
+          calculateRelativeWeights(
+              child, totalWeights_ - sorter->getWeight(child), relativeWeight);
+        }
+      }
+    };
+
+  calculateRelativeWeights(sorter->root, 0.0, 1.0);
+}
+
 } // namespace allocator {
 } // namespace master {
 } // namespace internal {
diff --git a/src/master/allocator/sorter/random/sorter.hpp b/src/master/allocator/sorter/random/sorter.hpp
index 0c2d645..15bc75c 100644
--- a/src/master/allocator/sorter/random/sorter.hpp
+++ b/src/master/allocator/sorter/random/sorter.hpp
@@ -133,6 +133,35 @@ private:
   // internal node in the tree (not a client).
   Node* find(const std::string& clientPath) const;
 
+  // Sorting related info are kept in memory to avoid recalculations.
+  //
+  // TODO(mzhu): Keep this in the memory to avoid recalculations.
+  struct SortInfo
+  {
+  public:
+    SortInfo(const RandomSorter* sorter_) : sorter(sorter_) {}
+
+    // Returns a pair of vectors which are active clients and
+    // their corresponding relative weights.
+    std::pair<std::vector<std::string>, std::vector<double>>
+    getClientsAndWeights();
+
+  private:
+    void updateRelativeWeights();
+
+    std::vector<std::string> clients;
+
+    // Relative weights of the `clients` above.
+    // Relative weight denotes the weight of an active leaf node relative to
+    // other active leaf nodes given their configured weights. The number here
+    // stands for the probability of a given node being shuffled to the 1st in
+    // all the nodes in a random shuffle. Intuitively, the sum of all relative
+    // weights should be one.
+    std::vector<double> weights;
+
+    const RandomSorter* sorter;
+  };
+
   // Used for random number generation.
   std::mt19937 generator;
 
diff --git a/src/tests/sorter_tests.cpp b/src/tests/sorter_tests.cpp
index 91bfde0..c9a0bda 100644
--- a/src/tests/sorter_tests.cpp
+++ b/src/tests/sorter_tests.cpp
@@ -149,10 +149,10 @@ TEST(RandomSorterTest, HierarchicalProbabilityDistribution)
   // This table was generated by running a weighted shuffle algorithm
   // for a large number of iterations.
   double expectedProbabilities[4][4] = {
-    {0.11, 0.22, 0.22, 0.44},
-    {0.22, 0.11, 0.44, 0.22},
-    {0.22, 0.44, 0.11, 0.22},
-    {0.44, 0.22, 0.22, 0.11},
+    {0.11, 0.15, 0.23, 0.51},
+    {0.22, 0.27, 0.30, 0.21},
+    {0.22, 0.27, 0.30, 0.21},
+    {0.44, 0.31, 0.18, 0.07},
   };
 
   double actualProbabilities[4][4];