You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ne...@apache.org on 2017/04/26 18:20:47 UTC

[01/11] mesos git commit: Updated quota handler logic for hierarchical roles.

Repository: mesos
Updated Branches:
  refs/heads/master 7bb3c0432 -> 044b72e8e


Updated quota handler logic for hierarchical roles.

The quota'd resources for a nested role are "included" within the
quota'd resources for that role's parent. Hence, the quota of a node
must always be greater than or equal to the sum of the quota'd resources
of that role's children. When creating and removing quota, we must
ensure that this invariant is not violated.

When computing the cluster capacity heuristic, we must ensure that we do
not "double-count" quota'd resources: e.g., if the cluster has a total
capacity of 100 CPUs, role "x" has a quota guarantee of 80 CPUs, and
role "x/y" has a quota guarantee of 40 CPUs, this does NOT violate the
cluster capacity heuristic.

Review: https://reviews.apache.org/r/57167


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/d0f0f9d6
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/d0f0f9d6
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/d0f0f9d6

Branch: refs/heads/master
Commit: d0f0f9d6b87ca8f800910ac51f21279d8619795a
Parents: 7bb3c04
Author: Neil Conway <ne...@gmail.com>
Authored: Tue Jan 31 17:24:11 2017 -0800
Committer: Neil Conway <ne...@gmail.com>
Committed: Wed Apr 26 14:01:40 2017 -0400

----------------------------------------------------------------------
 src/master/quota_handler.cpp               | 212 +++++++++++---
 src/tests/hierarchical_allocator_tests.cpp |  87 ++++++
 src/tests/master_quota_tests.cpp           | 355 ++++++++++++++++++++++++
 3 files changed, 621 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/d0f0f9d6/src/master/quota_handler.cpp
----------------------------------------------------------------------
diff --git a/src/master/quota_handler.cpp b/src/master/quota_handler.cpp
index 7ff43a0..281fa1d 100644
--- a/src/master/quota_handler.cpp
+++ b/src/master/quota_handler.cpp
@@ -16,6 +16,7 @@
 
 #include "master/master.hpp"
 
+#include <memory>
 #include <list>
 #include <vector>
 
@@ -63,12 +64,128 @@ using process::http::authentication::Principal;
 
 using std::list;
 using std::string;
+using std::unique_ptr;
 using std::vector;
 
 namespace mesos {
 namespace internal {
 namespace master {
 
+// Represents the tree of roles that have quota. The quota of a child
+// node is "contained" in the quota of its parent node. This has two
+// implications:
+//
+//   (1) The quota of a parent must be greater than or equal to the
+//       sum of the quota of its children.
+//
+//   (2) When computing the total resources guaranteed by quota, we
+//       don't want to double-count resource guarantees between a
+//       parent role and its children.
+class QuotaTree
+{
+public:
+  QuotaTree(const hashmap<string, Quota>& quotas)
+    : root(new Node(""))
+  {
+    foreachpair (const string& role, const Quota& quota, quotas) {
+      insert(role, quota);
+    }
+  }
+
+  void insert(const string& role, const Quota& quota)
+  {
+    // Create the path from root->leaf in the tree. Any missing nodes
+    // are created implicitly.
+    vector<string> components = strings::tokenize(role, "/");
+    CHECK(!components.empty());
+
+    Node* current = root.get();
+    foreach (const string& component, components) {
+      if (!current->children.contains(component)) {
+        current->children[component] = unique_ptr<Node>(new Node(component));
+      }
+
+      current = current->children.at(component).get();
+    }
+
+    // Update `current` with the guaranteed quota resources for this
+    // role. A path in the tree should be associated with at most one
+    // quota guarantee, so the current guarantee should be empty.
+    CHECK(current->quota.info.guarantee().empty());
+    current->quota = quota;
+  }
+
+  // Check whether the tree satisfies the "parent >= sum(children)"
+  // constraint described above.
+  Option<Error> validate() const
+  {
+    // Don't check the root node because it does not have quota set.
+    foreachvalue (const unique_ptr<Node>& child, root->children) {
+      Option<Error> error = child->validate();
+      if (error.isSome()) {
+        return error;
+      }
+    }
+
+    return None();
+  }
+
+  // Returns the total resources requested by all quotas in the
+  // tree. Since a role's quota must be greater than or equal to the
+  // sum of the quota of its children, we can just sum the quota of
+  // the top-level roles.
+  Resources total() const
+  {
+    Resources result;
+
+    // Don't include the root node because it does not have quota set.
+    foreachvalue (const unique_ptr<Node>& child, root->children) {
+      result += child->quota.info.guarantee();
+    }
+
+    return result;
+  }
+
+private:
+  struct Node
+  {
+    Node(const string& _name) : name(_name) {}
+
+    Option<Error> validate() const
+    {
+      foreachvalue (const unique_ptr<Node>& child, children) {
+        Option<Error> error = child->validate();
+        if (error.isSome()) {
+          return error;
+        }
+      }
+
+      Resources childResources;
+      foreachvalue (const unique_ptr<Node>& child, children) {
+        childResources += child->quota.info.guarantee();
+      }
+
+      Resources selfResources = quota.info.guarantee();
+
+      if (!selfResources.contains(childResources)) {
+        return Error("Invalid quota configuration. Parent role '" +
+                     name + "' with quota " + stringify(selfResources) +
+                     " does not contain the sum of its children's" +
+                     " resources (" + stringify(childResources) + ")");
+      }
+
+      return None();
+    }
+
+    const string name;
+    Quota quota;
+    hashmap<string, unique_ptr<Node>> children;
+  };
+
+  unique_ptr<Node> root;
+};
+
+
 Option<Error> Master::QuotaHandler::capacityHeuristic(
     const QuotaInfo& request) const
 {
@@ -78,19 +195,21 @@ Option<Error> Master::QuotaHandler::capacityHeuristic(
   CHECK(master->isWhitelistedRole(request.role()));
   CHECK(!master->quotas.contains(request.role()));
 
-  // Calculate the total amount of resources requested by all quotas
-  // (including the request) in the cluster.
-  // NOTE: We have validated earlier that the quota for the role in the
-  // request does not exist, hence `master->quotas` is guaranteed not to
-  // contain the request role's quota yet.
-  // TODO(alexr): Relax this constraint once we allow updating quotas.
-  Resources totalQuota = request.guarantee();
-  foreachvalue (const Quota& quota, master->quotas) {
-    totalQuota += quota.info.guarantee();
-  }
+  hashmap<string, Quota> quotaMap = master->quotas;
+
+  // Check that adding the requested quota to the existing quotas does
+  // not violate the capacity heuristic.
+  quotaMap[request.role()] = Quota{request};
+
+  QuotaTree quotaTree(quotaMap);
+
+  CHECK_NONE(quotaTree.validate());
+
+  Resources totalQuota = quotaTree.total();
 
   // Determine whether the total quota, including the new request, does
   // not exceed the sum of non-static cluster resources.
+  //
   // NOTE: We do not necessarily calculate the full sum of non-static
   // cluster resources. We apply the early termination logic as it can
   // reduce the cost of the function significantly. This early exit does
@@ -354,11 +473,12 @@ Future<http::Response> Master::QuotaHandler::_set(
   QuotaInfo quotaInfo = create.get();
 
   // Check that the `QuotaInfo` is a valid quota request.
-  Option<Error> validateError = quota::validation::quotaInfo(quotaInfo);
-  if (validateError.isSome()) {
-    return BadRequest(
-        "Failed to validate set quota request: " +
-        validateError.get().message);
+  {
+    Option<Error> error = quota::validation::quotaInfo(quotaInfo);
+    if (error.isSome()) {
+      return BadRequest(
+          "Failed to validate set quota request: " + error->message);
+    }
   }
 
   // Check that the role is on the role whitelist, if it exists.
@@ -376,6 +496,22 @@ Future<http::Response> Master::QuotaHandler::_set(
         " for role '" + quotaInfo.role() + "' which already has quota");
   }
 
+  hashmap<string, Quota> quotaMap = master->quotas;
+
+  // Validate that adding this quota does not violate the hierarchical
+  // relationship between quotas.
+  quotaMap[quotaInfo.role()] = Quota{quotaInfo};
+
+  QuotaTree quotaTree(quotaMap);
+
+  {
+    Option<Error> error = quotaTree.validate();
+    if (error.isSome()) {
+      return BadRequest(
+          "Failed to validate set quota request: " + error->message);
+    }
+  }
+
   // The force flag is used to overwrite the `capacityHeuristic` check.
   const bool forced = quotaRequest.force();
 
@@ -466,31 +602,26 @@ Future<http::Response> Master::QuotaHandler::remove(
   // Check that the request type is DELETE which is guaranteed by the master.
   CHECK_EQ("DELETE", request.method);
 
-  // Extract role from url.
-  vector<string> tokens = strings::tokenize(request.url.path, "/");
-
-  // Check that there are exactly 3 parts: {master,quota,'role'}.
-  if (tokens.size() != 3u) {
-    return BadRequest(
-        "Failed to parse request path '" + request.url.path +
-        "': 3 tokens ('master', 'quota', 'role') required, found " +
-        stringify(tokens.size()) + " token(s)");
-  }
-
-  // Check that "quota" is the second to last token.
-  if (tokens.end()[-2] != "quota") {
-    return BadRequest(
-        "Failed to parse request path '" + request.url.path +
-        "': Missing 'quota' endpoint");
+  // Extract role from url. We expect the request path to have the
+  // format "/master/quota/role", where "role" is a role name. The
+  // role name itself may contain one or more slashes. Note that
+  // `strings::tokenize` returns the remainder of the string when the
+  // specified maximum number of tokens is reached.
+  vector<string> components = strings::tokenize(request.url.path, "/", 3u);
+  if (components.size() < 3u) {
+    return BadRequest("Failed to parse remove quota request for path '" +
+                      request.url.path + "': expected 3 tokens, found " +
+                      stringify(components.size()) + " tokens");
   }
 
-  const string& role = tokens.back();
+  CHECK_EQ(3u, components.size());
+  string role = components.back();
 
   // Check that the role is on the role whitelist, if it exists.
   if (!master->isWhitelistedRole(role)) {
     return BadRequest(
         "Failed to validate remove quota request for path '" +
-        request.url.path +"': Unknown role '" + role + "'");
+        request.url.path + "': Unknown role '" + role + "'");
   }
 
   // Check that we are removing an existing quota.
@@ -500,6 +631,21 @@ Future<http::Response> Master::QuotaHandler::remove(
         "': Role '" + role + "' has no quota set");
   }
 
+  hashmap<string, Quota> quotaMap = master->quotas;
+
+  // Validate that removing the quota for `role` does not violate the
+  // hierarchical relationship between quotas.
+  quotaMap.erase(role);
+
+  QuotaTree quotaTree(quotaMap);
+
+  Option<Error> error = quotaTree.validate();
+  if (error.isSome()) {
+    return BadRequest(
+        "Failed to remove quota for path '" + request.url.path +
+        "': " + error->message);
+  }
+
   return _remove(role, principal);
 }
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/d0f0f9d6/src/tests/hierarchical_allocator_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/hierarchical_allocator_tests.cpp b/src/tests/hierarchical_allocator_tests.cpp
index 33e7b45..e2cd66d 100644
--- a/src/tests/hierarchical_allocator_tests.cpp
+++ b/src/tests/hierarchical_allocator_tests.cpp
@@ -4297,6 +4297,93 @@ TEST_F(HierarchicalAllocatorTest, DisproportionateQuotaVsAllocation)
 }
 
 
+// This test checks that quota guarantees work correctly when a nested
+// role is created as a child of an existing role that has quota.
+TEST_F(HierarchicalAllocatorTest, NestedRoleQuota)
+{
+  // Pausing the clock is not necessary, but ensures that the test
+  // doesn't rely on the batch allocation in the allocator, which
+  // would slow down the test.
+  Clock::pause();
+
+  initialize();
+
+  const string PARENT_ROLE = "a/b";
+  const string CHILD_ROLE1 = "a/b/c";
+  const string CHILD_ROLE2 = "a/b/d";
+
+  // Create `framework1` and set quota for its role.
+  FrameworkInfo framework1 = createFrameworkInfo({PARENT_ROLE});
+  allocator->addFramework(framework1.id(), framework1, {}, true);
+
+  const Quota parentQuota = createQuota(PARENT_ROLE, "cpus:2;mem:1024");
+  allocator->setQuota(PARENT_ROLE, parentQuota);
+
+  SlaveInfo agent1 = createSlaveInfo("cpus:1;mem:512;disk:0");
+  allocator->addSlave(
+      agent1.id(),
+      agent1,
+      AGENT_CAPABILITIES(),
+      None(),
+      agent1.resources(),
+      {});
+
+  // `framework1` will be offered all of `agent1`'s resources because
+  // it is the only framework in the only role with unsatisfied quota.
+  {
+    Allocation expected = Allocation(
+        framework1.id(),
+        {{PARENT_ROLE, {{agent1.id(), agent1.resources()}}}});
+
+    AWAIT_EXPECT_EQ(expected, allocations.get());
+  }
+
+  // `framework1` declines the resources on `agent1` for the duration
+  // of the test.
+  Filters longFilter;
+  longFilter.set_refuse_seconds(flags.allocation_interval.secs() * 10);
+
+  allocator->recoverResources(
+      framework1.id(),
+      agent1.id(),
+      allocatedResources(agent1.resources(), PARENT_ROLE),
+      longFilter);
+
+  // Register a framework in CHILD_ROLE1, which is a child role of
+  // PARENT_ROLE. In the current implementation, because CHILD_ROLE1
+  // does not itself have quota, it will not be offered any of
+  // PARENT_ROLE's quota'd resources. This behavior may change in the
+  // future (MESOS-7150).
+  FrameworkInfo framework2 = createFrameworkInfo({CHILD_ROLE1});
+  allocator->addFramework(framework2.id(), framework2, {}, true);
+
+  // Trigger a batch allocation for good measure; we do not expect
+  // either framework to be offered resources.
+  Clock::advance(flags.allocation_interval);
+  Clock::settle();
+
+  Future<Allocation> allocation = allocations.get();
+  EXPECT_TRUE(allocation.isPending());
+
+  // Register a framework in CHILD_ROLE2, which is a child role of
+  // PARENT_ROLE. Because CHILD_ROLE2 has quota, it will be offered
+  // resources.
+  FrameworkInfo framework3 = createFrameworkInfo({CHILD_ROLE2});
+  allocator->addFramework(framework3.id(), framework3, {}, true);
+
+  const Quota childQuota = createQuota(CHILD_ROLE2, "cpus:1;mem:512");
+  allocator->setQuota(CHILD_ROLE2, childQuota);
+
+  {
+    Allocation expected = Allocation(
+        framework3.id(),
+        {{CHILD_ROLE2, {{agent1.id(), agent1.resources()}}}});
+
+    AWAIT_EXPECT_EQ(expected, allocation);
+  }
+}
+
+
 class HierarchicalAllocatorTestWithParam
   : public HierarchicalAllocatorTestBase,
     public WithParamInterface<bool> {};

http://git-wip-us.apache.org/repos/asf/mesos/blob/d0f0f9d6/src/tests/master_quota_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_quota_tests.cpp b/src/tests/master_quota_tests.cpp
index 1714ba1..7f94b92 100644
--- a/src/tests/master_quota_tests.cpp
+++ b/src/tests/master_quota_tests.cpp
@@ -14,6 +14,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include <map>
 #include <string>
 #include <vector>
 
@@ -50,6 +51,7 @@ using mesos::internal::slave::Slave;
 
 using mesos::master::detector::MasterDetector;
 
+using mesos::quota::QuotaInfo;
 using mesos::quota::QuotaRequest;
 using mesos::quota::QuotaStatus;
 
@@ -64,6 +66,7 @@ using process::http::OK;
 using process::http::Response;
 using process::http::Unauthorized;
 
+using std::map;
 using std::string;
 using std::vector;
 
@@ -1475,6 +1478,358 @@ TEST_F(MasterQuotaTest, AuthorizeGetUpdateQuotaRequestsWithoutPrincipal)
   }
 }
 
+
+// This test checks that quota can be successfully set, queried, and
+// removed on a child role.
+TEST_F(MasterQuotaTest, ChildRole)
+{
+  Try<Owned<cluster::Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  // Use the force flag for setting quota that cannot be satisfied in
+  // this empty cluster without any agents.
+  const bool FORCE = true;
+
+  const string PARENT_ROLE = "eng";
+  const string CHILD_ROLE = "eng/dev";
+
+  // Set quota for the parent role.
+  Resources parentQuotaResources = Resources::parse("cpus:2;mem:1024").get();
+
+  {
+    Future<Response> response = process::http::post(
+        master.get()->pid,
+        "quota",
+        createBasicAuthHeaders(DEFAULT_CREDENTIAL),
+        createRequestBody(PARENT_ROLE, parentQuotaResources, FORCE));
+
+    AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response) << response->body;
+  }
+
+  // Set quota for the child role.
+  Resources childQuotaResources = Resources::parse("cpus:1;mem:768").get();
+
+  {
+    Future<Response> response = process::http::post(
+        master.get()->pid,
+        "quota",
+        createBasicAuthHeaders(DEFAULT_CREDENTIAL),
+        createRequestBody(CHILD_ROLE, childQuotaResources, FORCE));
+
+    AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response) << response->body;
+  }
+
+  // Query the configured quota.
+  {
+    Future<Response> response = process::http::get(
+        master.get()->pid,
+        "quota",
+        None(),
+        createBasicAuthHeaders(DEFAULT_CREDENTIAL));
+
+    AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response)
+      << response->body;
+
+    EXPECT_SOME_EQ(
+        "application/json",
+        response->headers.get("Content-Type"));
+
+    const Try<JSON::Object> parse = JSON::parse<JSON::Object>(response->body);
+
+    ASSERT_SOME(parse);
+
+    // Convert JSON response to `QuotaStatus` protobuf.
+    const Try<QuotaStatus> status = ::protobuf::parse<QuotaStatus>(parse.get());
+    ASSERT_FALSE(status.isError());
+    ASSERT_EQ(2, status->infos().size());
+
+    // Don't assume that the quota for child and parent are returned
+    // in any particular order.
+    map<string, Resources> expected = {{PARENT_ROLE, parentQuotaResources},
+                                       {CHILD_ROLE, childQuotaResources}};
+
+    map<string, Resources> actual = {
+      {status->infos(0).role(), status->infos(0).guarantee()},
+      {status->infos(1).role(), status->infos(1).guarantee()}
+    };
+
+    EXPECT_EQ(expected, actual);
+  }
+
+  // Remove quota for the child role.
+  {
+    Future<Response> response = process::http::requestDelete(
+        master.get()->pid,
+        "quota/" + CHILD_ROLE,
+        createBasicAuthHeaders(DEFAULT_CREDENTIAL));
+
+    AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response) << response->body;
+  }
+}
+
+
+// This test checks that attempting to set quota on a child role is
+// rejected if the child's parent does not have quota set.
+TEST_F(MasterQuotaTest, ChildRoleWithNoParentQuota)
+{
+  Try<Owned<cluster::Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  // Use the force flag for setting quota that cannot be satisfied in
+  // this empty cluster without any agents.
+  const bool FORCE = true;
+
+  const string CHILD_ROLE = "eng/dev";
+
+  // Set quota for the child role.
+  Resources childQuotaResources = Resources::parse("cpus:1;mem:768").get();
+
+  {
+    Future<Response> response = process::http::post(
+        master.get()->pid,
+        "quota",
+        createBasicAuthHeaders(DEFAULT_CREDENTIAL),
+        createRequestBody(CHILD_ROLE, childQuotaResources, FORCE));
+
+    AWAIT_EXPECT_RESPONSE_STATUS_EQ(BadRequest().status, response)
+      << response->body;
+  }
+}
+
+
+// This test checks that a request to set quota for a child role is
+// rejected if it exceeds the parent role's quota.
+TEST_F(MasterQuotaTest, ChildRoleExceedsParentQuota)
+{
+  Try<Owned<cluster::Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  // Use the force flag for setting quota that cannot be satisfied in
+  // this empty cluster without any agents.
+  const bool FORCE = true;
+
+  const string PARENT_ROLE = "eng";
+  const string CHILD_ROLE = "eng/dev";
+
+  // Set quota for the parent role.
+  Resources parentQuotaResources = Resources::parse("cpus:2;mem:768").get();
+
+  {
+    Future<Response> response = process::http::post(
+        master.get()->pid,
+        "quota",
+        createBasicAuthHeaders(DEFAULT_CREDENTIAL),
+        createRequestBody(PARENT_ROLE, parentQuotaResources, FORCE));
+
+    AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response) << response->body;
+  }
+
+  // Attempt to set quota for the child role. Because the child role's
+  // quota exceeds the parent role's quota, this should not succeed.
+  Resources childQuotaResources = Resources::parse("cpus:1;mem:1024").get();
+
+  {
+    Future<Response> response = process::http::post(
+        master.get()->pid,
+        "quota",
+        createBasicAuthHeaders(DEFAULT_CREDENTIAL),
+        createRequestBody(CHILD_ROLE, childQuotaResources, FORCE));
+
+    AWAIT_EXPECT_RESPONSE_STATUS_EQ(BadRequest().status, response)
+      << response->body;
+  }
+}
+
+
+// This test checks that a request to set quota for a child role is
+// rejected if it would result in the parent role's quota being
+// smaller than the sum of the quota of its children.
+TEST_F(MasterQuotaTest, ChildRoleSumExceedsParentQuota)
+{
+  Try<Owned<cluster::Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  // Use the force flag for setting quota that cannot be satisfied in
+  // this empty cluster without any agents.
+  const bool FORCE = true;
+
+  const string PARENT_ROLE = "eng";
+  const string CHILD_ROLE1 = "eng/dev";
+  const string CHILD_ROLE2 = "eng/prod";
+
+  // Set quota for the parent role.
+  Resources parentQuotaResources = Resources::parse("cpus:2;mem:768").get();
+
+  {
+    Future<Response> response = process::http::post(
+        master.get()->pid,
+        "quota",
+        createBasicAuthHeaders(DEFAULT_CREDENTIAL),
+        createRequestBody(PARENT_ROLE, parentQuotaResources, FORCE));
+
+    AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response) << response->body;
+  }
+
+  // Set quota for the first child role. This should succeed.
+  Resources childQuotaResources = Resources::parse("cpus:1;mem:512").get();
+
+  {
+    Future<Response> response = process::http::post(
+        master.get()->pid,
+        "quota",
+        createBasicAuthHeaders(DEFAULT_CREDENTIAL),
+        createRequestBody(CHILD_ROLE1, childQuotaResources, FORCE));
+
+    AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response)
+      << response->body;
+  }
+
+  // Attempt to set quota for the second child role. This should fail,
+  // because the sum of the quotas of the children of PARENT_ROLE
+  // would now exceed the quota of PARENT_ROLE.
+  {
+    Future<Response> response = process::http::post(
+        master.get()->pid,
+        "quota",
+        createBasicAuthHeaders(DEFAULT_CREDENTIAL),
+        createRequestBody(CHILD_ROLE2, childQuotaResources, FORCE));
+
+    AWAIT_EXPECT_RESPONSE_STATUS_EQ(BadRequest().status, response)
+      << response->body;
+  }
+}
+
+
+// This test checks that a request to delete quota for a parent role
+// is rejected since this would result in the child role's quota
+// exceeding the parent role's quota.
+TEST_F(MasterQuotaTest, ChildRoleDeleteParentQuota)
+{
+  Try<Owned<cluster::Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  // Use the force flag for setting quota that cannot be satisfied in
+  // this empty cluster without any agents.
+  const bool FORCE = true;
+
+  const string PARENT_ROLE = "eng";
+  const string CHILD_ROLE = "eng/dev";
+
+  // Set quota for the parent role.
+  Resources parentQuotaResources = Resources::parse("cpus:2;mem:1024").get();
+
+  {
+    Future<Response> response = process::http::post(
+        master.get()->pid,
+        "quota",
+        createBasicAuthHeaders(DEFAULT_CREDENTIAL),
+        createRequestBody(PARENT_ROLE, parentQuotaResources, FORCE));
+
+    AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response) << response->body;
+  }
+
+  // Set quota for the child role.
+  Resources childQuotaResources = Resources::parse("cpus:1;mem:512").get();
+
+  {
+    Future<Response> response = process::http::post(
+        master.get()->pid,
+        "quota",
+        createBasicAuthHeaders(DEFAULT_CREDENTIAL),
+        createRequestBody(CHILD_ROLE, childQuotaResources, FORCE));
+
+    AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response)
+      << response->body;
+  }
+
+  // Attempt to remove the quota for the parent role. This should not
+  // succeed.
+  {
+    Future<Response> response = process::http::requestDelete(
+        master.get()->pid,
+        "quota/" + PARENT_ROLE,
+        createBasicAuthHeaders(DEFAULT_CREDENTIAL));
+
+    AWAIT_EXPECT_RESPONSE_STATUS_EQ(BadRequest().status, response)
+      << response->body;
+  }
+}
+
+
+// This test checks that the cluster capacity heuristic correctly
+// interprets quota set on hierarchical roles. Specifically, quota on
+// child roles should not be double-counted with the quota on the
+// child's parent role. In other words, the total quota'd resources in
+// the cluster is the sum of the quota on the top-level roles.
+TEST_F(MasterQuotaTest, ClusterCapacityWithNestedRoles)
+{
+  TestAllocator<> allocator;
+  EXPECT_CALL(allocator, initialize(_, _, _, _));
+
+  Try<Owned<cluster::Master>> master = StartMaster(&allocator);
+  ASSERT_SOME(master);
+
+  // Start an agent and wait until its resources are available.
+  Future<Resources> agentTotalResources;
+  EXPECT_CALL(allocator, addSlave(_, _, _, _, _, _))
+    .WillOnce(DoAll(InvokeAddSlave(&allocator),
+                    FutureArg<4>(&agentTotalResources)));
+
+  Owned<MasterDetector> detector = master.get()->createDetector();
+  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get());
+  ASSERT_SOME(slave);
+
+  AWAIT_READY(agentTotalResources);
+  EXPECT_EQ(defaultAgentResources, agentTotalResources.get());
+
+  const string PARENT_ROLE1 = "eng";
+  const string PARENT_ROLE2 = "sales";
+  const string CHILD_ROLE = "eng/dev";
+
+  // Set quota for the first parent role.
+  Resources parent1QuotaResources = Resources::parse("cpus:1;mem:768").get();
+
+  {
+    Future<Response> response = process::http::post(
+        master.get()->pid,
+        "quota",
+        createBasicAuthHeaders(DEFAULT_CREDENTIAL),
+        createRequestBody(PARENT_ROLE1, parent1QuotaResources));
+
+    AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response) << response->body;
+  }
+
+  // Set quota for the child role. This should succeed, even though
+  // naively summing the parent and child quota would result in
+  // violating the cluster capacity heuristic.
+  Resources childQuotaResources = Resources::parse("cpus:1;mem:512").get();
+
+  {
+    Future<Response> response = process::http::post(
+        master.get()->pid,
+        "quota",
+        createBasicAuthHeaders(DEFAULT_CREDENTIAL),
+        createRequestBody(CHILD_ROLE, childQuotaResources));
+
+    AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response) << response->body;
+  }
+
+  // Set quota for the second parent role. This should succeed, even
+  // though naively summing the quota of the subtree rooted at
+  // PARENT_ROLE1 would violate the cluster capacity check.
+  Resources parent2QuotaResources = Resources::parse("cpus:1;mem:256").get();
+
+  {
+    Future<Response> response = process::http::post(
+        master.get()->pid,
+        "quota",
+        createBasicAuthHeaders(DEFAULT_CREDENTIAL),
+        createRequestBody(PARENT_ROLE2, parent2QuotaResources));
+
+    AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response) << response->body;
+  }
+}
+
 } // namespace tests {
 } // namespace internal {
 } // namespace mesos {


[10/11] mesos git commit: Disabled support for setting quota on nested roles.

Posted by ne...@apache.org.
Disabled support for setting quota on nested roles.

Correct support for quota on nested roles will require further work in
the allocator (see MESOS-7402 for details). For now, setting quota on
nested roles is disabled until MESOS-7402 can be fixed. This commit
disables any tests that rely on setting quota on nested roles; it also
adds a (disabled) test to cover the behavior that will be fixed as part
of MESOS-7402.

Review: https://reviews.apache.org/r/58584


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/50a71c03
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/50a71c03
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/50a71c03

Branch: refs/heads/master
Commit: 50a71c03a374ef9ff834ab5dd3fe0a4f195cd380
Parents: cfd1b48
Author: Neil Conway <ne...@gmail.com>
Authored: Wed Apr 19 16:28:43 2017 -0700
Committer: Neil Conway <ne...@gmail.com>
Committed: Wed Apr 26 14:19:27 2017 -0400

----------------------------------------------------------------------
 src/master/quota_handler.cpp               |  9 +++
 src/tests/hierarchical_allocator_tests.cpp | 93 +++++++++++++++++++++++++
 src/tests/master_quota_tests.cpp           | 24 +++++--
 3 files changed, 120 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/50a71c03/src/master/quota_handler.cpp
----------------------------------------------------------------------
diff --git a/src/master/quota_handler.cpp b/src/master/quota_handler.cpp
index 281fa1d..7fe5580 100644
--- a/src/master/quota_handler.cpp
+++ b/src/master/quota_handler.cpp
@@ -512,6 +512,15 @@ Future<http::Response> Master::QuotaHandler::_set(
     }
   }
 
+  // Setting quota on a nested role is temporarily disabled.
+  //
+  // TODO(neilc): Remove this check when MESOS-7402 is fixed.
+  bool nestedRole = strings::contains(quotaInfo.role(), "/");
+  if (nestedRole) {
+    return BadRequest("Setting quota on nested role '" +
+                      quotaInfo.role() + "' is not supported yet");
+  }
+
   // The force flag is used to overwrite the `capacityHeuristic` check.
   const bool forced = quotaRequest.force();
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/50a71c03/src/tests/hierarchical_allocator_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/hierarchical_allocator_tests.cpp b/src/tests/hierarchical_allocator_tests.cpp
index ad03e17..84bb6f3 100644
--- a/src/tests/hierarchical_allocator_tests.cpp
+++ b/src/tests/hierarchical_allocator_tests.cpp
@@ -4594,6 +4594,99 @@ TEST_F(HierarchicalAllocatorTest, NestedRoleQuotaAllocateToParent)
 }
 
 
+// This test checks that when quota resources are allocated to a
+// nested role, those resources are also counted against the quota of
+// the parent role as well.
+//
+// TODO(neilc): Re-enable this test when MESOS-7402 is fixed.
+TEST_F(HierarchicalAllocatorTest, DISABLED_NestedQuotaAccounting)
+{
+  Clock::pause();
+
+  initialize();
+
+  const string PARENT_ROLE = "x/b";
+  const string CHILD_ROLE = "x/b/c";
+  const string NON_QUOTA_ROLE = "aaa";
+
+  // Create `framework1` in the non-quota role.
+  FrameworkInfo framework1 = createFrameworkInfo({NON_QUOTA_ROLE});
+  allocator->addFramework(framework1.id(), framework1, {}, true);
+
+  // Set quota for parent role.
+  const Quota parentQuota = createQuota(PARENT_ROLE, "cpus:3;mem:300");
+  allocator->setQuota(PARENT_ROLE, parentQuota);
+
+  // Create `framework2` in the parent role.
+  FrameworkInfo framework2 = createFrameworkInfo({PARENT_ROLE});
+  allocator->addFramework(framework2.id(), framework2, {}, true);
+
+  SlaveInfo agent1 = createSlaveInfo("cpus:2;mem:200");
+  allocator->addSlave(
+      agent1.id(),
+      agent1,
+      AGENT_CAPABILITIES(),
+      None(),
+      agent1.resources(),
+      {});
+
+  // `framework2` will be offered all of the resources on `agent1`.
+  {
+    Allocation expected = Allocation(
+        framework2.id(),
+        {{PARENT_ROLE, {{agent1.id(), agent1.resources()}}}});
+
+    AWAIT_EXPECT_EQ(expected, allocations.get());
+  }
+
+  // Set quota for child role.
+  const Quota childQuota = createQuota(CHILD_ROLE, "cpus:1;mem:100");
+  allocator->setQuota(CHILD_ROLE, childQuota);
+
+  // Create `framework3` in the child role.
+  FrameworkInfo framework3 = createFrameworkInfo({CHILD_ROLE});
+  allocator->addFramework(framework3.id(), framework3, {}, true);
+
+  SlaveInfo agent2 = createSlaveInfo("cpus:1;mem:100");
+  allocator->addSlave(
+      agent2.id(),
+      agent2,
+      AGENT_CAPABILITIES(),
+      None(),
+      agent2.resources(),
+      {});
+
+  // `framework3` will be offered all of the resources on `agent2`.
+  {
+    Allocation expected = Allocation(
+        framework3.id(),
+        {{CHILD_ROLE, {{agent2.id(), agent2.resources()}}}});
+
+    AWAIT_EXPECT_EQ(expected, allocations.get());
+  }
+
+  SlaveInfo agent3 = createSlaveInfo("cpus:1;mem:100");
+  allocator->addSlave(
+      agent3.id(),
+      agent3,
+      AGENT_CAPABILITIES(),
+      None(),
+      agent3.resources(),
+      {});
+
+  // Quota of both frameworks are satisfied at this point, therefore
+  // resources of agent3 should follow the rule of fair share and be
+  // offered to `framework1`.
+  {
+    Allocation expected = Allocation(
+        framework1.id(),
+        {{PARENT_ROLE, {{agent3.id(), agent3.resources()}}}});
+
+    AWAIT_EXPECT_EQ(expected, allocations.get());
+  }
+}
+
+
 class HierarchicalAllocatorTestWithParam
   : public HierarchicalAllocatorTestBase,
     public WithParamInterface<bool> {};

http://git-wip-us.apache.org/repos/asf/mesos/blob/50a71c03/src/tests/master_quota_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_quota_tests.cpp b/src/tests/master_quota_tests.cpp
index 7f94b92..2c7ec5a 100644
--- a/src/tests/master_quota_tests.cpp
+++ b/src/tests/master_quota_tests.cpp
@@ -1481,7 +1481,9 @@ TEST_F(MasterQuotaTest, AuthorizeGetUpdateQuotaRequestsWithoutPrincipal)
 
 // This test checks that quota can be successfully set, queried, and
 // removed on a child role.
-TEST_F(MasterQuotaTest, ChildRole)
+//
+// TODO(neilc): Re-enable this test when MESOS-7402 is fixed.
+TEST_F(MasterQuotaTest, DISABLED_ChildRole)
 {
   Try<Owned<cluster::Master>> master = StartMaster();
   ASSERT_SOME(master);
@@ -1570,7 +1572,9 @@ TEST_F(MasterQuotaTest, ChildRole)
 
 // This test checks that attempting to set quota on a child role is
 // rejected if the child's parent does not have quota set.
-TEST_F(MasterQuotaTest, ChildRoleWithNoParentQuota)
+//
+// TODO(neilc): Re-enable this test when MESOS-7402 is fixed.
+TEST_F(MasterQuotaTest, DISABLED_ChildRoleWithNoParentQuota)
 {
   Try<Owned<cluster::Master>> master = StartMaster();
   ASSERT_SOME(master);
@@ -1599,7 +1603,9 @@ TEST_F(MasterQuotaTest, ChildRoleWithNoParentQuota)
 
 // This test checks that a request to set quota for a child role is
 // rejected if it exceeds the parent role's quota.
-TEST_F(MasterQuotaTest, ChildRoleExceedsParentQuota)
+//
+// TODO(neilc): Re-enable this test when MESOS-7402 is fixed.
+TEST_F(MasterQuotaTest, DISABLED_ChildRoleExceedsParentQuota)
 {
   Try<Owned<cluster::Master>> master = StartMaster();
   ASSERT_SOME(master);
@@ -1644,7 +1650,9 @@ TEST_F(MasterQuotaTest, ChildRoleExceedsParentQuota)
 // This test checks that a request to set quota for a child role is
 // rejected if it would result in the parent role's quota being
 // smaller than the sum of the quota of its children.
-TEST_F(MasterQuotaTest, ChildRoleSumExceedsParentQuota)
+//
+// TODO(neilc): Re-enable this test when MESOS-7402 is fixed.
+TEST_F(MasterQuotaTest, DISABLED_ChildRoleSumExceedsParentQuota)
 {
   Try<Owned<cluster::Master>> master = StartMaster();
   ASSERT_SOME(master);
@@ -1703,7 +1711,9 @@ TEST_F(MasterQuotaTest, ChildRoleSumExceedsParentQuota)
 // This test checks that a request to delete quota for a parent role
 // is rejected since this would result in the child role's quota
 // exceeding the parent role's quota.
-TEST_F(MasterQuotaTest, ChildRoleDeleteParentQuota)
+//
+// TODO(neilc): Re-enable this test when MESOS-7402 is fixed.
+TEST_F(MasterQuotaTest, DISABLED_ChildRoleDeleteParentQuota)
 {
   Try<Owned<cluster::Master>> master = StartMaster();
   ASSERT_SOME(master);
@@ -1761,7 +1771,9 @@ TEST_F(MasterQuotaTest, ChildRoleDeleteParentQuota)
 // child roles should not be double-counted with the quota on the
 // child's parent role. In other words, the total quota'd resources in
 // the cluster is the sum of the quota on the top-level roles.
-TEST_F(MasterQuotaTest, ClusterCapacityWithNestedRoles)
+//
+// TODO(neilc): Re-enable this test when MESOS-7402 is fixed.
+TEST_F(MasterQuotaTest, DISABLED_ClusterCapacityWithNestedRoles)
 {
   TestAllocator<> allocator;
   EXPECT_CALL(allocator, initialize(_, _, _, _));


[09/11] mesos git commit: Updated agent for hierarchical roles.

Posted by ne...@apache.org.
Updated agent for hierarchical roles.

This commit adjusts the way persistent volumes are stored on the
agent. Instead of interpreting the role of the volume as a literal
path, we replace `/` with ` ` when creating the path. This prevents
that subdirectories are created for volumes with hierarchical roles.
Directly interpreting the role as a path is undesirable as it can lead
to volumes overlapping (e.g., a volume with role `a/b/c/d` and id `id`
would be visible as `id` in a volume with role `a/b/c` and id `d`).

Review: https://reviews.apache.org/r/57190/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/cfd1b482
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/cfd1b482
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/cfd1b482

Branch: refs/heads/master
Commit: cfd1b482fd2dd9809f5c8169cce7443a10e5ee5a
Parents: e5ef199
Author: Benjamin Bannier <be...@mesosphere.io>
Authored: Wed Apr 26 14:05:19 2017 -0400
Committer: Neil Conway <ne...@gmail.com>
Committed: Wed Apr 26 14:19:27 2017 -0400

----------------------------------------------------------------------
 src/slave/paths.cpp      |  19 ++++-
 src/tests/role_tests.cpp | 188 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 206 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/cfd1b482/src/slave/paths.cpp
----------------------------------------------------------------------
diff --git a/src/slave/paths.cpp b/src/slave/paths.cpp
index d5903b8..b2709ad 100644
--- a/src/slave/paths.cpp
+++ b/src/slave/paths.cpp
@@ -450,7 +450,24 @@ string getPersistentVolumePath(
     const string& role,
     const string& persistenceId)
 {
-  return path::join(rootDir, "volumes", "roles", role, persistenceId);
+  // Role names might contain literal `/` if the role is part of a
+  // role hierarchy. Since `/` is not allowed in a directory name
+  // under Linux, we could either represent such sub-roles with
+  // sub-directories, or encode the `/` with some other identifier.
+  // To clearly distinguish artifacts in a volume from subroles we
+  // choose to encode `/` in role names as ` ` (literal space) as
+  // opposed to using subdirectories. Whitespace is not allowed as
+  // part of a role name. Also, practically all modern filesystems can
+  // use ` ` in filenames. There are some limitations in auxilary
+  // tooling which are not relevant here, e.g., many shell constructs
+  // require quotes around filesnames containing ` `; containers using
+  // persistent volumes would not see the ` ` as the role-related part
+  // of the path would not be part of a mapping into the container
+  // sandbox.
+  string serializableRole = strings::replace(role, "/", " ");
+
+  return path::join(
+      rootDir, "volumes", "roles", serializableRole, persistenceId);
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/cfd1b482/src/tests/role_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/role_tests.cpp b/src/tests/role_tests.cpp
index 8930b7e..7b47526 100644
--- a/src/tests/role_tests.cpp
+++ b/src/tests/role_tests.cpp
@@ -21,9 +21,15 @@
 #include <mesos/roles.hpp>
 
 #include <process/clock.hpp>
+#include <process/future.hpp>
+#include <process/gtest.hpp>
+#include <process/http.hpp>
 #include <process/owned.hpp>
 #include <process/pid.hpp>
 
+#include <stout/none.hpp>
+#include <stout/nothing.hpp>
+
 #include "tests/containerizer.hpp"
 #include "tests/mesos.hpp"
 #include "tests/resources_utils.hpp"
@@ -39,10 +45,13 @@ using std::vector;
 using google::protobuf::RepeatedPtrField;
 
 using process::Clock;
+using process::Failure;
 using process::Future;
 using process::Owned;
 using process::PID;
 
+using process::http::Accepted;
+using process::http::Headers;
 using process::http::OK;
 using process::http::Response;
 using process::http::Unauthorized;
@@ -868,6 +877,185 @@ TEST_F(RoleTest, EndpointBadAuthentication)
   AWAIT_EXPECT_RESPONSE_STATUS_EQ(Unauthorized({}).status, response);
 }
 
+
+// This test confirms that our handling of peristent volumes from hierarchical
+// roles does not cause leaking of volumes. Since hierarchical roles contain
+// literal `/` an implementation not taking this into account could map the name
+// of a hierarchical role `A/B` onto a directory hierarchy `A/B`. If the
+// persistent volume with persistence id `ID` and role `ROLE` is mapped to a
+// path `ROLE/ID`, it becomes impossible to distinguish the last component of a
+// hierarchical role from a persistence id.
+//
+// This performs the following checks:
+//
+// 1) Run two tasks with volumes whose role and id overlap on the file system in
+// the naive implementation. The tasks should not be able to see each others
+// volumes.
+//
+// 2) Destroy the previously created volumes in an order such that the in the
+// naive implementation less nested volume is destroyed first. This should not
+// destroy the more nested volume (e.g., since it is not created as a
+// subdirectory).
+TEST_F(RoleTest, VolumesInOverlappingHierarchies)
+{
+  constexpr char PATH[] = "path";
+  constexpr Megabytes DISK_SIZE(1);
+
+  Try<Owned<cluster::Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  // Capture the SlaveID so we can use it in create/destroy volumes API calls.
+  Future<SlaveRegisteredMessage> slaveRegisteredMessage =
+    FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _);
+
+  Owned<MasterDetector> detector = master.get()->createDetector();
+  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get());
+  ASSERT_SOME(slave);
+
+  AWAIT_READY(slaveRegisteredMessage);
+  const SlaveID slaveId = slaveRegisteredMessage->slave_id();
+
+  // Helper function which starts a framework in a role and creates a
+  // persistent volume with the given id. The framework creates a task
+  // using the volume and makes sure that no volumes from other roles
+  // are leaked into the volume.
+  auto runTask = [&master, &PATH, DISK_SIZE](
+      const string& role, const string& id) {
+    FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
+    frameworkInfo.set_role(role);
+
+    MockScheduler sched;
+    MesosSchedulerDriver driver(
+        &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL);
+
+    EXPECT_CALL(sched, registered(&driver, _, _));
+
+    Future<vector<Offer>> offers;
+    EXPECT_CALL(sched, resourceOffers(&driver, _))
+      .WillOnce(FutureArg<1>(&offers))
+      .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+    driver.start();
+
+    AWAIT_READY(offers);
+
+    ASSERT_FALSE(offers->empty());
+
+    // Create a reserved disk. We only create a small disk so
+    // we have remaining disk to offer to other frameworks.
+    Resource unreservedDisk = Resources::parse("disk", "1", "*").get();
+    Resource reservedDisk = unreservedDisk;
+    reservedDisk.set_role(role);
+    reservedDisk.mutable_reservation()->CopyFrom(
+        createReservationInfo(DEFAULT_CREDENTIAL.principal()));
+
+    // Create a persistent volume on the reserved disk.
+    Resource volume = createPersistentVolume(
+        createDiskResource(
+            stringify(DISK_SIZE.megabytes()), role, None(), None()),
+        id,
+        PATH,
+        None(),
+        frameworkInfo.principal());
+    volume.mutable_reservation()->CopyFrom(reservedDisk.reservation());
+    volume.set_role(reservedDisk.role());
+
+    // Create a task which uses the volume and checks that
+    // it contains no files leaked from another role.
+    const Offer& offer = offers.get()[0];
+
+    Resources cpusMem =
+      Resources(offer.resources()).filter([](const Resource& r) {
+        return r.name() == "cpus" || r.name() == "mem";
+      });
+
+    Resources taskResources = cpusMem + volume;
+
+    // Create a task confirming that the directory `path` is empty.
+    // Note that we do not explicitly confirm that `path` exists here.
+    TaskInfo task = createTask(
+        offer.slave_id(),
+        taskResources,
+        "! (ls -Av path | grep -q .)");
+
+    // We expect two status updates for the task.
+    Future<TaskStatus> status1, status2;
+    EXPECT_CALL(sched, statusUpdate(&driver, _))
+      .WillOnce(FutureArg<1>(&status1))
+      .WillOnce(FutureArg<1>(&status2));
+
+    // Accept the offer.
+    driver.acceptOffers(
+        {offer.id()},
+        {RESERVE(reservedDisk), CREATE(volume), LAUNCH({task})});
+
+    AWAIT_READY(status1);
+
+    EXPECT_EQ(task.task_id(), status1->task_id());
+    EXPECT_EQ(TASK_RUNNING, status1->state());
+
+    AWAIT_READY(status2);
+
+    EXPECT_EQ(task.task_id(), status2->task_id());
+    EXPECT_EQ(TASK_FINISHED, status2->state())
+      << "Task for role '" << role << "' and id '" << id << "' did not succeed";
+
+    driver.stop();
+    driver.join();
+  };
+
+  // Helper function to destroy a volume with given role and id.
+  auto destroyVolume = [&slaveId, &master, &PATH, DISK_SIZE](
+      const string& role, const string& id) {
+    Resource volume = createPersistentVolume(
+        DISK_SIZE,
+        role,
+        id,
+        PATH,
+        None(),
+        None(),
+        DEFAULT_CREDENTIAL.principal());
+
+    volume.mutable_reservation()->set_principal(DEFAULT_CREDENTIAL.principal());
+
+    v1::master::Call destroyVolumesCall;
+    destroyVolumesCall.set_type(v1::master::Call::DESTROY_VOLUMES);
+
+    v1::master::Call::DestroyVolumes* destroyVolumes =
+      destroyVolumesCall.mutable_destroy_volumes();
+    destroyVolumes->add_volumes()->CopyFrom(evolve(volume));
+    destroyVolumes->mutable_agent_id()->CopyFrom(evolve(slaveId));
+
+    constexpr ContentType CONTENT_TYPE = ContentType::PROTOBUF;
+
+    Headers headers = createBasicAuthHeaders(DEFAULT_CREDENTIAL);
+    headers["Accept"] = stringify(CONTENT_TYPE);
+
+    return process::http::post(
+        master.get()->pid,
+        "api/v1",
+        headers,
+        serialize(CONTENT_TYPE, destroyVolumesCall),
+        stringify(CONTENT_TYPE));
+  };
+
+  // Run two tasks. In the naive storage scheme of volumes from hierarchical
+  // role frameworks, the first volume would be created under paths
+  // `a/b/c/d/id/` and the second one under `a/b/c/d/`. The second task would in
+  // that case incorrectly see a directory `id` in its persistent volume.
+  runTask("a/b/c/d", "id");
+  runTask("a/b/c", "d");
+
+  // Destroy both volumes. Even though the role `a/b/c` is a prefix of the role
+  // `a/b/c/d`, destroying the former role's volume `d` should not interfere
+  // with the latter's volume `id`.
+  AWAIT_EXPECT_RESPONSE_STATUS_EQ(
+      Accepted().status, destroyVolume("a/b/c", "d"));
+
+  AWAIT_EXPECT_RESPONSE_STATUS_EQ(
+      Accepted().status, destroyVolume("a/b/c/d", "id"));
+}
+
 }  // namespace tests {
 }  // namespace internal {
 }  // namespace mesos {


[04/11] mesos git commit: Reordered DRFSorter member function.

Posted by ne...@apache.org.
Reordered DRFSorter member function.

Ensure that member function appear in the same order in the header file
as in the implementation file.

Review: https://reviews.apache.org/r/57528


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/bbe2c6c6
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/bbe2c6c6
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/bbe2c6c6

Branch: refs/heads/master
Commit: bbe2c6c65647ddbe91608d370ea27de3ec50740a
Parents: b7f70ca
Author: Neil Conway <ne...@gmail.com>
Authored: Fri Mar 10 20:19:27 2017 -0500
Committer: Neil Conway <ne...@gmail.com>
Committed: Wed Apr 26 14:01:57 2017 -0400

----------------------------------------------------------------------
 src/master/allocator/sorter/drf/sorter.cpp | 76 ++++++++++++-------------
 1 file changed, 38 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/bbe2c6c6/src/master/allocator/sorter/drf/sorter.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/sorter/drf/sorter.cpp b/src/master/allocator/sorter/drf/sorter.cpp
index d2952b8..85dbff1 100644
--- a/src/master/allocator/sorter/drf/sorter.cpp
+++ b/src/master/allocator/sorter/drf/sorter.cpp
@@ -228,6 +228,44 @@ void DRFSorter::update(
 }
 
 
+void DRFSorter::unallocated(
+    const string& name,
+    const SlaveID& slaveId,
+    const Resources& resources)
+{
+  CHECK(contains(name));
+  CHECK(allocations.at(name).resources.contains(slaveId));
+  CHECK(allocations.at(name).resources.at(slaveId).contains(resources));
+
+  allocations[name].resources[slaveId] -= resources;
+
+  // Remove shared resources from the allocated quantities when there
+  // are no instances of same resources left in the allocation.
+  const Resources absentShared = resources.shared()
+    .filter([this, name, slaveId](const Resource& resource) {
+      return !allocations[name].resources[slaveId].contains(resource);
+    });
+
+  const Resources scalarQuantities =
+    (resources.nonShared() + absentShared).createStrippedScalarQuantity();
+
+  foreach (const Resource& resource, scalarQuantities) {
+    allocations[name].totals[resource.name()] -= resource.scalar();
+  }
+
+  CHECK(allocations[name].scalarQuantities.contains(scalarQuantities));
+  allocations[name].scalarQuantities -= scalarQuantities;
+
+  if (allocations[name].resources[slaveId].empty()) {
+    allocations[name].resources.erase(slaveId);
+  }
+
+  if (!dirty) {
+    updateShare(name);
+  }
+}
+
+
 const hashmap<SlaveID, Resources>& DRFSorter::allocation(
     const string& name) const
 {
@@ -286,44 +324,6 @@ const Resources& DRFSorter::totalScalarQuantities() const
 }
 
 
-void DRFSorter::unallocated(
-    const string& name,
-    const SlaveID& slaveId,
-    const Resources& resources)
-{
-  CHECK(contains(name));
-  CHECK(allocations.at(name).resources.contains(slaveId));
-  CHECK(allocations.at(name).resources.at(slaveId).contains(resources));
-
-  allocations[name].resources[slaveId] -= resources;
-
-  // Remove shared resources from the allocated quantities when there
-  // are no instances of same resources left in the allocation.
-  const Resources absentShared = resources.shared()
-    .filter([this, name, slaveId](const Resource& resource) {
-      return !allocations[name].resources[slaveId].contains(resource);
-    });
-
-  const Resources scalarQuantities =
-    (resources.nonShared() + absentShared).createStrippedScalarQuantity();
-
-  foreach (const Resource& resource, scalarQuantities) {
-    allocations[name].totals[resource.name()] -= resource.scalar();
-  }
-
-  CHECK(allocations[name].scalarQuantities.contains(scalarQuantities));
-  allocations[name].scalarQuantities -= scalarQuantities;
-
-  if (allocations[name].resources[slaveId].empty()) {
-    allocations[name].resources.erase(slaveId);
-  }
-
-  if (!dirty) {
-    updateShare(name);
-  }
-}
-
-
 void DRFSorter::add(const SlaveID& slaveId, const Resources& resources)
 {
   if (!resources.empty()) {


[08/11] mesos git commit: Added support for hierarchical roles to DRFSorter.

Posted by ne...@apache.org.
Added support for hierarchical roles to DRFSorter.

This commit replaces the sorter's flat list of clients with a tree; the
tree represents the hierarchical relationship between sorter clients.
Each node in the tree contains a vector of pointers to child nodes. The
tree might contain nodes that do not correspond directly to sorter
clients. For example, adding clients "a/b" and "c/d" results in the
following tree:

root
 -> a
   -> b
 -> c
   -> d

The `sort` member function still only returns one result for each active
client in the sorter. This is implemented by ensuring that each sorter
client is associated with a leaf node in the tree (i.e., internal nodes
are not returned by `sort`). Note that it is possible for a leaf node to
be transformed into an internal node by a subsequent insertion; to
handle this case, we "implicitly" create an extra child node, which
maintains the invariant that each client is associated with a leaf
node. For example, if the client "a/b/x" is added to the tree above, the
result is:

root
 -> a
   -> b
     -> .
     -> x
 -> c
   -> d

The "." leaf node holds the allocation that has been made to the "a/b"
client itself; the "a/b" node holds the sum of all the allocations that
have been made to the subtree rooted at "a/b", which also includes
"a/b/x". The "." node is called a "virtual leaf node".

This commit also introduces a new approach to sorting: rather than
keeping a `std::set` of sorter clients, we now keep a tree of
`std::vector`, which is sorted explicitly via `std::sort` when
necessary. The previous implementation tried to optimize the sorting
process by updating the sort order incrementally when a single sorter
client was updated; this commit removes that optimization, and instead
re-sorts the entire tree whenever a change is made that might alter the
sort order. Re-introducing a version of this optimization would make
sense in the future (MESOS-7390), but benchmarking suggests that this
commit results in a net improvement in sorter performance for
non-hierarchical clients, anyway. The performance improvement is likely
due to the introduction of a secondary hashmap that allows the leaf node
associated with a client name to be found efficiently; the previous
implementation required a linear scan of a `std::set`.

Review: https://reviews.apache.org/r/57254


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/e5ef1992
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/e5ef1992
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/e5ef1992

Branch: refs/heads/master
Commit: e5ef1992b2b8e84b5d1487f1578f18f2291cd082
Parents: 5bf32be
Author: Neil Conway <ne...@gmail.com>
Authored: Mon Mar 13 10:18:36 2017 -0700
Committer: Neil Conway <ne...@gmail.com>
Committed: Wed Apr 26 14:02:22 2017 -0400

----------------------------------------------------------------------
 src/master/allocator/sorter/drf/metrics.cpp |  12 +-
 src/master/allocator/sorter/drf/sorter.cpp  | 430 +++++++++++------
 src/master/allocator/sorter/drf/sorter.hpp  | 393 ++++++++++------
 src/master/allocator/sorter/sorter.hpp      |  15 +-
 src/tests/hierarchical_allocator_tests.cpp  | 251 +++++++++-
 src/tests/master_allocator_tests.cpp        | 152 ++++++
 src/tests/sorter_tests.cpp                  | 573 ++++++++++++++++++++++-
 7 files changed, 1497 insertions(+), 329 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/e5ef1992/src/master/allocator/sorter/drf/metrics.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/sorter/drf/metrics.cpp b/src/master/allocator/sorter/drf/metrics.cpp
index 94acb86..ff63fba 100644
--- a/src/master/allocator/sorter/drf/metrics.cpp
+++ b/src/master/allocator/sorter/drf/metrics.cpp
@@ -16,8 +16,6 @@
 
 #include "master/allocator/sorter/drf/metrics.hpp"
 
-#include <set>
-
 #include <process/defer.hpp>
 
 #include <process/metrics/metrics.hpp>
@@ -27,7 +25,6 @@
 
 #include "master/allocator/sorter/drf/sorter.hpp"
 
-using std::set;
 using std::string;
 
 using process::UPID;
@@ -67,12 +64,13 @@ void Metrics::add(const string& client)
         // The client may have been removed if the dispatch
         // occurs after the client is removed but before the
         // metric is removed.
-        if (sorter->contains(client)) {
-          set<Client, DRFComparator>::iterator it = sorter->find(client);
-          return sorter->calculateShare(*it);
+        DRFSorter::Node* sorterClient = sorter->find(client);
+
+        if (sorterClient == nullptr) {
+          return 0.0;
         }
 
-        return 0.0;
+        return sorter->calculateShare(sorterClient);
       }));
 
   dominantShares.put(client, gauge);

http://git-wip-us.apache.org/repos/asf/mesos/blob/e5ef1992/src/master/allocator/sorter/drf/sorter.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/sorter/drf/sorter.cpp b/src/master/allocator/sorter/drf/sorter.cpp
index 7e91c85..73b8e8c 100644
--- a/src/master/allocator/sorter/drf/sorter.cpp
+++ b/src/master/allocator/sorter/drf/sorter.cpp
@@ -30,6 +30,7 @@
 #include <stout/foreach.hpp>
 #include <stout/hashmap.hpp>
 #include <stout/option.hpp>
+#include <stout/strings.hpp>
 
 using std::set;
 using std::string;
@@ -42,24 +43,22 @@ namespace internal {
 namespace master {
 namespace allocator {
 
-bool DRFComparator::operator()(const Client& client1, const Client& client2)
-{
-  if (client1.share != client2.share) {
-    return client1.share < client2.share;
-  }
 
-  if (client1.allocation.count != client2.allocation.count) {
-    return client1.allocation.count < client2.allocation.count;
-  }
-
-  return client1.name < client2.name;
-}
+DRFSorter::DRFSorter()
+  : root(new Node("", nullptr)) {}
 
 
 DRFSorter::DRFSorter(
     const UPID& allocator,
     const string& metricsPrefix)
-  : metrics(Metrics(allocator, *this, metricsPrefix)) {}
+  : root(new Node("", nullptr)),
+    metrics(Metrics(allocator, *this, metricsPrefix)) {}
+
+
+DRFSorter::~DRFSorter()
+{
+  delete root;
+}
 
 
 void DRFSorter::initialize(
@@ -69,96 +68,231 @@ void DRFSorter::initialize(
 }
 
 
-void DRFSorter::add(const string& name)
+void DRFSorter::add(const string& clientPath)
 {
-  CHECK(!contains(name));
+  vector<string> pathElements = strings::tokenize(clientPath, "/");
+  CHECK(!pathElements.empty());
+
+  Node* current = root;
+  Node* lastCreatedNode = nullptr;
+
+  // Traverse the tree to add new nodes for each element of the path,
+  // if that node doesn't already exist (similar to `mkdir -p`).
+  foreach (const string& element, pathElements) {
+    Node* node = nullptr;
+
+    foreach (Node* child, current->children) {
+      if (child->name == element) {
+        node = child;
+        break;
+      }
+    }
 
-  Client client(name);
-  clients.insert(client);
+    if (node != nullptr) {
+      current = node;
+      continue;
+    }
 
-  if (metrics.isSome()) {
-    metrics->add(name);
+    // We didn't find `element`, so add a new child to `current`.
+    //
+    // If adding this child would result in turning `current` from a
+    // leaf node into an internal node, we need to create an
+    // additional child node: `current` must have been associated with
+    // a client and clients must always be associated with leaf nodes.
+    //
+    // There are two exceptions: if `current` is the root node or it
+    // was just created by the current `add()` call, it does not
+    // correspond to a client, so we don't create an extra child.
+    if (current->children.empty() &&
+        current != root &&
+        current != lastCreatedNode) {
+      Node* parent = CHECK_NOTNULL(current->parent);
+
+      parent->removeChild(current);
+
+      // Create a node under `parent`. This internal node will take
+      // the place of `current` in the tree.
+      Node* internal = new Node(current->name, parent);
+      parent->addChild(internal);
+      internal->allocation = current->allocation;
+
+      CHECK_EQ(current->path, internal->path);
+
+      // Update `current` to become a virtual leaf node and a child of
+      // `internal`.
+      current->name = ".";
+      current->parent = internal;
+      internal->addChild(current);
+      current->path = strings::join("/", parent->path, current->name);
+
+      CHECK_EQ(internal->path, current->clientPath());
+
+      current = internal;
+    }
+
+    // Now actually add a new child to `current`.
+    Node* newChild = new Node(element, current);
+    current->addChild(newChild);
+
+    current = newChild;
+    lastCreatedNode = newChild;
   }
-}
 
+  // `current` is the node associated with the last element of the
+  // path. If we didn't add `current` to the tree above, create a leaf
+  // node now. For example, if the tree contains "a/b" and we add a
+  // new client "a", we want to create a new leaf node "a/." here.
+  if (current != lastCreatedNode) {
+    Node* newChild = new Node(".", current);
+    current->addChild(newChild);
+    current = newChild;
+  }
 
-void DRFSorter::remove(const string& name)
-{
-  set<Client, DRFComparator>::iterator it = find(name);
-  CHECK(it != clients.end());
+  // `current` is the newly created node associated with the last
+  // element of the path. `current` should be an inactive node with no
+  // children; activate it now.
+  CHECK(current->children.empty());
+  CHECK(!current->active);
+  current->active = true;
+
+  // Add a new entry to the lookup table. The full path of the newly
+  // added client should not already exist in `clients`.
+  CHECK_EQ(clientPath, current->clientPath());
+  CHECK(!clients.contains(clientPath));
+
+  clients[clientPath] = current;
 
-  clients.erase(it);
+  // TODO(neilc): Avoid dirtying the tree in some circumstances.
+  dirty = true;
 
   if (metrics.isSome()) {
-    metrics->remove(name);
+    metrics->add(clientPath);
   }
 }
 
 
-void DRFSorter::activate(const string& name)
+void DRFSorter::remove(const string& clientPath)
 {
-  set<Client, DRFComparator>::iterator it = find(name);
-  CHECK(it != clients.end());
+  Node* current = CHECK_NOTNULL(find(clientPath));
+
+  // Save a copy of the leaf node's allocated resources, because we
+  // destroy the leaf node below.
+  const hashmap<SlaveID, Resources> leafAllocation =
+    current->allocation.resources;
+
+  // Remove the lookup table entry for the client.
+  CHECK(clients.contains(clientPath));
+  clients.erase(clientPath);
+
+  // To remove a client from the tree, we have to do two things:
+  //
+  //   (1) Update the tree structure to reflect the removal of the
+  //       client. This means removing the client's leaf node, then
+  //       walking back up the tree to remove any internal nodes that
+  //       are now unnecessary.
+  //
+  //   (2) Update allocations of ancestor nodes to reflect the removal
+  //       of the client.
+  //
+  // We do both things at once: find the leaf node, remove it, and
+  // walk up the tree, updating ancestor allocations and removing
+  // ancestors when possible.
+  while (current != root) {
+    Node* parent = CHECK_NOTNULL(current->parent);
+
+    // Update `parent` to reflect the fact that the resources in the
+    // leaf node are no longer allocated to the subtree rooted at
+    // `parent`. We skip `root`, because we never update the
+    // allocation made to the root node.
+    if (parent != root) {
+      foreachpair (const SlaveID& slaveId,
+                   const Resources& resources,
+                   leafAllocation) {
+        parent->allocation.subtract(slaveId, resources);
+      }
+    }
+
+    if (current->children.empty()) {
+      parent->removeChild(current);
+      delete current;
+    } else if (current->children.size() == 1) {
+      // If `current` has only one child that was created to
+      // accommodate inserting `clientPath` (see `DRFSorter::add()`),
+      // we can remove the child node and turn `current` back into a
+      // leaf node.
+      Node* child = *(current->children.begin());
+
+      if (child->name == ".") {
+        CHECK(child->children.empty());
+        CHECK(clients.contains(current->path));
+        CHECK_EQ(child, clients.at(current->path));
 
-  if (!it->active) {
-    Client client(*it);
-    client.active = true;
+        current->active = child->active;
+        current->removeChild(child);
 
-    clients.erase(it);
-    clients.insert(client);
+        clients[current->path] = current;
+
+        delete child;
+      }
+    }
+
+    current = parent;
+  }
+
+  // TODO(neilc): Avoid dirtying the tree in some circumstances.
+  dirty = true;
+
+  if (metrics.isSome()) {
+    metrics->remove(clientPath);
   }
 }
 
 
-void DRFSorter::deactivate(const string& name)
+void DRFSorter::activate(const string& clientPath)
 {
-  set<Client, DRFComparator>::iterator it = find(name);
-  CHECK(it != clients.end());
+  Node* client = CHECK_NOTNULL(find(clientPath));
+  client->active = true;
+}
 
-  if (it->active) {
-    Client client(*it);
-    client.active = false;
 
-    clients.erase(it);
-    clients.insert(client);
-  }
+void DRFSorter::deactivate(const string& clientPath)
+{
+  Node* client = CHECK_NOTNULL(find(clientPath));
+  client->active = false;
 }
 
 
-void DRFSorter::updateWeight(const string& name, double weight)
+void DRFSorter::updateWeight(const string& path, double weight)
 {
-  weights[name] = weight;
+  weights[path] = weight;
 
-  // It would be possible to avoid dirtying the tree here (in some
-  // cases), but it doesn't seem worth the complexity.
+  // TODO(neilc): Avoid dirtying the tree in some circumstances.
   dirty = true;
 }
 
 
 void DRFSorter::allocated(
-    const string& name,
+    const string& clientPath,
     const SlaveID& slaveId,
     const Resources& resources)
 {
-  set<Client, DRFComparator>::iterator it = find(name);
-  CHECK(it != clients.end());
-
-  Client client(*it);
-  client.allocation.add(slaveId, resources);
-
-  clients.erase(it);
-  clients.insert(client);
-
-  // If the total resources have changed, we're going to recalculate
-  // all the shares, so don't bother just updating this client.
-  if (!dirty) {
-    updateShare(client.name);
+  Node* current = CHECK_NOTNULL(find(clientPath));
+
+  // NOTE: We don't currently update the `allocation` for the root
+  // node. This is debatable, but the current implementation doesn't
+  // require looking at the allocation of the root node.
+  while (current != root) {
+    current->allocation.add(slaveId, resources);
+    current = CHECK_NOTNULL(current->parent);
   }
+
+  // TODO(neilc): Avoid dirtying the tree in some circumstances.
+  dirty = true;
 }
 
 
 void DRFSorter::update(
-    const string& name,
+    const string& clientPath,
     const SlaveID& slaveId,
     const Resources& oldAllocation,
     const Resources& newAllocation)
@@ -168,14 +302,15 @@ void DRFSorter::update(
   // Otherwise, we need to ensure we re-calculate the shares, as
   // is being currently done, for safety.
 
-  set<Client, DRFComparator>::iterator it = find(name);
-  CHECK(it != clients.end());
+  Node* current = CHECK_NOTNULL(find(clientPath));
 
-  Client client(*it);
-  client.allocation.update(slaveId, oldAllocation, newAllocation);
-
-  clients.erase(it);
-  clients.insert(client);
+  // NOTE: We don't currently update the `allocation` for the root
+  // node. This is debatable, but the current implementation doesn't
+  // require looking at the allocation of the root node.
+  while (current != root) {
+    current->allocation.update(slaveId, oldAllocation, newAllocation);
+    current = CHECK_NOTNULL(current->parent);
+  }
 
   // Just assume the total has changed, per the TODO above.
   dirty = true;
@@ -183,60 +318,60 @@ void DRFSorter::update(
 
 
 void DRFSorter::unallocated(
-    const string& name,
+    const string& clientPath,
     const SlaveID& slaveId,
     const Resources& resources)
 {
-  set<Client, DRFComparator>::iterator it = find(name);
-  CHECK(it != clients.end());
-
-  Client client(*it);
-  client.allocation.subtract(slaveId, resources);
-
-  clients.erase(it);
-  clients.insert(client);
-
-  // If the total resources have changed, we're going to recalculate
-  // all the shares, so don't bother just updating this client.
-  if (!dirty) {
-    updateShare(client.name);
+  Node* current = CHECK_NOTNULL(find(clientPath));
+
+  // NOTE: We don't currently update the `allocation` for the root
+  // node. This is debatable, but the current implementation doesn't
+  // require looking at the allocation of the root node.
+  while (current != root) {
+    current->allocation.subtract(slaveId, resources);
+    current = CHECK_NOTNULL(current->parent);
   }
+
+  // TODO(neilc): Avoid dirtying the tree in some circumstances.
+  dirty = true;
 }
 
 
 const hashmap<SlaveID, Resources>& DRFSorter::allocation(
-    const string& name) const
+    const string& clientPath) const
 {
-  set<Client, DRFComparator>::iterator it = find(name);
-  CHECK(it != clients.end());
-
-  return it->allocation.resources;
+  const Node* client = CHECK_NOTNULL(find(clientPath));
+  return client->allocation.resources;
 }
 
 
 const Resources& DRFSorter::allocationScalarQuantities(
-    const string& name) const
+    const string& clientPath) const
 {
-  set<Client, DRFComparator>::iterator it = find(name);
-  CHECK(it != clients.end());
-
-  return it->allocation.scalarQuantities;
+  const Node* client = CHECK_NOTNULL(find(clientPath));
+  return client->allocation.scalarQuantities;
 }
 
 
 hashmap<string, Resources> DRFSorter::allocation(const SlaveID& slaveId) const
 {
-  // TODO(jmlvanre): We can index the allocation by slaveId to make this faster.
-  // It is a tradeoff between speed vs. memory. For now we use existing data
-  // structures.
-
   hashmap<string, Resources> result;
 
-  foreach (const Client& client, clients) {
-    if (client.allocation.resources.contains(slaveId)) {
+  // We want to find the allocation that has been made to each client
+  // on a particular `slaveId`. Rather than traversing the tree
+  // looking for leaf nodes (clients), we can instead just iterate
+  // over the `clients` hashmap.
+  //
+  // TODO(jmlvanre): We can index the allocation by slaveId to make
+  // this faster.  It is a tradeoff between speed vs. memory. For now
+  // we use existing data structures.
+  foreachvalue (const Node* client, clients) {
+    if (client->allocation.resources.contains(slaveId)) {
       // It is safe to use `at()` here because we've just checked the
-      // existence of the key. This avoid un-necessary copies.
-      result.emplace(client.name, client.allocation.resources.at(slaveId));
+      // existence of the key. This avoids unnecessary copies.
+      string path = client->clientPath();
+      CHECK(!result.contains(path));
+      result.emplace(path, client->allocation.resources.at(slaveId));
     }
   }
 
@@ -245,14 +380,13 @@ hashmap<string, Resources> DRFSorter::allocation(const SlaveID& slaveId) const
 
 
 Resources DRFSorter::allocation(
-    const string& name,
+    const string& clientPath,
     const SlaveID& slaveId) const
 {
-  set<Client, DRFComparator>::iterator it = find(name);
-  CHECK(it != clients.end());
+  const Node* client = CHECK_NOTNULL(find(clientPath));
 
-  if (it->allocation.resources.contains(slaveId)) {
-    return it->allocation.resources.at(slaveId);
+  if (client->allocation.resources.contains(slaveId)) {
+    return client->allocation.resources.at(slaveId);
   }
 
   return Resources();
@@ -287,7 +421,7 @@ void DRFSorter::add(const SlaveID& slaveId, const Resources& resources)
     }
 
     // We have to recalculate all shares when the total resources
-    // change, but we put it off until sort is called so that if
+    // change, but we put it off until `sort` is called so that if
     // something else changes before the next allocation we don't
     // recalculate everything twice.
     dirty = true;
@@ -333,38 +467,49 @@ void DRFSorter::remove(const SlaveID& slaveId, const Resources& resources)
 vector<string> DRFSorter::sort()
 {
   if (dirty) {
-    set<Client, DRFComparator> temp;
+    std::function<void (Node*)> sortTree = [this, &sortTree](Node* node) {
+      foreach (Node* child, node->children) {
+        child->share = calculateShare(child);
+      }
 
-    foreach (Client client, clients) {
-      // Update the 'share' to get proper sorting.
-      client.share = calculateShare(client);
+      std::sort(node->children.begin(),
+                node->children.end(),
+                DRFSorter::Node::compareDRF);
 
-      temp.insert(client);
-    }
+      foreach (Node* child, node->children) {
+        sortTree(child);
+      }
+    };
 
-    clients = temp;
+    sortTree(root);
 
-    // Reset dirty to false so as not to re-calculate *all*
-    // shares unless another dirtying operation occurs.
     dirty = false;
   }
 
+  // Return the leaf nodes in the tree. The children of each node are
+  // already sorted in DRF order.
   vector<string> result;
 
-  foreach (const Client& client, clients) {
-    if (client.active) {
-      result.push_back(client.name);
+  std::function<void (const Node*)> listClients =
+      [this, &listClients, &result](const Node* node) {
+    if (node->active) {
+      result.push_back(node->clientPath());
     }
-  }
+
+    foreach (Node* child, node->children) {
+      listClients(child);
+    }
+  };
+
+  listClients(root);
 
   return result;
 }
 
 
-bool DRFSorter::contains(const string& name) const
+bool DRFSorter::contains(const string& clientPath) const
 {
-  set<Client, DRFComparator>::iterator it = find(name);
-  return it != clients.end();
+  return find(clientPath) != nullptr;
 }
 
 
@@ -374,23 +519,7 @@ int DRFSorter::count() const
 }
 
 
-void DRFSorter::updateShare(const string& name)
-{
-  set<Client, DRFComparator>::iterator it = find(name);
-  CHECK(it != clients.end());
-
-  Client client(*it);
-
-  // Update the 'share' to get proper sorting.
-  client.share = calculateShare(client);
-
-  // Remove and reinsert it to update the ordering appropriately.
-  clients.erase(it);
-  clients.insert(client);
-}
-
-
-double DRFSorter::calculateShare(const Client& client) const
+double DRFSorter::calculateShare(const Node* node) const
 {
   double share = 0.0;
 
@@ -408,21 +537,21 @@ double DRFSorter::calculateShare(const Client& client) const
     }
 
     if (scalar.value() > 0.0 &&
-        client.allocation.totals.contains(resourceName)) {
+        node->allocation.totals.contains(resourceName)) {
       const double allocation =
-        client.allocation.totals.at(resourceName).value();
+        node->allocation.totals.at(resourceName).value();
 
       share = std::max(share, allocation / scalar.value());
     }
   }
 
-  return share / clientWeight(client.name);
+  return share / findWeight(node);
 }
 
 
-double DRFSorter::clientWeight(const string& name) const
+double DRFSorter::findWeight(const Node* node) const
 {
-  Option<double> weight = weights.get(name);
+  Option<double> weight = weights.get(node->path);
 
   if (weight.isNone()) {
     return 1.0;
@@ -432,16 +561,15 @@ double DRFSorter::clientWeight(const string& name) const
 }
 
 
-set<Client, DRFComparator>::iterator DRFSorter::find(const string& name) const
+DRFSorter::Node* DRFSorter::find(const string& clientPath) const
 {
-  set<Client, DRFComparator>::iterator it;
-  for (it = clients.begin(); it != clients.end(); it++) {
-    if (name == it->name) {
-      break;
-    }
+  Option<Node*> client = clients.get(clientPath);
+
+  if (client.isNone()) {
+    return nullptr;
   }
 
-  return it;
+  return client.get();
 }
 
 } // namespace allocator {

http://git-wip-us.apache.org/repos/asf/mesos/blob/e5ef1992/src/master/allocator/sorter/drf/sorter.hpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/sorter/drf/sorter.hpp b/src/master/allocator/sorter/drf/sorter.hpp
index 2ef2eb8..fee58d6 100644
--- a/src/master/allocator/sorter/drf/sorter.hpp
+++ b/src/master/allocator/sorter/drf/sorter.hpp
@@ -17,6 +17,7 @@
 #ifndef __MASTER_ALLOCATOR_SORTER_DRF_SORTER_HPP__
 #define __MASTER_ALLOCATOR_SORTER_DRF_SORTER_HPP__
 
+#include <algorithm>
 #include <set>
 #include <string>
 #include <vector>
@@ -25,6 +26,7 @@
 #include <mesos/resources.hpp>
 #include <mesos/values.hpp>
 
+#include <stout/check.hpp>
 #include <stout/hashmap.hpp>
 #include <stout/option.hpp>
 
@@ -38,20 +40,248 @@ namespace internal {
 namespace master {
 namespace allocator {
 
-struct Client
+class DRFSorter : public Sorter
 {
-  explicit Client(const std::string& _name)
-    : name(_name), share(0), active(true) {}
+public:
+  DRFSorter();
+
+  explicit DRFSorter(
+      const process::UPID& allocator,
+      const std::string& metricsPrefix);
+
+  virtual ~DRFSorter();
+
+  virtual void initialize(
+      const Option<std::set<std::string>>& fairnessExcludeResourceNames);
+
+  virtual void add(const std::string& clientPath);
+
+  virtual void remove(const std::string& clientPath);
+
+  virtual void activate(const std::string& clientPath);
+
+  virtual void deactivate(const std::string& clientPath);
+
+  virtual void updateWeight(const std::string& path, double weight);
+
+  virtual void allocated(
+      const std::string& clientPath,
+      const SlaveID& slaveId,
+      const Resources& resources);
+
+  virtual void update(
+      const std::string& clientPath,
+      const SlaveID& slaveId,
+      const Resources& oldAllocation,
+      const Resources& newAllocation);
+
+  virtual void unallocated(
+      const std::string& clientPath,
+      const SlaveID& slaveId,
+      const Resources& resources);
+
+  virtual const hashmap<SlaveID, Resources>& allocation(
+      const std::string& clientPath) const;
+
+  virtual const Resources& allocationScalarQuantities(
+      const std::string& clientPath) const;
+
+  virtual hashmap<std::string, Resources> allocation(
+      const SlaveID& slaveId) const;
+
+  virtual Resources allocation(
+      const std::string& clientPath,
+      const SlaveID& slaveId) const;
+
+  virtual const Resources& totalScalarQuantities() const;
+
+  virtual void add(const SlaveID& slaveId, const Resources& resources);
+
+  virtual void remove(const SlaveID& slaveId, const Resources& resources);
+
+  virtual std::vector<std::string> sort();
+
+  virtual bool contains(const std::string& clientPath) const;
+
+  virtual int count() const;
+
+private:
+  // A node in the sorter's tree.
+  struct Node;
 
+  // Returns the dominant resource share for the node.
+  double calculateShare(const Node* node) const;
+
+  // Returns the weight associated with the node. If no weight has
+  // been configured for the node's path, the default weight (1.0) is
+  // returned.
+  double findWeight(const Node* node) const;
+
+  // Returns the client associated with the given path. Returns
+  // nullptr if the path is not found or if the path identifies an
+  // internal node in the tree (not a client).
+  Node* find(const std::string& clientPath) const;
+
+  // Resources (by name) that will be excluded from fair sharing.
+  Option<std::set<std::string>> fairnessExcludeResourceNames;
+
+  // If true, sort() will recalculate all shares.
+  bool dirty = false;
+
+  // The root node in the sorter tree.
+  Node* root;
+
+  // To speed lookups, we keep a map from client paths to the leaf
+  // node associated with that client. There is an entry in this map
+  // for every leaf node in the client tree (except for the root when
+  // the tree is empty). Paths in this map do NOT contain the trailing
+  // "." label we use for leaf nodes.
+  hashmap<std::string, Node*> clients;
+
+  // Weights associated with role paths. Setting the weight for a path
+  // influences the share of all nodes in the subtree rooted at that
+  // path. This hashmap might include weights for paths that are not
+  // currently in the sorter tree.
+  hashmap<std::string, double> weights;
+
+  // Total resources.
+  struct Total
+  {
+    // We need to keep track of the resources (and not just scalar
+    // quantities) to account for multiple copies of the same shared
+    // resources. We need to ensure that we do not update the scalar
+    // quantities for shared resources when the change is only in the
+    // number of copies in the sorter.
+    hashmap<SlaveID, Resources> resources;
+
+    // NOTE: Scalars can be safely aggregated across slaves. We keep
+    // that to speed up the calculation of shares. See MESOS-2891 for
+    // the reasons why we want to do that.
+    //
+    // NOTE: We omit information about dynamic reservations and
+    // persistent volumes here to enable resources to be aggregated
+    // across slaves more effectively. See MESOS-4833 for more
+    // information.
+    //
+    // Sharedness info is also stripped out when resource identities
+    // are omitted because sharedness inherently refers to the
+    // identities of resources and not quantities.
+    Resources scalarQuantities;
+
+    // We also store a map version of `scalarQuantities`, mapping
+    // the `Resource::name` to aggregated scalar. This improves the
+    // performance of calculating shares. See MESOS-4694.
+    //
+    // TODO(bmahler): Ideally we do not store `scalarQuantities`
+    // redundantly here, investigate performance improvements to
+    // `Resources` to make this unnecessary.
+    hashmap<std::string, Value::Scalar> totals;
+  } total_;
+
+  // Metrics are optionally exposed by the sorter.
+  friend Metrics;
+  Option<Metrics> metrics;
+};
+
+
+// Represents a node in the sorter's tree. The structure of the tree
+// reflects the hierarchical relationships between the clients of the
+// sorter. Some (but not all) nodes correspond to sorter clients; some
+// nodes only exist to represent the structure of the sorter
+// tree. Clients are always associated with leaf nodes.
+//
+// For example, if there are two sorter clients "a/b" and "c/d", the
+// tree will contain five nodes: the root node, internal nodes for "a"
+// and "c", and leaf nodes for the clients "a/b" and "c/d".
+struct DRFSorter::Node
+{
+  Node(const std::string& _name, Node* _parent)
+    : name(_name), share(0), active(false), parent(_parent)
+  {
+    // Compute the node's path. Three cases:
+    //
+    //  (1) If the root node, use the empty string
+    //  (2) If a child of the root node, use the child's name
+    //  (3) Otherwise, use the parent's name, "/", and the child's name.
+    if (parent == nullptr) {
+      path = "";
+    } else if (parent->parent == nullptr) {
+      path = name;
+    } else {
+      path = strings::join("/", parent->path, name);
+    }
+  }
+
+  ~Node()
+  {
+    foreach (Node* child, children) {
+      delete child;
+    }
+  }
+
+  // The label of the edge from this node's parent to the
+  // node. "Implicit" leaf nodes are always named ".".
+  //
+  // TODO(neilc): Consider naming implicit leaf nodes in a clearer
+  // way, e.g., by making `name` an Option?
   std::string name;
+
+  // Complete path from root to node. This includes the trailing "."
+  // label for virtual leaf nodes.
+  std::string path;
+
   double share;
+
+  // True if this node represents an active sorter client. False if
+  // this node represents an inactive sorter client or an internal node.
+  //
+  // TODO(neilc): Replace this with a three-valued enum?
   bool active;
 
-  // Allocation for a client.
-  struct Allocation {
+  Node* parent;
+  std::vector<Node*> children;
+
+  // If this node represents a sorter client, this returns the path of
+  // that client. Unlike the `path` field, this does NOT include the
+  // trailing "." label for virtual leaf nodes.
+  //
+  // For example, if the sorter contains two clients "a" and "a/b",
+  // the tree will contain four nodes: the root node, "a", "a/."
+  // (virtual leaf), and "a/b". The `clientPath()` of "a/." is "a",
+  // because that is the name of the client associated with that
+  // virtual leaf node.
+  std::string clientPath() const
+  {
+    if (name == ".") {
+      return CHECK_NOTNULL(parent)->path;
+    }
+
+    return path;
+  }
+
+  void removeChild(const Node* child)
+  {
+    auto it = std::find(children.begin(), children.end(), child);
+    CHECK(it != children.end());
+
+    children.erase(it);
+  }
+
+  void addChild(Node* child)
+  {
+    auto it = std::find(children.begin(), children.end(), child);
+    CHECK(it == children.end());
+
+    children.push_back(child);
+  }
+
+  // Allocation for a node.
+  struct Allocation
+  {
     Allocation() : count(0) {}
 
-    void add(const SlaveID& slaveId, const Resources& toAdd) {
+    void add(const SlaveID& slaveId, const Resources& toAdd)
+    {
       // Add shared resources to the allocated quantities when the same
       // resources don't already exist in the allocation.
       const Resources sharedToAdd = toAdd.shared()
@@ -72,7 +302,8 @@ struct Client
       count++;
     }
 
-    void subtract(const SlaveID& slaveId, const Resources& toRemove) {
+    void subtract(const SlaveID& slaveId, const Resources& toRemove)
+    {
       CHECK(resources.contains(slaveId));
       CHECK(resources.at(slaveId).contains(toRemove));
 
@@ -103,7 +334,8 @@ struct Client
     void update(
         const SlaveID& slaveId,
         const Resources& oldAllocation,
-        const Resources& newAllocation) {
+        const Resources& newAllocation)
+    {
       const Resources oldAllocationQuantity =
         oldAllocation.createStrippedScalarQuantity();
       const Resources newAllocationQuantity =
@@ -156,143 +388,20 @@ struct Client
     // `Resources` to make this unnecessary.
     hashmap<std::string, Value::Scalar> totals;
   } allocation;
-};
-
-
-struct DRFComparator
-{
-  virtual ~DRFComparator() {}
-  virtual bool operator()(const Client& client1, const Client& client2);
-};
-
-
-class DRFSorter : public Sorter
-{
-public:
-  DRFSorter() = default;
-
-  explicit DRFSorter(
-      const process::UPID& allocator,
-      const std::string& metricsPrefix);
-
-  virtual ~DRFSorter() {}
-
-  virtual void initialize(
-      const Option<std::set<std::string>>& fairnessExcludeResourceNames);
-
-  virtual void add(const std::string& name);
-
-  virtual void remove(const std::string& name);
-
-  virtual void activate(const std::string& name);
-
-  virtual void deactivate(const std::string& name);
-
-  virtual void updateWeight(const std::string& name, double weight);
-
-  virtual void allocated(
-      const std::string& name,
-      const SlaveID& slaveId,
-      const Resources& resources);
-
-  virtual void update(
-      const std::string& name,
-      const SlaveID& slaveId,
-      const Resources& oldAllocation,
-      const Resources& newAllocation);
-
-  virtual void unallocated(
-      const std::string& name,
-      const SlaveID& slaveId,
-      const Resources& resources);
-
-  virtual const hashmap<SlaveID, Resources>& allocation(
-      const std::string& name) const;
-
-  virtual const Resources& allocationScalarQuantities(
-      const std::string& name) const;
-
-  virtual hashmap<std::string, Resources> allocation(
-      const SlaveID& slaveId) const;
-
-  virtual Resources allocation(
-      const std::string& name,
-      const SlaveID& slaveId) const;
-
-  virtual const Resources& totalScalarQuantities() const;
-
-  virtual void add(const SlaveID& slaveId, const Resources& resources);
-
-  virtual void remove(const SlaveID& slaveId, const Resources& resources);
-
-  virtual std::vector<std::string> sort();
-
-  virtual bool contains(const std::string& name) const;
-
-  virtual int count() const;
-
-private:
-  // Recalculates the share for the client and moves
-  // it in 'clients' accordingly.
-  void updateShare(const std::string& name);
-
-  // Returns the dominant resource share for the client.
-  double calculateShare(const Client& client) const;
-
-  // Resources (by name) that will be excluded from fair sharing.
-  Option<std::set<std::string>> fairnessExcludeResourceNames;
-
-  // Returns the weight associated with the given path. If no weight
-  // has been configured, the default weight (1.0) is returned.
-  double clientWeight(const std::string& name) const;
-
-  // Returns an iterator to the specified client, if
-  // it exists in this Sorter.
-  std::set<Client, DRFComparator>::iterator find(const std::string& name) const;
-
-  // If true, sort() will recalculate all shares.
-  bool dirty = false;
-
-  // The set of clients, sorted by share.
-  std::set<Client, DRFComparator> clients;
 
-  // Maps client names to the weights that should be applied to their shares.
-  hashmap<std::string, double> weights;
-
-  // Total resources.
-  struct Total {
-    // We need to keep track of the resources (and not just scalar quantities)
-    // to account for multiple copies of the same shared resources. We need to
-    // ensure that we do not update the scalar quantities for shared resources
-    // when the change is only in the number of copies in the sorter.
-    hashmap<SlaveID, Resources> resources;
-
-    // NOTE: Scalars can be safely aggregated across slaves. We keep
-    // that to speed up the calculation of shares. See MESOS-2891 for
-    // the reasons why we want to do that.
-    //
-    // NOTE: We omit information about dynamic reservations and persistent
-    // volumes here to enable resources to be aggregated across slaves
-    // more effectively. See MESOS-4833 for more information.
-    //
-    // Sharedness info is also stripped out when resource identities are
-    // omitted because sharedness inherently refers to the identities of
-    // resources and not quantities.
-    Resources scalarQuantities;
+  // Compares two nodes according to DRF share.
+  static bool compareDRF(const Node* left, const Node* right)
+  {
+    if (left->share != right->share) {
+      return left->share < right->share;
+    }
 
-    // We also store a map version of `scalarQuantities`, mapping
-    // the `Resource::name` to aggregated scalar. This improves the
-    // performance of calculating shares. See MESOS-4694.
-    //
-    // TODO(bmahler): Ideally we do not store `scalarQuantities`
-    // redundantly here, investigate performance improvements to
-    // `Resources` to make this unnecessary.
-    hashmap<std::string, Value::Scalar> totals;
-  } total_;
+    if (left->allocation.count != right->allocation.count) {
+      return left->allocation.count < right->allocation.count;
+    }
 
-  // Metrics are optionally exposed by the sorter.
-  friend Metrics;
-  Option<Metrics> metrics;
+    return left->path < right->path;
+  }
 };
 
 } // namespace allocator {

http://git-wip-us.apache.org/repos/asf/mesos/blob/e5ef1992/src/master/allocator/sorter/sorter.hpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/sorter/sorter.hpp b/src/master/allocator/sorter/sorter.hpp
index 4de249e..0c9f172 100644
--- a/src/master/allocator/sorter/sorter.hpp
+++ b/src/master/allocator/sorter/sorter.hpp
@@ -71,13 +71,14 @@ public:
   // It is a no-op if the client is already not in the sort.
   virtual void deactivate(const std::string& client) = 0;
 
-  // Updates the weight of a client name. The sorter will store this
-  // weight regardless of whether a client with this name has been
-  // added. If a client's weight is not changed, the default weight
-  // (1.0) is used. This interface does not support unsetting
-  // previously set weights; instead, a weight should be reset to the
-  // default value.
-  virtual void updateWeight(const std::string& client, double weight) = 0;
+  // Updates the weight of a client path. This changes the sorter's
+  // behavior for all clients in the subtree identified by this path
+  // (both clients currently in the sorter and any clients that may be
+  // added later). If a client's weight is not explicitly set, the
+  // default weight of 1.0 is used. This interface does not support
+  // unsetting previously set weights; instead, the weight should be
+  // reset to the default value.
+  virtual void updateWeight(const std::string& path, double weight) = 0;
 
   // Specify that resources have been allocated to the given client.
   virtual void allocated(

http://git-wip-us.apache.org/repos/asf/mesos/blob/e5ef1992/src/tests/hierarchical_allocator_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/hierarchical_allocator_tests.cpp b/src/tests/hierarchical_allocator_tests.cpp
index 1e2eb96..ad03e17 100644
--- a/src/tests/hierarchical_allocator_tests.cpp
+++ b/src/tests/hierarchical_allocator_tests.cpp
@@ -704,6 +704,121 @@ TEST_F(HierarchicalAllocatorTest, DRFWithFairnessExclusion)
 }
 
 
+// This test checks allocator behavior when offering resources to
+// frameworks that register using nested ("hierarchical") roles.
+TEST_F(HierarchicalAllocatorTest, NestedRoleDRF)
+{
+  // Pausing the clock is not necessary, but ensures that the test
+  // doesn't rely on the batch allocation in the allocator, which
+  // would slow down the test.
+  Clock::pause();
+
+  initialize();
+
+  // Total cluster resources will become cpus=2, mem=1024.
+  SlaveInfo slave1 = createSlaveInfo("cpus:2;mem:1024;disk:0");
+  allocator->addSlave(
+      slave1.id(),
+      slave1,
+      AGENT_CAPABILITIES(),
+      None(),
+      slave1.resources(),
+      {});
+
+  // framework1 will be offered all of slave1's resources since it is
+  // the only framework running so far.
+  FrameworkInfo framework1 = createFrameworkInfo({"a/b"});
+  allocator->addFramework(framework1.id(), framework1, {}, true);
+
+  {
+    Allocation expected = Allocation(
+        framework1.id(),
+        {{"a/b", {{slave1.id(), slave1.resources()}}}});
+
+    AWAIT_EXPECT_EQ(expected, allocations.get());
+  }
+
+  // a share = 1 (cpus=2, mem=1024)
+  //   a/b share = 1 (cpus=2, mem=1024)
+  //     framework1 share = 1
+
+  // Add a new slave, along with two new frameworks in roles "a/c" and
+  // "d/e". We expect the new slave's resources to be offered to "d/e"
+  // rather than "a/c", since the role subtree under "a" has more
+  // resources than the "d" subtree.
+
+  // Total cluster resources will become cpus=3, mem=1536.
+  SlaveInfo slave2 = createSlaveInfo("cpus:1;mem:512;disk:0");
+  allocator->addSlave(
+      slave2.id(),
+      slave2,
+      AGENT_CAPABILITIES(),
+      None(),
+      slave2.resources(),
+      {});
+
+  FrameworkInfo framework2 = createFrameworkInfo({"a/c"});
+  allocator->addFramework(framework2.id(), framework2, {}, true);
+
+  FrameworkInfo framework3 = createFrameworkInfo({"d/e"});
+  allocator->addFramework(framework3.id(), framework3, {}, true);
+
+  {
+    Allocation expected = Allocation(
+        framework3.id(),
+        {{"d/e", {{slave2.id(), slave2.resources()}}}});
+
+    AWAIT_EXPECT_EQ(expected, allocations.get());
+  }
+
+  // a share = 0.666667 (cpus=2, mem=1024)
+  //   a/b share = 0.666667 (cpus=2, mem=1024)
+  //     framework1 share = 1
+  //   a/c share = 0
+  //     framework2 share = 0
+  // d share = 0.333333 (cpus=1, mem=512)
+  //   d/e share = 0.333333 (cpus=1, mem=512)
+  //     framework3 share = 1
+
+  // Add a new slave and a new framework in the role "d/f". The new
+  // slave's resources should be allocated to the new framework (and
+  // not the framework in "a/c"), because the "d" subtree has fewer
+  // allocated resources than the "a" subtree.
+
+  // Total cluster resources will become cpus=5, mem=2560.
+  SlaveInfo slave3 = createSlaveInfo("cpus:2;mem:1024;disk:0");
+  allocator->addSlave(
+      slave3.id(),
+      slave3,
+      AGENT_CAPABILITIES(),
+      None(),
+      slave3.resources(),
+      {});
+
+  FrameworkInfo framework4 = createFrameworkInfo({"d/f"});
+  allocator->addFramework(framework4.id(), framework4, {}, true);
+
+  {
+    Allocation expected = Allocation(
+        framework4.id(),
+        {{"d/f", {{slave3.id(), slave3.resources()}}}});
+
+    AWAIT_EXPECT_EQ(expected, allocations.get());
+  }
+
+  // a share = 0.4 (cpus=2, mem=1024)
+  //   a/b share = 0.4 (cpus=2, mem=1024)
+  //     framework1 share = 1
+  //   a/c share = 0
+  //     framework2 share = 0
+  // d share = 0.6 (cpus=3, mem=1536)
+  //   d/e share = 0.2 (cpus=1, mem=512)
+  //     framework3 share = 1
+  //   d/f share = 0.4 (cpus=2, mem=1024)
+  //     framework4 share = 1
+}
+
+
 // This test ensures that an offer filter larger than the
 // allocation interval effectively filters out resources.
 TEST_F(HierarchicalAllocatorTest, OfferFilter)
@@ -4298,8 +4413,8 @@ TEST_F(HierarchicalAllocatorTest, DisproportionateQuotaVsAllocation)
 }
 
 
-// This test checks that quota guarantees work correctly when a nested
-// role is created as a child of an existing role that has quota.
+// This test checks that quota guarantees work as expected when a
+// nested role is created as a child of an existing quota'd role.
 TEST_F(HierarchicalAllocatorTest, NestedRoleQuota)
 {
   // Pausing the clock is not necessary, but ensures that the test
@@ -4313,48 +4428,48 @@ TEST_F(HierarchicalAllocatorTest, NestedRoleQuota)
   const string CHILD_ROLE1 = "a/b/c";
   const string CHILD_ROLE2 = "a/b/d";
 
-  // Create `framework1` and set quota for its role.
+  // Create `framework1` in PARENT_ROLE and set quota for its role.
   FrameworkInfo framework1 = createFrameworkInfo({PARENT_ROLE});
   allocator->addFramework(framework1.id(), framework1, {}, true);
 
   const Quota parentQuota = createQuota(PARENT_ROLE, "cpus:2;mem:1024");
   allocator->setQuota(PARENT_ROLE, parentQuota);
 
-  SlaveInfo agent1 = createSlaveInfo("cpus:1;mem:512;disk:0");
+  SlaveInfo agent = createSlaveInfo("cpus:1;mem:512");
   allocator->addSlave(
-      agent1.id(),
-      agent1,
+      agent.id(),
+      agent,
       AGENT_CAPABILITIES(),
       None(),
-      agent1.resources(),
+      agent.resources(),
       {});
 
-  // `framework1` will be offered all of `agent1`'s resources because
+  // `framework1` will be offered all the resources on `agent` because
   // it is the only framework in the only role with unsatisfied quota.
   {
     Allocation expected = Allocation(
         framework1.id(),
-        {{PARENT_ROLE, {{agent1.id(), agent1.resources()}}}});
+        {{PARENT_ROLE, {{agent.id(), agent.resources()}}}});
 
     AWAIT_EXPECT_EQ(expected, allocations.get());
   }
 
-  // `framework1` declines the resources on `agent1` for the duration
+  // `framework1` declines the resources on `agent` for the duration
   // of the test.
   Filters longFilter;
   longFilter.set_refuse_seconds(flags.allocation_interval.secs() * 10);
 
   allocator->recoverResources(
       framework1.id(),
-      agent1.id(),
-      allocatedResources(agent1.resources(), PARENT_ROLE),
+      agent.id(),
+      allocatedResources(agent.resources(), PARENT_ROLE),
       longFilter);
 
-  // Register a framework in CHILD_ROLE1, which is a child role of
-  // PARENT_ROLE. In the current implementation, because CHILD_ROLE1
-  // does not itself have quota, it will not be offered any of
-  // PARENT_ROLE's quota'd resources. This behavior may change in the
-  // future (MESOS-7150).
+  // Create `framework2` in CHILD_ROLE1, which is a child role of
+  // PARENT_ROLE. CHILD_ROLE1 does not have quota. In the current
+  // implementation, because CHILD_ROLE1 does not itself have quota,
+  // it will not be offered any of PARENT_ROLE's quota'd resources.
+  // This behavior may change in the future (MESOS-7150).
   FrameworkInfo framework2 = createFrameworkInfo({CHILD_ROLE1});
   allocator->addFramework(framework2.id(), framework2, {}, true);
 
@@ -4366,9 +4481,9 @@ TEST_F(HierarchicalAllocatorTest, NestedRoleQuota)
   Future<Allocation> allocation = allocations.get();
   EXPECT_TRUE(allocation.isPending());
 
-  // Register a framework in CHILD_ROLE2, which is a child role of
-  // PARENT_ROLE. Because CHILD_ROLE2 has quota, it will be offered
-  // resources.
+  // Create `framework3` in CHILD_ROLE2, which is a child role of
+  // PARENT_ROLE. CHILD_ROLE2 has quota, so in the current
+  // implementation, it will be offered resources.
   FrameworkInfo framework3 = createFrameworkInfo({CHILD_ROLE2});
   allocator->addFramework(framework3.id(), framework3, {}, true);
 
@@ -4378,13 +4493,107 @@ TEST_F(HierarchicalAllocatorTest, NestedRoleQuota)
   {
     Allocation expected = Allocation(
         framework3.id(),
-        {{CHILD_ROLE2, {{agent1.id(), agent1.resources()}}}});
+        {{CHILD_ROLE2, {{agent.id(), agent.resources()}}}});
 
     AWAIT_EXPECT_EQ(expected, allocation);
   }
 }
 
 
+// This test checks that quota guarantees work as expected when a
+// nested role is created as a child of an existing quota'd role, and
+// the parent role has been allocated resources.
+TEST_F(HierarchicalAllocatorTest, NestedRoleQuotaAllocateToParent)
+{
+  // Pausing the clock is not necessary, but ensures that the test
+  // doesn't rely on the batch allocation in the allocator, which
+  // would slow down the test.
+  Clock::pause();
+
+  initialize();
+
+  const string PARENT_ROLE = "a/b";
+  const string CHILD_ROLE = "a/b/c";
+
+  // Set quota for parent role.
+  const Quota parentQuota = createQuota(PARENT_ROLE, "cpus:4;mem:2048");
+  allocator->setQuota(PARENT_ROLE, parentQuota);
+
+  // Create `framework1` in the parent role.
+  FrameworkInfo framework1 = createFrameworkInfo({PARENT_ROLE});
+  allocator->addFramework(framework1.id(), framework1, {}, true);
+
+  SlaveInfo agent1 = createSlaveInfo("cpus:2;mem:1024");
+  allocator->addSlave(
+      agent1.id(),
+      agent1,
+      AGENT_CAPABILITIES(),
+      None(),
+      agent1.resources(),
+      {});
+
+  // `framework1` will be offered all of the resources on `agent1`.
+  {
+    Allocation expected = Allocation(
+        framework1.id(),
+        {{PARENT_ROLE, {{agent1.id(), agent1.resources()}}}});
+
+    AWAIT_EXPECT_EQ(expected, allocations.get());
+  }
+
+  // Create `framework2` in the child role.
+  FrameworkInfo framework2 = createFrameworkInfo({CHILD_ROLE});
+  allocator->addFramework(framework2.id(), framework2, {}, true);
+
+  const Quota childQuota = createQuota(CHILD_ROLE, "cpus:1;mem:512");
+  allocator->setQuota(CHILD_ROLE, childQuota);
+
+  SlaveInfo agent2 = createSlaveInfo("cpus:1;mem:512");
+  allocator->addSlave(
+      agent2.id(),
+      agent2,
+      AGENT_CAPABILITIES(),
+      None(),
+      agent2.resources(),
+      {});
+
+  // `framework2` will be offered all of the resources on `agent2`.
+  {
+    Allocation expected = Allocation(
+        framework2.id(),
+        {{CHILD_ROLE, {{agent2.id(), agent2.resources()}}}});
+
+    AWAIT_EXPECT_EQ(expected, allocations.get());
+  }
+
+  SlaveInfo agent3 = createSlaveInfo("cpus:1;mem:512");
+  allocator->addSlave(
+      agent3.id(),
+      agent3,
+      AGENT_CAPABILITIES(),
+      None(),
+      agent3.resources(),
+      {});
+
+  // `framework1` will be offered all of the resources on `agent3`.
+  //
+  // NOTE: The quota on PARENT_ROLE actually applies to the entire
+  // subtree rooted at PARENT_ROLE, which includes CHILD_ROLE.
+  // Therefore, `framework1` and `framework2` should both be
+  // candidates to receive the resources at `agent3`. In the current
+  // implementation, we don't "delegate" the PARENT_ROLE quota to the
+  // entire subtree; rather, it can only be used by roles in the
+  // subtree that have quota set (MESOS-7150).
+  {
+    Allocation expected = Allocation(
+        framework1.id(),
+        {{PARENT_ROLE, {{agent3.id(), agent3.resources()}}}});
+
+    AWAIT_EXPECT_EQ(expected, allocations.get());
+  }
+}
+
+
 class HierarchicalAllocatorTestWithParam
   : public HierarchicalAllocatorTestBase,
     public WithParamInterface<bool> {};

http://git-wip-us.apache.org/repos/asf/mesos/blob/e5ef1992/src/tests/master_allocator_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_allocator_tests.cpp b/src/tests/master_allocator_tests.cpp
index 119e318..3b072b2 100644
--- a/src/tests/master_allocator_tests.cpp
+++ b/src/tests/master_allocator_tests.cpp
@@ -1780,6 +1780,158 @@ TYPED_TEST(MasterAllocatorTest, RebalancedForUpdatedWeights)
 }
 #endif // __WINDOWS__
 
+
+// Checks that accepting offers and launching tasks works as expected
+// with nested roles.
+TYPED_TEST(MasterAllocatorTest, NestedRoles)
+{
+  Clock::pause();
+
+  TestAllocator<TypeParam> allocator;
+
+  EXPECT_CALL(allocator, initialize(_, _, _, _));
+
+  master::Flags masterFlags = this->CreateMasterFlags();
+  Try<Owned<cluster::Master>> master =
+    this->StartMaster(&allocator, masterFlags);
+  ASSERT_SOME(master);
+
+  EXPECT_CALL(allocator, addSlave(_, _, _, _, _, _));
+
+  MockExecutor exec1(DEFAULT_EXECUTOR_ID);
+  TestContainerizer containerizer1(&exec1);
+
+  slave::Flags slaveFlags1 = this->CreateSlaveFlags();
+  slaveFlags1.resources = Some("cpus:2;mem:1024");
+
+  Owned<MasterDetector> detector = master.get()->createDetector();
+  Try<Owned<cluster::Slave>> slave1 =
+    this->StartSlave(detector.get(), &containerizer1, slaveFlags1);
+  ASSERT_SOME(slave1);
+
+  // Advance clock to force agent to register.
+  Clock::advance(slaveFlags1.authentication_backoff_factor);
+  Clock::advance(slaveFlags1.registration_backoff_factor);
+
+  // Register a framework in the "a/b" role and launch a single task,
+  // consuming all the resources on `slave1`.
+  FrameworkInfo frameworkInfo1 = DEFAULT_FRAMEWORK_INFO;
+  frameworkInfo1.set_role("a/b");
+
+  MockScheduler sched1;
+  MesosSchedulerDriver driver1(
+      &sched1, frameworkInfo1, master.get()->pid, DEFAULT_CREDENTIAL);
+
+  EXPECT_CALL(allocator, addFramework(_, _, _, _));
+
+  EXPECT_CALL(sched1, registered(_, _, _));
+
+  EXPECT_CALL(sched1, resourceOffers(&driver1, OfferEq(2, 1024)))
+    .WillOnce(LaunchTasks(DEFAULT_EXECUTOR_INFO, 1, 2, 1024, "a/b"));
+
+  EXPECT_CALL(exec1, registered(_, _, _, _));
+
+  EXPECT_CALL(exec1, launchTask(_, _))
+    .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING));
+
+  Future<TaskStatus> runningStatus1;
+  EXPECT_CALL(sched1, statusUpdate(&driver1, _))
+    .WillOnce(FutureArg<1>(&runningStatus1));
+
+  driver1.start();
+
+  AWAIT_READY(runningStatus1);
+  EXPECT_EQ(TASK_RUNNING, runningStatus1->state());
+
+  // Register a framework in the "a/c" role. It should not get any
+  // offers, because there are no unused resources.
+  FrameworkInfo frameworkInfo2 = DEFAULT_FRAMEWORK_INFO;
+  frameworkInfo2.set_role("a/c");
+
+  MockScheduler sched2;
+  MesosSchedulerDriver driver2(
+      &sched2, frameworkInfo2, master.get()->pid, DEFAULT_CREDENTIAL);
+
+  EXPECT_CALL(allocator, addFramework(_, _, _, _));
+
+  Future<Nothing> framework2Registered;
+  EXPECT_CALL(sched2, registered(_, _, _))
+    .WillOnce(FutureSatisfy(&framework2Registered));
+
+  driver2.start();
+
+  AWAIT_READY(framework2Registered);
+
+  // Register a framework in the "b/x" role. It should not get any
+  // offers, because there are no unused resources.
+  FrameworkInfo frameworkInfo3 = DEFAULT_FRAMEWORK_INFO;
+  frameworkInfo3.set_role("b/x");
+
+  MockScheduler sched3;
+  MesosSchedulerDriver driver3(
+      &sched3, frameworkInfo3, master.get()->pid, DEFAULT_CREDENTIAL);
+
+  EXPECT_CALL(allocator, addFramework(_, _, _, _));
+
+  Future<Nothing> framework3Registered;
+  EXPECT_CALL(sched3, registered(_, _, _))
+    .WillOnce(FutureSatisfy(&framework3Registered));
+
+  driver3.start();
+
+  AWAIT_READY(framework3Registered);
+
+  // Start a second agent. The resources on this agent should be
+  // offered to `sched3`, because the "b" role subtree is below its
+  // fair-share.
+  MockExecutor exec2(DEFAULT_EXECUTOR_ID);
+  TestContainerizer containerizer2(&exec2);
+
+  EXPECT_CALL(allocator, addSlave(_, _, _, _, _, _));
+
+  EXPECT_CALL(sched3, resourceOffers(&driver3, OfferEq(1, 512)))
+    .WillOnce(LaunchTasks(DEFAULT_EXECUTOR_INFO, 1, 1, 512, "b/x"));
+
+  EXPECT_CALL(exec2, registered(_, _, _, _));
+
+  EXPECT_CALL(exec2, launchTask(_, _))
+    .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING));
+
+  Future<TaskStatus> runningStatus2;
+  EXPECT_CALL(sched3, statusUpdate(&driver3, _))
+    .WillOnce(FutureArg<1>(&runningStatus2));
+
+  slave::Flags slaveFlags2 = this->CreateSlaveFlags();
+  slaveFlags2.resources = Some("cpus:1;mem:512");
+
+  Try<Owned<cluster::Slave>> slave2 =
+    this->StartSlave(detector.get(), &containerizer2, slaveFlags2);
+  ASSERT_SOME(slave2);
+
+  // Advance clock to force agent to register.
+  Clock::advance(slaveFlags2.authentication_backoff_factor);
+  Clock::advance(slaveFlags2.registration_backoff_factor);
+
+  AWAIT_READY(runningStatus2);
+  EXPECT_EQ(TASK_RUNNING, runningStatus2->state());
+
+  EXPECT_CALL(exec1, shutdown(_))
+    .Times(AtMost(1));
+
+  EXPECT_CALL(exec2, shutdown(_))
+    .Times(AtMost(1));
+
+  driver1.stop();
+  driver1.join();
+
+  driver2.stop();
+  driver2.join();
+
+  driver3.stop();
+  driver3.join();
+}
+
+
 } // namespace tests {
 } // namespace internal {
 } // namespace mesos {

http://git-wip-us.apache.org/repos/asf/mesos/blob/e5ef1992/src/tests/sorter_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/sorter_tests.cpp b/src/tests/sorter_tests.cpp
index 7ca6fca..2ddf64e 100644
--- a/src/tests/sorter_tests.cpp
+++ b/src/tests/sorter_tests.cpp
@@ -51,6 +51,8 @@ TEST(SorterTest, DRFSorter)
   Resources totalResources = Resources::parse("cpus:100;mem:100").get();
   sorter.add(slaveId, totalResources);
 
+  EXPECT_EQ(vector<string>({}), sorter.sort());
+
   sorter.add("a");
   Resources aResources = Resources::parse("cpus:5;mem:5").get();
   sorter.allocated("a", slaveId, aResources);
@@ -74,6 +76,7 @@ TEST(SorterTest, DRFSorter)
   EXPECT_EQ(vector<string>({"c", "d", "a", "b"}), sorter.sort());
 
   sorter.remove("a");
+
   Resources bUnallocated = Resources::parse("cpus:4;mem:4").get();
   sorter.unallocated("b", slaveId, bUnallocated);
 
@@ -284,6 +287,539 @@ TEST(SorterTest, CountAllocations)
 }
 
 
+// This test checks a simple case of hierarchical allocation: the same
+// sequence of operations happens as in the `DRFSorter` test, but all
+// client names are nested into disjoint branches of the tree. In this
+// case, the hierarchy should not change allocation behavior.
+TEST(SorterTest, ShallowHierarchy)
+{
+  DRFSorter sorter;
+
+  SlaveID slaveId;
+  slaveId.set_value("agentId");
+
+  Resources totalResources = Resources::parse("cpus:100;mem:100").get();
+  sorter.add(slaveId, totalResources);
+
+  sorter.add("a/a");
+
+  Resources aResources = Resources::parse("cpus:5;mem:5").get();
+  sorter.allocated("a/a", slaveId, aResources);
+
+  Resources bResources = Resources::parse("cpus:6;mem:6").get();
+  sorter.add("b/b");
+  sorter.allocated("b/b", slaveId, bResources);
+
+  // shares: a/a = .05, b/b = .06
+  EXPECT_EQ(vector<string>({"a/a", "b/b"}), sorter.sort());
+
+  Resources cResources = Resources::parse("cpus:1;mem:1").get();
+  sorter.add("c/c");
+  sorter.allocated("c/c", slaveId, cResources);
+
+  Resources dResources = Resources::parse("cpus:3;mem:1").get();
+  sorter.add("d/d");
+  sorter.allocated("d/d", slaveId, dResources);
+
+  // shares: a/a = .05, b/b = .06, c/c = .01, d/d = .03
+  EXPECT_EQ(vector<string>({"c/c", "d/d", "a/a", "b/b"}), sorter.sort());
+
+  sorter.remove("a/a");
+
+  Resources bUnallocated = Resources::parse("cpus:4;mem:4").get();
+  sorter.unallocated("b/b", slaveId, bUnallocated);
+
+  // shares: b/b = .02, c/c = .01, d/d = .03
+  EXPECT_EQ(vector<string>({"c/c", "b/b", "d/d"}), sorter.sort());
+
+  Resources eResources = Resources::parse("cpus:1;mem:5").get();
+  sorter.add("e/e");
+  sorter.allocated("e/e", slaveId, eResources);
+
+  Resources removedResources = Resources::parse("cpus:50;mem:0").get();
+  sorter.remove(slaveId, removedResources);
+  // total resources is now cpus = 50, mem = 100
+
+  // shares: b/b = .04, c/c = .02, d/d = .06, e/e = .05
+  EXPECT_EQ(vector<string>({"c/c", "b/b", "e/e", "d/d"}), sorter.sort());
+
+  Resources addedResources = Resources::parse("cpus:0;mem:100").get();
+  sorter.add(slaveId, addedResources);
+  // total resources is now cpus = 50, mem = 200
+
+  Resources fResources = Resources::parse("cpus:5;mem:1").get();
+  sorter.add("f/f");
+  sorter.allocated("f/f", slaveId, fResources);
+
+  Resources cResources2 = Resources::parse("cpus:0;mem:15").get();
+  sorter.allocated("c/c", slaveId, cResources2);
+
+  // shares: b = .04, c = .08, d = .06, e = .025, f = .1
+  EXPECT_EQ(vector<string>({"e/e", "b/b", "d/d", "c/c", "f/f"}), sorter.sort());
+
+  EXPECT_TRUE(sorter.contains("b/b"));
+
+  EXPECT_FALSE(sorter.contains("a/a"));
+
+  EXPECT_EQ(5, sorter.count());
+
+  sorter.deactivate("d/d");
+
+  EXPECT_TRUE(sorter.contains("d/d"));
+
+  EXPECT_EQ(vector<string>({"e/e", "b/b", "c/c", "f/f"}), sorter.sort());
+
+  EXPECT_EQ(5, sorter.count());
+
+  sorter.activate("d/d");
+
+  EXPECT_EQ(vector<string>({"e/e", "b/b", "d/d", "c/c", "f/f"}), sorter.sort());
+}
+
+
+// Analogous to `ShallowHierarchy` except the client names are nested
+// more deeply and different client names are at different depths in
+// the tree.
+TEST(SorterTest, DeepHierarchy)
+{
+  DRFSorter sorter;
+
+  SlaveID slaveId;
+  slaveId.set_value("agentId");
+
+  Resources totalResources = Resources::parse("cpus:100;mem:100").get();
+  sorter.add(slaveId, totalResources);
+
+  sorter.add("a/a/a/a/a");
+  Resources aResources = Resources::parse("cpus:5;mem:5").get();
+  sorter.allocated("a/a/a/a/a", slaveId, aResources);
+
+  Resources bResources = Resources::parse("cpus:6;mem:6").get();
+  sorter.add("b/b/b/b");
+  sorter.allocated("b/b/b/b", slaveId, bResources);
+
+  // shares: a/a/a/a/a = .05, b/b/b/b = .06
+  EXPECT_EQ(vector<string>({"a/a/a/a/a", "b/b/b/b"}), sorter.sort());
+
+  Resources cResources = Resources::parse("cpus:1;mem:1").get();
+  sorter.add("c/c/c");
+  sorter.allocated("c/c/c", slaveId, cResources);
+
+  Resources dResources = Resources::parse("cpus:3;mem:1").get();
+  sorter.add("d/d");
+  sorter.allocated("d/d", slaveId, dResources);
+
+  // shares: a/a/a/a/a = .05, b/b/b/b = .06, c/c/c = .01, d/d = .03
+  EXPECT_EQ(vector<string>({"c/c/c", "d/d", "a/a/a/a/a", "b/b/b/b"}),
+            sorter.sort());
+
+  sorter.remove("a/a/a/a/a");
+
+  Resources bUnallocated = Resources::parse("cpus:4;mem:4").get();
+  sorter.unallocated("b/b/b/b", slaveId, bUnallocated);
+
+  // shares: b/b/b/b = .02, c/c/c = .01, d/d = .03
+  EXPECT_EQ(vector<string>({"c/c/c", "b/b/b/b", "d/d"}), sorter.sort());
+
+  Resources eResources = Resources::parse("cpus:1;mem:5").get();
+  sorter.add("e/e/e/e/e/e");
+  sorter.allocated("e/e/e/e/e/e", slaveId, eResources);
+
+  Resources removedResources = Resources::parse("cpus:50;mem:0").get();
+  sorter.remove(slaveId, removedResources);
+  // total resources is now cpus = 50, mem = 100
+
+  // shares: b/b/b/b = .04, c/c/c = .02, d/d = .06, e/e/e/e/e/e = .05
+  EXPECT_EQ(vector<string>({"c/c/c", "b/b/b/b", "e/e/e/e/e/e", "d/d"}),
+            sorter.sort());
+
+  Resources addedResources = Resources::parse("cpus:0;mem:100").get();
+  sorter.add(slaveId, addedResources);
+  // total resources is now cpus = 50, mem = 200
+
+  Resources fResources = Resources::parse("cpus:5;mem:1").get();
+  sorter.add("f/f");
+  sorter.allocated("f/f", slaveId, fResources);
+
+  Resources cResources2 = Resources::parse("cpus:0;mem:15").get();
+  sorter.allocated("c/c/c", slaveId, cResources2);
+
+  // shares: b = .04, c = .08, d = .06, e = .025, f = .1
+  EXPECT_EQ(vector<string>({"e/e/e/e/e/e", "b/b/b/b", "d/d", "c/c/c", "f/f"}),
+            sorter.sort());
+
+  EXPECT_TRUE(sorter.contains("b/b/b/b"));
+
+  EXPECT_FALSE(sorter.contains("a/a/a/a/a"));
+
+  EXPECT_EQ(5, sorter.count());
+
+  sorter.deactivate("d/d");
+
+  EXPECT_TRUE(sorter.contains("d/d"));
+
+  EXPECT_EQ(vector<string>({"e/e/e/e/e/e", "b/b/b/b", "c/c/c", "f/f"}),
+            sorter.sort());
+
+  EXPECT_EQ(5, sorter.count());
+
+  sorter.activate("d/d");
+
+  EXPECT_EQ(vector<string>({"e/e/e/e/e/e", "b/b/b/b", "d/d", "c/c/c", "f/f"}),
+            sorter.sort());
+}
+
+
+TEST(SorterTest, HierarchicalAllocation)
+{
+  DRFSorter sorter;
+
+  SlaveID slaveId;
+  slaveId.set_value("agentId");
+
+  Resources totalResources = Resources::parse("cpus:100;mem:100").get();
+  sorter.add(slaveId, totalResources);
+
+  sorter.add("a");
+  sorter.add("b/c");
+  sorter.add("b/d");
+
+  EXPECT_EQ(3, sorter.count());
+  EXPECT_TRUE(sorter.contains("a"));
+  EXPECT_FALSE(sorter.contains("b"));
+  EXPECT_TRUE(sorter.contains("b/c"));
+  EXPECT_TRUE(sorter.contains("b/d"));
+
+  // Shares: a = 0, b/c = 0, b/d = 0.
+  EXPECT_EQ(vector<string>({"a", "b/c", "b/d"}), sorter.sort());
+
+  Resources aResources = Resources::parse("cpus:6;mem:6").get();
+  sorter.allocated("a", slaveId, aResources);
+
+  // Shares: a = 0.06, b/c = 0, b/d = 0.
+  EXPECT_EQ(vector<string>({"b/c", "b/d", "a"}), sorter.sort());
+
+  Resources cResources = Resources::parse("cpus:4;mem:4").get();
+  sorter.allocated("b/c", slaveId, cResources);
+
+  Resources dResources = Resources::parse("cpus:3;mem:3").get();
+  sorter.allocated("b/d", slaveId, dResources);
+
+  // Shares: a = 0.06, b/d = 0.03, d = 0.04.
+  EXPECT_EQ(vector<string>({"a", "b/d", "b/c"}), sorter.sort());
+
+  {
+    hashmap<string, Resources> agentAllocation =
+      sorter.allocation(slaveId);
+
+    EXPECT_EQ(3, agentAllocation.size());
+    EXPECT_EQ(aResources, agentAllocation.at("a"));
+    EXPECT_EQ(cResources, agentAllocation.at("b/c"));
+    EXPECT_EQ(dResources, agentAllocation.at("b/d"));
+
+    EXPECT_EQ(aResources, sorter.allocation("a", slaveId));
+    EXPECT_EQ(cResources, sorter.allocation("b/c", slaveId));
+    EXPECT_EQ(dResources, sorter.allocation("b/d", slaveId));
+  }
+
+  Resources aExtraResources = Resources::parse("cpus:2;mem:2").get();
+  sorter.allocated("a", slaveId, aExtraResources);
+
+  // Shares: b/d = 0.03, b/c = 0.04, a = 0.08.
+  EXPECT_EQ(vector<string>({"b/d", "b/c", "a"}), sorter.sort());
+
+  sorter.add("b/e/f");
+
+  EXPECT_FALSE(sorter.contains("b/e"));
+  EXPECT_TRUE(sorter.contains("b/e/f"));
+
+  // Shares: b/e/f = 0, b/d = 0.03, b/c = 0.04, a = 0.08.
+  EXPECT_EQ(vector<string>({"b/e/f", "b/d", "b/c", "a"}), sorter.sort());
+
+  Resources fResources = Resources::parse("cpus:3.5;mem:3.5").get();
+  sorter.allocated("b/e/f", slaveId, fResources);
+
+  // Shares: a = 0.08, b/d = 0.03, b/e/f = 0.035, b/c = 0.04.
+  EXPECT_EQ(vector<string>({"a", "b/d", "b/e/f", "b/c"}), sorter.sort());
+
+  // Removing a client should result in updating the fair-share for
+  // the subtree that contains the removed client.
+  sorter.remove("b/e/f");
+
+  EXPECT_FALSE(sorter.contains("b/e/f"));
+  EXPECT_EQ(3, sorter.count());
+
+  // Shares: b/d = 0.03, b/c = 0.04, a = 0.08.
+  EXPECT_EQ(vector<string>({"b/d", "b/c", "a"}), sorter.sort());
+
+  // Updating a client should result in updating the fair-share for
+  // the subtree that contains the updated client.
+  Resources cNewResources = Resources::parse("cpus:1;mem:1").get();
+  sorter.update("b/c", slaveId, cResources, cNewResources);
+
+  // Shares: b/c = 0.01, b/d = 0.03, a = 0.08.
+  EXPECT_EQ(vector<string>({"b/c", "b/d", "a"}), sorter.sort());
+
+  sorter.add("b/e/f");
+  sorter.allocated("b/e/f", slaveId, fResources);
+
+  // Shares: b/c = 0.01, b/d = 0.03, b/e/f = 0.035, a = 0.08.
+  EXPECT_EQ(vector<string>({"b/c", "b/d", "b/e/f", "a"}), sorter.sort());
+
+  EXPECT_EQ(4, sorter.count());
+}
+
+
+// This test checks what happens when a new sorter client is added as
+// a child of what was previously a leaf node.
+TEST(SorterTest, AddChildToLeaf)
+{
+  DRFSorter sorter;
+
+  SlaveID slaveId;
+  slaveId.set_value("agentId");
+
+  sorter.add(slaveId, Resources::parse("cpus:100;mem:100").get());
+
+  sorter.add("a");
+  sorter.allocated(
+      "a", slaveId, Resources::parse("cpus:10;mem:10").get());
+
+  sorter.add("b");
+  sorter.allocated(
+      "b", slaveId, Resources::parse("cpus:6;mem:6").get());
+
+  EXPECT_EQ(vector<string>({"b", "a"}), sorter.sort());
+
+  // Add a new client "a/c". The "a" subtree should now compete against
+  // the "b" subtree; within the "a" subtree, "a" should compete (as a
+  // sibling) against "a/c".
+
+  sorter.add("a/c");
+  sorter.allocated(
+      "a/c", slaveId, Resources::parse("cpus:5;mem:5").get());
+
+  EXPECT_EQ(vector<string>({"b", "a/c", "a"}), sorter.sort());
+
+  // Remove the "a" client; the "a/c" client should remain. Note that
+  // "a/c" now appears before "b" in the sort order, because the "a"
+  // subtree is now farther below its fair-share than the "b" subtree.
+
+  sorter.remove("a");
+
+  EXPECT_FALSE(sorter.contains("a"));
+  EXPECT_EQ(vector<string>({"a/c", "b"}), sorter.sort());
+
+  // Re-add the "a" client with the same resources. The client order
+  // should revert to its previous value.
+  sorter.add("a");
+  sorter.allocated(
+      "a", slaveId, Resources::parse("cpus:10;mem:10").get());
+
+  EXPECT_TRUE(sorter.contains("a"));
+  EXPECT_EQ(vector<string>({"b", "a/c", "a"}), sorter.sort());
+
+  // Check that "a" is considered to have a weight of 1 when it
+  // competes against "a/c".
+  sorter.updateWeight("a/c", 0.2);
+
+  EXPECT_EQ(vector<string>({"b", "a", "a/c"}), sorter.sort());
+
+  // Changing the weight "a" should change how it competes against its
+  // siblings (e.g., "b"), not its children (e.g., "a/c").
+  sorter.updateWeight("a", 3);
+
+  EXPECT_EQ(vector<string>({"a", "a/c", "b"}), sorter.sort());
+
+  sorter.updateWeight("a/c", 1);
+
+  EXPECT_EQ(vector<string>({"a/c", "a", "b"}), sorter.sort());
+}
+
+
+// This test checks what happens when a new sorter client is added as
+// a child of what was previously an internal node.
+TEST(SorterTest, AddChildToInternal)
+{
+  DRFSorter sorter;
+
+  SlaveID slaveId;
+  slaveId.set_value("agentId");
+
+  sorter.add(slaveId, Resources::parse("cpus:100;mem:100").get());
+
+  sorter.add("x/a");
+  sorter.allocated(
+      "x/a", slaveId, Resources::parse("cpus:10;mem:10").get());
+
+  sorter.add("x/b");
+  sorter.allocated(
+      "x/b", slaveId, Resources::parse("cpus:6;mem:6").get());
+
+  EXPECT_EQ(vector<string>({"x/b", "x/a"}), sorter.sort());
+
+  sorter.add("x");
+  sorter.allocated(
+      "x", slaveId, Resources::parse("cpus:7;mem:7").get());
+
+  EXPECT_EQ(vector<string>({"x/b", "x", "x/a"}), sorter.sort());
+
+  sorter.add("z");
+  sorter.allocated(
+      "z", slaveId, Resources::parse("cpus:20;mem:20").get());
+
+  EXPECT_EQ(vector<string>({"z", "x/b", "x", "x/a"}), sorter.sort());
+
+  sorter.remove("x");
+
+  EXPECT_EQ(vector<string>({"x/b", "x/a", "z"}), sorter.sort());
+}
+
+
+// This test checks what happens when a new sorter client is added as
+// a child of what was previously an inactive leaf node.
+TEST(SorterTest, AddChildToInactiveLeaf)
+{
+  DRFSorter sorter;
+
+  SlaveID slaveId;
+  slaveId.set_value("agentId");
+
+  sorter.add(slaveId, Resources::parse("cpus:100;mem:100").get());
+
+  sorter.add("a");
+  sorter.allocated(
+      "a", slaveId, Resources::parse("cpus:10;mem:10").get());
+
+  sorter.add("b");
+  sorter.allocated(
+      "b", slaveId, Resources::parse("cpus:6;mem:6").get());
+
+  sorter.deactivate("a");
+
+  EXPECT_EQ(vector<string>({"b"}), sorter.sort());
+
+  sorter.add("a/c");
+  sorter.allocated(
+      "a/c", slaveId, Resources::parse("cpus:5;mem:5").get());
+
+  EXPECT_EQ(vector<string>({"b", "a/c"}), sorter.sort());
+}
+
+
+// This test checks what happens when a sorter client is removed,
+// which allows a leaf node to be collapsed into its parent node. This
+// is basically the inverse situation to `AddChildToLeaf`.
+TEST(SorterTest, RemoveLeafCollapseParent)
+{
+  DRFSorter sorter;
+
+  SlaveID slaveId;
+  slaveId.set_value("agentId");
+
+  sorter.add(slaveId, Resources::parse("cpus:100;mem:100").get());
+
+  sorter.add("a");
+  sorter.allocated(
+      "a", slaveId, Resources::parse("cpus:10;mem:10").get());
+
+  sorter.add("b");
+  sorter.allocated(
+      "b", slaveId, Resources::parse("cpus:6;mem:6").get());
+
+  sorter.add("a/c");
+  sorter.allocated(
+      "a/c", slaveId, Resources::parse("cpus:5;mem:5").get());
+
+  EXPECT_EQ(vector<string>({"b", "a/c", "a"}), sorter.sort());
+
+  sorter.remove("a/c");
+
+  EXPECT_EQ(vector<string>({"b", "a"}), sorter.sort());
+}
+
+
+// This test checks what happens when a sorter client is removed and a
+// leaf node can be collapsed into its parent node, we correctly
+// propagate the `inactive` flag from leaf -> parent.
+TEST(SorterTest, RemoveLeafCollapseParentInactive)
+{
+  DRFSorter sorter;
+
+  SlaveID slaveId;
+  slaveId.set_value("agentId");
+
+  sorter.add(slaveId, Resources::parse("cpus:100;mem:100").get());
+
+  sorter.add("a");
+  sorter.allocated(
+      "a", slaveId, Resources::parse("cpus:10;mem:10").get());
+
+  sorter.add("b");
+  sorter.allocated(
+      "b", slaveId, Resources::parse("cpus:6;mem:6").get());
+
+  sorter.add("a/c");
+  sorter.allocated(
+      "a/c", slaveId, Resources::parse("cpus:5;mem:5").get());
+
+  sorter.deactivate("a");
+
+  EXPECT_EQ(vector<string>({"b", "a/c"}), sorter.sort());
+
+  sorter.remove("a/c");
+
+  EXPECT_EQ(vector<string>({"b"}), sorter.sort());
+}
+
+
+// This test checks that setting a weight on an internal node works
+// correctly.
+TEST(SorterTest, ChangeWeightOnSubtree)
+{
+  DRFSorter sorter;
+
+  SlaveID slaveId;
+  slaveId.set_value("agentId");
+
+  sorter.add(slaveId, Resources::parse("cpus:100;mem:100").get());
+
+  sorter.updateWeight("b", 3);
+  sorter.updateWeight("a", 2);
+
+  sorter.add("a/x");
+  sorter.add("b/y");
+
+  EXPECT_EQ(vector<string>({"a/x", "b/y"}), sorter.sort());
+
+  sorter.allocated(
+      "a/x", slaveId, Resources::parse("cpus:10;mem:10").get());
+
+  sorter.allocated(
+      "b/y", slaveId, Resources::parse("cpus:10;mem:10").get());
+
+  EXPECT_EQ(vector<string>({"b/y", "a/x"}), sorter.sort());
+
+  sorter.add("b/z");
+  sorter.allocated(
+      "b/z", slaveId, Resources::parse("cpus:5;mem:5").get());
+
+  EXPECT_EQ(vector<string>({"b/z", "b/y", "a/x"}), sorter.sort());
+
+  sorter.add("b");
+  sorter.allocated(
+      "b", slaveId, Resources::parse("cpus:4;mem:4").get());
+
+  EXPECT_EQ(vector<string>({"a/x", "b", "b/z", "b/y"}), sorter.sort());
+
+  sorter.add("a/zz");
+  sorter.allocated(
+      "a/zz", slaveId, Resources::parse("cpus:2;mem:2").get());
+
+  EXPECT_EQ(vector<string>({"a/zz", "a/x", "b", "b/z", "b/y"}), sorter.sort());
+}
+
+
 // Some resources are split across multiple resource objects (e.g.
 // persistent volumes). This test ensures that the shares for these
 // are accounted correctly.
@@ -350,11 +886,46 @@ TEST(SorterTest, UpdateAllocation)
 
   hashmap<SlaveID, Resources> allocation = sorter.allocation("a");
   EXPECT_EQ(1u, allocation.size());
-  EXPECT_EQ(newAllocation.get(), allocation[slaveId]);
+  EXPECT_EQ(newAllocation.get(), allocation.at(slaveId));
   EXPECT_EQ(newAllocation.get(), sorter.allocation("a", slaveId));
 }
 
 
+TEST(SorterTest, UpdateAllocationNestedClient)
+{
+  DRFSorter sorter;
+
+  SlaveID slaveId;
+  slaveId.set_value("agentId");
+
+  sorter.add("a/x");
+  sorter.add("b/y");
+
+  sorter.add(slaveId, Resources::parse("cpus:10;mem:10;disk:10").get());
+
+  sorter.allocated(
+      "a/x", slaveId, Resources::parse("cpus:10;mem:10;disk:10").get());
+
+  // Construct an offer operation.
+  Resource volume = Resources::parse("disk", "5", "*").get();
+  volume.mutable_disk()->mutable_persistence()->set_id("ID");
+  volume.mutable_disk()->mutable_volume()->set_container_path("data");
+
+  // Compute the updated allocation.
+  Resources oldAllocation = sorter.allocation("a/x", slaveId);
+  Try<Resources> newAllocation = oldAllocation.apply(CREATE(volume));
+  ASSERT_SOME(newAllocation);
+
+  // Update the resources for the client.
+  sorter.update("a/x", slaveId, oldAllocation, newAllocation.get());
+
+  hashmap<SlaveID, Resources> allocation = sorter.allocation("a/x");
+  EXPECT_EQ(1u, allocation.size());
+  EXPECT_EQ(newAllocation.get(), allocation.at(slaveId));
+  EXPECT_EQ(newAllocation.get(), sorter.allocation("a/x", slaveId));
+}
+
+
 // We aggregate resources from multiple slaves into the sorter.
 // Since non-scalar resources don't aggregate well across slaves,
 // we need to keep track of the SlaveIDs of the resources. This


[05/11] mesos git commit: Changed DRFSorter's representation of inactive clients.

Posted by ne...@apache.org.
Changed DRFSorter's representation of inactive clients.

DRFSorter previously removed inactive clients from the `clients`
collection, and then re-added clients when they were reactivated. This
resulted in resetting the allocation count for the client, which is
unfortunate. This scheme would also be more difficult to adapt to
hierarchical sorting.

This commit changes DRFSorter to continue to store inactive clients in
the `clients`; inactive clients are indicated by a new field in the
`Client` struct, and are omitted from the return value of
`DRFSorter::sort`.

Review: https://reviews.apache.org/r/57564


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/93216467
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/93216467
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/93216467

Branch: refs/heads/master
Commit: 932164675908bf83224b6d420d61cfc4a2c0e875
Parents: bbe2c6c
Author: Neil Conway <ne...@gmail.com>
Authored: Mon Mar 13 10:18:17 2017 -0700
Committer: Neil Conway <ne...@gmail.com>
Committed: Wed Apr 26 14:02:04 2017 -0400

----------------------------------------------------------------------
 src/master/allocator/sorter/drf/sorter.cpp | 60 +++++++++++++------------
 src/master/allocator/sorter/drf/sorter.hpp | 12 ++---
 src/tests/sorter_tests.cpp                 | 34 ++++++++++----
 3 files changed, 65 insertions(+), 41 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/93216467/src/master/allocator/sorter/drf/sorter.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/sorter/drf/sorter.cpp b/src/master/allocator/sorter/drf/sorter.cpp
index 85dbff1..9563f58 100644
--- a/src/master/allocator/sorter/drf/sorter.cpp
+++ b/src/master/allocator/sorter/drf/sorter.cpp
@@ -87,10 +87,8 @@ void DRFSorter::remove(const string& name)
   CHECK(contains(name));
 
   set<Client, DRFComparator>::iterator it = find(name);
-
-  if (it != clients.end()) {
-    clients.erase(it);
-  }
+  CHECK(it != clients.end());
+  clients.erase(it);
 
   allocations.erase(name);
 
@@ -105,8 +103,13 @@ void DRFSorter::activate(const string& name)
   CHECK(contains(name));
 
   set<Client, DRFComparator>::iterator it = find(name);
-  if (it == clients.end()) {
-    Client client(name, calculateShare(name), 0);
+  CHECK(it != clients.end());
+
+  if (!it->active) {
+    Client client(*it);
+    client.active = true;
+
+    clients.erase(it);
     clients.insert(client);
   }
 }
@@ -117,13 +120,14 @@ void DRFSorter::deactivate(const string& name)
   CHECK(contains(name));
 
   set<Client, DRFComparator>::iterator it = find(name);
+  CHECK(it != clients.end());
+
+  if (it->active) {
+    Client client(*it);
+    client.active = false;
 
-  if (it != clients.end()) {
-    // TODO(benh): Removing the client is an unfortunate strategy
-    // because we lose information such as the number of allocations
-    // for this client which means the fairness can be gamed by a
-    // framework disconnecting and reconnecting.
     clients.erase(it);
+    clients.insert(client);
   }
 }
 
@@ -145,14 +149,14 @@ void DRFSorter::allocated(
 {
   CHECK(contains(name));
 
-  set<Client, DRFComparator>::iterator it = find(name);
+  // Update the number of allocations that have been made to this
+  // client. Note that the client might currently be inactive.
+  //
+  // TODO(benh): Refactor 'updateShare' to be able to reuse it here.
+  {
+    set<Client, DRFComparator>::iterator it = find(name);
+    CHECK(it != clients.end());
 
-  // The allocator might notify us about an allocation that has been
-  // made to an inactive sorter client. For example, this happens when
-  // an agent re-registers that is running tasks for a framework that
-  // has not yet re-registered.
-  if (it != clients.end()) {
-    // TODO(benh): Refactor 'updateShare' to be able to reuse it here.
     Client client(*it);
 
     // Update the 'allocations' to reflect the allocator decision.
@@ -409,10 +413,11 @@ vector<string> DRFSorter::sort()
   }
 
   vector<string> result;
-  result.reserve(clients.size());
 
   foreach (const Client& client, clients) {
-    result.push_back(client.name);
+    if (client.active) {
+      result.push_back(client.name);
+    }
   }
 
   return result;
@@ -434,17 +439,16 @@ int DRFSorter::count() const
 void DRFSorter::updateShare(const string& name)
 {
   set<Client, DRFComparator>::iterator it = find(name);
+  CHECK(it != clients.end());
 
-  if (it != clients.end()) {
-    Client client(*it);
+  Client client(*it);
 
-    // Update the 'share' to get proper sorting.
-    client.share = calculateShare(client.name);
+  // Update the 'share' to get proper sorting.
+  client.share = calculateShare(client.name);
 
-    // Remove and reinsert it to update the ordering appropriately.
-    clients.erase(it);
-    clients.insert(client);
-  }
+  // Remove and reinsert it to update the ordering appropriately.
+  clients.erase(it);
+  clients.insert(client);
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/93216467/src/master/allocator/sorter/drf/sorter.hpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/sorter/drf/sorter.hpp b/src/master/allocator/sorter/drf/sorter.hpp
index 19fa41b..665eeb0 100644
--- a/src/master/allocator/sorter/drf/sorter.hpp
+++ b/src/master/allocator/sorter/drf/sorter.hpp
@@ -41,10 +41,11 @@ namespace allocator {
 struct Client
 {
   Client(const std::string& _name, double _share, uint64_t _allocations)
-    : name(_name), share(_share), allocations(_allocations) {}
+    : name(_name), share(_share), active(true), allocations(_allocations) {}
 
   std::string name;
   double share;
+  bool active;
 
   // We store the number of times this client has been chosen for
   // allocation so that we can fairly share the resources across
@@ -151,7 +152,7 @@ private:
   // If true, sort() will recalculate all shares.
   bool dirty = false;
 
-  // The set of active clients (names and shares), sorted by share.
+  // The set of clients, sorted by share.
   std::set<Client, DRFComparator> clients;
 
   // Maps client names to the weights that should be applied to their shares.
@@ -211,9 +212,10 @@ private:
     hashmap<std::string, Value::Scalar> totals;
   };
 
-  // Maps client names to the resources they have been allocated. Note
-  // that `allocations` might contain entries for deactivated clients
-  // not currently in `clients`.
+  // Maps client names to the resources they have been allocated.
+  //
+  // TODO(neilc): It would be cleaner to store a client's allocation
+  // in the `Client` struct instead.
   hashmap<std::string, Allocation> allocations;
 
   // Metrics are optionally exposed by the sorter.

http://git-wip-us.apache.org/repos/asf/mesos/blob/93216467/src/tests/sorter_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/sorter_tests.cpp b/src/tests/sorter_tests.cpp
index 8e86e4c..7ca6fca 100644
--- a/src/tests/sorter_tests.cpp
+++ b/src/tests/sorter_tests.cpp
@@ -227,8 +227,9 @@ TEST(SorterTest, CountAllocations)
   sorter.add("d");
   sorter.add("e");
 
-  // Everyone is allocated the same resources; "c" gets three distinct
-  // allocations, "d" gets two, and all other clients get one.
+  // Everyone is allocated the same amount of resources; "c" gets
+  // three distinct allocations, "d" gets two, and all other clients
+  // get one.
   sorter.allocated("a", slaveId, Resources::parse("cpus:3;mem:3").get());
   sorter.allocated("b", slaveId, Resources::parse("cpus:3;mem:3").get());
   sorter.allocated("c", slaveId, Resources::parse("cpus:1;mem:1").get());
@@ -238,6 +239,7 @@ TEST(SorterTest, CountAllocations)
   sorter.allocated("d", slaveId, Resources::parse("cpus:1;mem:1").get());
   sorter.allocated("e", slaveId, Resources::parse("cpus:3;mem:3").get());
 
+  // Allocation count: {a,b,e} = 1, {d} = 2, {c} = 3.
   EXPECT_EQ(vector<string>({"a", "b", "e", "d", "c"}), sorter.sort());
 
   // Check that unallocating and re-allocating to a client does not
@@ -248,21 +250,37 @@ TEST(SorterTest, CountAllocations)
 
   sorter.allocated("c", slaveId, Resources::parse("cpus:3;mem:3").get());
 
+  // Allocation count: {a,b,e} = 1, {d} = 2, {c} = 4.
   EXPECT_EQ(vector<string>({"a", "b", "e", "d", "c"}), sorter.sort());
 
-  // Deactivating and then re-activating a client currently resets the
-  // allocation count to zero.
-  //
-  // TODO(neilc): Consider changing this behavior.
+  // Check that deactivating and then re-activating a client does not
+  // reset the allocation count.
   sorter.deactivate("c");
   sorter.activate("c");
 
-  EXPECT_EQ(vector<string>({"c", "a", "b", "e", "d"}), sorter.sort());
+  // Allocation count: {a,b,e} = 1, {d} = 2, {c} = 4.
+  EXPECT_EQ(vector<string>({"a", "b", "e", "d", "c"}), sorter.sort());
 
   sorter.unallocated("c", slaveId, Resources::parse("cpus:3;mem:3").get());
   sorter.allocated("c", slaveId, Resources::parse("cpus:3;mem:3").get());
 
-  EXPECT_EQ(vector<string>({"a", "b", "c", "e", "d"}), sorter.sort());
+  // Allocation count: {a,b,e} = 1, {d} = 2, {c} = 5.
+  EXPECT_EQ(vector<string>({"a", "b", "e", "d", "c"}), sorter.sort());
+
+  // Check that allocations to an inactive client increase the
+  // allocation count.
+  sorter.deactivate("a");
+
+  sorter.unallocated("a", slaveId, Resources::parse("cpus:1;mem:3").get());
+  sorter.allocated("a", slaveId, Resources::parse("cpus:1;mem:3").get());
+
+  // Allocation count: {b,e} = 1, {d} = 2, {c} = 5.
+  EXPECT_EQ(vector<string>({"b", "e", "d", "c"}), sorter.sort());
+
+  sorter.activate("a");
+
+  // Allocation count: {b,e} = 1, {a,d} = 2, {c} = 5.
+  EXPECT_EQ(vector<string>({"b", "e", "a", "d", "c"}), sorter.sort());
 }
 
 


[06/11] mesos git commit: Removed `allocations` map from DRFSorter.

Posted by ne...@apache.org.
Removed `allocations` map from DRFSorter.

The sorter previously managed three collections: a set of active
clients, a map from client names to allocations, and a map from client
names to weights. Since the set previously contained only active
clients, each client's allocation needed to be stored separately, since
the sorter needed to remember the allocation made to inactive clients.

Now that the set of clients includes both active and inactive clients,
we can dispense with the additional `allocations` map and store a
client's allocation as a nested struct in the `Client` struct. This
avoids the need to keep the two collections synchronized; the logic for
manipulating allocations is also more properly written as a member
function of the Client::Allocation struct, rather than inline in a
member function of DRFSorter.

Note that this change significantly increases the size of the `Client`
struct. Since we copy clients by value in several places (e.g., when
erasing and re-inserting a `Client` into `std::set`), this change
regresses sorter performance. This will be fixed in the next review in
this chain: we'll move from `std::set<Client>` to `std::set<Client*>`,
which will avoid the need to copy the `Client` on updates.

Review: https://reviews.apache.org/r/58110


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/3d8faf59
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/3d8faf59
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/3d8faf59

Branch: refs/heads/master
Commit: 3d8faf5910843843e107f81639811e7720eadf0c
Parents: 9321646
Author: Neil Conway <ne...@gmail.com>
Authored: Thu Mar 30 14:28:26 2017 -0700
Committer: Neil Conway <ne...@gmail.com>
Committed: Wed Apr 26 14:02:09 2017 -0400

----------------------------------------------------------------------
 src/master/allocator/sorter/drf/metrics.cpp |   6 +-
 src/master/allocator/sorter/drf/sorter.cpp  | 168 +++++++----------------
 src/master/allocator/sorter/drf/sorter.hpp  | 154 +++++++++++++++------
 3 files changed, 169 insertions(+), 159 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/3d8faf59/src/master/allocator/sorter/drf/metrics.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/sorter/drf/metrics.cpp b/src/master/allocator/sorter/drf/metrics.cpp
index 15aab32..94acb86 100644
--- a/src/master/allocator/sorter/drf/metrics.cpp
+++ b/src/master/allocator/sorter/drf/metrics.cpp
@@ -16,6 +16,8 @@
 
 #include "master/allocator/sorter/drf/metrics.hpp"
 
+#include <set>
+
 #include <process/defer.hpp>
 
 #include <process/metrics/metrics.hpp>
@@ -25,6 +27,7 @@
 
 #include "master/allocator/sorter/drf/sorter.hpp"
 
+using std::set;
 using std::string;
 
 using process::UPID;
@@ -65,7 +68,8 @@ void Metrics::add(const string& client)
         // occurs after the client is removed but before the
         // metric is removed.
         if (sorter->contains(client)) {
-          return sorter->calculateShare(client);
+          set<Client, DRFComparator>::iterator it = sorter->find(client);
+          return sorter->calculateShare(*it);
         }
 
         return 0.0;

http://git-wip-us.apache.org/repos/asf/mesos/blob/3d8faf59/src/master/allocator/sorter/drf/sorter.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/sorter/drf/sorter.cpp b/src/master/allocator/sorter/drf/sorter.cpp
index 9563f58..ee5b8c0 100644
--- a/src/master/allocator/sorter/drf/sorter.cpp
+++ b/src/master/allocator/sorter/drf/sorter.cpp
@@ -45,10 +45,10 @@ namespace allocator {
 bool DRFComparator::operator()(const Client& client1, const Client& client2)
 {
   if (client1.share == client2.share) {
-    if (client1.allocations == client2.allocations) {
+    if (client1.allocation.count == client2.allocation.count) {
       return client1.name < client2.name;
     }
-    return client1.allocations < client2.allocations;
+    return client1.allocation.count < client2.allocation.count;
   }
   return client1.share < client2.share;
 }
@@ -71,11 +71,9 @@ void DRFSorter::add(const string& name)
 {
   CHECK(!contains(name));
 
-  Client client(name, 0, 0);
+  Client client(name);
   clients.insert(client);
 
-  allocations[name] = Allocation();
-
   if (metrics.isSome()) {
     metrics->add(name);
   }
@@ -84,13 +82,10 @@ void DRFSorter::add(const string& name)
 
 void DRFSorter::remove(const string& name)
 {
-  CHECK(contains(name));
-
   set<Client, DRFComparator>::iterator it = find(name);
   CHECK(it != clients.end());
-  clients.erase(it);
 
-  allocations.erase(name);
+  clients.erase(it);
 
   if (metrics.isSome()) {
     metrics->remove(name);
@@ -100,8 +95,6 @@ void DRFSorter::remove(const string& name)
 
 void DRFSorter::activate(const string& name)
 {
-  CHECK(contains(name));
-
   set<Client, DRFComparator>::iterator it = find(name);
   CHECK(it != clients.end());
 
@@ -117,8 +110,6 @@ void DRFSorter::activate(const string& name)
 
 void DRFSorter::deactivate(const string& name)
 {
-  CHECK(contains(name));
-
   set<Client, DRFComparator>::iterator it = find(name);
   CHECK(it != clients.end());
 
@@ -147,47 +138,19 @@ void DRFSorter::allocated(
     const SlaveID& slaveId,
     const Resources& resources)
 {
-  CHECK(contains(name));
-
-  // Update the number of allocations that have been made to this
-  // client. Note that the client might currently be inactive.
-  //
-  // TODO(benh): Refactor 'updateShare' to be able to reuse it here.
-  {
-    set<Client, DRFComparator>::iterator it = find(name);
-    CHECK(it != clients.end());
-
-    Client client(*it);
-
-    // Update the 'allocations' to reflect the allocator decision.
-    client.allocations++;
-
-    // Remove and reinsert it to update the ordering appropriately.
-    clients.erase(it);
-    clients.insert(client);
-  }
-
-  // Add shared resources to the allocated quantities when the same
-  // resources don't already exist in the allocation.
-  const Resources newShared = resources.shared()
-    .filter([this, name, slaveId](const Resource& resource) {
-      return !allocations[name].resources[slaveId].contains(resource);
-    });
-
-  const Resources scalarQuantities =
-    (resources.nonShared() + newShared).createStrippedScalarQuantity();
+  set<Client, DRFComparator>::iterator it = find(name);
+  CHECK(it != clients.end());
 
-  allocations[name].resources[slaveId] += resources;
-  allocations[name].scalarQuantities += scalarQuantities;
+  Client client(*it);
+  client.allocation.add(slaveId, resources);
 
-  foreach (const Resource& resource, scalarQuantities) {
-    allocations[name].totals[resource.name()] += resource.scalar();
-  }
+  clients.erase(it);
+  clients.insert(client);
 
   // If the total resources have changed, we're going to recalculate
   // all the shares, so don't bother just updating this client.
   if (!dirty) {
-    updateShare(name);
+    updateShare(client.name);
   }
 }
 
@@ -198,34 +161,19 @@ void DRFSorter::update(
     const Resources& oldAllocation,
     const Resources& newAllocation)
 {
-  CHECK(contains(name));
-
   // TODO(bmahler): Check invariants between old and new allocations.
   // Namely, the roles and quantities of resources should be the same!
   // Otherwise, we need to ensure we re-calculate the shares, as
   // is being currently done, for safety.
 
-  const Resources oldAllocationQuantity =
-    oldAllocation.createStrippedScalarQuantity();
-  const Resources newAllocationQuantity =
-    newAllocation.createStrippedScalarQuantity();
-
-  CHECK(allocations[name].resources[slaveId].contains(oldAllocation));
-  CHECK(allocations[name].scalarQuantities.contains(oldAllocationQuantity));
-
-  allocations[name].resources[slaveId] -= oldAllocation;
-  allocations[name].resources[slaveId] += newAllocation;
-
-  allocations[name].scalarQuantities -= oldAllocationQuantity;
-  allocations[name].scalarQuantities += newAllocationQuantity;
+  set<Client, DRFComparator>::iterator it = find(name);
+  CHECK(it != clients.end());
 
-  foreach (const Resource& resource, oldAllocationQuantity) {
-    allocations[name].totals[resource.name()] -= resource.scalar();
-  }
+  Client client(*it);
+  client.allocation.update(slaveId, oldAllocation, newAllocation);
 
-  foreach (const Resource& resource, newAllocationQuantity) {
-    allocations[name].totals[resource.name()] += resource.scalar();
-  }
+  clients.erase(it);
+  clients.insert(client);
 
   // Just assume the total has changed, per the TODO above.
   dirty = true;
@@ -237,35 +185,19 @@ void DRFSorter::unallocated(
     const SlaveID& slaveId,
     const Resources& resources)
 {
-  CHECK(contains(name));
-  CHECK(allocations.at(name).resources.contains(slaveId));
-  CHECK(allocations.at(name).resources.at(slaveId).contains(resources));
-
-  allocations[name].resources[slaveId] -= resources;
-
-  // Remove shared resources from the allocated quantities when there
-  // are no instances of same resources left in the allocation.
-  const Resources absentShared = resources.shared()
-    .filter([this, name, slaveId](const Resource& resource) {
-      return !allocations[name].resources[slaveId].contains(resource);
-    });
-
-  const Resources scalarQuantities =
-    (resources.nonShared() + absentShared).createStrippedScalarQuantity();
-
-  foreach (const Resource& resource, scalarQuantities) {
-    allocations[name].totals[resource.name()] -= resource.scalar();
-  }
+  set<Client, DRFComparator>::iterator it = find(name);
+  CHECK(it != clients.end());
 
-  CHECK(allocations[name].scalarQuantities.contains(scalarQuantities));
-  allocations[name].scalarQuantities -= scalarQuantities;
+  Client client(*it);
+  client.allocation.subtract(slaveId, resources);
 
-  if (allocations[name].resources[slaveId].empty()) {
-    allocations[name].resources.erase(slaveId);
-  }
+  clients.erase(it);
+  clients.insert(client);
 
+  // If the total resources have changed, we're going to recalculate
+  // all the shares, so don't bother just updating this client.
   if (!dirty) {
-    updateShare(name);
+    updateShare(client.name);
   }
 }
 
@@ -273,18 +205,20 @@ void DRFSorter::unallocated(
 const hashmap<SlaveID, Resources>& DRFSorter::allocation(
     const string& name) const
 {
-  CHECK(contains(name));
+  set<Client, DRFComparator>::iterator it = find(name);
+  CHECK(it != clients.end());
 
-  return allocations.at(name).resources;
+  return it->allocation.resources;
 }
 
 
 const Resources& DRFSorter::allocationScalarQuantities(
     const string& name) const
 {
-  CHECK(contains(name));
+  set<Client, DRFComparator>::iterator it = find(name);
+  CHECK(it != clients.end());
 
-  return allocations.at(name).scalarQuantities;
+  return it->allocation.scalarQuantities;
 }
 
 
@@ -296,11 +230,11 @@ hashmap<string, Resources> DRFSorter::allocation(const SlaveID& slaveId) const
 
   hashmap<string, Resources> result;
 
-  foreachpair (const string& name, const Allocation& allocation, allocations) {
-    if (allocation.resources.contains(slaveId)) {
-      // It is safe to use `at()` here because we've just checked the existence
-      // of the key. This avoid un-necessary copies.
-      result.emplace(name, allocation.resources.at(slaveId));
+  foreach (const Client& client, clients) {
+    if (client.allocation.resources.contains(slaveId)) {
+      // It is safe to use `at()` here because we've just checked the
+      // existence of the key. This avoid un-necessary copies.
+      result.emplace(client.name, client.allocation.resources.at(slaveId));
     }
   }
 
@@ -312,10 +246,11 @@ Resources DRFSorter::allocation(
     const string& name,
     const SlaveID& slaveId) const
 {
-  CHECK(contains(name));
+  set<Client, DRFComparator>::iterator it = find(name);
+  CHECK(it != clients.end());
 
-  if (allocations.at(name).resources.contains(slaveId)) {
-    return allocations.at(name).resources.at(slaveId);
+  if (it->allocation.resources.contains(slaveId)) {
+    return it->allocation.resources.at(slaveId);
   }
 
   return Resources();
@@ -400,7 +335,7 @@ vector<string> DRFSorter::sort()
 
     foreach (Client client, clients) {
       // Update the 'share' to get proper sorting.
-      client.share = calculateShare(client.name);
+      client.share = calculateShare(client);
 
       temp.insert(client);
     }
@@ -426,13 +361,14 @@ vector<string> DRFSorter::sort()
 
 bool DRFSorter::contains(const string& name) const
 {
-  return allocations.contains(name);
+  set<Client, DRFComparator>::iterator it = find(name);
+  return it != clients.end();
 }
 
 
 int DRFSorter::count() const
 {
-  return allocations.size();
+  return clients.size();
 }
 
 
@@ -444,7 +380,7 @@ void DRFSorter::updateShare(const string& name)
   Client client(*it);
 
   // Update the 'share' to get proper sorting.
-  client.share = calculateShare(client.name);
+  client.share = calculateShare(client);
 
   // Remove and reinsert it to update the ordering appropriately.
   clients.erase(it);
@@ -452,10 +388,8 @@ void DRFSorter::updateShare(const string& name)
 }
 
 
-double DRFSorter::calculateShare(const string& name) const
+double DRFSorter::calculateShare(const Client& client) const
 {
-  CHECK(contains(name));
-
   double share = 0.0;
 
   // TODO(benh): This implementation of "dominant resource fairness"
@@ -472,15 +406,15 @@ double DRFSorter::calculateShare(const string& name) const
     }
 
     if (scalar.value() > 0.0 &&
-        allocations.at(name).totals.contains(resourceName)) {
+        client.allocation.totals.contains(resourceName)) {
       const double allocation =
-        allocations.at(name).totals.at(resourceName).value();
+        client.allocation.totals.at(resourceName).value();
 
       share = std::max(share, allocation / scalar.value());
     }
   }
 
-  return share / clientWeight(name);
+  return share / clientWeight(client.name);
 }
 
 
@@ -496,7 +430,7 @@ double DRFSorter::clientWeight(const string& name) const
 }
 
 
-set<Client, DRFComparator>::iterator DRFSorter::find(const string& name)
+set<Client, DRFComparator>::iterator DRFSorter::find(const string& name) const
 {
   set<Client, DRFComparator>::iterator it;
   for (it = clients.begin(); it != clients.end(); it++) {

http://git-wip-us.apache.org/repos/asf/mesos/blob/3d8faf59/src/master/allocator/sorter/drf/sorter.hpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/sorter/drf/sorter.hpp b/src/master/allocator/sorter/drf/sorter.hpp
index 665eeb0..2ef2eb8 100644
--- a/src/master/allocator/sorter/drf/sorter.hpp
+++ b/src/master/allocator/sorter/drf/sorter.hpp
@@ -40,21 +40,122 @@ namespace allocator {
 
 struct Client
 {
-  Client(const std::string& _name, double _share, uint64_t _allocations)
-    : name(_name), share(_share), active(true), allocations(_allocations) {}
+  explicit Client(const std::string& _name)
+    : name(_name), share(0), active(true) {}
 
   std::string name;
   double share;
   bool active;
 
-  // We store the number of times this client has been chosen for
-  // allocation so that we can fairly share the resources across
-  // clients that have the same share. Note that this information is
-  // not persisted across master failovers, but since the point is to
-  // equalize the 'allocations' across clients of the same 'share'
-  // having allocations restart at 0 after a master failover should be
-  // sufficient (famous last words.)
-  uint64_t allocations;
+  // Allocation for a client.
+  struct Allocation {
+    Allocation() : count(0) {}
+
+    void add(const SlaveID& slaveId, const Resources& toAdd) {
+      // Add shared resources to the allocated quantities when the same
+      // resources don't already exist in the allocation.
+      const Resources sharedToAdd = toAdd.shared()
+        .filter([this, slaveId](const Resource& resource) {
+            return !resources[slaveId].contains(resource);
+        });
+
+      const Resources quantitiesToAdd =
+        (toAdd.nonShared() + sharedToAdd).createStrippedScalarQuantity();
+
+      resources[slaveId] += toAdd;
+      scalarQuantities += quantitiesToAdd;
+
+      foreach (const Resource& resource, quantitiesToAdd) {
+        totals[resource.name()] += resource.scalar();
+      }
+
+      count++;
+    }
+
+    void subtract(const SlaveID& slaveId, const Resources& toRemove) {
+      CHECK(resources.contains(slaveId));
+      CHECK(resources.at(slaveId).contains(toRemove));
+
+      resources[slaveId] -= toRemove;
+
+      // Remove shared resources from the allocated quantities when there
+      // are no instances of same resources left in the allocation.
+      const Resources sharedToRemove = toRemove.shared()
+        .filter([this, slaveId](const Resource& resource) {
+            return !resources[slaveId].contains(resource);
+        });
+
+      const Resources quantitiesToRemove =
+        (toRemove.nonShared() + sharedToRemove).createStrippedScalarQuantity();
+
+      foreach (const Resource& resource, quantitiesToRemove) {
+        totals[resource.name()] -= resource.scalar();
+      }
+
+      CHECK(scalarQuantities.contains(quantitiesToRemove));
+      scalarQuantities -= quantitiesToRemove;
+
+      if (resources[slaveId].empty()) {
+        resources.erase(slaveId);
+      }
+    }
+
+    void update(
+        const SlaveID& slaveId,
+        const Resources& oldAllocation,
+        const Resources& newAllocation) {
+      const Resources oldAllocationQuantity =
+        oldAllocation.createStrippedScalarQuantity();
+      const Resources newAllocationQuantity =
+        newAllocation.createStrippedScalarQuantity();
+
+      CHECK(resources[slaveId].contains(oldAllocation));
+      CHECK(scalarQuantities.contains(oldAllocationQuantity));
+
+      resources[slaveId] -= oldAllocation;
+      resources[slaveId] += newAllocation;
+
+      scalarQuantities -= oldAllocationQuantity;
+      scalarQuantities += newAllocationQuantity;
+
+      foreach (const Resource& resource, oldAllocationQuantity) {
+        totals[resource.name()] -= resource.scalar();
+      }
+
+      foreach (const Resource& resource, newAllocationQuantity) {
+        totals[resource.name()] += resource.scalar();
+      }
+    }
+
+    // We store the number of times this client has been chosen for
+    // allocation so that we can fairly share the resources across
+    // clients that have the same share. Note that this information is
+    // not persisted across master failovers, but since the point is
+    // to equalize the `count` across clients of the same `share`
+    // having allocations restart at 0 after a master failover should
+    // be sufficient (famous last words.)
+    uint64_t count;
+
+    // We maintain multiple copies of each shared resource allocated
+    // to a client, where the number of copies represents the number
+    // of times this shared resource has been allocated to (and has
+    // not been recovered from) a specific client.
+    hashmap<SlaveID, Resources> resources;
+
+    // Similarly, we aggregate scalars across slaves and omit information
+    // about dynamic reservations, persistent volumes and sharedness of
+    // the corresponding resource. See notes above.
+    Resources scalarQuantities;
+
+    // We also store a map version of `scalarQuantities`, mapping
+    // the `Resource::name` to aggregated scalar. This improves the
+    // performance of calculating shares. See MESOS-4694.
+    //
+    // TODO(bmahler): Ideally we do not store `scalarQuantities`
+    // redundantly here, investigate performance improvements to
+    // `Resources` to make this unnecessary.
+    hashmap<std::string, Value::Scalar> totals;
+  } allocation;
 };
 
 
@@ -136,7 +237,7 @@ private:
   void updateShare(const std::string& name);
 
   // Returns the dominant resource share for the client.
-  double calculateShare(const std::string& name) const;
+  double calculateShare(const Client& client) const;
 
   // Resources (by name) that will be excluded from fair sharing.
   Option<std::set<std::string>> fairnessExcludeResourceNames;
@@ -147,7 +248,7 @@ private:
 
   // Returns an iterator to the specified client, if
   // it exists in this Sorter.
-  std::set<Client, DRFComparator>::iterator find(const std::string& name);
+  std::set<Client, DRFComparator>::iterator find(const std::string& name) const;
 
   // If true, sort() will recalculate all shares.
   bool dirty = false;
@@ -189,35 +290,6 @@ private:
     hashmap<std::string, Value::Scalar> totals;
   } total_;
 
-  // Allocation for a client.
-  struct Allocation {
-    // We maintain multiple copies of each shared resource allocated
-    // to a client, where the number of copies represents the number
-    // of times this shared resource has been allocated to (and has
-    // not been recovered from) a specific client.
-    hashmap<SlaveID, Resources> resources;
-
-    // Similarly, we aggregate scalars across slaves and omit information
-    // about dynamic reservations, persistent volumes and sharedness of
-    // the corresponding resource. See notes above.
-    Resources scalarQuantities;
-
-    // We also store a map version of `scalarQuantities`, mapping
-    // the `Resource::name` to aggregated scalar. This improves the
-    // performance of calculating shares. See MESOS-4694.
-    //
-    // TODO(bmahler): Ideally we do not store `scalarQuantities`
-    // redundantly here, investigate performance improvements to
-    // `Resources` to make this unnecessary.
-    hashmap<std::string, Value::Scalar> totals;
-  };
-
-  // Maps client names to the resources they have been allocated.
-  //
-  // TODO(neilc): It would be cleaner to store a client's allocation
-  // in the `Client` struct instead.
-  hashmap<std::string, Allocation> allocations;
-
   // Metrics are optionally exposed by the sorter.
   friend Metrics;
   Option<Metrics> metrics;


[02/11] mesos git commit: Changed allocator to skip allocation on weight and quota changes.

Posted by ne...@apache.org.
Changed allocator to skip allocation on weight and quota changes.

Changing weight or quota will no longer trigger a batch allocation.
Since the allocator does not rebalance currently offered resources to
reflect changes to weight or quota, doing a batch allocation is not
useful; instead, the updated quota/weight values will be reflected on
the next allocation.

Review: https://reviews.apache.org/r/57788


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/d93f2246
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/d93f2246
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/d93f2246

Branch: refs/heads/master
Commit: d93f2246b20f8ee12194624b947b72604e3815a9
Parents: d0f0f9d
Author: Neil Conway <ne...@gmail.com>
Authored: Mon Mar 20 11:07:23 2017 -0700
Committer: Neil Conway <ne...@gmail.com>
Committed: Wed Apr 26 14:01:48 2017 -0400

----------------------------------------------------------------------
 src/master/allocator/mesos/hierarchical.cpp | 33 +++++++++++++-----------
 src/tests/hierarchical_allocator_tests.cpp  | 13 +++++-----
 2 files changed, 25 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/d93f2246/src/master/allocator/mesos/hierarchical.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index 051f749..57a5e82 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -1294,7 +1294,12 @@ void HierarchicalAllocatorProcess::setQuota(
   LOG(INFO) << "Set quota " << quota.info.guarantee() << " for role '" << role
             << "'";
 
-  allocate();
+  // NOTE: Since quota changes do not result in rebalancing of
+  // offered resources, we do not trigger an allocation here; the
+  // quota change will be reflected in subsequent allocations.
+  //
+  // If we add the ability for quota changes to incur a rebalancing
+  // of offered resources, then we should trigger that here.
 }
 
 
@@ -1317,7 +1322,12 @@ void HierarchicalAllocatorProcess::removeQuota(
 
   metrics.removeQuota(role);
 
-  allocate();
+  // NOTE: Since quota changes do not result in rebalancing of
+  // offered resources, we do not trigger an allocation here; the
+  // quota change will be reflected in subsequent allocations.
+  //
+  // If we add the ability for quota changes to incur a rebalancing
+  // of offered resources, then we should trigger that here.
 }
 
 
@@ -1326,33 +1336,26 @@ void HierarchicalAllocatorProcess::updateWeights(
 {
   CHECK(initialized);
 
-  bool rebalance = false;
-
   // Update the weight for each specified role.
   foreach (const WeightInfo& weightInfo, weightInfos) {
     CHECK(weightInfo.has_role());
     weights[weightInfo.role()] = weightInfo.weight();
 
-    // The allocator only needs to rebalance if there is a framework
-    // registered with this role. The roleSorter contains only roles
-    // for registered frameworks, but quotaRoleSorter contains any role
-    // with quota set, regardless of whether any frameworks are registered
-    // with that role.
     if (quotas.contains(weightInfo.role())) {
       quotaRoleSorter->update(weightInfo.role(), weightInfo.weight());
     }
 
     if (roleSorter->contains(weightInfo.role())) {
-      rebalance = true;
       roleSorter->update(weightInfo.role(), weightInfo.weight());
     }
   }
 
-  // If at least one of the updated roles has registered
-  // frameworks, then trigger the allocation.
-  if (rebalance) {
-    allocate();
-  }
+  // NOTE: Since weight changes do not result in rebalancing of
+  // offered resources, we do not trigger an allocation here; the
+  // weight change will be reflected in subsequent allocations.
+  //
+  // If we add the ability for weight changes to incur a rebalancing
+  // of offered resources, then we should trigger that here.
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/d93f2246/src/tests/hierarchical_allocator_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/hierarchical_allocator_tests.cpp b/src/tests/hierarchical_allocator_tests.cpp
index e2cd66d..1e2eb96 100644
--- a/src/tests/hierarchical_allocator_tests.cpp
+++ b/src/tests/hierarchical_allocator_tests.cpp
@@ -2745,6 +2745,9 @@ TEST_F(HierarchicalAllocatorTest, QuotaAgainstStarvation)
   // Since `QUOTA_ROLE` is under quota, `agent2`'s resources will
   // be allocated to `framework1`.
 
+  // Trigger the next batch allocation.
+  Clock::advance(flags.allocation_interval);
+
   expected = Allocation(
       framework1.id(),
       {{QUOTA_ROLE, {{agent2.id(), agent2.resources()}}}});
@@ -3974,8 +3977,8 @@ TEST_F(HierarchicalAllocatorTest, UpdateWeight)
     weightInfos.push_back(createWeightInfo({"role2"}, 2.0));
     allocator->updateWeights(weightInfos);
 
-    // 'updateWeights' will trigger the allocation immediately, so it does not
-    // need to manually advance the clock here.
+    // Advance the clock and trigger a batch allocation.
+    Clock::advance(flags.allocation_interval);
 
     // role1 share = 0.33 (cpus=4, mem=2048)
     //   framework1 share = 1
@@ -4015,15 +4018,13 @@ TEST_F(HierarchicalAllocatorTest, UpdateWeight)
     weightInfos.push_back(createWeightInfo("role3", 3.0));
     allocator->updateWeights(weightInfos);
 
-    // 'updateWeights' will not trigger the allocation immediately because no
-    // framework exists in 'role3' yet.
+    // 'updateWeights' does not trigger an allocation.
 
     // Framework3 registers with 'role3'.
     FrameworkInfo framework3 = createFrameworkInfo({"role3"});
     allocator->addFramework(framework3.id(), framework3, {}, true);
 
-    // 'addFramework' will trigger the allocation immediately, so it does not
-    // need to manually advance the clock here.
+    // 'addFramework' will trigger an allocation.
 
     // role1 share = 0.166 (cpus=2, mem=1024)
     //   framework1 share = 1


[11/11] mesos git commit: Updated CHANGELOG for hierarchical roles.

Posted by ne...@apache.org.
Updated CHANGELOG for hierarchical roles.

Review: https://reviews.apache.org/r/57516


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/044b72e8
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/044b72e8
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/044b72e8

Branch: refs/heads/master
Commit: 044b72e8ee839c49b9174b020469e836c5712192
Parents: 50a71c0
Author: Neil Conway <ne...@gmail.com>
Authored: Fri Mar 10 12:37:07 2017 -0500
Committer: Neil Conway <ne...@gmail.com>
Committed: Wed Apr 26 14:19:27 2017 -0400

----------------------------------------------------------------------
 CHANGELOG | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/044b72e8/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index 08f10da..09f3baa 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -10,12 +10,6 @@ This release contains the following new features:
     resource allocation, or implement multi-user resource allocation in
     the framework.
 
-  * [MESOS-6627] - Support for frameworks to modify the role(s) they are
-    subscribed to. This is essential to supporting "multi-user" frameworks
-    (see MESOS-1763) in that roles are expected to come and go over time
-    (e.g. new employees join, new teams are formed, employees leave, teams
-    are disbanded, etc).
-
   * [MESOS-6365] - Authentication and authorization support for HTTP executors.
     A new `--authenticate_http_executors` agent flag enables required
     authentication on the HTTP executor API. A new `--executor_secret_key` flag
@@ -29,13 +23,28 @@ This release contains the following new features:
     container. See 'docs/authorization.md' for more information on these
     implicit authorization rules.
 
+  * [MESOS-6375] - **Experimental** Support for hierarchical roles. A
+    role can now have one or more "child" roles. This makes it easier to
+    use roles to represent the hierarchical structure commonly found in
+    organizations. Support for hierarchical roles is experimental:
+    currently, quota cannot be set on a nested (child) role, and the
+    behavior of resource reservations for role hierarchies is expected
+    to change (presently, making a reservation to a role reserves the
+    resource for that exact role; in a future Mesos release, this will
+    instead reserve the resource for the sub-tree rooted at the role).
+
+  * [MESOS-6627] - Support for frameworks to modify the role(s) they are
+    subscribed to. This is essential to supporting "multi-user" frameworks
+    (see MESOS-1763) in that roles are expected to come and go over time
+    (e.g. new employees join, new teams are formed, employees leave, teams
+    are disbanded, etc).
+
 Deprecations:
   * [MESOS-7259] - Remove deprecated ACLs `SetQuota` and `RemoveQuota`
     This change is only applicable to the local authorizer since internally
     these acls were being translated to the `UPDATE_QUOTA` action.
 
 
-
 Release Notes - Mesos - Version 1.2.1 (WIP)
 -------------------------------------------
 * This is a bug fix release.


[03/11] mesos git commit: Avoided storing weights in the allocator.

Posted by ne...@apache.org.
Avoided storing weights in the allocator.

Previously, DRFSorter only kept track of weights for clients currently
in the sorter; the allocator supplied the current weight when adding a
client to the sorter.

This commit changes the sorter to keep track of all weights, not just
those for the active clients in the sorter. The allocator can now just
pass along role weights to the role sorters, rather than needing to
track them itself.

This commit changes the sorter API.

Review: https://reviews.apache.org/r/57527


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/b7f70ca4
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/b7f70ca4
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/b7f70ca4

Branch: refs/heads/master
Commit: b7f70ca477f5fce84f33658b28f1cde17dbfd452
Parents: d93f224
Author: Neil Conway <ne...@gmail.com>
Authored: Fri Mar 10 14:43:24 2017 -0500
Committer: Neil Conway <ne...@gmail.com>
Committed: Wed Apr 26 14:01:52 2017 -0400

----------------------------------------------------------------------
 src/master/allocator/mesos/hierarchical.cpp | 25 +++-----------
 src/master/allocator/mesos/hierarchical.hpp |  7 ----
 src/master/allocator/sorter/drf/sorter.cpp  | 42 ++++++++++++++----------
 src/master/allocator/sorter/drf/sorter.hpp  | 10 ++++--
 src/master/allocator/sorter/sorter.hpp      | 14 +++++---
 src/tests/sorter_tests.cpp                  | 13 +++++---
 6 files changed, 52 insertions(+), 59 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/b7f70ca4/src/master/allocator/mesos/hierarchical.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index 57a5e82..984a0a4 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -1276,7 +1276,7 @@ void HierarchicalAllocatorProcess::setQuota(
   // Persist quota in memory and add the role into the corresponding
   // allocation group.
   quotas[role] = quota;
-  quotaRoleSorter->add(role, roleWeight(role));
+  quotaRoleSorter->add(role);
 
   // Copy allocation information for the quota'ed role.
   if (roleSorter->contains(role)) {
@@ -1336,18 +1336,11 @@ void HierarchicalAllocatorProcess::updateWeights(
 {
   CHECK(initialized);
 
-  // Update the weight for each specified role.
   foreach (const WeightInfo& weightInfo, weightInfos) {
     CHECK(weightInfo.has_role());
-    weights[weightInfo.role()] = weightInfo.weight();
 
-    if (quotas.contains(weightInfo.role())) {
-      quotaRoleSorter->update(weightInfo.role(), weightInfo.weight());
-    }
-
-    if (roleSorter->contains(weightInfo.role())) {
-      roleSorter->update(weightInfo.role(), weightInfo.weight());
-    }
+    quotaRoleSorter->updateWeight(weightInfo.role(), weightInfo.weight());
+    roleSorter->updateWeight(weightInfo.role(), weightInfo.weight());
   }
 
   // NOTE: Since weight changes do not result in rebalancing of
@@ -2047,16 +2040,6 @@ void HierarchicalAllocatorProcess::expire(
 }
 
 
-double HierarchicalAllocatorProcess::roleWeight(const string& name) const
-{
-  if (weights.contains(name)) {
-    return weights.at(name);
-  } else {
-    return 1.0; // Default weight.
-  }
-}
-
-
 bool HierarchicalAllocatorProcess::isWhitelisted(
     const SlaveID& slaveId) const
 {
@@ -2235,7 +2218,7 @@ void HierarchicalAllocatorProcess::trackFrameworkUnderRole(
   if (!roles.contains(role)) {
     roles[role] = {};
     CHECK(!roleSorter->contains(role));
-    roleSorter->add(role, roleWeight(role));
+    roleSorter->add(role);
 
     CHECK(!frameworkSorters.contains(role));
     frameworkSorters.insert({role, Owned<Sorter>(frameworkSorterFactory())});

http://git-wip-us.apache.org/repos/asf/mesos/blob/b7f70ca4/src/master/allocator/mesos/hierarchical.hpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.hpp b/src/master/allocator/mesos/hierarchical.hpp
index f84b057..219f508 100644
--- a/src/master/allocator/mesos/hierarchical.hpp
+++ b/src/master/allocator/mesos/hierarchical.hpp
@@ -256,9 +256,6 @@ protected:
       const SlaveID& slaveId,
       InverseOfferFilter* inverseOfferFilter);
 
-  // Returns the weight of the specified role name.
-  double roleWeight(const std::string& name) const;
-
   // Checks whether the slave is whitelisted.
   bool isWhitelisted(const SlaveID& slaveId) const;
 
@@ -432,10 +429,6 @@ protected:
   // (e.g. some tasks and/or executors are consuming resources under the role).
   hashmap<std::string, hashset<FrameworkID>> roles;
 
-  // Configured weight for each role, if any; if a role does not
-  // appear here, it has the default weight of 1.
-  hashmap<std::string, double> weights;
-
   // Configured quota for each role, if any. Setting quota for a role
   // changes the order that the role's frameworks are offered
   // resources. Quota comes before fair share, hence setting quota moves

http://git-wip-us.apache.org/repos/asf/mesos/blob/b7f70ca4/src/master/allocator/sorter/drf/sorter.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/sorter/drf/sorter.cpp b/src/master/allocator/sorter/drf/sorter.cpp
index ed54680..d2952b8 100644
--- a/src/master/allocator/sorter/drf/sorter.cpp
+++ b/src/master/allocator/sorter/drf/sorter.cpp
@@ -67,7 +67,7 @@ void DRFSorter::initialize(
 }
 
 
-void DRFSorter::add(const string& name, double weight)
+void DRFSorter::add(const string& name)
 {
   CHECK(!contains(name));
 
@@ -75,7 +75,6 @@ void DRFSorter::add(const string& name, double weight)
   clients.insert(client);
 
   allocations[name] = Allocation();
-  weights[name] = weight;
 
   if (metrics.isSome()) {
     metrics->add(name);
@@ -83,20 +82,6 @@ void DRFSorter::add(const string& name, double weight)
 }
 
 
-void DRFSorter::update(const string& name, double weight)
-{
-  CHECK(weights.contains(name));
-  weights[name] = weight;
-
-  // If the total resources have changed, we're going to
-  // recalculate all the shares, so don't bother just
-  // updating this client.
-  if (!dirty) {
-    updateShare(name);
-  }
-}
-
-
 void DRFSorter::remove(const string& name)
 {
   CHECK(contains(name));
@@ -108,7 +93,6 @@ void DRFSorter::remove(const string& name)
   }
 
   allocations.erase(name);
-  weights.erase(name);
 
   if (metrics.isSome()) {
     metrics->remove(name);
@@ -144,6 +128,16 @@ void DRFSorter::deactivate(const string& name)
 }
 
 
+void DRFSorter::updateWeight(const string& name, double weight)
+{
+  weights[name] = weight;
+
+  // It would be possible to avoid dirtying the tree here (in some
+  // cases), but it doesn't seem worth the complexity.
+  dirty = true;
+}
+
+
 void DRFSorter::allocated(
     const string& name,
     const SlaveID& slaveId,
@@ -482,7 +476,19 @@ double DRFSorter::calculateShare(const string& name) const
     }
   }
 
-  return share / weights.at(name);
+  return share / clientWeight(name);
+}
+
+
+double DRFSorter::clientWeight(const string& name) const
+{
+  Option<double> weight = weights.get(name);
+
+  if (weight.isNone()) {
+    return 1.0;
+  }
+
+  return weight.get();
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/b7f70ca4/src/master/allocator/sorter/drf/sorter.hpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/sorter/drf/sorter.hpp b/src/master/allocator/sorter/drf/sorter.hpp
index 7632922..19fa41b 100644
--- a/src/master/allocator/sorter/drf/sorter.hpp
+++ b/src/master/allocator/sorter/drf/sorter.hpp
@@ -78,9 +78,7 @@ public:
   virtual void initialize(
       const Option<std::set<std::string>>& fairnessExcludeResourceNames);
 
-  virtual void add(const std::string& name, double weight = 1);
-
-  virtual void update(const std::string& name, double weight);
+  virtual void add(const std::string& name);
 
   virtual void remove(const std::string& name);
 
@@ -88,6 +86,8 @@ public:
 
   virtual void deactivate(const std::string& name);
 
+  virtual void updateWeight(const std::string& name, double weight);
+
   virtual void allocated(
       const std::string& name,
       const SlaveID& slaveId,
@@ -140,6 +140,10 @@ private:
   // Resources (by name) that will be excluded from fair sharing.
   Option<std::set<std::string>> fairnessExcludeResourceNames;
 
+  // Returns the weight associated with the given path. If no weight
+  // has been configured, the default weight (1.0) is returned.
+  double clientWeight(const std::string& name) const;
+
   // Returns an iterator to the specified client, if
   // it exists in this Sorter.
   std::set<Client, DRFComparator>::iterator find(const std::string& name);

http://git-wip-us.apache.org/repos/asf/mesos/blob/b7f70ca4/src/master/allocator/sorter/sorter.hpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/sorter/sorter.hpp b/src/master/allocator/sorter/sorter.hpp
index b3029fc..4de249e 100644
--- a/src/master/allocator/sorter/sorter.hpp
+++ b/src/master/allocator/sorter/sorter.hpp
@@ -58,11 +58,7 @@ public:
 
   // Adds a client to allocate resources to. A client
   // may be a user or a framework.
-  virtual void add(const std::string& client, double weight = 1) = 0;
-
-  // Updates the weight of a client. The client must have previously
-  // be added to the sorter, but it may currently be inactive.
-  virtual void update(const std::string& client, double weight) = 0;
+  virtual void add(const std::string& client) = 0;
 
   // Removes a client.
   virtual void remove(const std::string& client) = 0;
@@ -75,6 +71,14 @@ public:
   // It is a no-op if the client is already not in the sort.
   virtual void deactivate(const std::string& client) = 0;
 
+  // Updates the weight of a client name. The sorter will store this
+  // weight regardless of whether a client with this name has been
+  // added. If a client's weight is not changed, the default weight
+  // (1.0) is used. This interface does not support unsetting
+  // previously set weights; instead, a weight should be reset to the
+  // default value.
+  virtual void updateWeight(const std::string& client, double weight) = 0;
+
   // Specify that resources have been allocated to the given client.
   virtual void allocated(
       const std::string& client,

http://git-wip-us.apache.org/repos/asf/mesos/blob/b7f70ca4/src/tests/sorter_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/sorter_tests.cpp b/src/tests/sorter_tests.cpp
index 43bd857..8e86e4c 100644
--- a/src/tests/sorter_tests.cpp
+++ b/src/tests/sorter_tests.cpp
@@ -138,7 +138,8 @@ TEST(SorterTest, WDRFSorter)
 
   sorter.allocated("a", slaveId, Resources::parse("cpus:5;mem:5").get());
 
-  sorter.add("b", 2);
+  sorter.add("b");
+  sorter.updateWeight("b", 2);
   sorter.allocated("b", slaveId, Resources::parse("cpus:6;mem:6").get());
 
   // shares: a = .05, b = .03
@@ -150,7 +151,8 @@ TEST(SorterTest, WDRFSorter)
   // shares: a = .05, b = .03, c = .04
   EXPECT_EQ(vector<string>({"b", "c", "a"}), sorter.sort());
 
-  sorter.add("d", 10);
+  sorter.add("d");
+  sorter.updateWeight("d", 10);
   sorter.allocated("d", slaveId, Resources::parse("cpus:10;mem:20").get());
 
   // shares: a = .05, b = .03, c = .04, d = .02
@@ -165,7 +167,8 @@ TEST(SorterTest, WDRFSorter)
   // shares: a = .05, c = .04, d = .045
   EXPECT_EQ(vector<string>({"c", "d", "a"}), sorter.sort());
 
-  sorter.add("e", .1);
+  sorter.add("e");
+  sorter.updateWeight("e", 0.1);
   sorter.allocated("e", slaveId, Resources::parse("cpus:1;mem:1").get());
 
   // shares: a = .05, c = .04, d = .045, e = .1
@@ -197,8 +200,8 @@ TEST(SorterTest, WDRFSorterUpdateWeight)
   // shares: a = .05, b = .06
   EXPECT_EQ(vector<string>({"a", "b"}), sorter.sort());
 
-  // Increase b's  weight to flip the sort order.
-  sorter.update("b", 2);
+  // Increase b's weight to flip the sort order.
+  sorter.updateWeight("b", 2);
 
   // shares: a = .05, b = .03
   EXPECT_EQ(vector<string>({"b", "a"}), sorter.sort());


[07/11] mesos git commit: Cleaned up coding of `DRFComparator::operator()`.

Posted by ne...@apache.org.
Cleaned up coding of `DRFComparator::operator()`.

Review: https://reviews.apache.org/r/58112


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/5bf32be9
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/5bf32be9
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/5bf32be9

Branch: refs/heads/master
Commit: 5bf32be976c404aa1834a1dbd378c0ef37173856
Parents: 3d8faf5
Author: Neil Conway <ne...@gmail.com>
Authored: Thu Mar 30 19:09:21 2017 -0700
Committer: Neil Conway <ne...@gmail.com>
Committed: Wed Apr 26 14:02:15 2017 -0400

----------------------------------------------------------------------
 src/master/allocator/sorter/drf/sorter.cpp | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/5bf32be9/src/master/allocator/sorter/drf/sorter.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/sorter/drf/sorter.cpp b/src/master/allocator/sorter/drf/sorter.cpp
index ee5b8c0..7e91c85 100644
--- a/src/master/allocator/sorter/drf/sorter.cpp
+++ b/src/master/allocator/sorter/drf/sorter.cpp
@@ -44,13 +44,15 @@ namespace allocator {
 
 bool DRFComparator::operator()(const Client& client1, const Client& client2)
 {
-  if (client1.share == client2.share) {
-    if (client1.allocation.count == client2.allocation.count) {
-      return client1.name < client2.name;
-    }
+  if (client1.share != client2.share) {
+    return client1.share < client2.share;
+  }
+
+  if (client1.allocation.count != client2.allocation.count) {
     return client1.allocation.count < client2.allocation.count;
   }
-  return client1.share < client2.share;
+
+  return client1.name < client2.name;
 }