You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by jo...@apache.org on 2016/03/03 02:18:06 UTC

[2/2] mesos git commit: Improved allocator performance for labeled reservations and volumes.

Improved allocator performance for labeled reservations and volumes.

When the cluster contains many resources that have either labeled
reservations or persistent volumes, allocator performance can decrease
substantially because such metadata prevents `Resource` objects from
being merged together inside the allocator. As a result, the allocator
must manipulate `Resources` vectors that consist of many small
individual `Resource` values; since many `Resources` operations take
linear-time in the number of `Resource` values they contain, this can
cause very significant slowdowns.

As a short-term solution, this commit strips dynamic reservation and
persistent volume information from the `Resources` objects used
internally by the allocator, because they are not needed when
aggregating resource quantities together.

A long-term solution for this problem will be addressed as work on
refactoring the allocator more generally.

Review: https://reviews.apache.org/r/44251/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/b4d746fc
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/b4d746fc
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/b4d746fc

Branch: refs/heads/master
Commit: b4d746fc0934f733b82e06277d7e76431b054b38
Parents: 204398e
Author: Neil Conway <ne...@gmail.com>
Authored: Wed Mar 2 16:54:46 2016 -0800
Committer: Joris Van Remoortere <jo...@gmail.com>
Committed: Wed Mar 2 17:17:04 2016 -0800

----------------------------------------------------------------------
 src/master/allocator/mesos/hierarchical.cpp | 44 ++++++++++------
 src/master/allocator/sorter/drf/sorter.cpp  | 64 +++++++++++++++---------
 src/master/allocator/sorter/drf/sorter.hpp  | 15 ++++--
 src/master/allocator/sorter/sorter.hpp      | 13 +++--
 4 files changed, 89 insertions(+), 47 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/b4d746fc/src/master/allocator/mesos/hierarchical.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index 24fa50f..7029107 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -1183,7 +1183,7 @@ void HierarchicalAllocatorProcess::allocate(
   // TODO(vinod): Implement a smarter sorting algorithm.
   std::random_shuffle(slaveIds.begin(), slaveIds.end());
 
-  // Returns the __amount__ of resources allocated to a quota role. Since we
+  // Returns the __quantity__ of resources allocated to a quota role. Since we
   // account for reservations and persistent volumes toward quota, we strip
   // reservation and persistent volume related information for comparability.
   // The result is used to determine whether a role's quota is satisfied, and
@@ -1194,13 +1194,16 @@ void HierarchicalAllocatorProcess::allocate(
   auto getQuotaRoleAllocatedResources = [this](const string& role) {
     CHECK(quotas.contains(role));
 
-    // Strip the reservation and persistent volume info.
+    // NOTE: `allocationScalarQuantities` omits dynamic reservation and
+    // persistent volume info, but we additionally strip `role` here.
     Resources resources;
 
-    foreach (Resource resource, quotaRoleSorter->allocationScalars(role)) {
+    foreach (Resource resource,
+             quotaRoleSorter->allocationScalarQuantities(role)) {
+      CHECK(!resource.has_reservation());
+      CHECK(!resource.has_disk());
+
       resource.set_role("*");
-      resource.clear_reservation();
-      resource.clear_disk();
       resources += resource;
     }
 
@@ -1220,7 +1223,8 @@ void HierarchicalAllocatorProcess::allocate(
         continue;
       }
 
-      // Get the total amount of resources allocated to a quota role.
+      // Get the total quantity of resources allocated to a quota role. The
+      // value omits role, reservation, and persistence info.
       Resources roleConsumedResources = getQuotaRoleAllocatedResources(role);
 
       // If quota for the role is satisfied, we do not need to do any further
@@ -1299,17 +1303,21 @@ void HierarchicalAllocatorProcess::allocate(
     }
   }
 
-  // Calculate how many scalar resources (including revocable and reserved)
-  // are available for allocation in the next round. We need this in order
-  // to ensure we do not over-allocate resources during the second stage.
+  // Calculate the total quantity of scalar resources (including revocable
+  // and reserved) that are available for allocation in the next round. We
+  // need this in order to ensure we do not over-allocate resources during
+  // the second stage.
+  //
+  // For performance reasons (MESOS-4833), this omits information about
+  // dynamic reservations or persistent volumes in the resources.
   //
   // NOTE: We use total cluster resources, and not just those based on the
   // agents participating in the current allocation (i.e. provided as an
   // argument to the `allocate()` call) so that frameworks in roles without
   // quota are not unnecessarily deprived of resources.
-  Resources remainingClusterResources = roleSorter->totalScalars();
+  Resources remainingClusterResources = roleSorter->totalScalarQuantities();
   foreachkey (const string& role, activeRoles) {
-    remainingClusterResources -= roleSorter->allocationScalars(role);
+    remainingClusterResources -= roleSorter->allocationScalarQuantities(role);
   }
 
   // Frameworks in a quota'ed role may temporarily reject resources by
@@ -1339,6 +1347,12 @@ void HierarchicalAllocatorProcess::allocate(
   //     than available, i.e. `remainingClusterResources` does not contain
   //     (`allocatedStage2` + potential offer). In this case we skip this
   //     agent and continue to the next one.
+  //
+  // NOTE: Like `remainingClusterResources`, `allocatedStage2` omits
+  // information about dynamic reservations and persistent volumes for
+  // performance reasons. This invariant is preserved because we only add
+  // resources to it that have also had this metadata stripped from them
+  // (typically by using `Resources::createStrippedScalarQuantity`).
   Resources allocatedStage2;
 
   // At this point resources for quotas are allocated or accounted for.
@@ -1402,9 +1416,11 @@ void HierarchicalAllocatorProcess::allocate(
         // stage to use more than `remainingClusterResources`, move along.
         // We do not terminate early, as offers generated further in the
         // loop may be small enough to fit within `remainingClusterResources`.
-        const Resources scalarResources = resources.scalars();
+        const Resources scalarQuantity =
+          resources.createStrippedScalarQuantity();
+
         if (!remainingClusterResources.contains(
-                allocatedStage2 + scalarResources)) {
+                allocatedStage2 + scalarQuantity)) {
           continue;
         }
 
@@ -1417,7 +1433,7 @@ void HierarchicalAllocatorProcess::allocate(
         // NOTE: We may have already allocated some resources on the current
         // agent as part of quota.
         offerable[frameworkId][slaveId] += resources;
-        allocatedStage2 += scalarResources;
+        allocatedStage2 += scalarQuantity;
         slaves[slaveId].allocated += resources;
 
         frameworkSorters[role]->add(slaveId, resources);

http://git-wip-us.apache.org/repos/asf/mesos/blob/b4d746fc/src/master/allocator/sorter/drf/sorter.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/sorter/drf/sorter.cpp b/src/master/allocator/sorter/drf/sorter.cpp
index 9e863dd..c14f9a4 100644
--- a/src/master/allocator/sorter/drf/sorter.cpp
+++ b/src/master/allocator/sorter/drf/sorter.cpp
@@ -112,7 +112,8 @@ void DRFSorter::allocated(
   }
 
   allocations[name].resources[slaveId] += resources;
-  allocations[name].scalars += resources.scalars();
+  allocations[name].scalarQuantities +=
+    resources.createStrippedScalarQuantity();
 
   // If the total resources have changed, we're going to
   // recalculate all the shares, so don't bother just
@@ -136,23 +137,28 @@ void DRFSorter::update(
   // Otherwise, we need to ensure we re-calculate the shares, as
   // is being currently done, for safety.
 
+  const Resources oldAllocationQuantity =
+    oldAllocation.createStrippedScalarQuantity();
+  const Resources newAllocationQuantity =
+    newAllocation.createStrippedScalarQuantity();
+
   CHECK(total_.resources[slaveId].contains(oldAllocation));
-  CHECK(total_.scalars.contains(oldAllocation.scalars()));
+  CHECK(total_.scalarQuantities.contains(oldAllocationQuantity));
 
   total_.resources[slaveId] -= oldAllocation;
   total_.resources[slaveId] += newAllocation;
 
-  total_.scalars -= oldAllocation.scalars();
-  total_.scalars += newAllocation.scalars();
+  total_.scalarQuantities -= oldAllocationQuantity;
+  total_.scalarQuantities += newAllocationQuantity;
 
   CHECK(allocations[name].resources[slaveId].contains(oldAllocation));
-  CHECK(allocations[name].scalars.contains(oldAllocation.scalars()));
+  CHECK(allocations[name].scalarQuantities.contains(oldAllocationQuantity));
 
   allocations[name].resources[slaveId] -= oldAllocation;
   allocations[name].resources[slaveId] += newAllocation;
 
-  allocations[name].scalars -= oldAllocation.scalars();
-  allocations[name].scalars += newAllocation.scalars();
+  allocations[name].scalarQuantities -= oldAllocationQuantity;
+  allocations[name].scalarQuantities += newAllocationQuantity;
 
   // Just assume the total has changed, per the TODO above.
   dirty = true;
@@ -167,11 +173,11 @@ const hashmap<SlaveID, Resources>& DRFSorter::allocation(const string& name)
 }
 
 
-const Resources& DRFSorter::allocationScalars(const string& name)
+const Resources& DRFSorter::allocationScalarQuantities(const string& name)
 {
   CHECK(contains(name));
 
-  return allocations[name].scalars;
+  return allocations[name].scalarQuantities;
 }
 
 
@@ -213,9 +219,9 @@ const hashmap<SlaveID, Resources>& DRFSorter::total() const
 }
 
 
-const Resources& DRFSorter::totalScalars() const
+const Resources& DRFSorter::totalScalarQuantities() const
 {
-  return total_.scalars;
+  return total_.scalarQuantities;
 }
 
 
@@ -225,7 +231,8 @@ void DRFSorter::unallocated(
     const Resources& resources)
 {
   allocations[name].resources[slaveId] -= resources;
-  allocations[name].scalars -= resources.scalars();
+  allocations[name].scalarQuantities -=
+    resources.createStrippedScalarQuantity();
 
   if (allocations[name].resources[slaveId].empty()) {
     allocations[name].resources.erase(slaveId);
@@ -241,7 +248,7 @@ void DRFSorter::add(const SlaveID& slaveId, const Resources& resources)
 {
   if (!resources.empty()) {
     total_.resources[slaveId] += resources;
-    total_.scalars += resources.scalars();
+    total_.scalarQuantities += resources.createStrippedScalarQuantity();
 
     // We have to recalculate all shares when the total resources
     // change, but we put it off until sort is called so that if
@@ -258,7 +265,7 @@ void DRFSorter::remove(const SlaveID& slaveId, const Resources& resources)
     CHECK(total_.resources.contains(slaveId));
 
     total_.resources[slaveId] -= resources;
-    total_.scalars -= resources.scalars();
+    total_.scalarQuantities -= resources.createStrippedScalarQuantity();
 
     if (total_.resources[slaveId].empty()) {
       total_.resources.erase(slaveId);
@@ -271,10 +278,13 @@ void DRFSorter::remove(const SlaveID& slaveId, const Resources& resources)
 
 void DRFSorter::update(const SlaveID& slaveId, const Resources& resources)
 {
-  CHECK(total_.scalars.contains(total_.resources[slaveId].scalars()));
+  const Resources oldSlaveQuantity =
+    total_.resources[slaveId].createStrippedScalarQuantity();
+
+  CHECK(total_.scalarQuantities.contains(oldSlaveQuantity));
 
-  total_.scalars -= total_.resources[slaveId].scalars();
-  total_.scalars += resources.scalars();
+  total_.scalarQuantities -= oldSlaveQuantity;
+  total_.scalarQuantities += resources.createStrippedScalarQuantity();
 
   total_.resources[slaveId] = resources;
 
@@ -352,13 +362,17 @@ double DRFSorter::calculateShare(const string& name)
   // currently does not take into account resources that are not
   // scalars.
 
-  foreach (const string& scalar, total_.scalars.names()) {
+  foreach (const string& scalar, total_.scalarQuantities.names()) {
     // We collect the scalar accumulated total value from the
     // `Resources` object.
     //
-    // NOTE: Scalar resources may be spread across multiple
-    // 'Resource' objects. E.g. persistent volumes.
-    Option<Value::Scalar> __total = total_.scalars.get<Value::Scalar>(scalar);
+    // NOTE: Although in principle scalar resources may be spread
+    // across multiple `Resource` objects (e.g., persistent volumes),
+    // we currently strip persistence and reservation metadata from
+    // the resources in `scalarQuantities`.
+    Option<Value::Scalar> __total =
+      total_.scalarQuantities.get<Value::Scalar>(scalar);
+
     CHECK_SOME(__total);
     const double _total = __total.get().value();
 
@@ -368,10 +382,12 @@ double DRFSorter::calculateShare(const string& name)
       // We collect the scalar accumulated allocation value from the
       // `Resources` object.
       //
-      // NOTE: Scalar resources may be spread across multiple
-      // 'Resource' objects. E.g. persistent volumes.
+      // NOTE: Although in principle scalar resources may be spread
+      // across multiple `Resource` objects (e.g., persistent volumes),
+      // we currently strip persistence and reservation metadata from
+      // the resources in `scalarQuantities`.
       Option<Value::Scalar> _allocation =
-        allocations[name].scalars.get<Value::Scalar>(scalar);
+        allocations[name].scalarQuantities.get<Value::Scalar>(scalar);
 
       if (_allocation.isSome()) {
         allocation = _allocation.get().value();

http://git-wip-us.apache.org/repos/asf/mesos/blob/b4d746fc/src/master/allocator/sorter/drf/sorter.hpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/sorter/drf/sorter.hpp b/src/master/allocator/sorter/drf/sorter.hpp
index 46b2a9c..f316bb5 100644
--- a/src/master/allocator/sorter/drf/sorter.hpp
+++ b/src/master/allocator/sorter/drf/sorter.hpp
@@ -92,7 +92,7 @@ public:
   virtual const hashmap<SlaveID, Resources>& allocation(
       const std::string& name);
 
-  virtual const Resources& allocationScalars(const std::string& name);
+  virtual const Resources& allocationScalarQuantities(const std::string& name);
 
   virtual hashmap<std::string, Resources> allocation(const SlaveID& slaveId);
 
@@ -100,7 +100,7 @@ public:
 
   virtual const hashmap<SlaveID, Resources>& total() const;
 
-  virtual const Resources& totalScalars() const;
+  virtual const Resources& totalScalarQuantities() const;
 
   virtual void add(const SlaveID& slaveId, const Resources& resources);
 
@@ -142,15 +142,20 @@ private:
     // NOTE: Scalars can be safely aggregated across slaves. We keep
     // that to speed up the calculation of shares. See MESOS-2891 for
     // the reasons why we want to do that.
-    Resources scalars;
+    //
+    // NOTE: We omit information about dynamic reservations and persistent
+    // volumes here to enable resources to be aggregated across slaves
+    // more effectively. See MESOS-4833 for more information.
+    Resources scalarQuantities;
   } total_;
 
   // Allocation for a client.
   struct Allocation {
     hashmap<SlaveID, Resources> resources;
 
-    // Similarly, we aggregated scalars across slaves. See note above.
-    Resources scalars;
+    // Similarly, we aggregate scalars across slaves and omit information
+    // about dynamic reservations and persistent volumes. See notes above.
+    Resources scalarQuantities;
   };
 
   // Maps client names to the resources they have been allocated.

http://git-wip-us.apache.org/repos/asf/mesos/blob/b4d746fc/src/master/allocator/sorter/sorter.hpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/sorter/sorter.hpp b/src/master/allocator/sorter/sorter.hpp
index ba91a38..e2338d5 100644
--- a/src/master/allocator/sorter/sorter.hpp
+++ b/src/master/allocator/sorter/sorter.hpp
@@ -82,8 +82,11 @@ public:
   virtual const hashmap<SlaveID, Resources>& allocation(
       const std::string& client) = 0;
 
-  // Returns the sum of the scalar resources that are allocated to this client.
-  virtual const Resources& allocationScalars(const std::string& client) = 0;
+  // Returns the total scalar resource quantities that are allocated to
+  // this client. This omits metadata about dynamic reservations and
+  // persistent volumes; see `Resources::createStrippedScalarQuantity`.
+  virtual const Resources& allocationScalarQuantities(
+      const std::string& client) = 0;
 
   // Returns the clients that have allocations on this slave.
   virtual hashmap<std::string, Resources> allocation(
@@ -98,8 +101,10 @@ public:
   // Returns the total resources that are in this sorter.
   virtual const hashmap<SlaveID, Resources>& total() const = 0;
 
-  // Returns the sum of the total scalar resources that are in this sorter.
-  virtual const Resources& totalScalars() const = 0;
+  // Returns the total scalar resource quantities in this sorter. This
+  // omits metadata about dynamic reservations and persistent volumes; see
+  // `Resources::createStrippedScalarQuantity`.
+  virtual const Resources& totalScalarQuantities() const = 0;
 
   // Add resources to the total pool of resources this
   // Sorter should consider.