You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2017/12/05 21:59:05 UTC

[3/8] mesos git commit: Added `SlaveInfo` parameter to Allocator::updateSlave().

Added `SlaveInfo` parameter to Allocator::updateSlave().

This is done mainly in preparation for the upcoming agent
reconfiguration patches, where it will become possible that
`updateSlave()` is passed a different SlaveInfo than `addSlave()`,
and schedulers who rely on some fields in it for their scheduling
decisions need to be able to check and compare them.

Additionally, the HierarchicalDRFAllocator was changed to store
the full SlaveInfo for every slave instead of storing domain and
hostname separately.

Review: https://reviews.apache.org/r/64010/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/9015cd31
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/9015cd31
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/9015cd31

Branch: refs/heads/master
Commit: 9015cd316bf6d185363cd0caf1705e2fb118ed63
Parents: 054bd5a
Author: Benno Evers <be...@mesosphere.com>
Authored: Tue Dec 5 13:55:34 2017 -0800
Committer: Vinod Kone <vi...@gmail.com>
Committed: Tue Dec 5 13:55:34 2017 -0800

----------------------------------------------------------------------
 include/mesos/allocator/allocator.hpp       |  4 ++++
 src/master/allocator/mesos/allocator.hpp    |  5 +++-
 src/master/allocator/mesos/hierarchical.cpp | 29 ++++++++++++++----------
 src/master/allocator/mesos/hierarchical.hpp |  9 +++++---
 src/master/master.cpp                       |  4 ++--
 src/tests/allocator.hpp                     |  9 ++++----
 src/tests/hierarchical_allocator_tests.cpp  | 22 ++++++++++--------
 7 files changed, 51 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/9015cd31/include/mesos/allocator/allocator.hpp
----------------------------------------------------------------------
diff --git a/include/mesos/allocator/allocator.hpp b/include/mesos/allocator/allocator.hpp
index acb9e4f..e5a5355 100644
--- a/include/mesos/allocator/allocator.hpp
+++ b/include/mesos/allocator/allocator.hpp
@@ -205,11 +205,15 @@ public:
   /**
    * Updates an agent.
    *
+   * TODO(bevers): Make `total` and `capabilities` non-optional.
+   *
+   * @param slaveInfo The current slave info of the agent.
    * @param total The new total resources on the agent.
    * @param capabilities The new capabilities of the agent.
    */
   virtual void updateSlave(
       const SlaveID& slave,
+      const SlaveInfo& slaveInfo,
       const Option<Resources>& total = None(),
       const Option<std::vector<SlaveInfo::Capability>>&
           capabilities = None()) = 0;

http://git-wip-us.apache.org/repos/asf/mesos/blob/9015cd31/src/master/allocator/mesos/allocator.hpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/allocator.hpp b/src/master/allocator/mesos/allocator.hpp
index 48254b6..c453c01 100644
--- a/src/master/allocator/mesos/allocator.hpp
+++ b/src/master/allocator/mesos/allocator.hpp
@@ -99,6 +99,7 @@ public:
 
   void updateSlave(
       const SlaveID& slave,
+      const SlaveInfo& slaveInfo,
       const Option<Resources>& total = None(),
       const Option<std::vector<SlaveInfo::Capability>>& capabilities = None());
 
@@ -244,6 +245,7 @@ public:
 
   virtual void updateSlave(
       const SlaveID& slave,
+      const SlaveInfo& slaveInfo,
       const Option<Resources>& total = None(),
       const Option<std::vector<SlaveInfo::Capability>>&
           capabilities = None()) = 0;
@@ -487,6 +489,7 @@ inline void MesosAllocator<AllocatorProcess>::removeSlave(
 template <typename AllocatorProcess>
 inline void MesosAllocator<AllocatorProcess>::updateSlave(
     const SlaveID& slaveId,
+    const SlaveInfo& slaveInfo,
     const Option<Resources>& total,
     const Option<std::vector<SlaveInfo::Capability>>& capabilities)
 {
@@ -494,11 +497,11 @@ inline void MesosAllocator<AllocatorProcess>::updateSlave(
       process,
       &MesosAllocatorProcess::updateSlave,
       slaveId,
+      slaveInfo,
       total,
       capabilities);
 }
 
-
 template <typename AllocatorProcess>
 void MesosAllocator<AllocatorProcess>::addResourceProvider(
     const SlaveID& slave,

http://git-wip-us.apache.org/repos/asf/mesos/blob/9015cd31/src/master/allocator/mesos/hierarchical.cpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.cpp b/src/master/allocator/mesos/hierarchical.cpp
index 4bc9fb6..b22829b 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -507,6 +507,7 @@ void HierarchicalAllocatorProcess::addSlave(
 {
   CHECK(initialized);
   CHECK(!slaves.contains(slaveId));
+  CHECK_EQ(slaveId, slaveInfo.id());
   CHECK(!paused || expectedAgentCount.isSome());
 
   slaves[slaveId] = Slave();
@@ -516,13 +517,9 @@ void HierarchicalAllocatorProcess::addSlave(
   slave.total = total;
   slave.allocated = Resources::sum(used);
   slave.activated = true;
-  slave.hostname = slaveInfo.hostname();
+  slave.info = slaveInfo;
   slave.capabilities = protobuf::slave::Capabilities(capabilities);
 
-  if (slaveInfo.has_domain()) {
-    slave.domain = slaveInfo.domain();
-  }
-
   // NOTE: We currently implement maintenance in the allocator to be able to
   // leverage state and features such as the FrameworkSorter and OfferFilter.
   if (unavailability.isSome()) {
@@ -576,7 +573,7 @@ void HierarchicalAllocatorProcess::addSlave(
     resume();
   }
 
-  LOG(INFO) << "Added agent " << slaveId << " (" << slave.hostname << ")"
+  LOG(INFO) << "Added agent " << slaveId << " (" << slave.info.hostname() << ")"
             << " with " << slave.total
             << " (allocated: " << slave.allocated << ")";
 
@@ -615,14 +612,22 @@ void HierarchicalAllocatorProcess::removeSlave(
 
 void HierarchicalAllocatorProcess::updateSlave(
     const SlaveID& slaveId,
+    const SlaveInfo& info,
     const Option<Resources>& total,
     const Option<vector<SlaveInfo::Capability>>& capabilities)
 {
   CHECK(initialized);
   CHECK(slaves.contains(slaveId));
+  CHECK_EQ(slaveId, info.id());
 
   Slave& slave = slaves.at(slaveId);
 
+  // We unconditionally overwrite the old domain and hostname: Even though
+  // the master places some restrictions on this (i.e. agents are not allowed
+  // to re-register with a different hostname) inside the allocator it
+  // doesn't matter, as the algorithm will work correctly either way.
+  slave.info = info;
+
   bool updated = false;
 
   // Update agent capabilities.
@@ -635,7 +640,7 @@ void HierarchicalAllocatorProcess::updateSlave(
     if (newCapabilities != oldCapabilities) {
       updated = true;
 
-      LOG(INFO) << "Agent " << slaveId << " (" << slave.hostname << ")"
+      LOG(INFO) << "Agent " << slaveId << " (" << slave.info.hostname() << ")"
                 << " updated with capabilities " << slave.capabilities;
     }
   }
@@ -643,7 +648,7 @@ void HierarchicalAllocatorProcess::updateSlave(
   if (total.isSome()) {
     updated = updateSlaveTotal(slaveId, total.get()) || updated;
 
-    LOG(INFO) << "Agent " << slaveId << " (" << slave.hostname << ")"
+    LOG(INFO) << "Agent " << slaveId << " (" << slave.info.hostname() << ")"
               << " updated with total resources " << total.get();
   }
 
@@ -2119,7 +2124,7 @@ bool HierarchicalAllocatorProcess::isWhitelisted(
 
   const Slave& slave = slaves.at(slaveId);
 
-  return whitelist.isNone() || whitelist->contains(slave.hostname);
+  return whitelist.isNone() || whitelist->contains(slave.info.hostname());
 }
 
 
@@ -2393,7 +2398,7 @@ bool HierarchicalAllocatorProcess::updateSlaveTotal(
 bool HierarchicalAllocatorProcess::isRemoteSlave(const Slave& slave) const
 {
   // If the slave does not have a configured domain, assume it is not remote.
-  if (slave.domain.isNone()) {
+  if (!slave.info.has_domain()) {
     return false;
   }
 
@@ -2402,7 +2407,7 @@ bool HierarchicalAllocatorProcess::isRemoteSlave(const Slave& slave) const
   // might change in the future, if more types of domains are added.
   // For forward compatibility, we treat agents with a configured
   // domain but no fault domain as having no configured domain.
-  if (!slave.domain->has_fault_domain()) {
+  if (!slave.info.domain().has_fault_domain()) {
     return false;
   }
 
@@ -2418,7 +2423,7 @@ bool HierarchicalAllocatorProcess::isRemoteSlave(const Slave& slave) const
   const DomainInfo::FaultDomain::RegionInfo& masterRegion =
     domain->fault_domain().region();
   const DomainInfo::FaultDomain::RegionInfo& slaveRegion =
-    slave.domain->fault_domain().region();
+    slave.info.domain().fault_domain().region();
 
   return masterRegion != slaveRegion;
 }

http://git-wip-us.apache.org/repos/asf/mesos/blob/9015cd31/src/master/allocator/mesos/hierarchical.hpp
----------------------------------------------------------------------
diff --git a/src/master/allocator/mesos/hierarchical.hpp b/src/master/allocator/mesos/hierarchical.hpp
index 41ffe17..14dcf3d 100644
--- a/src/master/allocator/mesos/hierarchical.hpp
+++ b/src/master/allocator/mesos/hierarchical.hpp
@@ -144,6 +144,7 @@ public:
 
   void updateSlave(
       const SlaveID& slave,
+      const SlaveInfo& slaveInfo,
       const Option<Resources>& total = None(),
       const Option<std::vector<SlaveInfo::Capability>>& capabilities = None());
 
@@ -381,12 +382,14 @@ protected:
 
     bool activated;  // Whether to offer resources.
 
-    std::string hostname;
+    // The `SlaveInfo` that was passed to the allocator when the slave was added
+    // or updated. Currently only two fields are used: `hostname` for host
+    // whitelisting and in log messages, and `domain` for region-aware
+    // scheduling.
+    SlaveInfo info;
 
     protobuf::slave::Capabilities capabilities;
 
-    Option<DomainInfo> domain;
-
     // Represents a scheduled unavailability due to maintenance for a specific
     // slave, and the responses from frameworks as to whether they will be able
     // to gracefully handle this unavailability.

http://git-wip-us.apache.org/repos/asf/mesos/blob/9015cd31/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index f77a1ed..6679e25 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -6573,7 +6573,7 @@ void Master::_reregisterSlave(
     slave->resourceVersions = protobuf::parseResourceVersions(
         {resourceVersions.begin(), resourceVersions.end()});
 
-    allocator->updateSlave(slave->id, None(), agentCapabilities);
+    allocator->updateSlave(slave->id, slave->info, None(), agentCapabilities);
 
     // Reconcile tasks between master and slave, and send the
     // `SlaveReregisteredMessage`.
@@ -7424,7 +7424,7 @@ void Master::updateSlave(const UpdateSlaveMessage& message)
   }
 
   // Now update the agent's total resources in the allocator.
-  allocator->updateSlave(slaveId, slave->totalResources);
+  allocator->updateSlave(slaveId, slave->info, slave->totalResources);
 
   // Then rescind outstanding offers affected by the update.
   // NOTE: Need a copy of offers because the offers are removed inside the loop.

http://git-wip-us.apache.org/repos/asf/mesos/blob/9015cd31/src/tests/allocator.hpp
----------------------------------------------------------------------
diff --git a/src/tests/allocator.hpp b/src/tests/allocator.hpp
index fc5d9ef..341efa6 100644
--- a/src/tests/allocator.hpp
+++ b/src/tests/allocator.hpp
@@ -99,7 +99,7 @@ ACTION_P(InvokeRemoveSlave, allocator)
 
 ACTION_P(InvokeUpdateSlave, allocator)
 {
-  allocator->real->updateSlave(arg0, arg1, arg2);
+  allocator->real->updateSlave(arg0, arg1, arg2, arg3);
 }
 
 
@@ -280,9 +280,9 @@ public:
     EXPECT_CALL(*this, removeSlave(_))
       .WillRepeatedly(DoDefault());
 
-    ON_CALL(*this, updateSlave(_, _, _))
+    ON_CALL(*this, updateSlave(_, _, _, _))
       .WillByDefault(InvokeUpdateSlave(this));
-    EXPECT_CALL(*this, updateSlave(_, _, _))
+    EXPECT_CALL(*this, updateSlave(_, _, _, _))
       .WillRepeatedly(DoDefault());
 
     ON_CALL(*this, addResourceProvider(_, _, _))
@@ -416,8 +416,9 @@ public:
   MOCK_METHOD1(removeSlave, void(
       const SlaveID&));
 
-  MOCK_METHOD3(updateSlave, void(
+  MOCK_METHOD4(updateSlave, void(
       const SlaveID&,
+      const SlaveInfo&,
       const Option<Resources>&,
       const Option<std::vector<SlaveInfo::Capability>>&));
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/9015cd31/src/tests/hierarchical_allocator_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/hierarchical_allocator_tests.cpp b/src/tests/hierarchical_allocator_tests.cpp
index 154049c..2fc5f5e 100644
--- a/src/tests/hierarchical_allocator_tests.cpp
+++ b/src/tests/hierarchical_allocator_tests.cpp
@@ -2019,7 +2019,7 @@ TEST_F(HierarchicalAllocatorTest, UpdateSlaveOversubscribedResources)
 
   // Update the slave with 10 oversubscribed cpus.
   Resources oversubscribed = createRevocableResources("cpus", "10");
-  allocator->updateSlave(slave.id(), slave.resources() + oversubscribed);
+  allocator->updateSlave(slave.id(), slave, slave.resources() + oversubscribed);
 
   // The next allocation should be for 10 oversubscribed resources.
   expected = Allocation(
@@ -2030,7 +2030,8 @@ TEST_F(HierarchicalAllocatorTest, UpdateSlaveOversubscribedResources)
 
   // Update the slave again with 12 oversubscribed cpus.
   Resources oversubscribed2 = createRevocableResources("cpus", "12");
-  allocator->updateSlave(slave.id(), slave.resources() + oversubscribed2);
+  allocator->updateSlave(
+      slave.id(), slave, slave.resources() + oversubscribed2);
 
   // The next allocation should be for 2 oversubscribed cpus.
   expected = Allocation(
@@ -2041,7 +2042,8 @@ TEST_F(HierarchicalAllocatorTest, UpdateSlaveOversubscribedResources)
 
   // Update the slave again with 5 oversubscribed cpus.
   Resources oversubscribed3 = createRevocableResources("cpus", "5");
-  allocator->updateSlave(slave.id(), slave.resources() + oversubscribed3);
+  allocator->updateSlave(
+      slave.id(), slave, slave.resources() + oversubscribed3);
 
   // Since there are no more available oversubscribed resources there
   // shouldn't be an allocation.
@@ -2090,6 +2092,7 @@ TEST_F(HierarchicalAllocatorTest, UpdateSlaveTotalResources)
 
   allocator->updateSlave(
       agent.id(),
+      agent,
       agent.resources() + addedResources);
 
   const Allocation expected2 = Allocation(
@@ -2104,7 +2107,7 @@ TEST_F(HierarchicalAllocatorTest, UpdateSlaveTotalResources)
   const Resources agentResources2 =
     Resources::parse("cpus:50;mem:50;disk:50").get();
 
-  allocator->updateSlave(agent.id(), agentResources2);
+  allocator->updateSlave(agent.id(), agent, agentResources2);
 
   // Recover all agent resources allocated to the framework in the last two
   // allocations. We will subsequently be offered the complete agent which has
@@ -2129,7 +2132,7 @@ TEST_F(HierarchicalAllocatorTest, UpdateSlaveTotalResources)
   // Set the agent's total resources to its original value. This will trigger
   // allocation of the newly added `agentResources2` resources now available on
   // the agent.
-  allocator->updateSlave(agent.id(), agent.resources());
+  allocator->updateSlave(agent.id(), agent, agent.resources());
 
   const Allocation expected4 = Allocation(
       framework.id(),
@@ -2174,7 +2177,7 @@ TEST_F(HierarchicalAllocatorTest, UpdateSlaveCapabilities)
 
   // Update the agent to be MULTI_ROLE capable.
 
-  allocator->updateSlave(agent.id(), None(), AGENT_CAPABILITIES());
+  allocator->updateSlave(agent.id(), agent, None(), AGENT_CAPABILITIES());
 
   Clock::settle();
 
@@ -2218,7 +2221,7 @@ TEST_F(HierarchicalAllocatorTest, OversubscribedNotAllocated)
 
   // Update the slave with 10 oversubscribed cpus.
   Resources oversubscribed = createRevocableResources("cpus", "10");
-  allocator->updateSlave(slave.id(), slave.resources() + oversubscribed);
+  allocator->updateSlave(slave.id(), slave, slave.resources() + oversubscribed);
 
   // No allocation should be made for oversubscribed resources because
   // the framework has not opted in for them.
@@ -2262,7 +2265,7 @@ TEST_F(HierarchicalAllocatorTest, RecoverOversubscribedResources)
 
   // Update the slave with 10 oversubscribed cpus.
   Resources oversubscribed = createRevocableResources("cpus", "10");
-  allocator->updateSlave(slave.id(), slave.resources() + oversubscribed);
+  allocator->updateSlave(slave.id(), slave, slave.resources() + oversubscribed);
 
   // The next allocation should be for 10 oversubscribed cpus.
   expected = Allocation(
@@ -5527,7 +5530,8 @@ TEST_P(HierarchicalAllocator_BENCHMARK_Test, AddAndUpdateSlave)
   watch.start(); // Reset.
 
   foreach (const SlaveInfo& slave, slaves) {
-    allocator->updateSlave(slave.id(), slave.resources() + oversubscribed);
+    allocator->updateSlave(
+        slave.id(), slave, slave.resources() + oversubscribed);
   }
 
   // Wait for all the `updateSlave` operations to be processed.