You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by gr...@apache.org on 2019/01/10 18:36:05 UTC

[mesos] 07/08: Remove outstanding operations when removing agents.

This is an automated email from the ASF dual-hosted git repository.

grag pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 277d239813807dc089b47a386124705a01867781
Author: Benno Evers <be...@mesosphere.com>
AuthorDate: Wed Jan 9 14:31:19 2019 -0800

    Remove outstanding operations when removing agents.
    
    Usually, offer operations are removed when the framework acknowledges
    a terminal operation status update.
    
    However, currently only operations on registered agents can be
    acknowledged.
    
    This commit explicitly deletes all outstanding operations from an agent
    when it is removed.
    
    Review: https://reviews.apache.org/r/69597/
---
 src/master/master.cpp | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/src/master/master.cpp b/src/master/master.cpp
index a3de10d..454f06a 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -10985,6 +10985,32 @@ void Master::__removeSlave(
     removeInverseOffer(inverseOffer, true); // Rescind!
   }
 
+  // Usually, operations are removed when the framework acknowledges
+  // a terminal operation status update. However, currently only operations
+  // on registered agents can be acknowledged. Since we're about to remove
+  // this agent from the list of registered agents, clean out all outstanding
+  // operations to prevent leaks.
+  //
+  // NOTE: If the agent comes back, there will be a brief window between
+  // the `ReregisterSlaveMessage` and the first `UpdateSlaveMessage` where
+  // where the master will not be able to give correct answers to operation
+  // reconciliation requests. However, since the same thing happens during
+  // master failover, the scheduler must be able to handle this scenario
+  // anyway so we allow it to happen here.
+  foreachvalue (Operation* operation, utils::copy(slave->operations)) {
+    removeOperation(operation);
+  }
+
+  foreachvalue (
+      const Slave::ResourceProvider& provider,
+      slave->resourceProviders) {
+    foreachvalue (
+        Operation* operation,
+        utils::copy(provider.operations)) {
+      removeOperation(operation);
+    }
+  }
+
   // Mark the slave as being removed.
   slaves.registered.remove(slave);
   slaves.removed.put(slave->id, Nothing());