You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2014/05/02 22:41:41 UTC

git commit: Fixed slave authentication to prevent sending TASK_LOST on disconnection.

Repository: mesos
Updated Branches:
  refs/heads/master 5eaf1eb34 -> 84184f997


Fixed slave authentication to prevent sending TASK_LOST on
disconnection.

Changed master to only remove a slave's frameworks and offers on
exited, not on reauthenticate. Still disconnects at the allocator in
both cases.

Review: https://reviews.apache.org/r/21017


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/84184f99
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/84184f99
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/84184f99

Branch: refs/heads/master
Commit: 84184f9979604a855143979a40fb8dd6b8e73d41
Parents: 5eaf1eb
Author: Adam B <ad...@mesosphere.io>
Authored: Fri May 2 13:40:21 2014 -0700
Committer: Vinod Kone <vi...@twitter.com>
Committed: Fri May 2 13:40:21 2014 -0700

----------------------------------------------------------------------
 src/master/master.cpp | 21 +++++++++++++++------
 src/master/master.hpp |  1 +
 2 files changed, 16 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/84184f99/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 8efa2c9..e7b6657 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -697,6 +697,7 @@ void Master::exited(const UPID& pid)
       } else if (!slave->disconnected) {
         // Checkpointing slaves can just be disconnected.
         disconnect(slave);
+        removeFrameworksAndOffers(slave);
       } else {
         LOG(WARNING) << "Ignoring duplicate exited() notification for "
                      << "checkpointing slave " << *slave;
@@ -1266,19 +1267,27 @@ void Master::disconnect(Slave* slave)
   // Remove the slave from authenticated. This is safe because
   // a slave will always reauthenticate before (re-)registering.
   authenticated.erase(slave->pid);
+}
+
+
+void Master::removeFrameworksAndOffers(Slave* slave)
+{
+  CHECK_NOTNULL(slave);
 
   // If a slave is checkpointing, remove all non-checkpointing
-  // frameworks from the slave.
-  // First, collect all the frameworks running on this slave.
+  // frameworks from the slave. If the slave is not checkpointing,
+  // remove all of its frameworks.
   hashset<FrameworkID> frameworkIds =
     slave->tasks.keys() | slave->executors.keys();
 
-  // Now, remove all the non-checkpointing frameworks.
   foreach (const FrameworkID& frameworkId, frameworkIds) {
     Framework* framework = getFramework(frameworkId);
-    if (framework != NULL && !framework->info.checkpoint()) {
-      LOG(INFO) << "Removing non-checkpointing framework " << frameworkId
-                << " from disconnected slave " << *slave;
+    if (framework != NULL &&
+        (!framework->info.checkpoint() || !slave->info.checkpoint())) {
+      LOG(INFO) << "Removing framework " << frameworkId
+                << " from disconnected slave " << *slave << " because "
+                << (!slave->info.checkpoint() ? "slave" : "framework")
+                << " is not checkpointing";
 
       removeFramework(slave, framework);
     }

http://git-wip-us.apache.org/repos/asf/mesos/blob/84184f99/src/master/master.hpp
----------------------------------------------------------------------
diff --git a/src/master/master.hpp b/src/master/master.hpp
index a6737d5..6ae6ae4 100644
--- a/src/master/master.hpp
+++ b/src/master/master.hpp
@@ -274,6 +274,7 @@ protected:
   // TODO(adam-mesos): Rename deactivate to disconnect, or v.v.
   void deactivate(Framework* framework);
   void disconnect(Slave* slave);
+  void removeFrameworksAndOffers(Slave* slave);
 
   // Add a slave.
   void addSlave(Slave* slave, bool reregister = false);