You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by be...@apache.org on 2014/04/02 03:34:52 UTC

git commit: Moved slave disconnection into new Master::disconnect(Slave*).

Repository: mesos
Updated Branches:
  refs/heads/master 51d41c39c -> 3da7e6863


Moved slave disconnection into new Master::disconnect(Slave*).

Moved code to disconnect a checkpointing slave out of Master::exited()
and into a new disconnect(Slave), to be re-used by slave
authentication code. Also renamed deactivateSlave() to
shutdownSlave(), since it actually sends a ShutdownMessage and calls
removeSlave(). This avoids confusion with deactivateFramework, which
is more analagous to disconnect(Slave).

Review: https://reviews.apache.org/r/19006


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/3da7e686
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/3da7e686
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/3da7e686

Branch: refs/heads/master
Commit: 3da7e686311c942a3776046dcaf4860b2e9325c3
Parents: 51d41c3
Author: Adam B <ad...@mesosphere.io>
Authored: Tue Apr 1 18:31:54 2014 -0700
Committer: Benjamin Hindman <be...@gmail.com>
Committed: Tue Apr 1 18:31:54 2014 -0700

----------------------------------------------------------------------
 src/master/master.cpp | 89 +++++++++++++++++++++++++---------------------
 src/master/master.hpp |  5 +--
 2 files changed, 52 insertions(+), 42 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/3da7e686/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 1ddb2a2..3c3c989 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -177,7 +177,7 @@ protected:
   {
     if (pinged) { // So we haven't got back a pong yet ...
       if (++timeouts >= MAX_SLAVE_PING_TIMEOUTS) {
-        deactivate();
+        shutdown();
         return;
       }
     }
@@ -187,9 +187,9 @@ protected:
     delay(SLAVE_PING_TIMEOUT, self(), &SlaveObserver::timeout);
   }
 
-  void deactivate()
+  void shutdown()
   {
-    dispatch(master, &Master::deactivateSlave, slaveId);
+    dispatch(master, &Master::shutdownSlave, slaveId);
   }
 
 private:
@@ -682,44 +682,16 @@ void Master::exited(const UPID& pid)
       LOG(INFO) << "Slave " << slave->id << " (" << slave->info.hostname()
                 << ") disconnected";
 
-      // Remove the slave, if it is not checkpointing.
       if (!slave->info.checkpoint()) {
+        // Remove the slave, if it is not checkpointing.
         LOG(INFO) << "Removing disconnected slave " << slave->id
                   << "(" << slave->info.hostname() << ") "
                   << "because it is not checkpointing!";
         removeSlave(slave);
         return;
       } else if (!slave->disconnected) {
-        // Mark the slave as disconnected and remove it from the allocator.
-        slave->disconnected = true;
-
-        allocator->slaveDisconnected(slave->id);
-
-        // If a slave is checkpointing, remove all non-checkpointing
-        // frameworks from the slave.
-        // First, collect all the frameworks running on this slave.
-        hashset<FrameworkID> frameworkIds =
-          slave->tasks.keys() | slave->executors.keys();
-
-        // Now, remove all the non-checkpointing frameworks.
-        foreach (const FrameworkID& frameworkId, frameworkIds) {
-          Framework* framework = getFramework(frameworkId);
-          if (framework != NULL && !framework->info.checkpoint()) {
-            LOG(INFO) << "Removing non-checkpointing framework " << frameworkId
-                      << " from disconnected slave " << slave->id
-                      << "(" << slave->info.hostname() << ")";
-
-            removeFramework(slave, framework);
-          }
-        }
-
-        foreach (Offer* offer, utils::copy(slave->offers)) {
-          allocator->resourcesRecovered(
-              offer->framework_id(), slave->id, offer->resources());
-
-          // Remove and rescind offers.
-          removeOffer(offer, true); // Rescind!
-        }
+        // Checkpointing slaves can just be disconnected.
+        disconnect(slave);
       } else {
         LOG(WARNING) << "Ignoring duplicate exited() notification for "
                      << "checkpointing slave " << slave->id
@@ -1226,6 +1198,44 @@ void Master::deactivate(Framework* framework)
 }
 
 
+void Master::disconnect(Slave* slave)
+{
+  CHECK_NOTNULL(slave);
+
+  LOG(INFO) << "Disconnecting slave " << slave->id;
+
+  // Mark the slave as disconnected and remove it from the allocator.
+  slave->disconnected = true;
+  allocator->slaveDisconnected(slave->id);
+
+  // If a slave is checkpointing, remove all non-checkpointing
+  // frameworks from the slave.
+  // First, collect all the frameworks running on this slave.
+  hashset<FrameworkID> frameworkIds =
+    slave->tasks.keys() | slave->executors.keys();
+
+  // Now, remove all the non-checkpointing frameworks.
+  foreach (const FrameworkID& frameworkId, frameworkIds) {
+    Framework* framework = getFramework(frameworkId);
+    if (framework != NULL && !framework->info.checkpoint()) {
+      LOG(INFO) << "Removing non-checkpointing framework " << frameworkId
+                << " from disconnected slave " << slave->id
+                << "(" << slave->info.hostname() << ")";
+
+      removeFramework(slave, framework);
+    }
+  }
+
+  foreach (Offer* offer, utils::copy(slave->offers)) {
+    allocator->resourcesRecovered(
+        offer->framework_id(), slave->id, offer->resources());
+
+    // Remove and rescind offers.
+    removeOffer(offer, true); // Rescind!
+  }
+}
+
+
 void Master::resourceRequest(
     const UPID& from,
     const FrameworkID& frameworkId,
@@ -2394,20 +2404,19 @@ void Master::exitedExecutor(
 }
 
 
-void Master::deactivateSlave(const SlaveID& slaveId)
+void Master::shutdownSlave(const SlaveID& slaveId)
 {
   if (!slaves.activated.contains(slaveId)) {
-    // Possible when the SlaveObserver dispatched to deactivate a slave,
+    // Possible when the SlaveObserver dispatched to shutdown a slave,
     // but exited() was already called for this slave.
-    LOG(WARNING) << "Unable to deactivate unknown slave " << slaveId;
+    LOG(WARNING) << "Unable to shutdown unknown slave " << slaveId;
     return;
   }
 
   Slave* slave = slaves.activated[slaveId];
   CHECK_NOTNULL(slave);
 
-  LOG(WARNING) << "Removing slave " << slave->id << " at " << slave->pid
-               << " because it has been deactivated";
+  LOG(WARNING) << "Shutting down slave " << slaveId << " at " << slave->pid;
 
   send(slave->pid, ShutdownMessage());
   removeSlave(slave);
@@ -2887,7 +2896,7 @@ void Master::addFramework(Framework* framework)
   // Enforced by Master::registerFramework.
   CHECK(roles.contains(framework->info.role()))
     << "Unknown role " << framework->info.role()
-    << " of framework " << framework->id ;
+    << " of framework " << framework->id;
 
   roles[framework->info.role()]->addFramework(framework);
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/3da7e686/src/master/master.hpp
----------------------------------------------------------------------
diff --git a/src/master/master.hpp b/src/master/master.hpp
index 7019fed..fef59c9 100644
--- a/src/master/master.hpp
+++ b/src/master/master.hpp
@@ -159,7 +159,7 @@ public:
       const FrameworkID& frameworkId,
       const ExecutorID& executorId,
       int32_t status);
-  void deactivateSlave(
+  void shutdownSlave(
       const SlaveID& slaveId);
 
   // TODO(bmahler): It would be preferred to use a unique libprocess
@@ -265,6 +265,7 @@ protected:
   void removeFramework(Slave* slave, Framework* framework);
 
   void deactivate(Framework* framework);
+  void disconnect(Slave* slave);
 
   // Add a slave.
   void addSlave(Slave* slave, bool reregister = false);
@@ -456,7 +457,7 @@ private:
 };
 
 
-// A connected slave.
+// A connected (or disconnected, checkpointing) slave.
 struct Slave
 {
   Slave(const SlaveInfo& _info,