You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by be...@apache.org on 2014/04/02 03:34:52 UTC
git commit: Moved slave disconnection into new
Master::disconnect(Slave*).
Repository: mesos
Updated Branches:
refs/heads/master 51d41c39c -> 3da7e6863
Moved slave disconnection into new Master::disconnect(Slave*).
Moved code to disconnect a checkpointing slave out of Master::exited()
and into a new disconnect(Slave), to be re-used by slave
authentication code. Also renamed deactivateSlave() to
shutdownSlave(), since it actually sends a ShutdownMessage and calls
removeSlave(). This avoids confusion with deactivateFramework, which
is more analagous to disconnect(Slave).
Review: https://reviews.apache.org/r/19006
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/3da7e686
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/3da7e686
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/3da7e686
Branch: refs/heads/master
Commit: 3da7e686311c942a3776046dcaf4860b2e9325c3
Parents: 51d41c3
Author: Adam B <ad...@mesosphere.io>
Authored: Tue Apr 1 18:31:54 2014 -0700
Committer: Benjamin Hindman <be...@gmail.com>
Committed: Tue Apr 1 18:31:54 2014 -0700
----------------------------------------------------------------------
src/master/master.cpp | 89 +++++++++++++++++++++++++---------------------
src/master/master.hpp | 5 +--
2 files changed, 52 insertions(+), 42 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/3da7e686/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 1ddb2a2..3c3c989 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -177,7 +177,7 @@ protected:
{
if (pinged) { // So we haven't got back a pong yet ...
if (++timeouts >= MAX_SLAVE_PING_TIMEOUTS) {
- deactivate();
+ shutdown();
return;
}
}
@@ -187,9 +187,9 @@ protected:
delay(SLAVE_PING_TIMEOUT, self(), &SlaveObserver::timeout);
}
- void deactivate()
+ void shutdown()
{
- dispatch(master, &Master::deactivateSlave, slaveId);
+ dispatch(master, &Master::shutdownSlave, slaveId);
}
private:
@@ -682,44 +682,16 @@ void Master::exited(const UPID& pid)
LOG(INFO) << "Slave " << slave->id << " (" << slave->info.hostname()
<< ") disconnected";
- // Remove the slave, if it is not checkpointing.
if (!slave->info.checkpoint()) {
+ // Remove the slave, if it is not checkpointing.
LOG(INFO) << "Removing disconnected slave " << slave->id
<< "(" << slave->info.hostname() << ") "
<< "because it is not checkpointing!";
removeSlave(slave);
return;
} else if (!slave->disconnected) {
- // Mark the slave as disconnected and remove it from the allocator.
- slave->disconnected = true;
-
- allocator->slaveDisconnected(slave->id);
-
- // If a slave is checkpointing, remove all non-checkpointing
- // frameworks from the slave.
- // First, collect all the frameworks running on this slave.
- hashset<FrameworkID> frameworkIds =
- slave->tasks.keys() | slave->executors.keys();
-
- // Now, remove all the non-checkpointing frameworks.
- foreach (const FrameworkID& frameworkId, frameworkIds) {
- Framework* framework = getFramework(frameworkId);
- if (framework != NULL && !framework->info.checkpoint()) {
- LOG(INFO) << "Removing non-checkpointing framework " << frameworkId
- << " from disconnected slave " << slave->id
- << "(" << slave->info.hostname() << ")";
-
- removeFramework(slave, framework);
- }
- }
-
- foreach (Offer* offer, utils::copy(slave->offers)) {
- allocator->resourcesRecovered(
- offer->framework_id(), slave->id, offer->resources());
-
- // Remove and rescind offers.
- removeOffer(offer, true); // Rescind!
- }
+ // Checkpointing slaves can just be disconnected.
+ disconnect(slave);
} else {
LOG(WARNING) << "Ignoring duplicate exited() notification for "
<< "checkpointing slave " << slave->id
@@ -1226,6 +1198,44 @@ void Master::deactivate(Framework* framework)
}
+void Master::disconnect(Slave* slave)
+{
+ CHECK_NOTNULL(slave);
+
+ LOG(INFO) << "Disconnecting slave " << slave->id;
+
+ // Mark the slave as disconnected and remove it from the allocator.
+ slave->disconnected = true;
+ allocator->slaveDisconnected(slave->id);
+
+ // If a slave is checkpointing, remove all non-checkpointing
+ // frameworks from the slave.
+ // First, collect all the frameworks running on this slave.
+ hashset<FrameworkID> frameworkIds =
+ slave->tasks.keys() | slave->executors.keys();
+
+ // Now, remove all the non-checkpointing frameworks.
+ foreach (const FrameworkID& frameworkId, frameworkIds) {
+ Framework* framework = getFramework(frameworkId);
+ if (framework != NULL && !framework->info.checkpoint()) {
+ LOG(INFO) << "Removing non-checkpointing framework " << frameworkId
+ << " from disconnected slave " << slave->id
+ << "(" << slave->info.hostname() << ")";
+
+ removeFramework(slave, framework);
+ }
+ }
+
+ foreach (Offer* offer, utils::copy(slave->offers)) {
+ allocator->resourcesRecovered(
+ offer->framework_id(), slave->id, offer->resources());
+
+ // Remove and rescind offers.
+ removeOffer(offer, true); // Rescind!
+ }
+}
+
+
void Master::resourceRequest(
const UPID& from,
const FrameworkID& frameworkId,
@@ -2394,20 +2404,19 @@ void Master::exitedExecutor(
}
-void Master::deactivateSlave(const SlaveID& slaveId)
+void Master::shutdownSlave(const SlaveID& slaveId)
{
if (!slaves.activated.contains(slaveId)) {
- // Possible when the SlaveObserver dispatched to deactivate a slave,
+ // Possible when the SlaveObserver dispatched to shutdown a slave,
// but exited() was already called for this slave.
- LOG(WARNING) << "Unable to deactivate unknown slave " << slaveId;
+ LOG(WARNING) << "Unable to shutdown unknown slave " << slaveId;
return;
}
Slave* slave = slaves.activated[slaveId];
CHECK_NOTNULL(slave);
- LOG(WARNING) << "Removing slave " << slave->id << " at " << slave->pid
- << " because it has been deactivated";
+ LOG(WARNING) << "Shutting down slave " << slaveId << " at " << slave->pid;
send(slave->pid, ShutdownMessage());
removeSlave(slave);
@@ -2887,7 +2896,7 @@ void Master::addFramework(Framework* framework)
// Enforced by Master::registerFramework.
CHECK(roles.contains(framework->info.role()))
<< "Unknown role " << framework->info.role()
- << " of framework " << framework->id ;
+ << " of framework " << framework->id;
roles[framework->info.role()]->addFramework(framework);
http://git-wip-us.apache.org/repos/asf/mesos/blob/3da7e686/src/master/master.hpp
----------------------------------------------------------------------
diff --git a/src/master/master.hpp b/src/master/master.hpp
index 7019fed..fef59c9 100644
--- a/src/master/master.hpp
+++ b/src/master/master.hpp
@@ -159,7 +159,7 @@ public:
const FrameworkID& frameworkId,
const ExecutorID& executorId,
int32_t status);
- void deactivateSlave(
+ void shutdownSlave(
const SlaveID& slaveId);
// TODO(bmahler): It would be preferred to use a unique libprocess
@@ -265,6 +265,7 @@ protected:
void removeFramework(Slave* slave, Framework* framework);
void deactivate(Framework* framework);
+ void disconnect(Slave* slave);
// Add a slave.
void addSlave(Slave* slave, bool reregister = false);
@@ -456,7 +457,7 @@ private:
};
-// A connected slave.
+// A connected (or disconnected, checkpointing) slave.
struct Slave
{
Slave(const SlaveInfo& _info,