You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2014/05/02 22:41:41 UTC
git commit: Fixed slave authentication to prevent sending TASK_LOST
on disconnection.
Repository: mesos
Updated Branches:
refs/heads/master 5eaf1eb34 -> 84184f997
Fixed slave authentication to prevent sending TASK_LOST on
disconnection.
Changed master to only remove a slave's frameworks and offers on
exited, not on reauthenticate. Still disconnects at the allocator in
both cases.
Review: https://reviews.apache.org/r/21017
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/84184f99
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/84184f99
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/84184f99
Branch: refs/heads/master
Commit: 84184f9979604a855143979a40fb8dd6b8e73d41
Parents: 5eaf1eb
Author: Adam B <ad...@mesosphere.io>
Authored: Fri May 2 13:40:21 2014 -0700
Committer: Vinod Kone <vi...@twitter.com>
Committed: Fri May 2 13:40:21 2014 -0700
----------------------------------------------------------------------
src/master/master.cpp | 21 +++++++++++++++------
src/master/master.hpp | 1 +
2 files changed, 16 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/84184f99/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 8efa2c9..e7b6657 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -697,6 +697,7 @@ void Master::exited(const UPID& pid)
} else if (!slave->disconnected) {
// Checkpointing slaves can just be disconnected.
disconnect(slave);
+ removeFrameworksAndOffers(slave);
} else {
LOG(WARNING) << "Ignoring duplicate exited() notification for "
<< "checkpointing slave " << *slave;
@@ -1266,19 +1267,27 @@ void Master::disconnect(Slave* slave)
// Remove the slave from authenticated. This is safe because
// a slave will always reauthenticate before (re-)registering.
authenticated.erase(slave->pid);
+}
+
+
+void Master::removeFrameworksAndOffers(Slave* slave)
+{
+ CHECK_NOTNULL(slave);
// If a slave is checkpointing, remove all non-checkpointing
- // frameworks from the slave.
- // First, collect all the frameworks running on this slave.
+ // frameworks from the slave. If the slave is not checkpointing,
+ // remove all of its frameworks.
hashset<FrameworkID> frameworkIds =
slave->tasks.keys() | slave->executors.keys();
- // Now, remove all the non-checkpointing frameworks.
foreach (const FrameworkID& frameworkId, frameworkIds) {
Framework* framework = getFramework(frameworkId);
- if (framework != NULL && !framework->info.checkpoint()) {
- LOG(INFO) << "Removing non-checkpointing framework " << frameworkId
- << " from disconnected slave " << *slave;
+ if (framework != NULL &&
+ (!framework->info.checkpoint() || !slave->info.checkpoint())) {
+ LOG(INFO) << "Removing framework " << frameworkId
+ << " from disconnected slave " << *slave << " because "
+ << (!slave->info.checkpoint() ? "slave" : "framework")
+ << " is not checkpointing";
removeFramework(slave, framework);
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/84184f99/src/master/master.hpp
----------------------------------------------------------------------
diff --git a/src/master/master.hpp b/src/master/master.hpp
index a6737d5..6ae6ae4 100644
--- a/src/master/master.hpp
+++ b/src/master/master.hpp
@@ -274,6 +274,7 @@ protected:
// TODO(adam-mesos): Rename deactivate to disconnect, or v.v.
void deactivate(Framework* framework);
void disconnect(Slave* slave);
+ void removeFrameworksAndOffers(Slave* slave);
// Add a slave.
void addSlave(Slave* slave, bool reregister = false);