You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by be...@apache.org on 2011/06/05 08:43:31 UTC
svn commit: r1131974 - /incubator/mesos/trunk/src/master.cpp
Author: benh
Date: Sun Jun 5 06:43:31 2011
New Revision: 1131974
URL: http://svn.apache.org/viewvc?rev=1131974&view=rev
Log:
Fixed a bug Andy saw while killing slaves on the cluster.
Modified:
incubator/mesos/trunk/src/master.cpp
Modified: incubator/mesos/trunk/src/master.cpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/master.cpp?rev=1131974&r1=1131973&r2=1131974&view=diff
==============================================================================
--- incubator/mesos/trunk/src/master.cpp (original)
+++ incubator/mesos/trunk/src/master.cpp Sun Jun 5 06:43:31 2011
@@ -1017,9 +1017,17 @@ void Master::removeSlave(Slave *slave)
unordered_map<pair<FrameworkID, TaskID>, Task *> tasksCopy = slave->tasks;
foreachpair (_, Task *task, tasksCopy) {
Framework *framework = lookupFramework(task->frameworkId);
- CHECK(framework != NULL);
- send(framework->pid, pack<M2F_STATUS_UPDATE>(task->id, TASK_LOST,
- task->message));
+ // A framework might not actually exist because the master failed
+ // over and the framework hasn't reconnected. This can be a tricky
+ // situation for frameworks that want to have high-availability,
+ // because if they eventually do connect they won't ever get a
+ // status update about this task. Perhaps in the future what we
+ // want to do is create a local Framework object to represent that
+ // framework until it fails over. See the TODO above in
+ // S2M_REREGISTER_SLAVE.
+ if (framework != NULL)
+ send(framework->pid, pack<M2F_STATUS_UPDATE>(task->id, TASK_LOST,
+ task->message));
removeTask(task, TRR_SLAVE_LOST);
}