You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by be...@apache.org on 2011/06/05 08:43:31 UTC

svn commit: r1131974 - /incubator/mesos/trunk/src/master.cpp

Author: benh
Date: Sun Jun  5 06:43:31 2011
New Revision: 1131974

URL: http://svn.apache.org/viewvc?rev=1131974&view=rev
Log:
Fixed a bug Andy saw while killing slaves on the cluster.

Modified:
    incubator/mesos/trunk/src/master.cpp

Modified: incubator/mesos/trunk/src/master.cpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/master.cpp?rev=1131974&r1=1131973&r2=1131974&view=diff
==============================================================================
--- incubator/mesos/trunk/src/master.cpp (original)
+++ incubator/mesos/trunk/src/master.cpp Sun Jun  5 06:43:31 2011
@@ -1017,9 +1017,17 @@ void Master::removeSlave(Slave *slave)
   unordered_map<pair<FrameworkID, TaskID>, Task *> tasksCopy = slave->tasks;
   foreachpair (_, Task *task, tasksCopy) {
     Framework *framework = lookupFramework(task->frameworkId);
-    CHECK(framework != NULL);
-    send(framework->pid, pack<M2F_STATUS_UPDATE>(task->id, TASK_LOST,
-                                                 task->message));
+    // A framework might not actually exist because the master failed
+    // over and the framework hasn't reconnected. This can be a tricky
+    // situation for frameworks that want to have high-availability,
+    // because if they eventually do connect they won't ever get a
+    // status update about this task.  Perhaps in the future what we
+    // want to do is create a local Framework object to represent that
+    // framework until it fails over. See the TODO above in
+    // S2M_REREGISTER_SLAVE.
+    if (framework != NULL)
+      send(framework->pid, pack<M2F_STATUS_UPDATE>(task->id, TASK_LOST,
+						   task->message));
     removeTask(task, TRR_SLAVE_LOST);
   }