You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2013/05/17 21:34:31 UTC

git commit: Fixed master to correctly terminate the non-checkpointing framework's tasks, when a slave disconnects.

Updated Branches:
  refs/heads/master a615c5824 -> 0d80df85d


Fixed master to correctly terminate the non-checkpointing framework's
tasks, when a slave disconnects.

From: Brenden Matthews <br...@airbnb.com>
Review: https://reviews.apache.org/r/11128


Project: http://git-wip-us.apache.org/repos/asf/incubator-mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-mesos/commit/0d80df85
Tree: http://git-wip-us.apache.org/repos/asf/incubator-mesos/tree/0d80df85
Diff: http://git-wip-us.apache.org/repos/asf/incubator-mesos/diff/0d80df85

Branch: refs/heads/master
Commit: 0d80df85d0b67f73e0b249bc028819dc919e000f
Parents: a615c58
Author: Vinod Kone <vi...@twitter.com>
Authored: Fri May 17 12:31:51 2013 -0700
Committer: Vinod Kone <vi...@twitter.com>
Committed: Fri May 17 12:31:51 2013 -0700

----------------------------------------------------------------------
 src/master/master.cpp |   47 +++++++++++++++++++++++--------------------
 1 files changed, 25 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-mesos/blob/0d80df85/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 3207157..c44f2b7 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -1752,7 +1752,7 @@ void Master::removeFramework(Framework* framework)
 
   // The completedFramework buffer now owns the framework pointer.
   completedFrameworks.push_back(std::tr1::shared_ptr<Framework>(framework));
-  
+
   // Remove it.
   frameworks.erase(framework->id);
   allocator->frameworkRemoved(framework->id);
@@ -1765,29 +1765,32 @@ void Master::removeFramework(Slave* slave, Framework* framework)
   CHECK_NOTNULL(framework);
 
   // Remove pointers to framework's tasks in slaves, and send status updates.
-  foreachvalue (Task* task, utils::copy(framework->tasks)) {
-    // A framework might not actually exist because the master failed
-    // over and the framework hasn't reconnected yet. For more info
-    // please see the comments in 'removeFramework(Framework*)'.
-    StatusUpdateMessage message;
-    StatusUpdate* update = message.mutable_update();
-    update->mutable_framework_id()->MergeFrom(task->framework_id());
-
-    if (task->has_executor_id()) {
-      update->mutable_executor_id()->MergeFrom(task->executor_id());
-    }
+  foreachvalue (Task* task, utils::copy(slave->tasks)) {
+    // Remove tasks that belong to this framework.
+    if (task->framework_id() == framework->id) {
+      // A framework might not actually exist because the master failed
+      // over and the framework hasn't reconnected yet. For more info
+      // please see the comments in 'removeFramework(Framework*)'.
+      StatusUpdateMessage message;
+      StatusUpdate* update = message.mutable_update();
+      update->mutable_framework_id()->MergeFrom(task->framework_id());
 
-    update->mutable_slave_id()->MergeFrom(task->slave_id());
-    TaskStatus* status = update->mutable_status();
-    status->mutable_task_id()->MergeFrom(task->task_id());
-    status->set_state(TASK_LOST);
-    status->set_message("Slave " + slave->info.hostname() + " disconnected");
-    update->set_timestamp(Clock::now());
-    update->set_uuid(UUID::random().toBytes());
-    send(framework->pid, message);
+      if (task->has_executor_id()) {
+        update->mutable_executor_id()->MergeFrom(task->executor_id());
+      }
 
-    // Remove the task from slave and framework.
-    removeTask(task);
+      update->mutable_slave_id()->MergeFrom(task->slave_id());
+      TaskStatus* status = update->mutable_status();
+      status->mutable_task_id()->MergeFrom(task->task_id());
+      status->set_state(TASK_LOST);
+      status->set_message("Slave " + slave->info.hostname() + " disconnected");
+      update->set_timestamp(Clock::now());
+      update->set_uuid(UUID::random().toBytes());
+      send(framework->pid, message);
+
+      // Remove the task from slave and framework.
+      removeTask(task);
+    }
   }
 
   // Remove and rescind offers from this slave given to this framework.