You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2014/04/17 04:28:19 UTC
[6/6] git commit: Added task reconciliation for unknown slaves and
tasks.
Added task reconciliation for unknown slaves and tasks.
Review: https://reviews.apache.org/r/20104
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/24a22d20
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/24a22d20
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/24a22d20
Branch: refs/heads/master
Commit: 24a22d20cd375c51c75ea8c96c55f83af530326b
Parents: 8e02ce2
Author: Benjamin Mahler <bm...@twitter.com>
Authored: Fri Mar 28 18:49:43 2014 -0700
Committer: Benjamin Mahler <bm...@twitter.com>
Committed: Wed Apr 16 19:17:04 2014 -0700
----------------------------------------------------------------------
src/master/master.cpp | 75 ++++++++++++++++++++++++++++++----------------
1 file changed, 50 insertions(+), 25 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/24a22d20/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index b37c6f2..66e4940 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -2494,40 +2494,65 @@ void Master::reconcileTasks(
LOG(INFO) << "Performing best-effort task state reconciliation for framework "
<< frameworkId;
- // Verify expected task states and send status updates whenever expectations
- // are not met. When:
- // 1) Slave is unknown.*
- // 2) Task is unknown.*
- // 3) Task state has changed.
+ // Reconciliation occurs for the following cases:
+ // (1) If the slave is unknown, we send TASK_LOST.
+ // (2) If the task is missing on the slave, we send TASK_LOST.
+ // (3) If the task state differs, we send the latest state.
//
- // *) TODO(nnielsen): Missing slaves and tasks are currently treated silently
- // i.e. nothing is sent. To give accurate responses in
- // these cases during master fail-over, we need to leverage
- // the registrar.
+ // (1) is applicable only when operating with a strict registry.
foreach (const TaskStatus& status, statuses) {
if (!status.has_slave_id()) {
- LOG(WARNING) << "Status from task " << status.task_id()
- << " does not include slave id";
+ LOG(WARNING) << "Status for task " << status.task_id()
+ << " from framework " << frameworkId
+ << " does not include slave id; cannot reconcile";
continue;
}
- Slave* slave = getSlave(status.slave_id());
- if (slave != NULL) {
- Task* task = slave->getTask(frameworkId, status.task_id());
- if (task != NULL && task->state() != status.state()) {
- const StatusUpdate& update = protobuf::createStatusUpdate(
+ Option<StatusUpdate> update;
+
+ // Check for a removed slave (case 1).
+ if (flags.registry_strict &&
+ !slaves.recovered.contains(status.slave_id()) &&
+ !slaves.reregistering.contains(status.slave_id()) &&
+ !slaves.activated.contains(status.slave_id()) &&
+ !slaves.removing.contains(status.slave_id())) {
+ // Slave is removed!
+ update = protobuf::createStatusUpdate(
frameworkId,
- task->slave_id(),
- task->task_id(),
- task->state(),
- "Task state changed");
+ status.slave_id(),
+ status.task_id(),
+ TASK_LOST,
+ "Reconciliation: Slave is removed");
+ }
- statusUpdate(update, UPID());
+ // Check for a known slave / task (cases (2) and (3)).
+ if (slaves.activated.contains(status.slave_id())) {
+ Slave* slave = CHECK_NOTNULL(slaves.activated[status.slave_id()]);
+ Task* task = slave->getTask(frameworkId, status.task_id());
+
+ if (task == NULL) {
+ // Case (2).
+ // TODO(bmahler): Leverage completed tasks if we track these
+ // in the future.
+ update = protobuf::createStatusUpdate(
+ frameworkId,
+ status.slave_id(),
+ status.task_id(),
+ TASK_LOST,
+ "Reconciliation: Task is unknown to the slave");
+ } else if (task->state() != status.state()) {
+ // Case (3).
+ update = protobuf::createStatusUpdate(
+ frameworkId,
+ task->slave_id(),
+ task->task_id(),
+ task->state(),
+ "Reconciliation: Task state changed");
}
- } else {
- // TODO(bmahler): We can answer here if and only if the slave is
- // *completely* unknown to us. That is, it's not being tracked
- // in any of our 'slaves' data.
+ }
+
+ if (update.isSome()) {
+ statusUpdate(update.get(), UPID());
}
}
}