You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2015/06/29 19:20:26 UTC
[2/3] mesos git commit: Moved StatusUpdate.uuid from required to
optional.
Moved StatusUpdate.uuid from required to optional.
Review: https://reviews.apache.org/r/35911
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/fda49c04
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/fda49c04
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/fda49c04
Branch: refs/heads/master
Commit: fda49c04c88ce3c2c28866553669e64c8b65b956
Parents: a964bec
Author: Benjamin Mahler <be...@gmail.com>
Authored: Thu Jun 25 22:44:53 2015 -0700
Committer: Benjamin Mahler <be...@gmail.com>
Committed: Mon Jun 29 10:01:27 2015 -0700
----------------------------------------------------------------------
include/mesos/mesos.proto | 3 +-
src/common/type_utils.cpp | 17 ++++-----
src/master/master.cpp | 10 +++--
src/messages/messages.proto | 6 ++-
src/sched/sched.cpp | 63 ++++++++++++++++----------------
src/scheduler/scheduler.cpp | 18 ++++++---
src/slave/slave.cpp | 15 ++++++++
src/slave/status_update_manager.cpp | 4 ++
8 files changed, 86 insertions(+), 50 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/include/mesos/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto
index 81008ed..0ebe5d3 100644
--- a/include/mesos/mesos.proto
+++ b/include/mesos/mesos.proto
@@ -920,7 +920,8 @@ message TaskStatus {
// driver implicitly acknowledge (default).
//
// TODO(bmahler): This is currently overwritten in the scheduler
- // driver, even if executors set this.
+ // driver and executor driver, but executors will need to set this
+ // to a valid RFC-4122 UUID if using the HTTP API.
optional bytes uuid = 11;
// Describes whether the task has been determined to be healthy
http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/src/common/type_utils.cpp
----------------------------------------------------------------------
diff --git a/src/common/type_utils.cpp b/src/common/type_utils.cpp
index f88dff7..f7b21c6 100644
--- a/src/common/type_utils.cpp
+++ b/src/common/type_utils.cpp
@@ -388,19 +388,18 @@ std::ostream& operator << (
std::ostream& stream,
const StatusUpdate& update)
{
- stream
- << update.status().state()
- << " (UUID: " << UUID::fromBytes(update.uuid())
- << ") for task " << update.status().task_id();
+ stream << update.status().state()
+ << (update.has_uuid()
+ ? " (UUID: " + stringify(UUID::fromBytes(update.uuid()))
+ : "")
+ << ") for task " << update.status().task_id();
if (update.status().has_healthy()) {
- stream
- << " in health state "
- << (update.status().healthy() ? "healthy" : "unhealthy");
+ stream << " in health state "
+ << (update.status().healthy() ? "healthy" : "unhealthy");
}
- return stream
- << " of framework " << update.framework_id();
+ return stream << " of framework " << update.framework_id();
}
} // namespace internal {
http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index cbc6618..b8ed699 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -4990,9 +4990,13 @@ void Master::updateTask(Task* task, const StatusUpdate& update)
task->set_state(status.state());
}
- // Set the status update state and uuid for the task.
- task->set_status_update_state(status.state());
- task->set_status_update_uuid(update.uuid());
+ // Set the status update state and uuid for the task. Note that
+ // master-generated updates are terminal and do not have a uuid
+ // (in which case the master also calls removeTask()).
+ if (update.has_uuid()) {
+ task->set_status_update_state(status.state());
+ task->set_status_update_uuid(update.uuid());
+ }
// TODO(brenden) Consider wiping the `message` field?
if (task->statuses_size() > 0 &&
http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/src/messages/messages.proto
----------------------------------------------------------------------
diff --git a/src/messages/messages.proto b/src/messages/messages.proto
index a1e71d8..165a16d 100644
--- a/src/messages/messages.proto
+++ b/src/messages/messages.proto
@@ -71,7 +71,11 @@ message StatusUpdate {
optional SlaveID slave_id = 3;
required TaskStatus status = 4;
required double timestamp = 5;
- required bytes uuid = 6;
+
+ // This is being deprecated in favor of TaskStatus.uuid. In 0.23.0,
+ // we set the TaskStatus 'uuid' in the executor driver for all
+ // retryable status updates.
+ optional bytes uuid = 6;
// This corresponds to the latest state of the task according to the
// slave. Note that this state might be different than the state in
http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/src/sched/sched.cpp
----------------------------------------------------------------------
diff --git a/src/sched/sched.cpp b/src/sched/sched.cpp
index a4e35aa..d37b256 100644
--- a/src/sched/sched.cpp
+++ b/src/sched/sched.cpp
@@ -71,6 +71,8 @@
#include "authentication/cram_md5/authenticatee.hpp"
+#include "common/protobuf_utils.hpp"
+
#include "local/flags.hpp"
#include "local/local.hpp"
@@ -690,16 +692,19 @@ protected:
TaskStatus status = update.status();
- // If the update is driver-generated or master-generated, it
- // does not require acknowledgement and so we unset the 'uuid'
- // field of TaskStatus. Otherwise, we overwrite the field to
- // ensure that a 0.22.0 scheduler driver supports explicit
- // acknowledgements, even if running against a 0.21.0 cluster.
+ // If the update does not have a 'uuid', it does not need
+ // acknowledging. However, prior to 0.23.0, the update uuid
+ // was required and always set. In 0.24.0, we can rely on the
+ // update uuid check here, until then we must still check for
+ // this being sent from the driver (from == UPID()) or from
+ // the master (pid == UPID()).
//
- // TODO(bmahler): Update master and slave to ensure that 'uuid' is
- // set accurately by the time it reaches the scheduler driver.
- // This will be required for pure bindings.
- if (from == UPID() || pid == UPID()) {
+ // TODO(bmahler): For the HTTP API, we will have to update the
+ // master and slave to ensure the 'uuid' in TaskStatus is set
+ // correctly.
+ if (!update.has_uuid()) {
+ status.clear_uuid();
+ } else if (from == UPID() || pid == UPID()) {
status.clear_uuid();
} else {
status.set_uuid(update.uuid());
@@ -724,8 +729,8 @@ protected:
return;
}
- // Don't acknowledge updates created by the driver or master.
- if (from != UPID() && pid != UPID()) {
+ // See above for when we don't need to acknowledge.
+ if (update.has_uuid() && from != UPID() && pid != UPID()) {
// We drop updates while we're disconnected.
CHECK(connected);
CHECK_SOME(master);
@@ -915,16 +920,14 @@ protected:
// master failover etc). The correct way for schedulers to deal
// with this situation is to use 'reconcileTasks()'.
foreach (const TaskInfo& task, tasks) {
- StatusUpdate update;
- update.mutable_framework_id()->MergeFrom(framework.id());
- TaskStatus* status = update.mutable_status();
- status->mutable_task_id()->MergeFrom(task.task_id());
- status->set_state(TASK_LOST);
- status->set_source(TaskStatus::SOURCE_MASTER);
- status->set_reason(TaskStatus::REASON_MASTER_DISCONNECTED);
- status->set_message("Master Disconnected");
- update.set_timestamp(Clock::now().secs());
- update.set_uuid(UUID::random().toBytes());
+ StatusUpdate update = protobuf::createStatusUpdate(
+ framework.id(),
+ None(),
+ task.task_id(),
+ TASK_LOST,
+ TaskStatus::SOURCE_MASTER,
+ "Master disconnected",
+ TaskStatus::REASON_MASTER_DISCONNECTED);
statusUpdate(UPID(), update, UPID());
}
@@ -996,16 +999,14 @@ protected:
}
foreach (const TaskInfo& task, operation.launch().task_infos()) {
- StatusUpdate update;
- update.mutable_framework_id()->MergeFrom(framework.id());
- TaskStatus* status = update.mutable_status();
- status->mutable_task_id()->MergeFrom(task.task_id());
- status->set_state(TASK_LOST);
- status->set_source(TaskStatus::SOURCE_MASTER);
- status->set_reason(TaskStatus::REASON_MASTER_DISCONNECTED);
- status->set_message("Master Disconnected");
- update.set_timestamp(Clock::now().secs());
- update.set_uuid(UUID::random().toBytes());
+ StatusUpdate update = protobuf::createStatusUpdate(
+ framework.id(),
+ None(),
+ task.task_id(),
+ TASK_LOST,
+ TaskStatus::SOURCE_MASTER,
+ "Master disconnected",
+ TaskStatus::REASON_MASTER_DISCONNECTED);
statusUpdate(UPID(), update, UPID());
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/src/scheduler/scheduler.cpp
----------------------------------------------------------------------
diff --git a/src/scheduler/scheduler.cpp b/src/scheduler/scheduler.cpp
index 1efc6fb..f360e4d 100644
--- a/src/scheduler/scheduler.cpp
+++ b/src/scheduler/scheduler.cpp
@@ -624,11 +624,19 @@ protected:
update->mutable_status()->set_timestamp(message.update().timestamp());
- // If the update is generated by the master it doesn't need to be
- // acknowledged; so we unset the UUID inside TaskStatus.
- // TODO(vinod): Update master and slave to ensure that 'uuid' is
- // set accurately by the time it reaches the scheduler.
- if (UPID(message.pid()) == UPID()) {
+ // If the update does not have a 'uuid', it does not need
+ // acknowledging. However, prior to 0.23.0, the update uuid
+ // was required and always set. In 0.24.0, we can rely on the
+ // update uuid check here, until then we must still check for
+ // this being sent from the driver (from == UPID()) or from
+ // the master (pid == UPID()).
+ //
+ // TODO(bmahler): For the HTTP API, we will have to update the
+ // master and slave to ensure the 'uuid' in TaskStatus is set
+ // correctly.
+ if (!message.update().has_uuid()) {
+ update->mutable_status()->clear_uuid();
+ } else if (UPID(message.pid()) == UPID()) {
update->mutable_status()->clear_uuid();
} else {
update->mutable_status()->set_uuid(message.update().uuid());
http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 9b72fad..1105a66 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -2663,6 +2663,15 @@ void Slave::statusUpdate(StatusUpdate update, const UPID& pid)
state == RUNNING || state == TERMINATING)
<< state;
+ // TODO(bmahler): With the HTTP API, we must validate the UUID
+ // inside the TaskStatus. For now, we only care about the UUID
+ // inside the StatusUpdate, as the scheduler driver overwrites it.
+ if (!update.has_uuid()) {
+ LOG(WARNING) << "Ignoring status update " << update << " without 'uuid'";
+ metrics.invalid_status_updates++;
+ return;
+ }
+
// Set the source before forwarding the status update.
update.mutable_status()->set_source(
pid == UPID() ? TaskStatus::SOURCE_SLAVE : TaskStatus::SOURCE_EXECUTOR);
@@ -2880,6 +2889,9 @@ void Slave::forward(StatusUpdate update)
}
if (task != NULL) {
+ CHECK(update.has_uuid())
+ << "Expecting updates without 'uuid' to have been rejected";
+
// We set the status update state of the task here because in
// steady state master updates the status update state of the
// task when it receives this update. If the master fails over,
@@ -5120,6 +5132,9 @@ void Executor::recoverTask(const TaskState& state)
launchedTasks.contains(state.id)) {
terminateTask(state.id, update.status());
+ CHECK(update.has_uuid())
+ << "Expecting updates without 'uuid' to have been rejected";
+
// If the terminal update has been acknowledged, remove it.
if (state.acks.contains(UUID::fromBytes(update.uuid()))) {
completeTask(state.id);
http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/src/slave/status_update_manager.cpp
----------------------------------------------------------------------
diff --git a/src/slave/status_update_manager.cpp b/src/slave/status_update_manager.cpp
index 0ad2450..1978ac8 100644
--- a/src/slave/status_update_manager.cpp
+++ b/src/slave/status_update_manager.cpp
@@ -711,6 +711,10 @@ Try<bool> StatusUpdateStream::update(const StatusUpdate& update)
return Error(error.get());
}
+ if (!update.has_uuid()) {
+ return Error("Status update is missing 'uuid'");
+ }
+
// Check that this status update has not already been acknowledged.
// This could happen in the rare case when the slave received the ACK
// from the framework, died, but slave's ACK to the executor never made it!