You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2015/06/29 19:20:25 UTC
[1/3] mesos git commit: Updated createdStatusUpdate to take an
optional UUID.
Repository: mesos
Updated Branches:
refs/heads/master 4b591a0d0 -> 0d5dc05b5
Updated createdStatusUpdate to take an optional UUID.
Review: https://reviews.apache.org/r/35912
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/0d5dc05b
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/0d5dc05b
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/0d5dc05b
Branch: refs/heads/master
Commit: 0d5dc05b5d3a4bf33c7f38a07ae68ef078600552
Parents: fda49c0
Author: Benjamin Mahler <be...@gmail.com>
Authored: Thu Jun 25 22:59:29 2015 -0700
Committer: Benjamin Mahler <be...@gmail.com>
Committed: Mon Jun 29 10:01:27 2015 -0700
----------------------------------------------------------------------
src/common/protobuf_utils.cpp | 7 ++++++-
src/common/protobuf_utils.hpp | 7 +++++++
src/master/master.cpp | 15 +++++++++++++++
src/sched/sched.cpp | 2 ++
src/slave/slave.cpp | 14 +++++++++++++-
src/tests/fault_tolerance_tests.cpp | 5 ++++-
src/tests/partition_tests.cpp | 4 +++-
7 files changed, 50 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/0d5dc05b/src/common/protobuf_utils.cpp
----------------------------------------------------------------------
diff --git a/src/common/protobuf_utils.cpp b/src/common/protobuf_utils.cpp
index 9ccd70d..f0642ba 100644
--- a/src/common/protobuf_utils.cpp
+++ b/src/common/protobuf_utils.cpp
@@ -49,6 +49,7 @@ StatusUpdate createStatusUpdate(
const TaskID& taskId,
const TaskState& state,
const TaskStatus::Source& source,
+ const Option<UUID>& uuid,
const string& message = "",
const Option<TaskStatus::Reason>& reason = None(),
const Option<ExecutorID>& executorId = None(),
@@ -57,7 +58,6 @@ StatusUpdate createStatusUpdate(
StatusUpdate update;
update.set_timestamp(process::Clock::now().secs());
- update.set_uuid(UUID::random().toBytes());
update.mutable_framework_id()->MergeFrom(frameworkId);
if (slaveId.isSome()) {
@@ -80,6 +80,11 @@ StatusUpdate createStatusUpdate(
status->set_message(message);
status->set_timestamp(update.timestamp());
+ if (uuid.isSome()) {
+ update.set_uuid(uuid.get().toBytes());
+ status->set_uuid(uuid.get().toBytes());
+ }
+
if (reason.isSome()) {
status->set_reason(reason.get());
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/0d5dc05b/src/common/protobuf_utils.hpp
----------------------------------------------------------------------
diff --git a/src/common/protobuf_utils.hpp b/src/common/protobuf_utils.hpp
index 9ecd234..afe5a85 100644
--- a/src/common/protobuf_utils.hpp
+++ b/src/common/protobuf_utils.hpp
@@ -23,6 +23,7 @@
#include <stout/ip.hpp>
#include <stout/option.hpp>
+#include <stout/uuid.hpp>
#include "messages/messages.hpp"
@@ -38,12 +39,18 @@ namespace protobuf {
bool isTerminalState(const TaskState& state);
+// See TaskStatus for more information about these fields. Note
+// that the 'uuid' must be provided for updates that need
+// acknowledgement. Currently, all slave and executor generated
+// updates require acknowledgement, whereas master generated
+// and scheduler driver generated updates do not.
StatusUpdate createStatusUpdate(
const FrameworkID& frameworkId,
const Option<SlaveID>& slaveId,
const TaskID& taskId,
const TaskState& state,
const TaskStatus::Source& source,
+ const Option<UUID>& uuid,
const std::string& message = "",
const Option<TaskStatus::Reason>& reason = None(),
const Option<ExecutorID>& executorId = None(),
http://git-wip-us.apache.org/repos/asf/mesos/blob/0d5dc05b/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index b8ed699..34ce744 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -2403,6 +2403,7 @@ void Master::accept(
task.task_id(),
TASK_LOST,
TaskStatus::SOURCE_MASTER,
+ None(),
"Task launched with invalid offers: " + error.get().message,
TaskStatus::REASON_INVALID_OFFERS);
@@ -2514,6 +2515,7 @@ void Master::_accept(
task.task_id(),
TASK_LOST,
TaskStatus::SOURCE_MASTER,
+ None(),
slave == NULL ? "Slave removed" : "Slave disconnected",
reason);
@@ -2688,6 +2690,7 @@ void Master::_accept(
task.task_id(),
TASK_ERROR,
TaskStatus::SOURCE_MASTER,
+ None(),
authorization.isFailed() ?
"Authorization failure: " + authorization.failure() :
"Not authorized to launch as user '" + user + "'",
@@ -2730,6 +2733,7 @@ void Master::_accept(
task_.task_id(),
TASK_ERROR,
TaskStatus::SOURCE_MASTER,
+ None(),
validationError.get().message,
TaskStatus::REASON_TASK_INVALID);
@@ -2870,6 +2874,7 @@ void Master::kill(Framework* framework, const scheduler::Call::Kill& kill)
taskId,
TASK_KILLED,
TaskStatus::SOURCE_MASTER,
+ None(),
"Killed pending task");
forward(update, UPID(), framework);
@@ -3835,6 +3840,7 @@ void Master::_reconcileTasks(
task.task_id(),
TASK_STAGING,
TaskStatus::SOURCE_MASTER,
+ None(),
"Reconciliation: Latest task state",
TaskStatus::REASON_RECONCILIATION);
@@ -3865,6 +3871,7 @@ void Master::_reconcileTasks(
task->task_id(),
state,
TaskStatus::SOURCE_MASTER,
+ None(),
"Reconciliation: Latest task state",
TaskStatus::REASON_RECONCILIATION,
executorId,
@@ -3920,6 +3927,7 @@ void Master::_reconcileTasks(
task_.task_id(),
TASK_STAGING,
TaskStatus::SOURCE_MASTER,
+ None(),
"Reconciliation: Latest task state",
TaskStatus::REASON_RECONCILIATION);
} else if (task != NULL) {
@@ -3938,6 +3946,7 @@ void Master::_reconcileTasks(
task->task_id(),
state,
TaskStatus::SOURCE_MASTER,
+ None(),
"Reconciliation: Latest task state",
TaskStatus::REASON_RECONCILIATION,
executorId,
@@ -3950,6 +3959,7 @@ void Master::_reconcileTasks(
status.task_id(),
TASK_LOST,
TaskStatus::SOURCE_MASTER,
+ None(),
"Reconciliation: Task is unknown to the slave",
TaskStatus::REASON_RECONCILIATION);
} else if (slaves.transitioning(slaveId)) {
@@ -3965,6 +3975,7 @@ void Master::_reconcileTasks(
status.task_id(),
TASK_LOST,
TaskStatus::SOURCE_MASTER,
+ None(),
"Reconciliation: Task is unknown",
TaskStatus::REASON_RECONCILIATION);
}
@@ -4339,6 +4350,7 @@ void Master::reconcile(
task->task_id(),
TASK_LOST,
TaskStatus::SOURCE_MASTER,
+ None(),
"Task is unknown to the slave",
TaskStatus::REASON_TASK_UNKNOWN);
@@ -4600,6 +4612,7 @@ void Master::removeFramework(Framework* framework)
task->task_id(),
TASK_KILLED,
TaskStatus::SOURCE_MASTER,
+ None(),
"Framework " + framework->id().value() + " removed",
TaskStatus::REASON_FRAMEWORK_REMOVED,
(task->has_executor_id()
@@ -4695,6 +4708,7 @@ void Master::removeFramework(Slave* slave, Framework* framework)
task->task_id(),
TASK_LOST,
TaskStatus::SOURCE_MASTER,
+ None(),
"Slave " + slave->info.hostname() + " disconnected",
TaskStatus::REASON_SLAVE_DISCONNECTED,
(task->has_executor_id()
@@ -4834,6 +4848,7 @@ void Master::removeSlave(
task->task_id(),
TASK_LOST,
TaskStatus::SOURCE_MASTER,
+ None(),
"Slave " + slave->info.hostname() + " removed: " + message,
TaskStatus::REASON_SLAVE_REMOVED,
(task->has_executor_id() ?
http://git-wip-us.apache.org/repos/asf/mesos/blob/0d5dc05b/src/sched/sched.cpp
----------------------------------------------------------------------
diff --git a/src/sched/sched.cpp b/src/sched/sched.cpp
index d37b256..7563abb 100644
--- a/src/sched/sched.cpp
+++ b/src/sched/sched.cpp
@@ -926,6 +926,7 @@ protected:
task.task_id(),
TASK_LOST,
TaskStatus::SOURCE_MASTER,
+ None(),
"Master disconnected",
TaskStatus::REASON_MASTER_DISCONNECTED);
@@ -1005,6 +1006,7 @@ protected:
task.task_id(),
TASK_LOST,
TaskStatus::SOURCE_MASTER,
+ None(),
"Master disconnected",
TaskStatus::REASON_MASTER_DISCONNECTED);
http://git-wip-us.apache.org/repos/asf/mesos/blob/0d5dc05b/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 1105a66..008170f 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -61,6 +61,7 @@
#include <stout/stringify.hpp>
#include <stout/strings.hpp>
#include <stout/try.hpp>
+#include <stout/uuid.hpp>
#include <stout/utils.hpp>
#ifdef __linux__
@@ -1041,6 +1042,7 @@ void Slave::reregistered(
taskId,
TASK_LOST,
TaskStatus::SOURCE_SLAVE,
+ UUID::random(),
"Reconciliation: task unknown to the slave",
TaskStatus::REASON_RECONCILIATION);
@@ -1413,6 +1415,7 @@ void Slave::_runTask(
task.task_id(),
TASK_LOST,
TaskStatus::SOURCE_SLAVE,
+ UUID::random(),
"Could not launch the task because we failed to unschedule directories"
" scheduled for gc",
TaskStatus::REASON_GC_ERROR);
@@ -1454,7 +1457,8 @@ void Slave::_runTask(
task.task_id(),
TASK_LOST,
TaskStatus::SOURCE_SLAVE,
- "The checkpointed resources being used by the task are unknown to "
+ UUID::random(),
+ "The checkpointed resources being used by the task are unknown to "
"the slave",
TaskStatus::REASON_RESOURCES_UNKNOWN);
@@ -1486,6 +1490,7 @@ void Slave::_runTask(
task.task_id(),
TASK_LOST,
TaskStatus::SOURCE_SLAVE,
+ UUID::random(),
"The checkpointed resources being used by the executor are unknown "
"to the slave",
TaskStatus::REASON_RESOURCES_UNKNOWN,
@@ -1551,6 +1556,7 @@ void Slave::_runTask(
task.task_id(),
TASK_LOST,
TaskStatus::SOURCE_SLAVE,
+ UUID::random(),
"Executor terminating/terminated",
TaskStatus::REASON_EXECUTOR_TERMINATED);
@@ -1797,6 +1803,7 @@ void Slave::killTask(
taskId,
TASK_KILLED,
TaskStatus::SOURCE_SLAVE,
+ UUID::random(),
"Task killed before it was launched");
statusUpdate(update, UPID());
@@ -1824,6 +1831,7 @@ void Slave::killTask(
taskId,
TASK_LOST,
TaskStatus::SOURCE_SLAVE,
+ UUID::random(),
"Cannot find executor",
TaskStatus::REASON_EXECUTOR_TERMINATED);
@@ -1840,6 +1848,7 @@ void Slave::killTask(
taskId,
TASK_KILLED,
TaskStatus::SOURCE_SLAVE,
+ UUID::random(),
"Unregistered executor",
TaskStatus::REASON_EXECUTOR_UNREGISTERED,
executor->id);
@@ -1887,6 +1896,7 @@ void Slave::killTask(
taskId,
TASK_KILLED,
TaskStatus::SOURCE_SLAVE,
+ UUID::random(),
"Task killed when it was queued",
None(),
executor->id);
@@ -2566,6 +2576,7 @@ void Slave::reregisterExecutor(
task->task_id(),
TASK_LOST,
TaskStatus::SOURCE_SLAVE,
+ UUID::random(),
"Task launched during slave restart",
TaskStatus::REASON_SLAVE_RESTARTED,
executorId);
@@ -4531,6 +4542,7 @@ void Slave::sendExecutorTerminatedStatusUpdate(
taskId,
taskState,
TaskStatus::SOURCE_SLAVE,
+ UUID::random(),
termination.isReady() ? termination.get().message()
: "Abnormal executor termination",
reason,
http://git-wip-us.apache.org/repos/asf/mesos/blob/0d5dc05b/src/tests/fault_tolerance_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/fault_tolerance_tests.cpp b/src/tests/fault_tolerance_tests.cpp
index fb28e2a..1070ccf 100644
--- a/src/tests/fault_tolerance_tests.cpp
+++ b/src/tests/fault_tolerance_tests.cpp
@@ -38,6 +38,7 @@
#include <stout/json.hpp>
#include <stout/stringify.hpp>
+#include <stout/uuid.hpp>
#include "common/protobuf_utils.hpp"
@@ -1133,6 +1134,7 @@ TEST_F(FaultToleranceTest, ForwardStatusUpdateUnknownExecutor)
taskId,
TASK_RUNNING,
TaskStatus::SOURCE_SLAVE,
+ UUID::random(),
"Dummy update");
process::dispatch(slave.get(), &Slave::statusUpdate, statusUpdate2, UPID());
@@ -1694,7 +1696,8 @@ TEST_F(FaultToleranceTest, SplitBrainMasters)
runningStatus.get().slave_id(),
runningStatus.get().task_id(),
TASK_LOST,
- TaskStatus::SOURCE_SLAVE));
+ TaskStatus::SOURCE_SLAVE,
+ UUID::random()));
// Spoof a message from a random master; this should be dropped by
// the scheduler driver. Since this is delivered locally, it is
http://git-wip-us.apache.org/repos/asf/mesos/blob/0d5dc05b/src/tests/partition_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/partition_tests.cpp b/src/tests/partition_tests.cpp
index c6ec14d..a1fb6f7 100644
--- a/src/tests/partition_tests.cpp
+++ b/src/tests/partition_tests.cpp
@@ -26,6 +26,7 @@
#include <process/pid.hpp>
#include <stout/try.hpp>
+#include <stout/uuid.hpp>
#include "common/protobuf_utils.hpp"
@@ -389,7 +390,8 @@ TEST_F(PartitionTest, PartitionedSlaveStatusUpdates)
slaveId,
taskId,
TASK_RUNNING,
- TaskStatus::SOURCE_SLAVE);
+ TaskStatus::SOURCE_SLAVE,
+ UUID::random());
StatusUpdateMessage message;
message.mutable_update()->CopyFrom(update);
[2/3] mesos git commit: Moved StatusUpdate.uuid from required to
optional.
Posted by bm...@apache.org.
Moved StatusUpdate.uuid from required to optional.
Review: https://reviews.apache.org/r/35911
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/fda49c04
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/fda49c04
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/fda49c04
Branch: refs/heads/master
Commit: fda49c04c88ce3c2c28866553669e64c8b65b956
Parents: a964bec
Author: Benjamin Mahler <be...@gmail.com>
Authored: Thu Jun 25 22:44:53 2015 -0700
Committer: Benjamin Mahler <be...@gmail.com>
Committed: Mon Jun 29 10:01:27 2015 -0700
----------------------------------------------------------------------
include/mesos/mesos.proto | 3 +-
src/common/type_utils.cpp | 17 ++++-----
src/master/master.cpp | 10 +++--
src/messages/messages.proto | 6 ++-
src/sched/sched.cpp | 63 ++++++++++++++++----------------
src/scheduler/scheduler.cpp | 18 ++++++---
src/slave/slave.cpp | 15 ++++++++
src/slave/status_update_manager.cpp | 4 ++
8 files changed, 86 insertions(+), 50 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/include/mesos/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto
index 81008ed..0ebe5d3 100644
--- a/include/mesos/mesos.proto
+++ b/include/mesos/mesos.proto
@@ -920,7 +920,8 @@ message TaskStatus {
// driver implicitly acknowledge (default).
//
// TODO(bmahler): This is currently overwritten in the scheduler
- // driver, even if executors set this.
+ // driver and executor driver, but executors will need to set this
+ // to a valid RFC-4122 UUID if using the HTTP API.
optional bytes uuid = 11;
// Describes whether the task has been determined to be healthy
http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/src/common/type_utils.cpp
----------------------------------------------------------------------
diff --git a/src/common/type_utils.cpp b/src/common/type_utils.cpp
index f88dff7..f7b21c6 100644
--- a/src/common/type_utils.cpp
+++ b/src/common/type_utils.cpp
@@ -388,19 +388,18 @@ std::ostream& operator << (
std::ostream& stream,
const StatusUpdate& update)
{
- stream
- << update.status().state()
- << " (UUID: " << UUID::fromBytes(update.uuid())
- << ") for task " << update.status().task_id();
+ stream << update.status().state()
+ << (update.has_uuid()
+ ? " (UUID: " + stringify(UUID::fromBytes(update.uuid()))
+ : "")
+ << ") for task " << update.status().task_id();
if (update.status().has_healthy()) {
- stream
- << " in health state "
- << (update.status().healthy() ? "healthy" : "unhealthy");
+ stream << " in health state "
+ << (update.status().healthy() ? "healthy" : "unhealthy");
}
- return stream
- << " of framework " << update.framework_id();
+ return stream << " of framework " << update.framework_id();
}
} // namespace internal {
http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index cbc6618..b8ed699 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -4990,9 +4990,13 @@ void Master::updateTask(Task* task, const StatusUpdate& update)
task->set_state(status.state());
}
- // Set the status update state and uuid for the task.
- task->set_status_update_state(status.state());
- task->set_status_update_uuid(update.uuid());
+ // Set the status update state and uuid for the task. Note that
+ // master-generated updates are terminal and do not have a uuid
+ // (in which case the master also calls removeTask()).
+ if (update.has_uuid()) {
+ task->set_status_update_state(status.state());
+ task->set_status_update_uuid(update.uuid());
+ }
// TODO(brenden) Consider wiping the `message` field?
if (task->statuses_size() > 0 &&
http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/src/messages/messages.proto
----------------------------------------------------------------------
diff --git a/src/messages/messages.proto b/src/messages/messages.proto
index a1e71d8..165a16d 100644
--- a/src/messages/messages.proto
+++ b/src/messages/messages.proto
@@ -71,7 +71,11 @@ message StatusUpdate {
optional SlaveID slave_id = 3;
required TaskStatus status = 4;
required double timestamp = 5;
- required bytes uuid = 6;
+
+ // This is being deprecated in favor of TaskStatus.uuid. In 0.23.0,
+ // we set the TaskStatus 'uuid' in the executor driver for all
+ // retryable status updates.
+ optional bytes uuid = 6;
// This corresponds to the latest state of the task according to the
// slave. Note that this state might be different than the state in
http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/src/sched/sched.cpp
----------------------------------------------------------------------
diff --git a/src/sched/sched.cpp b/src/sched/sched.cpp
index a4e35aa..d37b256 100644
--- a/src/sched/sched.cpp
+++ b/src/sched/sched.cpp
@@ -71,6 +71,8 @@
#include "authentication/cram_md5/authenticatee.hpp"
+#include "common/protobuf_utils.hpp"
+
#include "local/flags.hpp"
#include "local/local.hpp"
@@ -690,16 +692,19 @@ protected:
TaskStatus status = update.status();
- // If the update is driver-generated or master-generated, it
- // does not require acknowledgement and so we unset the 'uuid'
- // field of TaskStatus. Otherwise, we overwrite the field to
- // ensure that a 0.22.0 scheduler driver supports explicit
- // acknowledgements, even if running against a 0.21.0 cluster.
+ // If the update does not have a 'uuid', it does not need
+ // acknowledging. However, prior to 0.23.0, the update uuid
+ // was required and always set. In 0.24.0, we can rely on the
+ // update uuid check here, until then we must still check for
+ // this being sent from the driver (from == UPID()) or from
+ // the master (pid == UPID()).
//
- // TODO(bmahler): Update master and slave to ensure that 'uuid' is
- // set accurately by the time it reaches the scheduler driver.
- // This will be required for pure bindings.
- if (from == UPID() || pid == UPID()) {
+ // TODO(bmahler): For the HTTP API, we will have to update the
+ // master and slave to ensure the 'uuid' in TaskStatus is set
+ // correctly.
+ if (!update.has_uuid()) {
+ status.clear_uuid();
+ } else if (from == UPID() || pid == UPID()) {
status.clear_uuid();
} else {
status.set_uuid(update.uuid());
@@ -724,8 +729,8 @@ protected:
return;
}
- // Don't acknowledge updates created by the driver or master.
- if (from != UPID() && pid != UPID()) {
+ // See above for when we don't need to acknowledge.
+ if (update.has_uuid() && from != UPID() && pid != UPID()) {
// We drop updates while we're disconnected.
CHECK(connected);
CHECK_SOME(master);
@@ -915,16 +920,14 @@ protected:
// master failover etc). The correct way for schedulers to deal
// with this situation is to use 'reconcileTasks()'.
foreach (const TaskInfo& task, tasks) {
- StatusUpdate update;
- update.mutable_framework_id()->MergeFrom(framework.id());
- TaskStatus* status = update.mutable_status();
- status->mutable_task_id()->MergeFrom(task.task_id());
- status->set_state(TASK_LOST);
- status->set_source(TaskStatus::SOURCE_MASTER);
- status->set_reason(TaskStatus::REASON_MASTER_DISCONNECTED);
- status->set_message("Master Disconnected");
- update.set_timestamp(Clock::now().secs());
- update.set_uuid(UUID::random().toBytes());
+ StatusUpdate update = protobuf::createStatusUpdate(
+ framework.id(),
+ None(),
+ task.task_id(),
+ TASK_LOST,
+ TaskStatus::SOURCE_MASTER,
+ "Master disconnected",
+ TaskStatus::REASON_MASTER_DISCONNECTED);
statusUpdate(UPID(), update, UPID());
}
@@ -996,16 +999,14 @@ protected:
}
foreach (const TaskInfo& task, operation.launch().task_infos()) {
- StatusUpdate update;
- update.mutable_framework_id()->MergeFrom(framework.id());
- TaskStatus* status = update.mutable_status();
- status->mutable_task_id()->MergeFrom(task.task_id());
- status->set_state(TASK_LOST);
- status->set_source(TaskStatus::SOURCE_MASTER);
- status->set_reason(TaskStatus::REASON_MASTER_DISCONNECTED);
- status->set_message("Master Disconnected");
- update.set_timestamp(Clock::now().secs());
- update.set_uuid(UUID::random().toBytes());
+ StatusUpdate update = protobuf::createStatusUpdate(
+ framework.id(),
+ None(),
+ task.task_id(),
+ TASK_LOST,
+ TaskStatus::SOURCE_MASTER,
+ "Master disconnected",
+ TaskStatus::REASON_MASTER_DISCONNECTED);
statusUpdate(UPID(), update, UPID());
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/src/scheduler/scheduler.cpp
----------------------------------------------------------------------
diff --git a/src/scheduler/scheduler.cpp b/src/scheduler/scheduler.cpp
index 1efc6fb..f360e4d 100644
--- a/src/scheduler/scheduler.cpp
+++ b/src/scheduler/scheduler.cpp
@@ -624,11 +624,19 @@ protected:
update->mutable_status()->set_timestamp(message.update().timestamp());
- // If the update is generated by the master it doesn't need to be
- // acknowledged; so we unset the UUID inside TaskStatus.
- // TODO(vinod): Update master and slave to ensure that 'uuid' is
- // set accurately by the time it reaches the scheduler.
- if (UPID(message.pid()) == UPID()) {
+ // If the update does not have a 'uuid', it does not need
+ // acknowledging. However, prior to 0.23.0, the update uuid
+ // was required and always set. In 0.24.0, we can rely on the
+ // update uuid check here, until then we must still check for
+ // this being sent from the driver (from == UPID()) or from
+ // the master (pid == UPID()).
+ //
+ // TODO(bmahler): For the HTTP API, we will have to update the
+ // master and slave to ensure the 'uuid' in TaskStatus is set
+ // correctly.
+ if (!message.update().has_uuid()) {
+ update->mutable_status()->clear_uuid();
+ } else if (UPID(message.pid()) == UPID()) {
update->mutable_status()->clear_uuid();
} else {
update->mutable_status()->set_uuid(message.update().uuid());
http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 9b72fad..1105a66 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -2663,6 +2663,15 @@ void Slave::statusUpdate(StatusUpdate update, const UPID& pid)
state == RUNNING || state == TERMINATING)
<< state;
+ // TODO(bmahler): With the HTTP API, we must validate the UUID
+ // inside the TaskStatus. For now, we only care about the UUID
+ // inside the StatusUpdate, as the scheduler driver overwrites it.
+ if (!update.has_uuid()) {
+ LOG(WARNING) << "Ignoring status update " << update << " without 'uuid'";
+ metrics.invalid_status_updates++;
+ return;
+ }
+
// Set the source before forwarding the status update.
update.mutable_status()->set_source(
pid == UPID() ? TaskStatus::SOURCE_SLAVE : TaskStatus::SOURCE_EXECUTOR);
@@ -2880,6 +2889,9 @@ void Slave::forward(StatusUpdate update)
}
if (task != NULL) {
+ CHECK(update.has_uuid())
+ << "Expecting updates without 'uuid' to have been rejected";
+
// We set the status update state of the task here because in
// steady state master updates the status update state of the
// task when it receives this update. If the master fails over,
@@ -5120,6 +5132,9 @@ void Executor::recoverTask(const TaskState& state)
launchedTasks.contains(state.id)) {
terminateTask(state.id, update.status());
+ CHECK(update.has_uuid())
+ << "Expecting updates without 'uuid' to have been rejected";
+
// If the terminal update has been acknowledged, remove it.
if (state.acks.contains(UUID::fromBytes(update.uuid()))) {
completeTask(state.id);
http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/src/slave/status_update_manager.cpp
----------------------------------------------------------------------
diff --git a/src/slave/status_update_manager.cpp b/src/slave/status_update_manager.cpp
index 0ad2450..1978ac8 100644
--- a/src/slave/status_update_manager.cpp
+++ b/src/slave/status_update_manager.cpp
@@ -711,6 +711,10 @@ Try<bool> StatusUpdateStream::update(const StatusUpdate& update)
return Error(error.get());
}
+ if (!update.has_uuid()) {
+ return Error("Status update is missing 'uuid'");
+ }
+
// Check that this status update has not already been acknowledged.
// This could happen in the rare case when the slave received the ACK
// from the framework, died, but slave's ACK to the executor never made it!
[3/3] mesos git commit: Updated the executor driver to set
TaskStatus.uuid.
Posted by bm...@apache.org.
Updated the executor driver to set TaskStatus.uuid.
Review: https://reviews.apache.org/r/35910
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/a964bec3
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/a964bec3
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/a964bec3
Branch: refs/heads/master
Commit: a964bec3bd85f109d479c69f6d7e21da881c3be5
Parents: 4b591a0
Author: Benjamin Mahler <be...@gmail.com>
Authored: Thu Jun 25 22:58:55 2015 -0700
Committer: Benjamin Mahler <be...@gmail.com>
Committed: Mon Jun 29 10:01:27 2015 -0700
----------------------------------------------------------------------
src/exec/exec.cpp | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/a964bec3/src/exec/exec.cpp
----------------------------------------------------------------------
diff --git a/src/exec/exec.cpp b/src/exec/exec.cpp
index 930dda9..a1ae074 100644
--- a/src/exec/exec.cpp
+++ b/src/exec/exec.cpp
@@ -499,17 +499,23 @@ protected:
update->mutable_status()->MergeFrom(status);
update->set_timestamp(Clock::now().secs());
update->mutable_status()->set_timestamp(update->timestamp());
- update->set_uuid(UUID::random().toBytes());
message.set_pid(self());
- // Incoming status update might come from an executor which has not set
- // slave id in TaskStatus. Set/overwrite slave id.
+ // We overwrite the UUID for this status update, however with
+ // the HTTP API, the executor will have to generate a UUID
+ // (which needs to be validated to be RFC-4122 compliant).
+ UUID uuid = UUID::random();
+ update->set_uuid(uuid.toBytes());
+ update->mutable_status()->set_uuid(uuid.toBytes());
+
+ // We overwrite the SlaveID for this status update, however with
+ // the HTTP API, this can be overwritten by the slave instead.
update->mutable_status()->mutable_slave_id()->CopyFrom(slaveId);
VLOG(1) << "Executor sending status update " << *update;
// Capture the status update.
- updates[UUID::fromBytes(update->uuid())] = *update;
+ updates[uuid] = *update;
send(slave, message);
}