You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2015/06/29 19:20:26 UTC

[2/3] mesos git commit: Moved StatusUpdate.uuid from required to optional.

Moved StatusUpdate.uuid from required to optional.

Review: https://reviews.apache.org/r/35911


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/fda49c04
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/fda49c04
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/fda49c04

Branch: refs/heads/master
Commit: fda49c04c88ce3c2c28866553669e64c8b65b956
Parents: a964bec
Author: Benjamin Mahler <be...@gmail.com>
Authored: Thu Jun 25 22:44:53 2015 -0700
Committer: Benjamin Mahler <be...@gmail.com>
Committed: Mon Jun 29 10:01:27 2015 -0700

----------------------------------------------------------------------
 include/mesos/mesos.proto           |  3 +-
 src/common/type_utils.cpp           | 17 ++++-----
 src/master/master.cpp               | 10 +++--
 src/messages/messages.proto         |  6 ++-
 src/sched/sched.cpp                 | 63 ++++++++++++++++----------------
 src/scheduler/scheduler.cpp         | 18 ++++++---
 src/slave/slave.cpp                 | 15 ++++++++
 src/slave/status_update_manager.cpp |  4 ++
 8 files changed, 86 insertions(+), 50 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/include/mesos/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto
index 81008ed..0ebe5d3 100644
--- a/include/mesos/mesos.proto
+++ b/include/mesos/mesos.proto
@@ -920,7 +920,8 @@ message TaskStatus {
   // driver implicitly acknowledge (default).
   //
   // TODO(bmahler): This is currently overwritten in the scheduler
-  // driver, even if executors set this.
+  // driver and executor driver, but executors will need to set this
+  // to a valid RFC-4122 UUID if using the HTTP API.
   optional bytes uuid = 11;
 
   // Describes whether the task has been determined to be healthy

http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/src/common/type_utils.cpp
----------------------------------------------------------------------
diff --git a/src/common/type_utils.cpp b/src/common/type_utils.cpp
index f88dff7..f7b21c6 100644
--- a/src/common/type_utils.cpp
+++ b/src/common/type_utils.cpp
@@ -388,19 +388,18 @@ std::ostream& operator << (
     std::ostream& stream,
     const StatusUpdate& update)
 {
-  stream
-    << update.status().state()
-    << " (UUID: " << UUID::fromBytes(update.uuid())
-    << ") for task " << update.status().task_id();
+  stream << update.status().state()
+         << (update.has_uuid()
+             ? " (UUID: " + stringify(UUID::fromBytes(update.uuid()))
+             : "")
+         << ") for task " << update.status().task_id();
 
   if (update.status().has_healthy()) {
-    stream
-      << " in health state "
-      << (update.status().healthy() ? "healthy" : "unhealthy");
+    stream << " in health state "
+           << (update.status().healthy() ? "healthy" : "unhealthy");
   }
 
-  return stream
-    << " of framework " << update.framework_id();
+  return stream << " of framework " << update.framework_id();
 }
 
 } // namespace internal {

http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index cbc6618..b8ed699 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -4990,9 +4990,13 @@ void Master::updateTask(Task* task, const StatusUpdate& update)
     task->set_state(status.state());
   }
 
-  // Set the status update state and uuid for the task.
-  task->set_status_update_state(status.state());
-  task->set_status_update_uuid(update.uuid());
+  // Set the status update state and uuid for the task. Note that
+  // master-generated updates are terminal and do not have a uuid
+  // (in which case the master also calls removeTask()).
+  if (update.has_uuid()) {
+    task->set_status_update_state(status.state());
+    task->set_status_update_uuid(update.uuid());
+  }
 
   // TODO(brenden) Consider wiping the `message` field?
   if (task->statuses_size() > 0 &&

http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/src/messages/messages.proto
----------------------------------------------------------------------
diff --git a/src/messages/messages.proto b/src/messages/messages.proto
index a1e71d8..165a16d 100644
--- a/src/messages/messages.proto
+++ b/src/messages/messages.proto
@@ -71,7 +71,11 @@ message StatusUpdate {
   optional SlaveID slave_id = 3;
   required TaskStatus status = 4;
   required double timestamp = 5;
-  required bytes uuid = 6;
+
+  // This is being deprecated in favor of TaskStatus.uuid. In 0.23.0,
+  // we set the TaskStatus 'uuid' in the executor driver for all
+  // retryable status updates.
+  optional bytes uuid = 6;
 
   // This corresponds to the latest state of the task according to the
   // slave. Note that this state might be different than the state in

http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/src/sched/sched.cpp
----------------------------------------------------------------------
diff --git a/src/sched/sched.cpp b/src/sched/sched.cpp
index a4e35aa..d37b256 100644
--- a/src/sched/sched.cpp
+++ b/src/sched/sched.cpp
@@ -71,6 +71,8 @@
 
 #include "authentication/cram_md5/authenticatee.hpp"
 
+#include "common/protobuf_utils.hpp"
+
 #include "local/flags.hpp"
 #include "local/local.hpp"
 
@@ -690,16 +692,19 @@ protected:
 
     TaskStatus status = update.status();
 
-    // If the update is driver-generated or master-generated, it
-    // does not require acknowledgement and so we unset the 'uuid'
-    // field of TaskStatus. Otherwise, we overwrite the field to
-    // ensure that a 0.22.0 scheduler driver supports explicit
-    // acknowledgements, even if running against a 0.21.0 cluster.
+    // If the update does not have a 'uuid', it does not need
+    // acknowledging. However, prior to 0.23.0, the update uuid
+    // was required and always set. In 0.24.0, we can rely on the
+    // update uuid check here, until then we must still check for
+    // this being sent from the driver (from == UPID()) or from
+    // the master (pid == UPID()).
     //
-    // TODO(bmahler): Update master and slave to ensure that 'uuid' is
-    // set accurately by the time it reaches the scheduler driver.
-    // This will be required for pure bindings.
-    if (from == UPID() || pid == UPID()) {
+    // TODO(bmahler): For the HTTP API, we will have to update the
+    // master and slave to ensure the 'uuid' in TaskStatus is set
+    // correctly.
+    if (!update.has_uuid()) {
+      status.clear_uuid();
+    } else if (from == UPID() || pid == UPID()) {
       status.clear_uuid();
     } else {
       status.set_uuid(update.uuid());
@@ -724,8 +729,8 @@ protected:
         return;
       }
 
-      // Don't acknowledge updates created by the driver or master.
-      if (from != UPID() && pid != UPID()) {
+      // See above for when we don't need to acknowledge.
+      if (update.has_uuid() && from != UPID() && pid != UPID()) {
         // We drop updates while we're disconnected.
         CHECK(connected);
         CHECK_SOME(master);
@@ -915,16 +920,14 @@ protected:
       // master failover etc). The correct way for schedulers to deal
       // with this situation is to use 'reconcileTasks()'.
       foreach (const TaskInfo& task, tasks) {
-        StatusUpdate update;
-        update.mutable_framework_id()->MergeFrom(framework.id());
-        TaskStatus* status = update.mutable_status();
-        status->mutable_task_id()->MergeFrom(task.task_id());
-        status->set_state(TASK_LOST);
-        status->set_source(TaskStatus::SOURCE_MASTER);
-        status->set_reason(TaskStatus::REASON_MASTER_DISCONNECTED);
-        status->set_message("Master Disconnected");
-        update.set_timestamp(Clock::now().secs());
-        update.set_uuid(UUID::random().toBytes());
+        StatusUpdate update = protobuf::createStatusUpdate(
+            framework.id(),
+            None(),
+            task.task_id(),
+            TASK_LOST,
+            TaskStatus::SOURCE_MASTER,
+            "Master disconnected",
+            TaskStatus::REASON_MASTER_DISCONNECTED);
 
         statusUpdate(UPID(), update, UPID());
       }
@@ -996,16 +999,14 @@ protected:
         }
 
         foreach (const TaskInfo& task, operation.launch().task_infos()) {
-          StatusUpdate update;
-          update.mutable_framework_id()->MergeFrom(framework.id());
-          TaskStatus* status = update.mutable_status();
-          status->mutable_task_id()->MergeFrom(task.task_id());
-          status->set_state(TASK_LOST);
-          status->set_source(TaskStatus::SOURCE_MASTER);
-          status->set_reason(TaskStatus::REASON_MASTER_DISCONNECTED);
-          status->set_message("Master Disconnected");
-          update.set_timestamp(Clock::now().secs());
-          update.set_uuid(UUID::random().toBytes());
+          StatusUpdate update = protobuf::createStatusUpdate(
+              framework.id(),
+              None(),
+              task.task_id(),
+              TASK_LOST,
+              TaskStatus::SOURCE_MASTER,
+              "Master disconnected",
+              TaskStatus::REASON_MASTER_DISCONNECTED);
 
           statusUpdate(UPID(), update, UPID());
         }

http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/src/scheduler/scheduler.cpp
----------------------------------------------------------------------
diff --git a/src/scheduler/scheduler.cpp b/src/scheduler/scheduler.cpp
index 1efc6fb..f360e4d 100644
--- a/src/scheduler/scheduler.cpp
+++ b/src/scheduler/scheduler.cpp
@@ -624,11 +624,19 @@ protected:
 
     update->mutable_status()->set_timestamp(message.update().timestamp());
 
-    // If the update is generated by the master it doesn't need to be
-    // acknowledged; so we unset the UUID inside TaskStatus.
-    // TODO(vinod): Update master and slave to ensure that 'uuid' is
-    // set accurately by the time it reaches the scheduler.
-    if (UPID(message.pid()) == UPID()) {
+    // If the update does not have a 'uuid', it does not need
+    // acknowledging. However, prior to 0.23.0, the update uuid
+    // was required and always set. In 0.24.0, we can rely on the
+    // update uuid check here, until then we must still check for
+    // this being sent from the driver (from == UPID()) or from
+    // the master (pid == UPID()).
+    //
+    // TODO(bmahler): For the HTTP API, we will have to update the
+    // master and slave to ensure the 'uuid' in TaskStatus is set
+    // correctly.
+    if (!message.update().has_uuid()) {
+      update->mutable_status()->clear_uuid();
+    } else if (UPID(message.pid()) == UPID()) {
       update->mutable_status()->clear_uuid();
     } else {
       update->mutable_status()->set_uuid(message.update().uuid());

http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 9b72fad..1105a66 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -2663,6 +2663,15 @@ void Slave::statusUpdate(StatusUpdate update, const UPID& pid)
         state == RUNNING || state == TERMINATING)
     << state;
 
+  // TODO(bmahler): With the HTTP API, we must validate the UUID
+  // inside the TaskStatus. For now, we only care about the UUID
+  // inside the StatusUpdate, as the scheduler driver overwrites it.
+  if (!update.has_uuid()) {
+    LOG(WARNING) << "Ignoring status update " << update << " without 'uuid'";
+    metrics.invalid_status_updates++;
+    return;
+  }
+
   // Set the source before forwarding the status update.
   update.mutable_status()->set_source(
       pid == UPID() ? TaskStatus::SOURCE_SLAVE : TaskStatus::SOURCE_EXECUTOR);
@@ -2880,6 +2889,9 @@ void Slave::forward(StatusUpdate update)
       }
 
       if (task != NULL) {
+        CHECK(update.has_uuid())
+          << "Expecting updates without 'uuid' to have been rejected";
+
         // We set the status update state of the task here because in
         // steady state master updates the status update state of the
         // task when it receives this update. If the master fails over,
@@ -5120,6 +5132,9 @@ void Executor::recoverTask(const TaskState& state)
         launchedTasks.contains(state.id)) {
       terminateTask(state.id, update.status());
 
+      CHECK(update.has_uuid())
+        << "Expecting updates without 'uuid' to have been rejected";
+
       // If the terminal update has been acknowledged, remove it.
       if (state.acks.contains(UUID::fromBytes(update.uuid()))) {
         completeTask(state.id);

http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/src/slave/status_update_manager.cpp
----------------------------------------------------------------------
diff --git a/src/slave/status_update_manager.cpp b/src/slave/status_update_manager.cpp
index 0ad2450..1978ac8 100644
--- a/src/slave/status_update_manager.cpp
+++ b/src/slave/status_update_manager.cpp
@@ -711,6 +711,10 @@ Try<bool> StatusUpdateStream::update(const StatusUpdate& update)
     return Error(error.get());
   }
 
+  if (!update.has_uuid()) {
+    return Error("Status update is missing 'uuid'");
+  }
+
   // Check that this status update has not already been acknowledged.
   // This could happen in the rare case when the slave received the ACK
   // from the framework, died, but slave's ACK to the executor never made it!