You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2015/06/29 19:20:25 UTC

[1/3] mesos git commit: Updated createdStatusUpdate to take an optional UUID.

Repository: mesos
Updated Branches:
  refs/heads/master 4b591a0d0 -> 0d5dc05b5


Updated createdStatusUpdate to take an optional UUID.

Review: https://reviews.apache.org/r/35912


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/0d5dc05b
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/0d5dc05b
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/0d5dc05b

Branch: refs/heads/master
Commit: 0d5dc05b5d3a4bf33c7f38a07ae68ef078600552
Parents: fda49c0
Author: Benjamin Mahler <be...@gmail.com>
Authored: Thu Jun 25 22:59:29 2015 -0700
Committer: Benjamin Mahler <be...@gmail.com>
Committed: Mon Jun 29 10:01:27 2015 -0700

----------------------------------------------------------------------
 src/common/protobuf_utils.cpp       |  7 ++++++-
 src/common/protobuf_utils.hpp       |  7 +++++++
 src/master/master.cpp               | 15 +++++++++++++++
 src/sched/sched.cpp                 |  2 ++
 src/slave/slave.cpp                 | 14 +++++++++++++-
 src/tests/fault_tolerance_tests.cpp |  5 ++++-
 src/tests/partition_tests.cpp       |  4 +++-
 7 files changed, 50 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/0d5dc05b/src/common/protobuf_utils.cpp
----------------------------------------------------------------------
diff --git a/src/common/protobuf_utils.cpp b/src/common/protobuf_utils.cpp
index 9ccd70d..f0642ba 100644
--- a/src/common/protobuf_utils.cpp
+++ b/src/common/protobuf_utils.cpp
@@ -49,6 +49,7 @@ StatusUpdate createStatusUpdate(
     const TaskID& taskId,
     const TaskState& state,
     const TaskStatus::Source& source,
+    const Option<UUID>& uuid,
     const string& message = "",
     const Option<TaskStatus::Reason>& reason = None(),
     const Option<ExecutorID>& executorId = None(),
@@ -57,7 +58,6 @@ StatusUpdate createStatusUpdate(
   StatusUpdate update;
 
   update.set_timestamp(process::Clock::now().secs());
-  update.set_uuid(UUID::random().toBytes());
   update.mutable_framework_id()->MergeFrom(frameworkId);
 
   if (slaveId.isSome()) {
@@ -80,6 +80,11 @@ StatusUpdate createStatusUpdate(
   status->set_message(message);
   status->set_timestamp(update.timestamp());
 
+  if (uuid.isSome()) {
+    update.set_uuid(uuid.get().toBytes());
+    status->set_uuid(uuid.get().toBytes());
+  }
+
   if (reason.isSome()) {
     status->set_reason(reason.get());
   }

http://git-wip-us.apache.org/repos/asf/mesos/blob/0d5dc05b/src/common/protobuf_utils.hpp
----------------------------------------------------------------------
diff --git a/src/common/protobuf_utils.hpp b/src/common/protobuf_utils.hpp
index 9ecd234..afe5a85 100644
--- a/src/common/protobuf_utils.hpp
+++ b/src/common/protobuf_utils.hpp
@@ -23,6 +23,7 @@
 
 #include <stout/ip.hpp>
 #include <stout/option.hpp>
+#include <stout/uuid.hpp>
 
 #include "messages/messages.hpp"
 
@@ -38,12 +39,18 @@ namespace protobuf {
 bool isTerminalState(const TaskState& state);
 
 
+// See TaskStatus for more information about these fields. Note
+// that the 'uuid' must be provided for updates that need
+// acknowledgement. Currently, all slave and executor generated
+// updates require acknowledgement, whereas master generated
+// and scheduler driver generated updates do not.
 StatusUpdate createStatusUpdate(
     const FrameworkID& frameworkId,
     const Option<SlaveID>& slaveId,
     const TaskID& taskId,
     const TaskState& state,
     const TaskStatus::Source& source,
+    const Option<UUID>& uuid,
     const std::string& message = "",
     const Option<TaskStatus::Reason>& reason = None(),
     const Option<ExecutorID>& executorId = None(),

http://git-wip-us.apache.org/repos/asf/mesos/blob/0d5dc05b/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index b8ed699..34ce744 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -2403,6 +2403,7 @@ void Master::accept(
             task.task_id(),
             TASK_LOST,
             TaskStatus::SOURCE_MASTER,
+            None(),
             "Task launched with invalid offers: " + error.get().message,
             TaskStatus::REASON_INVALID_OFFERS);
 
@@ -2514,6 +2515,7 @@ void Master::_accept(
             task.task_id(),
             TASK_LOST,
             TaskStatus::SOURCE_MASTER,
+            None(),
             slave == NULL ? "Slave removed" : "Slave disconnected",
             reason);
 
@@ -2688,6 +2690,7 @@ void Master::_accept(
                 task.task_id(),
                 TASK_ERROR,
                 TaskStatus::SOURCE_MASTER,
+                None(),
                 authorization.isFailed() ?
                     "Authorization failure: " + authorization.failure() :
                     "Not authorized to launch as user '" + user + "'",
@@ -2730,6 +2733,7 @@ void Master::_accept(
                 task_.task_id(),
                 TASK_ERROR,
                 TaskStatus::SOURCE_MASTER,
+                None(),
                 validationError.get().message,
                 TaskStatus::REASON_TASK_INVALID);
 
@@ -2870,6 +2874,7 @@ void Master::kill(Framework* framework, const scheduler::Call::Kill& kill)
         taskId,
         TASK_KILLED,
         TaskStatus::SOURCE_MASTER,
+        None(),
         "Killed pending task");
 
     forward(update, UPID(), framework);
@@ -3835,6 +3840,7 @@ void Master::_reconcileTasks(
           task.task_id(),
           TASK_STAGING,
           TaskStatus::SOURCE_MASTER,
+          None(),
           "Reconciliation: Latest task state",
           TaskStatus::REASON_RECONCILIATION);
 
@@ -3865,6 +3871,7 @@ void Master::_reconcileTasks(
           task->task_id(),
           state,
           TaskStatus::SOURCE_MASTER,
+          None(),
           "Reconciliation: Latest task state",
           TaskStatus::REASON_RECONCILIATION,
           executorId,
@@ -3920,6 +3927,7 @@ void Master::_reconcileTasks(
           task_.task_id(),
           TASK_STAGING,
           TaskStatus::SOURCE_MASTER,
+          None(),
           "Reconciliation: Latest task state",
           TaskStatus::REASON_RECONCILIATION);
     } else if (task != NULL) {
@@ -3938,6 +3946,7 @@ void Master::_reconcileTasks(
           task->task_id(),
           state,
           TaskStatus::SOURCE_MASTER,
+          None(),
           "Reconciliation: Latest task state",
           TaskStatus::REASON_RECONCILIATION,
           executorId,
@@ -3950,6 +3959,7 @@ void Master::_reconcileTasks(
           status.task_id(),
           TASK_LOST,
           TaskStatus::SOURCE_MASTER,
+          None(),
           "Reconciliation: Task is unknown to the slave",
           TaskStatus::REASON_RECONCILIATION);
     } else if (slaves.transitioning(slaveId)) {
@@ -3965,6 +3975,7 @@ void Master::_reconcileTasks(
           status.task_id(),
           TASK_LOST,
           TaskStatus::SOURCE_MASTER,
+          None(),
           "Reconciliation: Task is unknown",
           TaskStatus::REASON_RECONCILIATION);
     }
@@ -4339,6 +4350,7 @@ void Master::reconcile(
               task->task_id(),
               TASK_LOST,
               TaskStatus::SOURCE_MASTER,
+              None(),
               "Task is unknown to the slave",
               TaskStatus::REASON_TASK_UNKNOWN);
 
@@ -4600,6 +4612,7 @@ void Master::removeFramework(Framework* framework)
         task->task_id(),
         TASK_KILLED,
         TaskStatus::SOURCE_MASTER,
+        None(),
         "Framework " + framework->id().value() + " removed",
         TaskStatus::REASON_FRAMEWORK_REMOVED,
         (task->has_executor_id()
@@ -4695,6 +4708,7 @@ void Master::removeFramework(Slave* slave, Framework* framework)
         task->task_id(),
         TASK_LOST,
         TaskStatus::SOURCE_MASTER,
+        None(),
         "Slave " + slave->info.hostname() + " disconnected",
         TaskStatus::REASON_SLAVE_DISCONNECTED,
         (task->has_executor_id()
@@ -4834,6 +4848,7 @@ void Master::removeSlave(
           task->task_id(),
           TASK_LOST,
           TaskStatus::SOURCE_MASTER,
+          None(),
           "Slave " + slave->info.hostname() + " removed: " + message,
           TaskStatus::REASON_SLAVE_REMOVED,
           (task->has_executor_id() ?

http://git-wip-us.apache.org/repos/asf/mesos/blob/0d5dc05b/src/sched/sched.cpp
----------------------------------------------------------------------
diff --git a/src/sched/sched.cpp b/src/sched/sched.cpp
index d37b256..7563abb 100644
--- a/src/sched/sched.cpp
+++ b/src/sched/sched.cpp
@@ -926,6 +926,7 @@ protected:
             task.task_id(),
             TASK_LOST,
             TaskStatus::SOURCE_MASTER,
+            None(),
             "Master disconnected",
             TaskStatus::REASON_MASTER_DISCONNECTED);
 
@@ -1005,6 +1006,7 @@ protected:
               task.task_id(),
               TASK_LOST,
               TaskStatus::SOURCE_MASTER,
+              None(),
               "Master disconnected",
               TaskStatus::REASON_MASTER_DISCONNECTED);
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/0d5dc05b/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 1105a66..008170f 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -61,6 +61,7 @@
 #include <stout/stringify.hpp>
 #include <stout/strings.hpp>
 #include <stout/try.hpp>
+#include <stout/uuid.hpp>
 #include <stout/utils.hpp>
 
 #ifdef __linux__
@@ -1041,6 +1042,7 @@ void Slave::reregistered(
             taskId,
             TASK_LOST,
             TaskStatus::SOURCE_SLAVE,
+            UUID::random(),
             "Reconciliation: task unknown to the slave",
             TaskStatus::REASON_RECONCILIATION);
 
@@ -1413,6 +1415,7 @@ void Slave::_runTask(
         task.task_id(),
         TASK_LOST,
         TaskStatus::SOURCE_SLAVE,
+        UUID::random(),
         "Could not launch the task because we failed to unschedule directories"
         " scheduled for gc",
         TaskStatus::REASON_GC_ERROR);
@@ -1454,7 +1457,8 @@ void Slave::_runTask(
           task.task_id(),
           TASK_LOST,
           TaskStatus::SOURCE_SLAVE,
-          "The checkpointed resources being used by the task are unknown to "
+          UUID::random(),
+         "The checkpointed resources being used by the task are unknown to "
           "the slave",
           TaskStatus::REASON_RESOURCES_UNKNOWN);
 
@@ -1486,6 +1490,7 @@ void Slave::_runTask(
             task.task_id(),
             TASK_LOST,
             TaskStatus::SOURCE_SLAVE,
+            UUID::random(),
             "The checkpointed resources being used by the executor are unknown "
             "to the slave",
             TaskStatus::REASON_RESOURCES_UNKNOWN,
@@ -1551,6 +1556,7 @@ void Slave::_runTask(
           task.task_id(),
           TASK_LOST,
           TaskStatus::SOURCE_SLAVE,
+          UUID::random(),
           "Executor terminating/terminated",
           TaskStatus::REASON_EXECUTOR_TERMINATED);
 
@@ -1797,6 +1803,7 @@ void Slave::killTask(
           taskId,
           TASK_KILLED,
           TaskStatus::SOURCE_SLAVE,
+          UUID::random(),
           "Task killed before it was launched");
       statusUpdate(update, UPID());
 
@@ -1824,6 +1831,7 @@ void Slave::killTask(
         taskId,
         TASK_LOST,
         TaskStatus::SOURCE_SLAVE,
+        UUID::random(),
         "Cannot find executor",
         TaskStatus::REASON_EXECUTOR_TERMINATED);
 
@@ -1840,6 +1848,7 @@ void Slave::killTask(
           taskId,
           TASK_KILLED,
           TaskStatus::SOURCE_SLAVE,
+          UUID::random(),
           "Unregistered executor",
           TaskStatus::REASON_EXECUTOR_UNREGISTERED,
           executor->id);
@@ -1887,6 +1896,7 @@ void Slave::killTask(
             taskId,
             TASK_KILLED,
             TaskStatus::SOURCE_SLAVE,
+            UUID::random(),
             "Task killed when it was queued",
             None(),
             executor->id);
@@ -2566,6 +2576,7 @@ void Slave::reregisterExecutor(
               task->task_id(),
               TASK_LOST,
               TaskStatus::SOURCE_SLAVE,
+              UUID::random(),
               "Task launched during slave restart",
               TaskStatus::REASON_SLAVE_RESTARTED,
               executorId);
@@ -4531,6 +4542,7 @@ void Slave::sendExecutorTerminatedStatusUpdate(
       taskId,
       taskState,
       TaskStatus::SOURCE_SLAVE,
+      UUID::random(),
       termination.isReady() ? termination.get().message()
                             : "Abnormal executor termination",
       reason,

http://git-wip-us.apache.org/repos/asf/mesos/blob/0d5dc05b/src/tests/fault_tolerance_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/fault_tolerance_tests.cpp b/src/tests/fault_tolerance_tests.cpp
index fb28e2a..1070ccf 100644
--- a/src/tests/fault_tolerance_tests.cpp
+++ b/src/tests/fault_tolerance_tests.cpp
@@ -38,6 +38,7 @@
 
 #include <stout/json.hpp>
 #include <stout/stringify.hpp>
+#include <stout/uuid.hpp>
 
 #include "common/protobuf_utils.hpp"
 
@@ -1133,6 +1134,7 @@ TEST_F(FaultToleranceTest, ForwardStatusUpdateUnknownExecutor)
       taskId,
       TASK_RUNNING,
       TaskStatus::SOURCE_SLAVE,
+      UUID::random(),
       "Dummy update");
 
   process::dispatch(slave.get(), &Slave::statusUpdate, statusUpdate2, UPID());
@@ -1694,7 +1696,8 @@ TEST_F(FaultToleranceTest, SplitBrainMasters)
       runningStatus.get().slave_id(),
       runningStatus.get().task_id(),
       TASK_LOST,
-      TaskStatus::SOURCE_SLAVE));
+      TaskStatus::SOURCE_SLAVE,
+      UUID::random()));
 
   // Spoof a message from a random master; this should be dropped by
   // the scheduler driver. Since this is delivered locally, it is

http://git-wip-us.apache.org/repos/asf/mesos/blob/0d5dc05b/src/tests/partition_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/partition_tests.cpp b/src/tests/partition_tests.cpp
index c6ec14d..a1fb6f7 100644
--- a/src/tests/partition_tests.cpp
+++ b/src/tests/partition_tests.cpp
@@ -26,6 +26,7 @@
 #include <process/pid.hpp>
 
 #include <stout/try.hpp>
+#include <stout/uuid.hpp>
 
 #include "common/protobuf_utils.hpp"
 
@@ -389,7 +390,8 @@ TEST_F(PartitionTest, PartitionedSlaveStatusUpdates)
       slaveId,
       taskId,
       TASK_RUNNING,
-      TaskStatus::SOURCE_SLAVE);
+      TaskStatus::SOURCE_SLAVE,
+      UUID::random());
 
   StatusUpdateMessage message;
   message.mutable_update()->CopyFrom(update);


[2/3] mesos git commit: Moved StatusUpdate.uuid from required to optional.

Posted by bm...@apache.org.
Moved StatusUpdate.uuid from required to optional.

Review: https://reviews.apache.org/r/35911


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/fda49c04
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/fda49c04
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/fda49c04

Branch: refs/heads/master
Commit: fda49c04c88ce3c2c28866553669e64c8b65b956
Parents: a964bec
Author: Benjamin Mahler <be...@gmail.com>
Authored: Thu Jun 25 22:44:53 2015 -0700
Committer: Benjamin Mahler <be...@gmail.com>
Committed: Mon Jun 29 10:01:27 2015 -0700

----------------------------------------------------------------------
 include/mesos/mesos.proto           |  3 +-
 src/common/type_utils.cpp           | 17 ++++-----
 src/master/master.cpp               | 10 +++--
 src/messages/messages.proto         |  6 ++-
 src/sched/sched.cpp                 | 63 ++++++++++++++++----------------
 src/scheduler/scheduler.cpp         | 18 ++++++---
 src/slave/slave.cpp                 | 15 ++++++++
 src/slave/status_update_manager.cpp |  4 ++
 8 files changed, 86 insertions(+), 50 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/include/mesos/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto
index 81008ed..0ebe5d3 100644
--- a/include/mesos/mesos.proto
+++ b/include/mesos/mesos.proto
@@ -920,7 +920,8 @@ message TaskStatus {
   // driver implicitly acknowledge (default).
   //
   // TODO(bmahler): This is currently overwritten in the scheduler
-  // driver, even if executors set this.
+  // driver and executor driver, but executors will need to set this
+  // to a valid RFC-4122 UUID if using the HTTP API.
   optional bytes uuid = 11;
 
   // Describes whether the task has been determined to be healthy

http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/src/common/type_utils.cpp
----------------------------------------------------------------------
diff --git a/src/common/type_utils.cpp b/src/common/type_utils.cpp
index f88dff7..f7b21c6 100644
--- a/src/common/type_utils.cpp
+++ b/src/common/type_utils.cpp
@@ -388,19 +388,18 @@ std::ostream& operator << (
     std::ostream& stream,
     const StatusUpdate& update)
 {
-  stream
-    << update.status().state()
-    << " (UUID: " << UUID::fromBytes(update.uuid())
-    << ") for task " << update.status().task_id();
+  stream << update.status().state()
+         << (update.has_uuid()
+             ? " (UUID: " + stringify(UUID::fromBytes(update.uuid()))
+             : "")
+         << ") for task " << update.status().task_id();
 
   if (update.status().has_healthy()) {
-    stream
-      << " in health state "
-      << (update.status().healthy() ? "healthy" : "unhealthy");
+    stream << " in health state "
+           << (update.status().healthy() ? "healthy" : "unhealthy");
   }
 
-  return stream
-    << " of framework " << update.framework_id();
+  return stream << " of framework " << update.framework_id();
 }
 
 } // namespace internal {

http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index cbc6618..b8ed699 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -4990,9 +4990,13 @@ void Master::updateTask(Task* task, const StatusUpdate& update)
     task->set_state(status.state());
   }
 
-  // Set the status update state and uuid for the task.
-  task->set_status_update_state(status.state());
-  task->set_status_update_uuid(update.uuid());
+  // Set the status update state and uuid for the task. Note that
+  // master-generated updates are terminal and do not have a uuid
+  // (in which case the master also calls removeTask()).
+  if (update.has_uuid()) {
+    task->set_status_update_state(status.state());
+    task->set_status_update_uuid(update.uuid());
+  }
 
   // TODO(brenden) Consider wiping the `message` field?
   if (task->statuses_size() > 0 &&

http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/src/messages/messages.proto
----------------------------------------------------------------------
diff --git a/src/messages/messages.proto b/src/messages/messages.proto
index a1e71d8..165a16d 100644
--- a/src/messages/messages.proto
+++ b/src/messages/messages.proto
@@ -71,7 +71,11 @@ message StatusUpdate {
   optional SlaveID slave_id = 3;
   required TaskStatus status = 4;
   required double timestamp = 5;
-  required bytes uuid = 6;
+
+  // This is being deprecated in favor of TaskStatus.uuid. In 0.23.0,
+  // we set the TaskStatus 'uuid' in the executor driver for all
+  // retryable status updates.
+  optional bytes uuid = 6;
 
   // This corresponds to the latest state of the task according to the
   // slave. Note that this state might be different than the state in

http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/src/sched/sched.cpp
----------------------------------------------------------------------
diff --git a/src/sched/sched.cpp b/src/sched/sched.cpp
index a4e35aa..d37b256 100644
--- a/src/sched/sched.cpp
+++ b/src/sched/sched.cpp
@@ -71,6 +71,8 @@
 
 #include "authentication/cram_md5/authenticatee.hpp"
 
+#include "common/protobuf_utils.hpp"
+
 #include "local/flags.hpp"
 #include "local/local.hpp"
 
@@ -690,16 +692,19 @@ protected:
 
     TaskStatus status = update.status();
 
-    // If the update is driver-generated or master-generated, it
-    // does not require acknowledgement and so we unset the 'uuid'
-    // field of TaskStatus. Otherwise, we overwrite the field to
-    // ensure that a 0.22.0 scheduler driver supports explicit
-    // acknowledgements, even if running against a 0.21.0 cluster.
+    // If the update does not have a 'uuid', it does not need
+    // acknowledging. However, prior to 0.23.0, the update uuid
+    // was required and always set. In 0.24.0, we can rely on the
+    // update uuid check here, until then we must still check for
+    // this being sent from the driver (from == UPID()) or from
+    // the master (pid == UPID()).
     //
-    // TODO(bmahler): Update master and slave to ensure that 'uuid' is
-    // set accurately by the time it reaches the scheduler driver.
-    // This will be required for pure bindings.
-    if (from == UPID() || pid == UPID()) {
+    // TODO(bmahler): For the HTTP API, we will have to update the
+    // master and slave to ensure the 'uuid' in TaskStatus is set
+    // correctly.
+    if (!update.has_uuid()) {
+      status.clear_uuid();
+    } else if (from == UPID() || pid == UPID()) {
       status.clear_uuid();
     } else {
       status.set_uuid(update.uuid());
@@ -724,8 +729,8 @@ protected:
         return;
       }
 
-      // Don't acknowledge updates created by the driver or master.
-      if (from != UPID() && pid != UPID()) {
+      // See above for when we don't need to acknowledge.
+      if (update.has_uuid() && from != UPID() && pid != UPID()) {
         // We drop updates while we're disconnected.
         CHECK(connected);
         CHECK_SOME(master);
@@ -915,16 +920,14 @@ protected:
       // master failover etc). The correct way for schedulers to deal
       // with this situation is to use 'reconcileTasks()'.
       foreach (const TaskInfo& task, tasks) {
-        StatusUpdate update;
-        update.mutable_framework_id()->MergeFrom(framework.id());
-        TaskStatus* status = update.mutable_status();
-        status->mutable_task_id()->MergeFrom(task.task_id());
-        status->set_state(TASK_LOST);
-        status->set_source(TaskStatus::SOURCE_MASTER);
-        status->set_reason(TaskStatus::REASON_MASTER_DISCONNECTED);
-        status->set_message("Master Disconnected");
-        update.set_timestamp(Clock::now().secs());
-        update.set_uuid(UUID::random().toBytes());
+        StatusUpdate update = protobuf::createStatusUpdate(
+            framework.id(),
+            None(),
+            task.task_id(),
+            TASK_LOST,
+            TaskStatus::SOURCE_MASTER,
+            "Master disconnected",
+            TaskStatus::REASON_MASTER_DISCONNECTED);
 
         statusUpdate(UPID(), update, UPID());
       }
@@ -996,16 +999,14 @@ protected:
         }
 
         foreach (const TaskInfo& task, operation.launch().task_infos()) {
-          StatusUpdate update;
-          update.mutable_framework_id()->MergeFrom(framework.id());
-          TaskStatus* status = update.mutable_status();
-          status->mutable_task_id()->MergeFrom(task.task_id());
-          status->set_state(TASK_LOST);
-          status->set_source(TaskStatus::SOURCE_MASTER);
-          status->set_reason(TaskStatus::REASON_MASTER_DISCONNECTED);
-          status->set_message("Master Disconnected");
-          update.set_timestamp(Clock::now().secs());
-          update.set_uuid(UUID::random().toBytes());
+          StatusUpdate update = protobuf::createStatusUpdate(
+              framework.id(),
+              None(),
+              task.task_id(),
+              TASK_LOST,
+              TaskStatus::SOURCE_MASTER,
+              "Master disconnected",
+              TaskStatus::REASON_MASTER_DISCONNECTED);
 
           statusUpdate(UPID(), update, UPID());
         }

http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/src/scheduler/scheduler.cpp
----------------------------------------------------------------------
diff --git a/src/scheduler/scheduler.cpp b/src/scheduler/scheduler.cpp
index 1efc6fb..f360e4d 100644
--- a/src/scheduler/scheduler.cpp
+++ b/src/scheduler/scheduler.cpp
@@ -624,11 +624,19 @@ protected:
 
     update->mutable_status()->set_timestamp(message.update().timestamp());
 
-    // If the update is generated by the master it doesn't need to be
-    // acknowledged; so we unset the UUID inside TaskStatus.
-    // TODO(vinod): Update master and slave to ensure that 'uuid' is
-    // set accurately by the time it reaches the scheduler.
-    if (UPID(message.pid()) == UPID()) {
+    // If the update does not have a 'uuid', it does not need
+    // acknowledging. However, prior to 0.23.0, the update uuid
+    // was required and always set. In 0.24.0, we can rely on the
+    // update uuid check here, until then we must still check for
+    // this being sent from the driver (from == UPID()) or from
+    // the master (pid == UPID()).
+    //
+    // TODO(bmahler): For the HTTP API, we will have to update the
+    // master and slave to ensure the 'uuid' in TaskStatus is set
+    // correctly.
+    if (!message.update().has_uuid()) {
+      update->mutable_status()->clear_uuid();
+    } else if (UPID(message.pid()) == UPID()) {
       update->mutable_status()->clear_uuid();
     } else {
       update->mutable_status()->set_uuid(message.update().uuid());

http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 9b72fad..1105a66 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -2663,6 +2663,15 @@ void Slave::statusUpdate(StatusUpdate update, const UPID& pid)
         state == RUNNING || state == TERMINATING)
     << state;
 
+  // TODO(bmahler): With the HTTP API, we must validate the UUID
+  // inside the TaskStatus. For now, we only care about the UUID
+  // inside the StatusUpdate, as the scheduler driver overwrites it.
+  if (!update.has_uuid()) {
+    LOG(WARNING) << "Ignoring status update " << update << " without 'uuid'";
+    metrics.invalid_status_updates++;
+    return;
+  }
+
   // Set the source before forwarding the status update.
   update.mutable_status()->set_source(
       pid == UPID() ? TaskStatus::SOURCE_SLAVE : TaskStatus::SOURCE_EXECUTOR);
@@ -2880,6 +2889,9 @@ void Slave::forward(StatusUpdate update)
       }
 
       if (task != NULL) {
+        CHECK(update.has_uuid())
+          << "Expecting updates without 'uuid' to have been rejected";
+
         // We set the status update state of the task here because in
         // steady state master updates the status update state of the
         // task when it receives this update. If the master fails over,
@@ -5120,6 +5132,9 @@ void Executor::recoverTask(const TaskState& state)
         launchedTasks.contains(state.id)) {
       terminateTask(state.id, update.status());
 
+      CHECK(update.has_uuid())
+        << "Expecting updates without 'uuid' to have been rejected";
+
       // If the terminal update has been acknowledged, remove it.
       if (state.acks.contains(UUID::fromBytes(update.uuid()))) {
         completeTask(state.id);

http://git-wip-us.apache.org/repos/asf/mesos/blob/fda49c04/src/slave/status_update_manager.cpp
----------------------------------------------------------------------
diff --git a/src/slave/status_update_manager.cpp b/src/slave/status_update_manager.cpp
index 0ad2450..1978ac8 100644
--- a/src/slave/status_update_manager.cpp
+++ b/src/slave/status_update_manager.cpp
@@ -711,6 +711,10 @@ Try<bool> StatusUpdateStream::update(const StatusUpdate& update)
     return Error(error.get());
   }
 
+  if (!update.has_uuid()) {
+    return Error("Status update is missing 'uuid'");
+  }
+
   // Check that this status update has not already been acknowledged.
   // This could happen in the rare case when the slave received the ACK
   // from the framework, died, but slave's ACK to the executor never made it!


[3/3] mesos git commit: Updated the executor driver to set TaskStatus.uuid.

Posted by bm...@apache.org.
Updated the executor driver to set TaskStatus.uuid.

Review: https://reviews.apache.org/r/35910


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/a964bec3
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/a964bec3
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/a964bec3

Branch: refs/heads/master
Commit: a964bec3bd85f109d479c69f6d7e21da881c3be5
Parents: 4b591a0
Author: Benjamin Mahler <be...@gmail.com>
Authored: Thu Jun 25 22:58:55 2015 -0700
Committer: Benjamin Mahler <be...@gmail.com>
Committed: Mon Jun 29 10:01:27 2015 -0700

----------------------------------------------------------------------
 src/exec/exec.cpp | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/a964bec3/src/exec/exec.cpp
----------------------------------------------------------------------
diff --git a/src/exec/exec.cpp b/src/exec/exec.cpp
index 930dda9..a1ae074 100644
--- a/src/exec/exec.cpp
+++ b/src/exec/exec.cpp
@@ -499,17 +499,23 @@ protected:
     update->mutable_status()->MergeFrom(status);
     update->set_timestamp(Clock::now().secs());
     update->mutable_status()->set_timestamp(update->timestamp());
-    update->set_uuid(UUID::random().toBytes());
     message.set_pid(self());
 
-    // Incoming status update might come from an executor which has not set
-    // slave id in TaskStatus. Set/overwrite slave id.
+    // We overwrite the UUID for this status update, however with
+    // the HTTP API, the executor will have to generate a UUID
+    // (which needs to be validated to be RFC-4122 compliant).
+    UUID uuid = UUID::random();
+    update->set_uuid(uuid.toBytes());
+    update->mutable_status()->set_uuid(uuid.toBytes());
+
+    // We overwrite the SlaveID for this status update, however with
+    // the HTTP API, this can be overwritten by the slave instead.
     update->mutable_status()->mutable_slave_id()->CopyFrom(slaveId);
 
     VLOG(1) << "Executor sending status update " << *update;
 
     // Capture the status update.
-    updates[UUID::fromBytes(update->uuid())] = *update;
+    updates[uuid] = *update;
 
     send(slave, message);
   }