You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by be...@apache.org on 2012/11/08 06:51:30 UTC

svn commit: r1406931 - in /incubator/mesos/trunk/src: common/ examples/ slave/ tests/

Author: benh
Date: Thu Nov  8 05:51:29 2012
New Revision: 1406931

URL: http://svn.apache.org/viewvc?rev=1406931&view=rev
Log:
Updated Slave::executorExited to Slave::executorTerminated and
distinguished between when the isolation module destroys an executor
versus when the executor exits or is killed by a signal (from the
slave or otherwise). Also, send TASK_FAILED for tasks "lost" when an
executor is destroyed by the isolation module.

From: Vinod Kone <vi...@gmail.com>
Review: https://reviews.apache.org/r/7887

Modified:
    incubator/mesos/trunk/src/common/protobuf_utils.hpp
    incubator/mesos/trunk/src/examples/balloon_framework.cpp
    incubator/mesos/trunk/src/slave/cgroups_isolation_module.cpp
    incubator/mesos/trunk/src/slave/cgroups_isolation_module.hpp
    incubator/mesos/trunk/src/slave/lxc_isolation_module.cpp
    incubator/mesos/trunk/src/slave/process_based_isolation_module.cpp
    incubator/mesos/trunk/src/slave/slave.cpp
    incubator/mesos/trunk/src/slave/slave.hpp
    incubator/mesos/trunk/src/tests/fault_tolerance_tests.cpp
    incubator/mesos/trunk/src/tests/gc_tests.cpp
    incubator/mesos/trunk/src/tests/master_tests.cpp

Modified: incubator/mesos/trunk/src/common/protobuf_utils.hpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/common/protobuf_utils.hpp?rev=1406931&r1=1406930&r2=1406931&view=diff
==============================================================================
--- incubator/mesos/trunk/src/common/protobuf_utils.hpp (original)
+++ incubator/mesos/trunk/src/common/protobuf_utils.hpp Thu Nov  8 05:51:29 2012
@@ -46,6 +46,7 @@ inline StatusUpdate createStatusUpdate(
     const SlaveID& slaveId,
     const TaskID& taskId,
     const TaskState& state,
+    const std::string& message,
     const ExecutorID& executorId = ExecutorID())
 {
   StatusUpdate update;
@@ -60,6 +61,8 @@ inline StatusUpdate createStatusUpdate(
   TaskStatus* status = update.mutable_status();
   status->mutable_task_id()->MergeFrom(taskId);
   status->set_state(state);
+  status->set_message(message);
+
   update.set_timestamp(::process::Clock::now());
   update.set_uuid(UUID::random().toBytes());
 

Modified: incubator/mesos/trunk/src/examples/balloon_framework.cpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/examples/balloon_framework.cpp?rev=1406931&r1=1406930&r2=1406931&view=diff
==============================================================================
--- incubator/mesos/trunk/src/examples/balloon_framework.cpp (original)
+++ incubator/mesos/trunk/src/examples/balloon_framework.cpp Thu Nov  8 05:51:29 2012
@@ -32,10 +32,12 @@
 #include <stout/os.hpp>
 #include <stout/stringify.hpp>
 
+#include "common/protobuf_utils.hpp"
+
 #include "examples/utils.hpp"
 
 using namespace mesos;
-
+using namespace mesos::internal;
 
 // The amount of memory in MB the executor itself takes.
 const static size_t EXECUTOR_MEMORY_MB = 64;
@@ -116,13 +118,18 @@ public:
   virtual void statusUpdate(SchedulerDriver* driver, const TaskStatus& status)
   {
     std::cout << "Task in state " << status.state() << std::endl;
+    if (status.has_message()) {
+      std::cout << "Reason: " << status.message() << std::endl;
+    }
 
-    if (status.state() == TASK_FINISHED) {
-      driver->stop();
-    } else if (status.state() == TASK_FAILED ||
-               status.state() == TASK_KILLED ||
-               status.state() == TASK_LOST) {
-      driver->abort();
+    if (protobuf::isTerminalState(status.state())) {
+      // NOTE: We expect TASK_FAILED here. The abort here ensures the shell
+      // script invoking this test, considers the test result as 'PASS'.
+      if (status.state() == TASK_FAILED) {
+        driver->abort();
+      } else {
+        driver->stop();
+      }
     }
   }
 

Modified: incubator/mesos/trunk/src/slave/cgroups_isolation_module.cpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/slave/cgroups_isolation_module.cpp?rev=1406931&r1=1406930&r2=1406931&view=diff
==============================================================================
--- incubator/mesos/trunk/src/slave/cgroups_isolation_module.cpp (original)
+++ incubator/mesos/trunk/src/slave/cgroups_isolation_module.cpp Thu Nov  8 05:51:29 2012
@@ -50,6 +50,7 @@ using process::Future;
 
 using std::set;
 using std::string;
+using std::ostringstream;
 using std::vector;
 
 namespace mesos {
@@ -332,10 +333,12 @@ void CgroupsIsolationModule::launchExecu
               << " of framework " << frameworkId;
 
     dispatch(slave,
-             &Slave::executorExited,
+             &Slave::executorTerminated,
              frameworkId,
              executorId,
-             -1); // TODO(benh): Determine "correct" status.
+             -1,  // TODO(benh): Determine "correct" status.
+             false,
+             "Error launching executor");
 
     return;
   }
@@ -462,14 +465,19 @@ void CgroupsIsolationModule::processExit
     FrameworkID frameworkId = info->frameworkId;
     ExecutorID executorId = info->executorId;
 
-    LOG(INFO) << "Telling slave of lost executor " << executorId
+    LOG(INFO) << "Telling slave of terminated executor " << executorId
               << " of framework " << frameworkId;
 
+    // TODO(vinod): Consider sending this message when the cgroup is
+    // completely destroyed (i.e., inside destroyWaited()).
+    // The tricky bit is to get the exit 'status' of the executor process.
     dispatch(slave,
-             &Slave::executorExited,
-             frameworkId,
-             executorId,
-             status);
+             &Slave::executorTerminated,
+             info->frameworkId,
+             info->executorId,
+             status,
+             info->destroyed,
+             info->reason);
 
     if (!info->killed) {
       killExecutor(frameworkId, executorId);
@@ -635,46 +643,52 @@ void CgroupsIsolationModule::oom(
     // It is likely that processExited is executed before this function (e.g.
     // The kill and OOM events happen at the same time, and the process exit
     // event arrives first.) Therefore, we should not report a fatal error here.
-    LOG(INFO) << "OOM detected for an exited executor";
+    LOG(INFO) << "OOM detected for an already terminated executor";
     return;
   }
 
-  // To safely ignore the OOM event from the previous launch of the same
-  // executor (with the same frameworkId and executorId).
+  // We can also ignore an OOM event that we are late to process for a
+  // previous instance of an executor.
   if (tag != info->tag) {
-    LOG(INFO) << "OOM detected for the previous launch of the same executor";
+    LOG(INFO) << "OOM detected for a previous executor instance";
     return;
   }
 
   // If killed is set, the OOM notifier will be discarded in oomWaited.
   // Therefore, we should not be able to reach this point.
-  CHECK(!info->killed) << "OOM detected for a killed executor";
+  CHECK(!info->killed) << "OOM detected for an already killed executor";
 
   LOG(INFO) << "OOM detected for executor " << executorId
             << " of framework " << frameworkId
             << " with tag " << tag;
 
-  // Output 'memory.limit_in_bytes' of the cgroup to help with debugging.
+  // Construct a "reason" string to describe why the isolation module
+  // destroyed the executor's cgroup (in order to assist in debugging).
+  ostringstream reason;
+
   Try<string> read =
     cgroups::readControl(hierarchy, info->name(), "memory.limit_in_bytes");
   if (read.isSome()) {
-    LOG(INFO) << "MEMORY LIMIT: " << strings::trim(read.get()) << " bytes";
+    reason << "MEMORY LIMIT: " << strings::trim(read.get()) << " bytes\n";
   }
 
   // Output 'memory.usage_in_bytes'.
   read = cgroups::readControl(hierarchy, info->name(), "memory.usage_in_bytes");
   if (read.isSome()) {
-    LOG(INFO) << "MEMORY USAGE: " << strings::trim(read.get()) << " bytes";
+    reason << "MEMORY USAGE: " << strings::trim(read.get()) << " bytes\n";
   }
 
   // Output 'memory.stat' of the cgroup to help with debugging.
   read = cgroups::readControl(hierarchy, info->name(), "memory.stat");
   if (read.isSome()) {
-    LOG(INFO) << "MEMORY STATISTICS: \n" << read.get();
+    reason << "MEMORY STATISTICS: \n" << read.get() << "\n";
   }
 
-  // TODO(jieyu): Have a mechanism to use a different policy (e.g. freeze the
-  // executor) when OOM happens.
+  LOG(INFO) << strings::trim(reason.str()); // Trim the extra '\n' at the end.
+
+  info->destroyed = true;
+  info->reason = reason.str();
+
   killExecutor(frameworkId, executorId);
 }
 
@@ -702,6 +716,8 @@ CgroupsIsolationModule::CgroupInfo* Cgro
   info->tag = UUID::random().toString();
   info->pid = -1;
   info->killed = false;
+  info->destroyed = false;
+  info->reason = "";
   infos[frameworkId][executorId] = info;
   return info;
 }

Modified: incubator/mesos/trunk/src/slave/cgroups_isolation_module.hpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/slave/cgroups_isolation_module.hpp?rev=1406931&r1=1406930&r2=1406931&view=diff
==============================================================================
--- incubator/mesos/trunk/src/slave/cgroups_isolation_module.hpp (original)
+++ incubator/mesos/trunk/src/slave/cgroups_isolation_module.hpp Thu Nov  8 05:51:29 2012
@@ -101,8 +101,15 @@ private:
     // PID of the forked process of the executor.
     pid_t pid;
 
-    // Whether the executor has been killed.
-    bool killed;
+    bool killed; // True if "killing" has been initiated via 'killExecutor'.
+
+    // Indicates if this executor has been destroyed by the isolation
+    // module. NOTE: An executor may have terminated due to reasons
+    // other than destruction by the isolation module (e.g. killed by
+    // slave, exited, etc.).
+    bool destroyed;
+
+    std::string reason; // The reason behind the destruction.
 
     // Used to cancel the OOM listening.
     process::Future<uint64_t> oomNotifier;

Modified: incubator/mesos/trunk/src/slave/lxc_isolation_module.cpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/slave/lxc_isolation_module.cpp?rev=1406931&r1=1406930&r2=1406931&view=diff
==============================================================================
--- incubator/mesos/trunk/src/slave/lxc_isolation_module.cpp (original)
+++ incubator/mesos/trunk/src/slave/lxc_isolation_module.cpp Thu Nov  8 05:51:29 2012
@@ -320,11 +320,16 @@ void LxcIsolationModule::processExited(p
     foreachvalue (ContainerInfo* info, infos[frameworkId]) {
       if (info->pid == pid) {
         LOG(INFO) << "Telling slave of lost executor "
-		  << info->executorId
+                  << info->executorId
                   << " of framework " << info->frameworkId;
 
-        dispatch(slave, &Slave::executorExited,
-                 info->frameworkId, info->executorId, status);
+        dispatch(slave,
+                 &Slave::executorTerminated,
+                 info->frameworkId,
+                 info->executorId,
+                 status,
+                 false,
+                 "Executor exited");
 
         // Try and cleanup after the executor.
         killExecutor(info->frameworkId, info->executorId);

Modified: incubator/mesos/trunk/src/slave/process_based_isolation_module.cpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/slave/process_based_isolation_module.cpp?rev=1406931&r1=1406930&r2=1406931&view=diff
==============================================================================
--- incubator/mesos/trunk/src/slave/process_based_isolation_module.cpp (original)
+++ incubator/mesos/trunk/src/slave/process_based_isolation_module.cpp Thu Nov  8 05:51:29 2012
@@ -278,8 +278,13 @@ void ProcessBasedIsolationModule::proces
         LOG(INFO) << "Telling slave of lost executor " << executorId
                   << " of framework " << frameworkId;
 
-        dispatch(slave, &Slave::executorExited,
-                 frameworkId, executorId, status);
+        dispatch(slave,
+                 &Slave::executorTerminated,
+                 frameworkId,
+                 executorId,
+                 status,
+                 false,
+                 "Executor exited");
 
         // Try and cleanup after the executor.
         killExecutor(frameworkId, executorId);

Modified: incubator/mesos/trunk/src/slave/slave.cpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/slave/slave.cpp?rev=1406931&r1=1406930&r2=1406931&view=diff
==============================================================================
--- incubator/mesos/trunk/src/slave/slave.cpp (original)
+++ incubator/mesos/trunk/src/slave/slave.cpp Thu Nov  8 05:51:29 2012
@@ -466,10 +466,11 @@ void Slave::doReliableRegistration()
 }
 
 
-void Slave::runTask(const FrameworkInfo& frameworkInfo,
-                    const FrameworkID& frameworkId,
-                    const string& pid,
-                    const TaskInfo& task)
+void Slave::runTask(
+    const FrameworkInfo& frameworkInfo,
+    const FrameworkID& frameworkId,
+    const string& pid,
+    const TaskInfo& task)
 {
   LOG(INFO) << "Got assigned task " << task.task_id()
             << " for framework " << frameworkId;
@@ -559,8 +560,7 @@ void Slave::runTask(const FrameworkInfo&
 }
 
 
-void Slave::killTask(const FrameworkID& frameworkId,
-                     const TaskID& taskId)
+void Slave::killTask(const FrameworkID& frameworkId, const TaskID& taskId)
 {
   LOG(INFO) << "Asked to kill task " << taskId
             << " of framework " << frameworkId;
@@ -663,10 +663,11 @@ void Slave::shutdownFramework(const Fram
 }
 
 
-void Slave::schedulerMessage(const SlaveID& slaveId,
-                             const FrameworkID& frameworkId,
-                             const ExecutorID& executorId,
-                             const string& data)
+void Slave::schedulerMessage(
+    const SlaveID& slaveId,
+    const FrameworkID& frameworkId,
+    const ExecutorID& executorId,
+    const string& data)
 {
   Framework* framework = getFramework(frameworkId);
   if (framework == NULL) {
@@ -703,8 +704,7 @@ void Slave::schedulerMessage(const Slave
 }
 
 
-void Slave::updateFramework(const FrameworkID& frameworkId,
-                            const string& pid)
+void Slave::updateFramework(const FrameworkID& frameworkId, const string& pid)
 {
   Framework* framework = getFramework(frameworkId);
   if (framework != NULL) {
@@ -715,10 +715,11 @@ void Slave::updateFramework(const Framew
 }
 
 
-void Slave::statusUpdateAcknowledgement(const SlaveID& slaveId,
-                                        const FrameworkID& frameworkId,
-                                        const TaskID& taskId,
-                                        const string& uuid)
+void Slave::statusUpdateAcknowledgement(
+    const SlaveID& slaveId,
+    const FrameworkID& frameworkId,
+    const TaskID& taskId,
+    const string& uuid)
 {
   Framework* framework = getFramework(frameworkId);
   if (framework != NULL) {
@@ -745,8 +746,9 @@ void Slave::statusUpdateAcknowledgement(
 }
 
 
-void Slave::registerExecutor(const FrameworkID& frameworkId,
-                             const ExecutorID& executorId)
+void Slave::registerExecutor(
+    const FrameworkID& frameworkId,
+    const ExecutorID& executorId)
 {
   LOG(INFO) << "Got registration for executor '" << executorId
             << "' of framework " << frameworkId;
@@ -878,10 +880,11 @@ void Slave::statusUpdate(const StatusUpd
 }
 
 
-void Slave::executorMessage(const SlaveID& slaveId,
-                            const FrameworkID& frameworkId,
-                            const ExecutorID& executorId,
-                            const string& data)
+void Slave::executorMessage(
+    const SlaveID& slaveId,
+    const FrameworkID& frameworkId,
+    const ExecutorID& executorId,
+    const string& data)
 {
   Framework* framework = getFramework(frameworkId);
   if (framework == NULL) {
@@ -964,69 +967,37 @@ Framework* Slave::getFramework(const Fra
 
 // N.B. When the slave is running in "local" mode then the pid is
 // uninteresting (and possibly could cause bugs).
-void Slave::executorStarted(const FrameworkID& frameworkId,
-                            const ExecutorID& executorId,
-                            pid_t pid)
-{
-
-}
-
-
-StatusUpdate Slave::createStatusUpdate(const TaskID& taskId,
-                                       const ExecutorID& executorId,
-                                       const FrameworkID& frameworkId,
-                                       TaskState taskState,
-                                       const string& reason)
+void Slave::executorStarted(
+    const FrameworkID& frameworkId,
+    const ExecutorID& executorId,
+    pid_t pid)
 {
-  TaskStatus status;
-  status.mutable_task_id()->MergeFrom(taskId);
-  status.set_state(taskState);
-  status.set_message(reason);
-
-  StatusUpdate update;
-  update.mutable_framework_id()->MergeFrom(frameworkId);
-  update.mutable_slave_id()->MergeFrom(id);
-  update.mutable_executor_id()->MergeFrom(executorId);
-  update.mutable_status()->MergeFrom(status);
-  update.set_timestamp(Clock::now());
-  update.set_uuid(UUID::random().toBytes());
 
-  return update;
 }
 
 
-// Called when an executor is exited.
-// Transitions a live task to TASK_LOST/TASK_FAILED and sends status update.
-void Slave::transitionLiveTask(const TaskID& taskId,
-                               const ExecutorID& executorId,
-                               const FrameworkID& frameworkId,
-                               bool isCommandExecutor,
-                               int status)
+void Slave::sendStatusUpdate(
+    const FrameworkID& frameworkId,
+    const ExecutorID& executorId,
+    const TaskID& taskId,
+    TaskState taskState,
+    const string& message)
 {
-  StatusUpdate update;
-
-  if (isCommandExecutor) {
-    update = createStatusUpdate(taskId,
-                                executorId,
-                                frameworkId,
-                                TASK_FAILED,
-                                "Executor running the task's command failed");
-  } else {
-    update = createStatusUpdate(taskId,
-                                executorId,
-                                frameworkId,
-                                TASK_LOST,
-                                "Executor exited");
-  }
+  const StatusUpdate& update = protobuf::createStatusUpdate(
+      frameworkId, id, taskId, taskState, message, executorId);
 
+  // Handle the status update as though it came from the executor.
   statusUpdate(update);
 }
 
 
-// Called by the isolation module when an executor process exits.
-void Slave::executorExited(const FrameworkID& frameworkId,
-                           const ExecutorID& executorId,
-                           int status)
+// Called by the isolation module when an executor process terminates.
+void Slave::executorTerminated(
+    const FrameworkID& frameworkId,
+    const ExecutorID& executorId,
+    int status,
+    bool destroyed,
+    const string& message)
 {
   LOG(INFO) << "Executor '" << executorId
             << "' of framework " << frameworkId
@@ -1056,30 +1027,38 @@ void Slave::executorExited(const Framewo
   bool isCommandExecutor = false;
 
   // Transition all live tasks to TASK_LOST/TASK_FAILED.
+  // If the isolation module destroyed the executor (e.g., due to OOM event)
+  // or if this is a command executor, we send TASK_FAILED status updates
+  // instead of TASK_LOST.
+
+  // Transition all live launched tasks.
   foreachvalue (Task* task, utils::copy(executor->launchedTasks)) {
     if (!protobuf::isTerminalState(task->state())) {
       isCommandExecutor = !task->has_executor_id();
 
-      transitionLiveTask(task->task_id(),
-                         executor->id,
-                         framework->id,
-                         isCommandExecutor,
-                         status);
+      if (destroyed || isCommandExecutor) {
+        sendStatusUpdate(
+            frameworkId, executorId, task->task_id(), TASK_FAILED, message);
+      } else {
+        sendStatusUpdate(
+            frameworkId, executorId, task->task_id(), TASK_LOST, message);
+      }
     }
   }
 
-  // Transition all queued tasks to TASK_LOST/TASK_FAILED.
+  // Transition all queued tasks.
   foreachvalue (const TaskInfo& task, utils::copy(executor->queuedTasks)) {
     isCommandExecutor = task.has_command();
 
-    transitionLiveTask(task.task_id(),
-                       executor->id,
-                       framework->id,
-                       isCommandExecutor,
-                       status);
+    if (destroyed || isCommandExecutor) {
+      sendStatusUpdate(
+          frameworkId, executorId, task.task_id(), TASK_FAILED, message);
+    } else {
+      sendStatusUpdate(
+          frameworkId, executorId, task.task_id(), TASK_LOST, message);
+    }
   }
 
-
   if (!isCommandExecutor) {
     ExitedExecutorMessage message;
     message.mutable_slave_id()->MergeFrom(id);
@@ -1116,9 +1095,10 @@ void Slave::shutdownExecutor(Framework* 
 }
 
 
-void Slave::shutdownExecutorTimeout(const FrameworkID& frameworkId,
-                                    const ExecutorID& executorId,
-                                    const UUID& uuid)
+void Slave::shutdownExecutorTimeout(
+    const FrameworkID& frameworkId,
+    const ExecutorID& executorId,
+    const UUID& uuid)
 {
   Framework* framework = getFramework(frameworkId);
   if (framework == NULL) {
@@ -1133,7 +1113,8 @@ void Slave::shutdownExecutorTimeout(cons
 
     dispatch(isolationModule,
              &IsolationModule::killExecutor,
-             framework->id, executor->id);
+             framework->id,
+             executor->id);
 
     // Schedule the executor directory to get garbage collected.
     gc.schedule(flags.gc_delay, executor->directory);

Modified: incubator/mesos/trunk/src/slave/slave.hpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/slave/slave.hpp?rev=1406931&r1=1406930&r2=1406931&view=diff
==============================================================================
--- incubator/mesos/trunk/src/slave/slave.hpp (original)
+++ incubator/mesos/trunk/src/slave/slave.hpp Thu Nov  8 05:51:29 2012
@@ -82,53 +82,65 @@ public:
   void registered(const SlaveID& slaveId);
   void reregistered(const SlaveID& slaveId);
   void doReliableRegistration();
-  void runTask(const FrameworkInfo& frameworkInfo,
-               const FrameworkID& frameworkId,
-               const std::string& pid,
-               const TaskInfo& task);
-  void killTask(const FrameworkID& frameworkId,
-                const TaskID& taskId);
+
+  void runTask(
+      const FrameworkInfo& frameworkInfo,
+      const FrameworkID& frameworkId,
+      const std::string& pid,
+      const TaskInfo& task);
+
+  void killTask(const FrameworkID& frameworkId, const TaskID& taskId);
+
   void shutdownFramework(const FrameworkID& frameworkId);
-  void schedulerMessage(const SlaveID& slaveId,
-			const FrameworkID& frameworkId,
-			const ExecutorID& executorId,
-			const std::string& data);
-  void updateFramework(const FrameworkID& frameworkId,
-                       const std::string& pid);
-  void statusUpdateAcknowledgement(const SlaveID& slaveId,
-                                   const FrameworkID& frameworkId,
-                                   const TaskID& taskId,
-                                   const std::string& uuid);
-  void registerExecutor(const FrameworkID& frameworkId,
-                        const ExecutorID& executorId);
+
+  void schedulerMessage(
+      const SlaveID& slaveId,
+      const FrameworkID& frameworkId,
+      const ExecutorID& executorId,
+      const std::string& data);
+
+  void updateFramework(const FrameworkID& frameworkId, const std::string& pid);
+
+  void statusUpdateAcknowledgement(
+      const SlaveID& slaveId,
+      const FrameworkID& frameworkId,
+      const TaskID& taskId,
+      const std::string& uuid);
+
+  void registerExecutor(
+      const FrameworkID& frameworkId,
+      const ExecutorID& executorId);
+
   void statusUpdate(const StatusUpdate& update);
-  void executorMessage(const SlaveID& slaveId,
-                       const FrameworkID& frameworkId,
-                       const ExecutorID& executorId,
-                       const std::string& data);
+
+  void executorMessage(
+      const SlaveID& slaveId,
+      const FrameworkID& frameworkId,
+      const ExecutorID& executorId,
+      const std::string& data);
+
   void ping(const UPID& from, const std::string& body);
 
   void statusUpdateTimeout(const FrameworkID& frameworkId, const UUID& uuid);
 
-  StatusUpdate createStatusUpdate(const TaskID& taskId,
-                                  const ExecutorID& executorId,
-                                  const FrameworkID& frameworkId,
-                                  TaskState taskState,
-                                  const std::string& message);
-
-  void executorStarted(const FrameworkID& frameworkId,
-                       const ExecutorID& executorId,
-                       pid_t pid);
-
-  void executorExited(const FrameworkID& frameworkId,
-                      const ExecutorID& executorId,
-                      int status);
-
-  void transitionLiveTask(const TaskID& taskId,
-                          const ExecutorID& executorId,
-                          const FrameworkID& frameworkId,
-                          bool command_executor,
-                          int status);
+  void sendStatusUpdate(
+      const FrameworkID& frameworkId,
+      const ExecutorID& executorId,
+      const TaskID& taskId,
+      TaskState taskState,
+      const std::string& message);
+
+  void executorStarted(
+      const FrameworkID& frameworkId,
+      const ExecutorID& executorId,
+      pid_t pid);
+
+  void executorTerminated(
+      const FrameworkID& frameworkId,
+      const ExecutorID& executorId,
+      int status,
+      bool destroyed,
+      const std::string& message);
 
   // NOTE: Pulled this to public to make it visible for testing.
   // Garbage collects the directories based on the current disk usage.

Modified: incubator/mesos/trunk/src/tests/fault_tolerance_tests.cpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/tests/fault_tolerance_tests.cpp?rev=1406931&r1=1406930&r2=1406931&view=diff
==============================================================================
--- incubator/mesos/trunk/src/tests/fault_tolerance_tests.cpp (original)
+++ incubator/mesos/trunk/src/tests/fault_tolerance_tests.cpp Thu Nov  8 05:51:29 2012
@@ -798,8 +798,16 @@ TEST(FaultToleranceTest, SchedulerExit)
   WAIT_UNTIL(shutdownCall);
 
   // Simulate a executorExited message from isolation module to the slave.
-  process::dispatch(slave, &Slave::executorExited,
-                    frameworkId, DEFAULT_EXECUTOR_ID, 0);
+  // We need to explicitly send this message because we don't spawn
+  // a real executor process in this test.
+  process::dispatch(
+      slave,
+      &Slave::executorTerminated,
+      frameworkId,
+      DEFAULT_EXECUTOR_ID,
+      0,
+      false,
+      "Killed executor");
 
   WAIT_UNTIL(statusUpdateMsg);
 

Modified: incubator/mesos/trunk/src/tests/gc_tests.cpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/tests/gc_tests.cpp?rev=1406931&r1=1406930&r2=1406931&view=diff
==============================================================================
--- incubator/mesos/trunk/src/tests/gc_tests.cpp (original)
+++ incubator/mesos/trunk/src/tests/gc_tests.cpp Thu Nov  8 05:51:29 2012
@@ -304,8 +304,16 @@ TEST_F(GarbageCollectorTest, ExitedExecu
   // Kill the executor and inform the slave.
   isolationModule->killExecutor(frameworkId, DEFAULT_EXECUTOR_ID);
 
-  process::dispatch(slave, &Slave::executorExited, frameworkId,
-                    DEFAULT_EXECUTOR_ID, 0);
+  // We need to explicitly send this message because we don't spawn
+  // a real executor process in this test.
+  process::dispatch(
+      slave,
+      &Slave::executorTerminated,
+      frameworkId,
+      DEFAULT_EXECUTOR_ID,
+      0,
+      false,
+      "Killed executor");
 
   // In order to make sure the slave has scheduled the executor
   // directory to get garbage collected we need to wait until the
@@ -382,8 +390,16 @@ TEST_F(GarbageCollectorTest, DiskUsage)
   // Kill the executor and inform the slave.
   isolationModule->killExecutor(frameworkId, DEFAULT_EXECUTOR_ID);
 
-  process::dispatch(slave, &Slave::executorExited, frameworkId,
-                    DEFAULT_EXECUTOR_ID, 0);
+  // We need to explicitly send this message because we don't spawn
+  // a real executor process in this test.
+  process::dispatch(
+      slave,
+      &Slave::executorTerminated,
+      frameworkId,
+      DEFAULT_EXECUTOR_ID,
+      0,
+      false,
+      "Killed executor");
 
   // In order to make sure the slave has scheduled the executor
   // directory to get garbage collected we need to wait until the

Modified: incubator/mesos/trunk/src/tests/master_tests.cpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/tests/master_tests.cpp?rev=1406931&r1=1406930&r2=1406931&view=diff
==============================================================================
--- incubator/mesos/trunk/src/tests/master_tests.cpp (original)
+++ incubator/mesos/trunk/src/tests/master_tests.cpp Thu Nov  8 05:51:29 2012
@@ -469,11 +469,16 @@ TEST(MasterTest, RecoverResources)
   driver.declineOffer(offer.id());
 
   // Now simulate an executorExited call to the slave.
-  process::dispatch(slave,
-                    &Slave::executorExited,
-                    offer.framework_id(),
-                    executorInfo.executor_id(),
-                    0);
+  // We need to explicitly send this message because we don't spawn
+  // a real executor process in this test.
+  process::dispatch(
+      slave,
+      &Slave::executorTerminated,
+      offer.framework_id(),
+      executorInfo.executor_id(),
+      0,
+      false,
+      "Killed executor");
 
   // Scheduler should get an offer for the complete slave resources.
   WAIT_UNTIL(resourceOffersCall3);