You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2013/08/13 21:49:50 UTC

[06/23] git commit: Fixed slave to not send tasks and executor info of a terminated executor, when re-registering with master.

Fixed slave to not send tasks and executor info of a terminated executor,
when re-registering with master.

Review: https://reviews.apache.org/r/11543


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/5912aa2a
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/5912aa2a
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/5912aa2a

Branch: refs/heads/vinod/0.13.0
Commit: 5912aa2a127b2af1135396d332388915d7670494
Parents: e029cf5
Author: Vinod Kone <vi...@twitter.com>
Authored: Wed May 29 18:14:01 2013 -0700
Committer: Vinod Kone <vi...@twitter.com>
Committed: Fri Jun 7 15:19:28 2013 -0700

----------------------------------------------------------------------
 src/slave/slave.cpp                 |  6 +++
 src/tests/fault_tolerance_tests.cpp | 78 ++++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/5912aa2a/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index e905ab3..8ce1646 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -679,6 +679,12 @@ void Slave::doReliableRegistration()
 
     foreachvalue (Framework* framework, frameworks){
       foreachvalue (Executor* executor, framework->executors) {
+        // Ignore terminated executors because they do not consume
+        // any resources.
+        if (executor->state == Executor::TERMINATED) {
+          continue;
+        }
+
         // TODO(benh): Kill this once framework_id is required
         // on ExecutorInfo.
         ExecutorInfo* executorInfo = message.add_executor_infos();

http://git-wip-us.apache.org/repos/asf/mesos/blob/5912aa2a/src/tests/fault_tolerance_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/fault_tolerance_tests.cpp b/src/tests/fault_tolerance_tests.cpp
index c8fad7c..ef570b7 100644
--- a/src/tests/fault_tolerance_tests.cpp
+++ b/src/tests/fault_tolerance_tests.cpp
@@ -1297,6 +1297,84 @@ TEST_F(FaultToleranceTest, SlaveReregisterOnZKExpiration)
 }
 
 
+// This test verifies that a re-registering slave does not inform
+// the master about a terminated executor (and its tasks), when the
+// executor has pending updates. We check this by ensuring that the
+// master sends a TASK_LOST update for the task belonging to the
+// terminated executor.
+TEST_F(FaultToleranceTest, SlaveReregisterTerminatedExecutor)
+{
+  Try<PID<Master> > master = StartMaster();
+  ASSERT_SOME(master);
+
+  MockExecutor exec(DEFAULT_EXECUTOR_ID);
+  TestingIsolator isolator(&exec);
+
+  Try<PID<Slave> > slave = StartSlave(&isolator);
+  ASSERT_SOME(slave);
+
+  MockScheduler sched;
+  MesosSchedulerDriver driver(&sched, DEFAULT_FRAMEWORK_INFO, master.get());
+
+  Future<FrameworkID> frameworkId;
+  EXPECT_CALL(sched, registered(&driver, _, _))
+    .WillOnce(FutureArg<1>(&frameworkId));
+
+  EXPECT_CALL(sched, resourceOffers(&driver, _))
+    .WillOnce(LaunchTasks(1, 1, 512))
+    .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+  EXPECT_CALL(exec, registered(_, _, _, _));
+
+  EXPECT_CALL(exec, launchTask(_, _))
+    .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING));
+
+  Future<TaskStatus> status;
+  EXPECT_CALL(sched, statusUpdate(&driver, _))
+    .WillOnce(FutureArg<1>(&status));
+
+  driver.start();
+
+  AWAIT_READY(status);
+  EXPECT_EQ(TASK_RUNNING, status.get().state());
+
+  // Drop the TASK_LOST status update(s) sent to the master.
+  // This ensures that the TASK_LOST received by the scheduler
+  // is generated by the master.
+  DROP_PROTOBUFS(StatusUpdateMessage(), _, master.get());
+
+  Future<ExitedExecutorMessage> executorExitedMessage =
+    FUTURE_PROTOBUF(ExitedExecutorMessage(), _, _);
+
+  // Now kill the executor.
+  dispatch(isolator,
+           &Isolator::killExecutor,
+           frameworkId.get(),
+           DEFAULT_EXECUTOR_ID);
+
+  AWAIT_READY(executorExitedMessage);
+
+  // Simulate a spurious newMasterDetected event (e.g., due to ZooKeeper
+  // expiration) at the slave to force re-registration.
+  Future<TaskStatus> status2;
+  EXPECT_CALL(sched, statusUpdate(&driver, _))
+    .WillOnce(FutureArg<1>(&status2));
+
+  NewMasterDetectedMessage message;
+  message.set_pid(master.get());
+
+  process::post(slave.get(), message);
+
+  AWAIT_READY(status2);
+  EXPECT_EQ(TASK_LOST, status2.get().state());
+
+  driver.stop();
+  driver.join();
+
+  Shutdown();
+}
+
+
 // This test verifies that the master sends TASK_LOST updates
 // for tasks in the master absent from the re-registered slave.
 // We do this by dropping RunTaskMessage from master to the slave.