You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2014/06/17 23:38:25 UTC

git commit: Send status update acknowledgements through the master.

Repository: mesos
Updated Branches:
  refs/heads/master a99de1a98 -> 90415006d


Send status update acknowledgements through the master.

Review: https://reviews.apache.org/r/22592


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/90415006
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/90415006
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/90415006

Branch: refs/heads/master
Commit: 90415006db79f127aa26a41f4bbc6d327f92c3dc
Parents: a99de1a
Author: Benjamin Mahler <bm...@twitter.com>
Authored: Thu Jun 12 16:34:33 2014 -0700
Committer: Benjamin Mahler <bm...@twitter.com>
Committed: Tue Jun 17 14:38:20 2014 -0700

----------------------------------------------------------------------
 src/sched/sched.cpp                 | 13 +++--
 src/tests/fault_tolerance_tests.cpp | 16 +++---
 src/tests/master_tests.cpp          | 96 --------------------------------
 src/tests/slave_tests.cpp           | 28 +++-------
 4 files changed, 23 insertions(+), 130 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/90415006/src/sched/sched.cpp
----------------------------------------------------------------------
diff --git a/src/sched/sched.cpp b/src/sched/sched.cpp
index 6e14f1c..aa19735 100644
--- a/src/sched/sched.cpp
+++ b/src/sched/sched.cpp
@@ -634,16 +634,21 @@ protected:
       return;
     }
 
-    // Acknowledge the status update.
-    if (pid != UPID()) {
-      VLOG(2) << "Sending ACK for status update " << update << " to " << pid;
+    // Don't acknowledge updates created by the driver or master.
+    if (from != UPID() && pid != UPID()) {
+      // We drop updates while we're disconnected.
+      CHECK(connected);
+      CHECK_SOME(master);
+
+      VLOG(2) << "Sending ACK for status update " << update
+              << " to " << master.get();
 
       StatusUpdateAcknowledgementMessage message;
       message.mutable_framework_id()->MergeFrom(framework.id());
       message.mutable_slave_id()->MergeFrom(update.slave_id());
       message.mutable_task_id()->MergeFrom(update.status().task_id());
       message.set_uuid(update.uuid());
-      send(pid, message);
+      send(master.get(), message);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/90415006/src/tests/fault_tolerance_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/fault_tolerance_tests.cpp b/src/tests/fault_tolerance_tests.cpp
index 4c6a5c4..5469c17 100644
--- a/src/tests/fault_tolerance_tests.cpp
+++ b/src/tests/fault_tolerance_tests.cpp
@@ -1899,11 +1899,18 @@ TEST_F(FaultToleranceTest, SlaveReregisterTerminatedExecutor)
   EXPECT_CALL(sched, statusUpdate(&driver, _))
     .WillOnce(FutureArg<1>(&status));
 
+  Future<StatusUpdateAcknowledgementMessage> statusUpdateAcknowledgementMessage
+    = FUTURE_PROTOBUF(
+        StatusUpdateAcknowledgementMessage(), master.get(), slave.get());
+
   driver.start();
 
   AWAIT_READY(status);
   EXPECT_EQ(TASK_RUNNING, status.get().state());
 
+  // Make sure the acknowledgement reaches the slave.
+  AWAIT_READY(statusUpdateAcknowledgementMessage);
+
   // Drop the TASK_FINISHED status update sent to the master.
   Future<StatusUpdateMessage> statusUpdateMessage =
     DROP_PROTOBUF(StatusUpdateMessage(), _, master.get());
@@ -1911,8 +1918,6 @@ TEST_F(FaultToleranceTest, SlaveReregisterTerminatedExecutor)
   Future<ExitedExecutorMessage> executorExitedMessage =
     FUTURE_PROTOBUF(ExitedExecutorMessage(), _, _);
 
-  Clock::pause();
-
   TaskStatus finishedStatus;
   finishedStatus = status.get();
   finishedStatus.set_state(TASK_FINISHED);
@@ -1928,15 +1933,8 @@ TEST_F(FaultToleranceTest, SlaveReregisterTerminatedExecutor)
   EXPECT_CALL(sched, statusUpdate(&driver, _))
     .WillOnce(FutureArg<1>(&status2));
 
-  Future<SlaveReregisteredMessage> slaveReregisteredMessage =
-    FUTURE_PROTOBUF(SlaveReregisteredMessage(), master.get(), slave.get());
-
   detector.appoint(master.get());
 
-  AWAIT_READY(slaveReregisteredMessage);
-
-  Clock::advance(slave::STATUS_UPDATE_RETRY_INTERVAL_MIN);
-
   AWAIT_READY(status2);
   EXPECT_EQ(TASK_FINISHED, status2.get().state());
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/90415006/src/tests/master_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_tests.cpp b/src/tests/master_tests.cpp
index edcaa75..a60709f 100644
--- a/src/tests/master_tests.cpp
+++ b/src/tests/master_tests.cpp
@@ -1402,102 +1402,6 @@ TEST_F(MasterTest, LaunchDuplicateOfferTest)
 }
 
 
-// This test runs a task but intercepts the scheduler driver's
-// acknowledgement messages to the slave and instead sends them to
-// the master. This test is necessary to test that the
-// acknowledgement handling in the master is correct, but once the
-// driver sends these messages we should remove/update this test!
-TEST_F(MasterTest, StatusUpdateAcknowledgementsThroughMaster)
-{
-  Try<PID<Master> > master = StartMaster();
-  ASSERT_SOME(master);
-
-  MockExecutor exec(DEFAULT_EXECUTOR_ID);
-
-  Try<PID<Slave> > slave = StartSlave(&exec);
-  ASSERT_SOME(slave);
-
-  MockScheduler sched;
-  MesosSchedulerDriver schedDriver(
-      &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL);
-
-  EXPECT_CALL(sched, registered(&schedDriver, _, _))
-    .Times(1);
-
-  Future<vector<Offer> > offers;
-  EXPECT_CALL(sched, resourceOffers(&schedDriver, _))
-    .WillOnce(FutureArg<1>(&offers))
-    .WillRepeatedly(Return()); // Ignore subsequent offers.
-
-  // We need to grab this message to get the scheduler's pid.
-  Future<process::Message> frameworkRegisteredMessage = FUTURE_MESSAGE(
-      Eq(FrameworkRegisteredMessage().GetTypeName()), master.get(), _);
-
-  schedDriver.start();
-
-  AWAIT_READY(frameworkRegisteredMessage);
-  const process::UPID schedulerPid = frameworkRegisteredMessage.get().to;
-
-  AWAIT_READY(offers);
-  EXPECT_NE(0u, offers.get().size());
-
-  TaskInfo task = createTask(offers.get()[0], "", DEFAULT_EXECUTOR_ID);
-
-  vector<TaskInfo> tasks;
-  tasks.push_back(task);
-
-  Future<ExecutorDriver*> execDriver;
-  EXPECT_CALL(exec, registered(_, _, _, _))
-    .WillOnce(FutureArg<0>(&execDriver));
-
-  EXPECT_CALL(exec, launchTask(_, _))
-    .WillOnce(SendStatusUpdateFromTask(TASK_RUNNING));
-
-  Future<TaskStatus> update;
-  EXPECT_CALL(sched, statusUpdate(&schedDriver, _))
-    .WillOnce(FutureArg<1>(&update));
-
-  // Pause the clock to prevent status update retries on the slave.
-  Clock::pause();
-
-  // Intercept the status update acknowledgement and send it to the
-  // master instead!
-  Future<StatusUpdateAcknowledgementMessage> acknowledgementMessage =
-    DROP_PROTOBUF(StatusUpdateAcknowledgementMessage(),
-                  schedulerPid,
-                  slave.get());
-
-  schedDriver.launchTasks(offers.get()[0].id(), tasks);
-
-  AWAIT_READY(update);
-  EXPECT_EQ(TASK_RUNNING, update.get().state());
-
-  AWAIT_READY(acknowledgementMessage);
-
-  Future<Nothing> _statusUpdateAcknowledgement =
-    FUTURE_DISPATCH(slave.get(), &Slave::_statusUpdateAcknowledgement);
-
-  // Send the acknowledgement to the master.
-  process::post(schedulerPid, master.get(), acknowledgementMessage.get());
-
-  // Ensure the slave receives and properly handles the ACK.
-  // Clock::settle() ensures that the slave successfully
-  // executes Slave::_statusUpdateAcknowledgement().
-  AWAIT_READY(_statusUpdateAcknowledgement);
-  Clock::settle();
-
-  Clock::resume();
-
-  EXPECT_CALL(exec, shutdown(_))
-    .Times(AtMost(1));
-
-  schedDriver.stop();
-  schedDriver.join();
-
-  Shutdown();
-}
-
-
 TEST_F(MasterTest, MetricsInStatsEndpoint)
 {
   Try<PID<Master> > master = StartMaster();

http://git-wip-us.apache.org/repos/asf/mesos/blob/90415006/src/tests/slave_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_tests.cpp b/src/tests/slave_tests.cpp
index aaf509d..9178e01 100644
--- a/src/tests/slave_tests.cpp
+++ b/src/tests/slave_tests.cpp
@@ -541,8 +541,6 @@ TEST_F(SlaveTest, DISABLED_ROOT_RunTaskWithCommandInfoWithUser)
 
 // This test ensures that a status update acknowledgement from a
 // non-leading master is ignored.
-// TODO(bmahler): This test will need to be updated once all
-// acknowledgements go through the master.
 TEST_F(SlaveTest, IgnoreNonLeaderStatusUpdateAcknowledgement)
 {
   Try<PID<Master> > master = StartMaster();
@@ -596,13 +594,16 @@ TEST_F(SlaveTest, IgnoreNonLeaderStatusUpdateAcknowledgement)
   // Pause the clock to prevent status update retries on the slave.
   Clock::pause();
 
-  // Intercept the status update acknowledgement and send it to the
-  // master instead!
+  // Intercept the acknowledgement sent to the slave so that we can
+  // spoof the master's pid.
   Future<StatusUpdateAcknowledgementMessage> acknowledgementMessage =
     DROP_PROTOBUF(StatusUpdateAcknowledgementMessage(),
-                  schedulerPid,
+                  master.get(),
                   slave.get());
 
+  Future<Nothing> _statusUpdateAcknowledgement =
+    FUTURE_DISPATCH(slave.get(), &Slave::_statusUpdateAcknowledgement);
+
   schedDriver.launchTasks(offers.get()[0].id(), tasks);
 
   AWAIT_READY(update);
@@ -610,25 +611,10 @@ TEST_F(SlaveTest, IgnoreNonLeaderStatusUpdateAcknowledgement)
 
   AWAIT_READY(acknowledgementMessage);
 
-  // Intercept the status update acknowledgement from the master
-  // to the slave so that we can spoof a non-leading master pid.
-  Future<StatusUpdateAcknowledgementMessage> acknowledgementMessage2 =
-    DROP_PROTOBUF(StatusUpdateAcknowledgementMessage(),
-                  master.get(),
-                  slave.get());
-
-  // Send the acknowledgment to the master.
-  process::post(schedulerPid, master.get(), acknowledgementMessage.get());
-
-  AWAIT_READY(acknowledgementMessage2);
-
-  Future<Nothing> _statusUpdateAcknowledgement =
-    FUTURE_DISPATCH(slave.get(), &Slave::_statusUpdateAcknowledgement);
-
   // Send the acknowledgement to the slave with a non-leading master.
   process::post(
-      schedulerPid,
       process::UPID("master@localhost:1"),
+      slave.get(),
       acknowledgementMessage.get());
 
   // Make sure the acknowledgement was ignored.