You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by gr...@apache.org on 2018/04/06 06:33:04 UTC

[10/13] mesos git commit: Added a test to verify that task launch order is preserved.

Added a test to verify that task launch order is preserved.

The agent should launch tasks in the same order in which they are
received. In the task launch path, there are currently two
asynchronous steps which may complete out of order: unschedule GC
and task authorization.

This test simulates the reordering of the completion of the
unschedule GC step and verifies that, despite the reordering,
tasks will still launch in their original order.

Review: https://reviews.apache.org/r/66145/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/5dfa4a5b
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/5dfa4a5b
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/5dfa4a5b

Branch: refs/heads/master
Commit: 5dfa4a5b06df860a1ecb29702ff0e245432ceb42
Parents: 028ea54
Author: Meng Zhu <mz...@mesosphere.io>
Authored: Thu Apr 5 17:44:35 2018 -0700
Committer: Greg Mann <gr...@gmail.com>
Committed: Thu Apr 5 17:58:09 2018 -0700

----------------------------------------------------------------------
 src/tests/slave_tests.cpp | 148 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 148 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/5dfa4a5b/src/tests/slave_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_tests.cpp b/src/tests/slave_tests.cpp
index 95d4143..14d39a2 100644
--- a/src/tests/slave_tests.cpp
+++ b/src/tests/slave_tests.cpp
@@ -141,6 +141,7 @@ using testing::_;
 using testing::AtMost;
 using testing::DoAll;
 using testing::Eq;
+using testing::StrEq;
 using testing::Invoke;
 using testing::InvokeWithoutArgs;
 using testing::Return;
@@ -4928,6 +4929,153 @@ TEST_F(SlaveTest, RemoveExecutorUponFailedTaskGroupLaunch)
 }
 
 
+// This test ensures that tasks using the same executor are successfully
+// launched in the order in which the agent receives the RunTask(Group)Message,
+// even when we manually reorder the completion of the asynchronous unschedule
+// GC step. See MESOS-8624.
+TEST_F(SlaveTest, LaunchTasksReorderUnscheduleGC)
+{
+  Clock::pause();
+
+  master::Flags masterFlags = CreateMasterFlags();
+  Try<Owned<cluster::Master>> master = StartMaster(masterFlags);
+  ASSERT_SOME(master);
+
+  Owned<MasterDetector> detector = master.get()->createDetector();
+  MockGarbageCollector mockGarbageCollector;
+  slave::Flags slaveFlags = CreateSlaveFlags();
+
+  // Start a mock slave.
+  Try<Owned<cluster::Slave>> slave =
+    StartSlave(detector.get(), &mockGarbageCollector, slaveFlags, true);
+
+  ASSERT_SOME(slave);
+  ASSERT_NE(nullptr, slave.get()->mock());
+
+  slave.get()->start();
+
+  auto scheduler = std::make_shared<v1::MockHTTPScheduler>();
+
+  EXPECT_CALL(*scheduler, connected(_))
+    .WillOnce(v1::scheduler::SendSubscribe(v1::DEFAULT_FRAMEWORK_INFO));
+
+  Future<v1::scheduler::Event::Subscribed> subscribed;
+  EXPECT_CALL(*scheduler, subscribed(_, _))
+    .WillOnce(FutureArg<1>(&subscribed));
+
+  EXPECT_CALL(*scheduler, heartbeat(_))
+    .WillRepeatedly(Return()); // Ignore heartbeats.
+
+  Future<v1::scheduler::Event::Offers> offers;
+  EXPECT_CALL(*scheduler, offers(_, _))
+    .WillOnce(FutureArg<1>(&offers))
+    .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+  v1::scheduler::TestMesos mesos(
+    master.get()->pid, ContentType::PROTOBUF, scheduler);
+
+  // Advance the clock to trigger both agent registration and a batch
+  // allocation.
+  Clock::advance(slaveFlags.registration_backoff_factor);
+  Clock::advance(masterFlags.allocation_interval);
+
+  AWAIT_READY(subscribed);
+
+  AWAIT_READY(offers);
+  ASSERT_FALSE(offers->offers().empty());
+
+  const v1::Offer& offer = offers->offers(0);
+  const v1::AgentID& agentId = offer.agent_id();
+
+  v1::Resources resources =
+    v1::Resources::parse("cpus:0.1;mem:32;disk:32").get();
+
+  v1::FrameworkID frameworkId(subscribed->framework_id());
+
+  v1::ExecutorInfo executorInfo = v1::createExecutorInfo(
+      "default", None(), resources, v1::ExecutorInfo::DEFAULT, frameworkId);
+
+  // Create two separate task groups that use the same executor.
+  v1::TaskInfo taskInfo1 = v1::createTask(agentId, resources, "sleep 1000");
+  v1::TaskGroupInfo taskGroup1 = v1::createTaskGroupInfo({taskInfo1});
+
+  v1::TaskInfo taskInfo2 = v1::createTask(agentId, resources, "sleep 1000");
+  v1::TaskGroupInfo taskGroup2 = v1::createTaskGroupInfo({taskInfo2});
+
+  v1::Offer::Operation launchGroup1 =
+    v1::LAUNCH_GROUP(executorInfo, taskGroup1);
+  v1::Offer::Operation launchGroup2 =
+    v1::LAUNCH_GROUP(executorInfo, taskGroup2);
+
+  // The `unschedule()` function is used to prevent premature garbage
+  // collection when the executor directory already exists due to a
+  // previously-launched task. Simulate this scenario by creating the
+  // executor directory manually.
+  string path = paths::getExecutorPath(
+      slaveFlags.work_dir,
+      devolve(agentId),
+      devolve(frameworkId),
+      devolve(executorInfo.executor_id()));
+
+  Try<Nothing> mkdir = os::mkdir(path, true);
+  CHECK_SOME(mkdir);
+
+  Promise<bool> promise1;
+
+  // Catch the unschedule GC step and reorder the task group launches by
+  // pausing the processing of `taskGroup1` while allowing the processing
+  // of `taskGroup1` to continue.
+  EXPECT_CALL(mockGarbageCollector, unschedule(StrEq(path)))
+    .WillOnce(Return(promise1.future()))
+    .WillRepeatedly(Return(true));
+
+  Future<v1::scheduler::Event::Update> taskStarting1, taskStarting2;
+  Future<v1::scheduler::Event::Update> taskRunning1, taskRunning2;
+  EXPECT_CALL(*scheduler, update(_, TaskStatusUpdateTaskIdEq(taskInfo1)))
+    .WillOnce(DoAll(
+        FutureArg<1>(&taskStarting1),
+        v1::scheduler::SendAcknowledge(frameworkId, agentId)))
+    .WillOnce(DoAll(
+        FutureArg<1>(&taskRunning1),
+        v1::scheduler::SendAcknowledge(frameworkId, agentId)));
+  EXPECT_CALL(*scheduler, update(_, TaskStatusUpdateTaskIdEq(taskInfo2)))
+    .WillOnce(DoAll(
+        FutureArg<1>(&taskStarting2),
+        v1::scheduler::SendAcknowledge(frameworkId, agentId)))
+    .WillOnce(DoAll(
+        FutureArg<1>(&taskRunning2),
+        v1::scheduler::SendAcknowledge(frameworkId, agentId)));
+
+  // Launch the two task groups.
+  mesos.send(
+      v1::createCallAccept(frameworkId, offer, {launchGroup1, launchGroup2}));
+
+  // Settle the clock to finish the processing of `taskGroup2`.
+  Clock::settle();
+
+  ASSERT_TRUE(taskStarting2.isPending());
+
+  // Resume the processing of `taskGroup1`.
+  promise1.set(true);
+
+  // If taskgroup2 tries to launch the executor first (i.e. if the order is
+  // not corrected by the agent), taskgroup2 will be subsequently dropped. The
+  // successful launch of both tasks verifies that the agent enforces the task
+  // launch order.
+  AWAIT_READY(taskStarting1);
+  AWAIT_READY(taskStarting2);
+
+  ASSERT_EQ(v1::TASK_STARTING, taskStarting1->status().state());
+  ASSERT_EQ(v1::TASK_STARTING, taskStarting2->status().state());
+
+  AWAIT_READY(taskRunning1);
+  AWAIT_READY(taskRunning2);
+
+  ASSERT_EQ(v1::TASK_RUNNING, taskRunning1->status().state());
+  ASSERT_EQ(v1::TASK_RUNNING, taskRunning2->status().state());
+}
+
+
 // This test ensures that agent sends ExitedExecutorMessage when the task
 // fails to launch due to task authorization failure and that master's executor
 // bookkeeping entry is removed.