You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ne...@apache.org on 2017/05/25 18:39:49 UTC

mesos git commit: Fixed flakiness in MasterAllocatorTest.FrameworkExited.

Repository: mesos
Updated Branches:
  refs/heads/master edde1da43 -> 299f6aa0b


Fixed flakiness in MasterAllocatorTest.FrameworkExited.

This test did not pause the clock; hence, batch allocations could occur
at unpredictable times and cause the test to fail.

Fix this by pausing the clock and explicitly advancing it as needed to
trigger batch allocations or other events.

Review: https://reviews.apache.org/r/59541


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/299f6aa0
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/299f6aa0
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/299f6aa0

Branch: refs/heads/master
Commit: 299f6aa0b5b3909cd590c701a12030f52c0852e3
Parents: edde1da
Author: Neil Conway <ne...@gmail.com>
Authored: Tue May 23 19:54:43 2017 -0700
Committer: Neil Conway <ne...@gmail.com>
Committed: Thu May 25 11:38:49 2017 -0700

----------------------------------------------------------------------
 src/tests/master_allocator_tests.cpp | 74 ++++++++++++++++++-------------
 1 file changed, 44 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/299f6aa0/src/tests/master_allocator_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_allocator_tests.cpp b/src/tests/master_allocator_tests.cpp
index 750e030..b9e0422 100644
--- a/src/tests/master_allocator_tests.cpp
+++ b/src/tests/master_allocator_tests.cpp
@@ -569,15 +569,18 @@ TYPED_TEST(MasterAllocatorTest, SchedulerFailover)
 
 
 // Checks that if a framework launches a task and then the framework
-// is killed, the tasks resources are returned and reoffered correctly.
+// is killed, the task's resources are returned and reoffered correctly.
 TYPED_TEST(MasterAllocatorTest, FrameworkExited)
 {
+  Clock::pause();
+
   TestAllocator<TypeParam> allocator;
 
   EXPECT_CALL(allocator, initialize(_, _, _, _));
 
   master::Flags masterFlags = this->CreateMasterFlags();
   masterFlags.allocation_interval = Milliseconds(50);
+
   Try<Owned<cluster::Master>> master =
     this->StartMaster(&allocator, masterFlags);
   ASSERT_SOME(master);
@@ -588,23 +591,32 @@ TYPED_TEST(MasterAllocatorTest, FrameworkExited)
   MockExecutor exec1(executor1.executor_id());
   MockExecutor exec2(executor2.executor_id());
 
-  hashmap<ExecutorID, Executor*> execs;
-  execs[executor1.executor_id()] = &exec1;
-  execs[executor2.executor_id()] = &exec2;
+  hashmap<ExecutorID, Executor*> execs = {
+    {executor1.executor_id(), &exec1},
+    {executor2.executor_id(), &exec2}
+  };
 
   TestContainerizer containerizer(execs);
 
-  slave::Flags flags = this->CreateSlaveFlags();
-  flags.resources = Some("cpus:3;mem:1024");
+  slave::Flags slaveFlags = this->CreateSlaveFlags();
+  slaveFlags.resources = Some("cpus:3;mem:1024");
 
-  EXPECT_CALL(allocator, addSlave(_, _, _, _, _, _));
+  Future<Nothing> addSlave;
+  EXPECT_CALL(allocator, addSlave(_, _, _, _, _, _))
+    .WillOnce(DoAll(InvokeAddSlave(&allocator),
+                    FutureSatisfy(&addSlave)));
 
   Owned<MasterDetector> detector = master.get()->createDetector();
 
   Try<Owned<cluster::Slave>> slave =
-    this->StartSlave(detector.get(), &containerizer, flags);
+    this->StartSlave(detector.get(), &containerizer, slaveFlags);
   ASSERT_SOME(slave);
 
+  Clock::advance(slaveFlags.authentication_backoff_factor);
+  Clock::advance(slaveFlags.registration_backoff_factor);
+
+  AWAIT_READY(addSlave);
+
   MockScheduler sched1;
   MesosSchedulerDriver driver1(
       &sched1, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);
@@ -613,13 +625,6 @@ TYPED_TEST(MasterAllocatorTest, FrameworkExited)
 
   EXPECT_CALL(sched1, registered(_, _, _));
 
-  // We decline offers that we aren't expecting so that the resources
-  // get aggregated. Note that we need to do this _first_ and
-  // _separate_ from the expectation below so that this expectation is
-  // checked last and matches all possible offers.
-  EXPECT_CALL(sched1, resourceOffers(_, _))
-    .WillRepeatedly(DeclineOffers());
-
   // The first time the framework is offered resources, all of the
   // cluster's resources should be available.
   EXPECT_CALL(sched1, resourceOffers(_, OfferEq(3, 1024)))
@@ -646,7 +651,8 @@ TYPED_TEST(MasterAllocatorTest, FrameworkExited)
 
   // We need to wait until the allocator knows about the unused
   // resources to start the second framework so that we get the
-  // expected offer.
+  // expected offer. Because the clock is paused, a batch allocation
+  // will not occur in the time before the second framework registers.
   AWAIT_READY(recoverResources);
 
   MockScheduler sched2;
@@ -657,13 +663,6 @@ TYPED_TEST(MasterAllocatorTest, FrameworkExited)
 
   EXPECT_CALL(sched2, registered(_, _, _));
 
-  // We decline offers that we aren't expecting so that the resources
-  // get aggregated. Note that we need to do this _first_ and
-  // _separate_ from the expectation below so that this expectation is
-  // checked last and matches all possible offers.
-  EXPECT_CALL(sched2, resourceOffers(_, _))
-    .WillRepeatedly(DeclineOffers());
-
   // The first time sched2 gets an offer, framework 1 has a task
   // running with 2 cpus and 512 mem, leaving 1 cpu and 512 mem.
   EXPECT_CALL(sched2, resourceOffers(_, OfferEq(1, 512)))
@@ -686,11 +685,25 @@ TYPED_TEST(MasterAllocatorTest, FrameworkExited)
 
   AWAIT_READY(recoverResources2);
 
-  // Shut everything down but check that framework 2 gets the
-  // resources from framework 1 after it is shutdown.
+  // Shutdown framework 1; framework 2 should then be offered the
+  // resources that were consumed by framework 1.
   EXPECT_CALL(allocator, recoverResources(_, _, _, _))
     .WillRepeatedly(DoDefault());
 
+  Future<Nothing> removeFramework;
+  EXPECT_CALL(allocator, removeFramework(_))
+    .WillOnce(FutureSatisfy(&removeFramework));
+
+  EXPECT_CALL(exec1, shutdown(_))
+    .Times(AtMost(1));
+
+  driver1.stop();
+  driver1.join();
+
+  // Ensure that framework 1 has been removed from the allocator
+  // before we trigger the next batch allocation below.
+  AWAIT_READY(removeFramework);
+
   // After we stop framework 1, all of its resources should
   // have been returned, but framework 2 should still have a
   // task with 1 cpu and 256 mem, leaving 2 cpus and 768 mem.
@@ -698,14 +711,15 @@ TYPED_TEST(MasterAllocatorTest, FrameworkExited)
   EXPECT_CALL(sched2, resourceOffers(_, OfferEq(2, 768)))
     .WillOnce(FutureSatisfy(&resourceOffers));
 
-  EXPECT_CALL(exec1, shutdown(_))
-    .Times(AtMost(1));
-
-  driver1.stop();
-  driver1.join();
+  // Trigger a batch allocation.
+  Clock::advance(masterFlags.allocation_interval);
 
   AWAIT_READY(resourceOffers);
 
+  // Reset the `removeFramework` expectation to its default value.
+  EXPECT_CALL(allocator, removeFramework(_))
+    .WillRepeatedly(DoDefault());
+
   EXPECT_CALL(exec2, shutdown(_))
     .Times(AtMost(1));