You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by jo...@apache.org on 2018/10/27 00:06:06 UTC

[mesos] branch master updated: Fixed test `MesosContainerizerSlaveRecoveryTest.ResourceStatistics`.

This is an automated email from the ASF dual-hosted git repository.

josephwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git


The following commit(s) were added to refs/heads/master by this push:
     new d9bb5b8  Fixed test `MesosContainerizerSlaveRecoveryTest.ResourceStatistics`.
d9bb5b8 is described below

commit d9bb5b89dda100965cc0a624030b374e316d8d9c
Author: Meng Zhu <mz...@mesosphere.io>
AuthorDate: Fri Oct 26 16:58:25 2018 -0700

    Fixed test `MesosContainerizerSlaveRecoveryTest.ResourceStatistics`.
    
    `MesosContainerizerSlaveRecoveryTest.ResourceStatistics` is flaky
    due to a race between agent restarting in the test and agent
    delivering queued tasks to registered executor. If queued task is
    delivered before agent restarts, an unexpected
    `MesosContainerizerProcess::update()` would be triggered, thus
    allowing the test to proceed before the agent has finished recovery.
    
    This patch eliminates the race by explicitly waiting for the first
    `update()` call before triggering the agent restart.
    
    Review: https://reviews.apache.org/r/69194/
---
 src/tests/slave_recovery_tests.cpp | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/tests/slave_recovery_tests.cpp b/src/tests/slave_recovery_tests.cpp
index 4bf0229..0fef5aa 100644
--- a/src/tests/slave_recovery_tests.cpp
+++ b/src/tests/slave_recovery_tests.cpp
@@ -5069,6 +5069,8 @@ class MesosContainerizerSlaveRecoveryTest
   : public SlaveRecoveryTest<MesosContainerizer> {};
 
 
+// Tests that the containerizer will properly report resource limits
+// after an agent failover.
 TEST_F(MesosContainerizerSlaveRecoveryTest, ResourceStatistics)
 {
   Try<Owned<cluster::Master>> master = this->StartMaster();
@@ -5112,19 +5114,23 @@ TEST_F(MesosContainerizerSlaveRecoveryTest, ResourceStatistics)
 
   TaskInfo task = createTask(offers.get()[0], SLEEP_COMMAND(1000));
 
-  // Message expectations.
+  // Wait until the executor registered and task resource updated.
   Future<Message> registerExecutor =
     FUTURE_MESSAGE(Eq(RegisterExecutorMessage().GetTypeName()), _, _);
+  Future<Nothing> update1 =
+    FUTURE_DISPATCH(_, &MesosContainerizerProcess::update);
 
   driver.launchTasks(offers.get()[0].id(), {task});
 
   AWAIT_READY(registerExecutor);
+  AWAIT_READY(update1);
 
   slave.get()->terminate();
 
-  // Set up so we can wait until the new slave updates the container's
-  // resources (this occurs after the executor has reregistered).
-  Future<Nothing> update =
+  // Wait until the executor re-registered and task resource updated.
+  Future<Message> reregisterExecutor =
+    FUTURE_MESSAGE(Eq(ReregisterExecutorMessage().GetTypeName()), _, _);
+  Future<Nothing> update2 =
     FUTURE_DISPATCH(_, &MesosContainerizerProcess::update);
 
   // Restart the slave (use same flags) with a new containerizer.
@@ -5135,8 +5141,8 @@ TEST_F(MesosContainerizerSlaveRecoveryTest, ResourceStatistics)
   slave = this->StartSlave(detector.get(), containerizer.get(), flags);
   ASSERT_SOME(slave);
 
-  // Wait until the containerizer is updated.
-  AWAIT_READY(update);
+  AWAIT_READY(reregisterExecutor);
+  AWAIT_READY(update2);
 
   Future<hashset<ContainerID>> containers = containerizer->containers();
   AWAIT_READY(containers);