You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@mesos.apache.org by qi...@apache.org on 2018/09/05 21:36:52 UTC

[mesos] 01/03: Made command check always waits before removing the nested container.

This is an automated email from the ASF dual-hosted git repository.

qianzhang pushed a commit to branch 1.7.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 54a4c1e6306339a59884438b3fd9752704b77362
Author: Qian Zhang <zh...@gmail.com>
AuthorDate: Thu Aug 23 17:44:53 2018 +0800

    Made command check always waits before removing the nested container.
    
    Review: https://reviews.apache.org/r/68495
---
 src/checks/checker_process.cpp | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/src/checks/checker_process.cpp b/src/checks/checker_process.cpp
index 77a76f4..21af9b6 100644
--- a/src/checks/checker_process.cpp
+++ b/src/checks/checker_process.cpp
@@ -795,7 +795,19 @@ void CheckerProcess::___nestedCommandCheck(
                  << launchResponse.body << ") while launching " << name
                  << " for task '" << taskId << "'";
 
-    promise->discard();
+    // We'll try to remove the container created for the check at the
+    // beginning of the next check. In order to prevent a failure, the
+    // promise should only be completed once we're sure that the
+    // container has terminated.
+    waitNestedContainer(checkContainerId, nested)
+      .onAny([promise](const Future<Option<int>>&) {
+        // We assume that once `WaitNestedContainer` returns,
+        // irrespective of whether the response contains a failure, the
+        // container will be in a terminal state, and that it will be
+        // possible to remove it.
+        promise->discard();
+    });
+
     return;
   }
 
@@ -881,7 +893,10 @@ void CheckerProcess::nestedCommandCheckFailure(
     //
     // This will allow us to recover from a blip. The executor will
     // pause the checker when it detects that the agent is not
-    // available.
+    // available. Here we do not need to wait the check container since
+    // the agent may have been unavailable, and when the agent is back,
+    // it will destroy the check container as orphan container, and we
+    // will eventually remove it in `nestedCommandCheck()`.
     LOG(WARNING) << "Connection to the agent to launch " << name
                  << " for task '" << taskId << "' failed: " << failure;