You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by qi...@apache.org on 2018/09/05 21:36:52 UTC
[mesos] 01/03: Made command check always waits before removing the
nested container.
This is an automated email from the ASF dual-hosted git repository.
qianzhang pushed a commit to branch 1.7.x
in repository https://gitbox.apache.org/repos/asf/mesos.git
commit 54a4c1e6306339a59884438b3fd9752704b77362
Author: Qian Zhang <zh...@gmail.com>
AuthorDate: Thu Aug 23 17:44:53 2018 +0800
Made command check always waits before removing the nested container.
Review: https://reviews.apache.org/r/68495
---
src/checks/checker_process.cpp | 19 +++++++++++++++++--
1 file changed, 17 insertions(+), 2 deletions(-)
diff --git a/src/checks/checker_process.cpp b/src/checks/checker_process.cpp
index 77a76f4..21af9b6 100644
--- a/src/checks/checker_process.cpp
+++ b/src/checks/checker_process.cpp
@@ -795,7 +795,19 @@ void CheckerProcess::___nestedCommandCheck(
<< launchResponse.body << ") while launching " << name
<< " for task '" << taskId << "'";
- promise->discard();
+ // We'll try to remove the container created for the check at the
+ // beginning of the next check. In order to prevent a failure, the
+ // promise should only be completed once we're sure that the
+ // container has terminated.
+ waitNestedContainer(checkContainerId, nested)
+ .onAny([promise](const Future<Option<int>>&) {
+ // We assume that once `WaitNestedContainer` returns,
+ // irrespective of whether the response contains a failure, the
+ // container will be in a terminal state, and that it will be
+ // possible to remove it.
+ promise->discard();
+ });
+
return;
}
@@ -881,7 +893,10 @@ void CheckerProcess::nestedCommandCheckFailure(
//
// This will allow us to recover from a blip. The executor will
// pause the checker when it detects that the agent is not
- // available.
+ // available. Here we do not need to wait the check container since
+ // the agent may have been unavailable, and when the agent is back,
+ // it will destroy the check container as orphan container, and we
+ // will eventually remove it in `nestedCommandCheck()`.
LOG(WARNING) << "Connection to the agent to launch " << name
<< " for task '" << taskId << "' failed: " << failure;