You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ab...@apache.org on 2019/04/30 11:58:11 UTC
[mesos] branch master updated: Removed the duplicate pid check in
Docker containerizer.
This is an automated email from the ASF dual-hosted git repository.
abudnik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git
The following commit(s) were added to refs/heads/master by this push:
new c8004ee Removed the duplicate pid check in Docker containerizer.
c8004ee is described below
commit c8004ee8a0962d0e0f9147718853160bb708f5bc
Author: Qian Zhang <zh...@gmail.com>
AuthorDate: Tue Apr 30 13:23:26 2019 +0200
Removed the duplicate pid check in Docker containerizer.
Review: https://reviews.apache.org/r/70561/
---
src/slave/containerizer/docker.cpp | 27 ++++++---------------------
1 file changed, 6 insertions(+), 21 deletions(-)
diff --git a/src/slave/containerizer/docker.cpp b/src/slave/containerizer/docker.cpp
index 7f1d471..e4ad945 100644
--- a/src/slave/containerizer/docker.cpp
+++ b/src/slave/containerizer/docker.cpp
@@ -936,10 +936,6 @@ Future<Nothing> DockerContainerizerProcess::_recover(
}
}
- // Collection of pids that we've started reaping in order to
- // detect very unlikely duplicate scenario (see below).
- hashmap<ContainerID, pid_t> pids;
-
foreachvalue (const FrameworkState& framework, state->frameworks) {
foreachvalue (const ExecutorState& executor, framework.executors) {
if (executor.info.isNone()) {
@@ -1018,9 +1014,12 @@ Future<Nothing> DockerContainerizerProcess::_recover(
// Only reap the executor process if the executor can be connected
// otherwise just set `container->status` to `None()`. This is to
- // avoid reaping an irrelevant process, e.g., after the agent host is
- // rebooted, the executor pid happens to be reused by another process.
- // See MESOS-8125 for details.
+ // avoid reaping an irrelevant process, e.g., agent process is stopped
+ // for a long time, and during this time executor terminates and its
+ // pid happens to be reused by another irrelevant process. When agent
+ // is restarted, it still considers this executor not complete (i.e.,
+ // `run->completed` is false), so we would reap the irrelevant process
+ // if we do not check whether that process can be connected.
// Note that if both the pid and the port of the executor are reused
// by another process or two processes respectively after the agent
// host reboots we will still reap an irrelevant process, but that
@@ -1056,20 +1055,6 @@ Future<Nothing> DockerContainerizerProcess::_recover(
container->status.future()
->onAny(defer(self(), &Self::reaped, containerId));
- if (pids.contains_value(pid)) {
- // This should (almost) never occur. There is the
- // possibility that a new executor is launched with the same
- // pid as one that just exited (highly unlikely) and the
- // slave dies after the new executor is launched but before
- // it hears about the termination of the earlier executor
- // (also unlikely).
- return Failure(
- "Detected duplicate pid " + stringify(pid) +
- " for container " + stringify(containerId));
- }
-
- pids.put(containerId, pid);
-
const string sandboxDirectory = paths::getExecutorRunPath(
flags.work_dir,
state->id,