You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ab...@apache.org on 2019/05/01 10:26:32 UTC
[mesos] 01/02: Removed the duplicate pid check in Docker
containerizer.
This is an automated email from the ASF dual-hosted git repository.
abudnik pushed a commit to branch 1.4.x
in repository https://gitbox.apache.org/repos/asf/mesos.git
commit ca65f991727494b9b78e64a19306231ac004289f
Author: Qian Zhang <zh...@gmail.com>
AuthorDate: Tue Apr 30 13:23:26 2019 +0200
Removed the duplicate pid check in Docker containerizer.
Review: https://reviews.apache.org/r/70561/
---
src/slave/containerizer/docker.cpp | 27 ++++++---------------------
1 file changed, 6 insertions(+), 21 deletions(-)
diff --git a/src/slave/containerizer/docker.cpp b/src/slave/containerizer/docker.cpp
index 31a47f0..97cd75e 100644
--- a/src/slave/containerizer/docker.cpp
+++ b/src/slave/containerizer/docker.cpp
@@ -925,10 +925,6 @@ Future<Nothing> DockerContainerizerProcess::_recover(
}
}
- // Collection of pids that we've started reaping in order to
- // detect very unlikely duplicate scenario (see below).
- hashmap<ContainerID, pid_t> pids;
-
foreachvalue (const FrameworkState& framework, state->frameworks) {
foreachvalue (const ExecutorState& executor, framework.executors) {
if (executor.info.isNone()) {
@@ -1007,9 +1003,12 @@ Future<Nothing> DockerContainerizerProcess::_recover(
// Only reap the executor process if the executor can be connected
// otherwise just set `container->status` to `None()`. This is to
- // avoid reaping an irrelevant process, e.g., after the agent host is
- // rebooted, the executor pid happens to be reused by another process.
- // See MESOS-8125 for details.
+ // avoid reaping an irrelevant process, e.g., agent process is stopped
+ // for a long time, and during this time executor terminates and its
+ // pid happens to be reused by another irrelevant process. When agent
+ // is restarted, it still considers this executor not complete (i.e.,
+ // `run->completed` is false), so we would reap the irrelevant process
+ // if we do not check whether that process can be connected.
// Note that if both the pid and the port of the executor are reused
// by another process or two processes respectively after the agent
// host reboots we will still reap an irrelevant process, but that
@@ -1045,20 +1044,6 @@ Future<Nothing> DockerContainerizerProcess::_recover(
container->status.future().get()
.onAny(defer(self(), &Self::reaped, containerId));
- if (pids.containsValue(pid)) {
- // This should (almost) never occur. There is the
- // possibility that a new executor is launched with the same
- // pid as one that just exited (highly unlikely) and the
- // slave dies after the new executor is launched but before
- // it hears about the termination of the earlier executor
- // (also unlikely).
- return Failure(
- "Detected duplicate pid " + stringify(pid) +
- " for container " + stringify(containerId));
- }
-
- pids.put(containerId, pid);
-
const string sandboxDirectory = paths::getExecutorRunPath(
flags.work_dir,
state->id,