You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ab...@apache.org on 2019/05/01 10:26:32 UTC

[mesos] 01/02: Removed the duplicate pid check in Docker containerizer.

This is an automated email from the ASF dual-hosted git repository.

abudnik pushed a commit to branch 1.4.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit ca65f991727494b9b78e64a19306231ac004289f
Author: Qian Zhang <zh...@gmail.com>
AuthorDate: Tue Apr 30 13:23:26 2019 +0200

    Removed the duplicate pid check in Docker containerizer.
    
    Review: https://reviews.apache.org/r/70561/
---
 src/slave/containerizer/docker.cpp | 27 ++++++---------------------
 1 file changed, 6 insertions(+), 21 deletions(-)

diff --git a/src/slave/containerizer/docker.cpp b/src/slave/containerizer/docker.cpp
index 31a47f0..97cd75e 100644
--- a/src/slave/containerizer/docker.cpp
+++ b/src/slave/containerizer/docker.cpp
@@ -925,10 +925,6 @@ Future<Nothing> DockerContainerizerProcess::_recover(
       }
     }
 
-    // Collection of pids that we've started reaping in order to
-    // detect very unlikely duplicate scenario (see below).
-    hashmap<ContainerID, pid_t> pids;
-
     foreachvalue (const FrameworkState& framework, state->frameworks) {
       foreachvalue (const ExecutorState& executor, framework.executors) {
         if (executor.info.isNone()) {
@@ -1007,9 +1003,12 @@ Future<Nothing> DockerContainerizerProcess::_recover(
 
         // Only reap the executor process if the executor can be connected
         // otherwise just set `container->status` to `None()`. This is to
-        // avoid reaping an irrelevant process, e.g., after the agent host is
-        // rebooted, the executor pid happens to be reused by another process.
-        // See MESOS-8125 for details.
+        // avoid reaping an irrelevant process, e.g., agent process is stopped
+        // for a long time, and during this time executor terminates and its
+        // pid happens to be reused by another irrelevant process. When agent
+        // is restarted, it still considers this executor not complete (i.e.,
+        // `run->completed` is false), so we would reap the irrelevant process
+        // if we do not check whether that process can be connected.
         // Note that if both the pid and the port of the executor are reused
         // by another process or two processes respectively after the agent
         // host reboots we will still reap an irrelevant process, but that
@@ -1045,20 +1044,6 @@ Future<Nothing> DockerContainerizerProcess::_recover(
         container->status.future().get()
           .onAny(defer(self(), &Self::reaped, containerId));
 
-        if (pids.containsValue(pid)) {
-          // This should (almost) never occur. There is the
-          // possibility that a new executor is launched with the same
-          // pid as one that just exited (highly unlikely) and the
-          // slave dies after the new executor is launched but before
-          // it hears about the termination of the earlier executor
-          // (also unlikely).
-          return Failure(
-              "Detected duplicate pid " + stringify(pid) +
-              " for container " + stringify(containerId));
-        }
-
-        pids.put(containerId, pid);
-
         const string sandboxDirectory = paths::getExecutorRunPath(
             flags.work_dir,
             state->id,