You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by al...@apache.org on 2018/08/20 14:38:14 UTC

[mesos] 03/05: Fixed incorrect `mnt` namespace detection of command executor's task.

This is an automated email from the ASF dual-hosted git repository.

alexr pushed a commit to branch 1.7.x
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit d99e28b645c4afe61a61282e6358a9236ede5b31
Author: Andrei Budnik <ab...@mesosphere.com>
AuthorDate: Mon Aug 20 16:28:31 2018 +0200

    Fixed incorrect `mnt` namespace detection of command executor's task.
    
    Previously, we were walking the process tree from the container's
    `init` process to find the first process along the way whose `mnt`
    namespace differs from the `init` process. We expected this algorithm
    to always return the PID of the command executor's task.
    
    However, if someone launches multiple nested containers within the
    process tree, the aforementioned algorithm might detect the PID of
    one of those nested container instead of the command executor's task.
    Even though the `mnt` namespace will be the same across all these
    candidates, the detected PID might belong to a short-lived container,
    which might terminate before the containerizer launcher (aka `nanny`
    process) tries to enter its `mnt` namespace.
    
    This patch fixes the detection algorithm so that it always returns
    the PID of the command executor's task.
    
    Review: https://reviews.apache.org/r/68257/
---
 src/slave/containerizer/mesos/utils.cpp | 37 +++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/src/slave/containerizer/mesos/utils.cpp b/src/slave/containerizer/mesos/utils.cpp
index 30e76d1..d9964f0 100644
--- a/src/slave/containerizer/mesos/utils.cpp
+++ b/src/slave/containerizer/mesos/utils.cpp
@@ -49,8 +49,13 @@ namespace slave {
 // `pid` of the `init` process of these containers. For now, we
 // compensate for this by simply walking the process tree from the
 // container's `init` process up to 2-levels down (where the task
-// process would exist) and look to see if any process along the way
-// has a different `mnt` namespace. If it does, we return a reference
+// process would exist) and look to see if any 2-nd level process
+// has a different `mnt` namespace. Only one pair of processes matches
+// this property - command executor and its task. One important detail
+// is that we skip all 1st-level processes whose `mnt` namespace is not
+// the same as the `mnt` namespace of the `init` process.
+//
+// If we found such a 2-nd level process, we return a reference
 // to its `pid` as the `pid` for entering the `mnt` namespace of the
 // container.  Otherwise, we return the `init` process's `pid`.
 //
@@ -67,13 +72,16 @@ Try<pid_t> getMountNamespaceTarget(pid_t parent)
                  " '" + stringify(parent) + "'");
   }
 
-  // Search for a new mount namespace in all direct children.
   Try<set<pid_t>> children = os::children(parent, false);
   if (children.isError()) {
     return Error("Cannot get children for process"
                  " '" + stringify(parent) + "': " + children.error());
   }
 
+  pid_t candidate = parent;
+  int numCandidates = 0;
+  bool hasGrandchild = false;
+
   foreach (pid_t child, children.get()) {
     Result<ino_t> childNamespace = ns::getns(child, "mnt");
     if (childNamespace.isError()) {
@@ -86,12 +94,11 @@ Try<pid_t> getMountNamespaceTarget(pid_t parent)
     }
 
     if (parentNamespace.get() != childNamespace.get()) {
-      return child;
+      // We skip this child, because we know that it's not a command executor.
+      continue;
     }
-  }
 
-  // Search for a new mount namespace in all 2nd-level children.
-  foreach (pid_t child, children.get()) {
+    // Search for a new mount namespace in 2nd-level children.
     Try<set<pid_t>> children2 = os::children(child, false);
     if (children2.isError()) {
       return Error("Cannot get 2nd-level children for process"
@@ -111,13 +118,25 @@ Try<pid_t> getMountNamespaceTarget(pid_t parent)
         continue;
       }
 
+      hasGrandchild = true;
+
       if (parentNamespace.get() != child2Namespace.get()) {
-        return child2;
+        ++numCandidates;
+        candidate = child2;
       }
     }
   }
 
-  return parent;
+  if (!hasGrandchild) {
+    return Error("Cannot detect task process: no 2nd-level processes found");
+  }
+
+  if (numCandidates > 1) {
+    return Error("Cannot detect task process: unexpected number of candidates"
+                 " found: " + stringify(numCandidates));
+  }
+
+  return candidate;
 }
 #endif // __linux__