You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ji...@apache.org on 2018/01/24 01:34:11 UTC

[2/2] mesos git commit: Fixed resource statistics for Docker containers being destroyed.

Fixed resource statistics for Docker containers being destroyed.

If a process has exited, but not reaped yet (zombie procses),
`/proc/<pid>/cgroup` will still exist, but the process's cgroup will be
reset to the root cgroup. In DockerContainerizer, we rely on
`/proc/<pid>/cgroup` to get the cpu/memory statistics of the container.
If the `usage` call happens when the process is a zombie, the cpu/memory
statistics will actually be that of the root cgroup, which is obviously
not correct. See more details in MESOS-8480.

This patch fixed the issue by checking if the cgroup of a given pid is
root cgroup or not.

Review: https://reviews.apache.org/r/65301/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/21a2fae5
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/21a2fae5
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/21a2fae5

Branch: refs/heads/1.4.x
Commit: 21a2fae5c5162e559981b40b40a413910b9c1e7e
Parents: 5f7ef53
Author: Chun-Hung Hsiao <ch...@mesosphere.io>
Authored: Tue Jan 23 17:13:05 2018 -0800
Committer: Jie Yu <yu...@gmail.com>
Committed: Tue Jan 23 17:31:29 2018 -0800

----------------------------------------------------------------------
 src/slave/containerizer/docker.cpp | 36 +++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/21a2fae5/src/slave/containerizer/docker.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/docker.cpp b/src/slave/containerizer/docker.cpp
index caac224..b3109a7 100644
--- a/src/slave/containerizer/docker.cpp
+++ b/src/slave/containerizer/docker.cpp
@@ -1687,6 +1687,13 @@ Future<Nothing> DockerContainerizerProcess::__update(
   static Result<string> cpuHierarchy = cgroups::hierarchy("cpu");
   static Result<string> memoryHierarchy = cgroups::hierarchy("memory");
 
+  // NOTE: Normally, a Docker container should be in its own cgroup.
+  // However, a zombie process (exited but not reaped) will be
+  // temporarily moved into the system root cgroup. We add some
+  // defensive check here to make sure we are not changing the knobs
+  // in the root cgroup. See MESOS-8480 for details.
+  const string systemRootCgroup = stringify(os::PATH_SEPARATOR);
+
   if (cpuHierarchy.isError()) {
     return Failure("Failed to determine the cgroup hierarchy "
                    "where the 'cpu' subsystem is mounted: " +
@@ -1715,11 +1722,16 @@ Future<Nothing> DockerContainerizerProcess::__update(
     LOG(WARNING) << "Container " << containerId
                  << " does not appear to be a member of a cgroup"
                  << " where the 'cpu' subsystem is mounted";
+  } else if (cpuCgroup.get() == systemRootCgroup) {
+    LOG(WARNING)
+        << "Process '" << pid
+        << "' should not be in the system root cgroup (being destroyed?)";
   }
 
   // And update the CPU shares (if applicable).
   if (cpuHierarchy.isSome() &&
       cpuCgroup.isSome() &&
+      cpuCgroup.get() != systemRootCgroup &&
       _resources.cpus().isSome()) {
     double cpuShares = _resources.cpus().get();
 
@@ -1777,11 +1789,16 @@ Future<Nothing> DockerContainerizerProcess::__update(
     LOG(WARNING) << "Container " << containerId
                  << " does not appear to be a member of a cgroup"
                  << " where the 'memory' subsystem is mounted";
+  } else if (memoryCgroup.get() == systemRootCgroup) {
+    LOG(WARNING)
+        << "Process '" << pid
+        << "' should not be in the system root cgroup (being destroyed?)";
   }
 
   // And update the memory limits (if applicable).
   if (memoryHierarchy.isSome() &&
       memoryCgroup.isSome() &&
+      memoryCgroup.get() != systemRootCgroup &&
       _resources.mem().isSome()) {
     // TODO(tnachen): investigate and handle OOM with docker.
     Bytes mem = _resources.mem().get();
@@ -1928,6 +1945,13 @@ Try<ResourceStatistics> DockerContainerizerProcess::cgroupsStatistics(
   const Result<string> cpuacctHierarchy = cgroups::hierarchy("cpuacct");
   const Result<string> memHierarchy = cgroups::hierarchy("memory");
 
+  // NOTE: Normally, a Docker container should be in its own cgroup.
+  // However, a zombie process (exited but not reaped) will be
+  // temporarily moved into the system root cgroup. We add some
+  // defensive check here to make sure we are not reporting statistics
+  // for the root cgroup. See MESOS-8480 for details.
+  const string systemRootCgroup = stringify(os::PATH_SEPARATOR);
+
   if (cpuacctHierarchy.isError()) {
     return Error(
         "Failed to determine the cgroup 'cpuacct' subsystem hierarchy: " +
@@ -1947,6 +1971,10 @@ Try<ResourceStatistics> DockerContainerizerProcess::cgroupsStatistics(
         cpuacctCgroup.error());
   } else if (cpuacctCgroup.isNone()) {
     return Error("Unable to find 'cpuacct' cgroup subsystem");
+  } else if (cpuacctCgroup.get() == systemRootCgroup) {
+    return Error(
+        "Process '" + stringify(pid) +
+        "' should not be in the system root cgroup (being destroyed?)");
   }
 
   const Result<string> memCgroup = cgroups::memory::cgroup(pid);
@@ -1956,6 +1984,10 @@ Try<ResourceStatistics> DockerContainerizerProcess::cgroupsStatistics(
         memCgroup.error());
   } else if (memCgroup.isNone()) {
     return Error("Unable to find 'memory' cgroup subsystem");
+  } else if (memCgroup.get() == systemRootCgroup) {
+    return Error(
+        "Process '" + stringify(pid) +
+        "' should not be in the system root cgroup (being destroyed?)");
   }
 
   const Try<cgroups::cpuacct::Stats> cpuAcctStat =
@@ -2001,6 +2033,10 @@ Try<ResourceStatistics> DockerContainerizerProcess::cgroupsStatistics(
           cpuCgroup.error());
     } else if (cpuCgroup.isNone()) {
       return Error("Unable to find 'cpu' cgroup subsystem");
+    } else if (cpuCgroup.get() == systemRootCgroup) {
+      return Error(
+          "Process '" + stringify(pid) +
+          "' should not be in the system root cgroup (being destroyed?)");
     }
 
     const Try<hashmap<string, uint64_t>> stat =