You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ji...@apache.org on 2018/01/24 01:34:11 UTC
[2/2] mesos git commit: Fixed resource statistics for Docker
containers being destroyed.
Fixed resource statistics for Docker containers being destroyed.
If a process has exited, but not reaped yet (zombie procses),
`/proc/<pid>/cgroup` will still exist, but the process's cgroup will be
reset to the root cgroup. In DockerContainerizer, we rely on
`/proc/<pid>/cgroup` to get the cpu/memory statistics of the container.
If the `usage` call happens when the process is a zombie, the cpu/memory
statistics will actually be that of the root cgroup, which is obviously
not correct. See more details in MESOS-8480.
This patch fixed the issue by checking if the cgroup of a given pid is
root cgroup or not.
Review: https://reviews.apache.org/r/65301/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/21a2fae5
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/21a2fae5
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/21a2fae5
Branch: refs/heads/1.4.x
Commit: 21a2fae5c5162e559981b40b40a413910b9c1e7e
Parents: 5f7ef53
Author: Chun-Hung Hsiao <ch...@mesosphere.io>
Authored: Tue Jan 23 17:13:05 2018 -0800
Committer: Jie Yu <yu...@gmail.com>
Committed: Tue Jan 23 17:31:29 2018 -0800
----------------------------------------------------------------------
src/slave/containerizer/docker.cpp | 36 +++++++++++++++++++++++++++++++++
1 file changed, 36 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/21a2fae5/src/slave/containerizer/docker.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/docker.cpp b/src/slave/containerizer/docker.cpp
index caac224..b3109a7 100644
--- a/src/slave/containerizer/docker.cpp
+++ b/src/slave/containerizer/docker.cpp
@@ -1687,6 +1687,13 @@ Future<Nothing> DockerContainerizerProcess::__update(
static Result<string> cpuHierarchy = cgroups::hierarchy("cpu");
static Result<string> memoryHierarchy = cgroups::hierarchy("memory");
+ // NOTE: Normally, a Docker container should be in its own cgroup.
+ // However, a zombie process (exited but not reaped) will be
+ // temporarily moved into the system root cgroup. We add some
+ // defensive check here to make sure we are not changing the knobs
+ // in the root cgroup. See MESOS-8480 for details.
+ const string systemRootCgroup = stringify(os::PATH_SEPARATOR);
+
if (cpuHierarchy.isError()) {
return Failure("Failed to determine the cgroup hierarchy "
"where the 'cpu' subsystem is mounted: " +
@@ -1715,11 +1722,16 @@ Future<Nothing> DockerContainerizerProcess::__update(
LOG(WARNING) << "Container " << containerId
<< " does not appear to be a member of a cgroup"
<< " where the 'cpu' subsystem is mounted";
+ } else if (cpuCgroup.get() == systemRootCgroup) {
+ LOG(WARNING)
+ << "Process '" << pid
+ << "' should not be in the system root cgroup (being destroyed?)";
}
// And update the CPU shares (if applicable).
if (cpuHierarchy.isSome() &&
cpuCgroup.isSome() &&
+ cpuCgroup.get() != systemRootCgroup &&
_resources.cpus().isSome()) {
double cpuShares = _resources.cpus().get();
@@ -1777,11 +1789,16 @@ Future<Nothing> DockerContainerizerProcess::__update(
LOG(WARNING) << "Container " << containerId
<< " does not appear to be a member of a cgroup"
<< " where the 'memory' subsystem is mounted";
+ } else if (memoryCgroup.get() == systemRootCgroup) {
+ LOG(WARNING)
+ << "Process '" << pid
+ << "' should not be in the system root cgroup (being destroyed?)";
}
// And update the memory limits (if applicable).
if (memoryHierarchy.isSome() &&
memoryCgroup.isSome() &&
+ memoryCgroup.get() != systemRootCgroup &&
_resources.mem().isSome()) {
// TODO(tnachen): investigate and handle OOM with docker.
Bytes mem = _resources.mem().get();
@@ -1928,6 +1945,13 @@ Try<ResourceStatistics> DockerContainerizerProcess::cgroupsStatistics(
const Result<string> cpuacctHierarchy = cgroups::hierarchy("cpuacct");
const Result<string> memHierarchy = cgroups::hierarchy("memory");
+ // NOTE: Normally, a Docker container should be in its own cgroup.
+ // However, a zombie process (exited but not reaped) will be
+ // temporarily moved into the system root cgroup. We add some
+ // defensive check here to make sure we are not reporting statistics
+ // for the root cgroup. See MESOS-8480 for details.
+ const string systemRootCgroup = stringify(os::PATH_SEPARATOR);
+
if (cpuacctHierarchy.isError()) {
return Error(
"Failed to determine the cgroup 'cpuacct' subsystem hierarchy: " +
@@ -1947,6 +1971,10 @@ Try<ResourceStatistics> DockerContainerizerProcess::cgroupsStatistics(
cpuacctCgroup.error());
} else if (cpuacctCgroup.isNone()) {
return Error("Unable to find 'cpuacct' cgroup subsystem");
+ } else if (cpuacctCgroup.get() == systemRootCgroup) {
+ return Error(
+ "Process '" + stringify(pid) +
+ "' should not be in the system root cgroup (being destroyed?)");
}
const Result<string> memCgroup = cgroups::memory::cgroup(pid);
@@ -1956,6 +1984,10 @@ Try<ResourceStatistics> DockerContainerizerProcess::cgroupsStatistics(
memCgroup.error());
} else if (memCgroup.isNone()) {
return Error("Unable to find 'memory' cgroup subsystem");
+ } else if (memCgroup.get() == systemRootCgroup) {
+ return Error(
+ "Process '" + stringify(pid) +
+ "' should not be in the system root cgroup (being destroyed?)");
}
const Try<cgroups::cpuacct::Stats> cpuAcctStat =
@@ -2001,6 +2033,10 @@ Try<ResourceStatistics> DockerContainerizerProcess::cgroupsStatistics(
cpuCgroup.error());
} else if (cpuCgroup.isNone()) {
return Error("Unable to find 'cpu' cgroup subsystem");
+ } else if (cpuCgroup.get() == systemRootCgroup) {
+ return Error(
+ "Process '" + stringify(pid) +
+ "' should not be in the system root cgroup (being destroyed?)");
}
const Try<hashmap<string, uint64_t>> stat =