You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ji...@apache.org on 2017/03/18 03:39:07 UTC
[1/2] mesos git commit: Added MESOS-7237 to 1.2.1 CHANGELOG.
Repository: mesos
Updated Branches:
refs/heads/1.2.x 801cdfac6 -> e718bc75b
Added MESOS-7237 to 1.2.1 CHANGELOG.
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/e718bc75
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/e718bc75
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/e718bc75
Branch: refs/heads/1.2.x
Commit: e718bc75bee157c8513462662d03d0d2b043b443
Parents: dc498b7
Author: Jie Yu <yu...@gmail.com>
Authored: Fri Mar 17 20:37:47 2017 -0700
Committer: Jie Yu <yu...@gmail.com>
Committed: Fri Mar 17 20:38:48 2017 -0700
----------------------------------------------------------------------
CHANGELOG | 1 +
1 file changed, 1 insertion(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/e718bc75/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index 47e9236..145825c 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -5,6 +5,7 @@ Release Notes - Mesos - Version 1.2.1 (WIP)
All Issues:
** Bug
* [MESOS-7208] - Persistent volume ownership is set to root when task is running with non-root user
+ * [MESOS-7237] - Enabling cgroups_limit_swap can lead to "invalid argument" error.
Release Notes - Mesos - Version 1.2.0
[2/2] mesos git commit: Ensured the cgroup memory+swap limit is no
less than the memory limit.
Posted by ji...@apache.org.
Ensured the cgroup memory+swap limit is no less than the memory limit.
This addressed the bug we saw in MESOS-7237. Linux kernel will ensure
cgroup memory+swap limit (i.e., 'memory.memsw.limit_in_bytes') is always
no less than the memory limit ('memory.limit_in_bytes'). Prior to this
patch, the memory subsystem in the cgroups isolator is buggy because
memory+swap limit is always changed after the memory limit. This will
cause EINVAL if we increase the memory limit of the container.
Review: https://reviews.apache.org/r/57574
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/dc498b7c
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/dc498b7c
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/dc498b7c
Branch: refs/heads/1.2.x
Commit: dc498b7ca1f38dab5187d0553b147d0f56ad10bd
Parents: 801cdfa
Author: Jie Yu <yu...@gmail.com>
Authored: Mon Mar 13 16:58:03 2017 -0700
Committer: Jie Yu <yu...@gmail.com>
Committed: Fri Mar 17 20:38:48 2017 -0700
----------------------------------------------------------------------
.../isolators/cgroups/subsystems/memory.cpp | 84 +++++++++++++-------
1 file changed, 56 insertions(+), 28 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/dc498b7c/src/slave/containerizer/mesos/isolators/cgroups/subsystems/memory.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/cgroups/subsystems/memory.cpp b/src/slave/containerizer/mesos/isolators/cgroups/subsystems/memory.cpp
index 009e996..113e908 100644
--- a/src/slave/containerizer/mesos/isolators/cgroups/subsystems/memory.cpp
+++ b/src/slave/containerizer/mesos/isolators/cgroups/subsystems/memory.cpp
@@ -211,39 +211,16 @@ Future<Nothing> MemorySubsystem::update(
": " + currentLimit.error());
}
- // Determine whether to set the hard limit. We only update the hard
- // limit if this is the first time or when we're raising the
- // existing limit, then we can update the hard limit safely.
- // Otherwise, if we need to decrease 'memory.limit_in_bytes' we may
- // induce an OOM if too much memory is in use. As a result, we only
- // update the soft limit when the memory reservation is being
- // reduced. This is probably okay if the machine has available
- // resources.
- //
- // TODO(benh): Introduce a MemoryWatcherProcess which monitors the
- // discrepancy between usage and soft limit and introduces a "manual
- // oom" if necessary.
- //
- // If this is the first time, 'memory.limit_in_bytes' is the inital
- // value which may be one of following possible values:
- // * LONG_MAX (Linux Kernel Version < 3.12)
- // * ULONG_MAX (3.12 <= Linux Kernel Version < 3.19)
- // * LONG_MAX / pageSize * pageSize (Linux Kernel Version >= 3.19)
- // Thus, if 'currentLimit' is greater or equals to 'initialLimit'
- // below, we know it's the first time.
- static const size_t pageSize = os::pagesize();
- Bytes initialLimit(static_cast<uint64_t>(LONG_MAX / pageSize * pageSize));
+ bool limitSwap = flags.cgroups_limit_swap;
- if (currentLimit.get() >= initialLimit || limit > currentLimit.get()) {
- // We always set limit_in_bytes first and optionally set
- // memsw.limit_in_bytes if `cgroups_limit_swap` is true.
+ auto setLimitInBytes = [=]() -> Try<Nothing> {
Try<Nothing> write = cgroups::memory::limit_in_bytes(
hierarchy,
cgroup,
limit);
if (write.isError()) {
- return Failure(
+ return Error(
"Failed to set 'memory.limit_in_bytes'"
": " + write.error());
}
@@ -251,14 +228,18 @@ Future<Nothing> MemorySubsystem::update(
LOG(INFO) << "Updated 'memory.limit_in_bytes' to " << limit
<< " for container " << containerId;
- if (flags.cgroups_limit_swap) {
+ return Nothing();
+ };
+
+ auto setMemswLimitInBytes = [=]() -> Try<Nothing> {
+ if (limitSwap) {
Try<bool> write = cgroups::memory::memsw_limit_in_bytes(
hierarchy,
cgroup,
limit);
if (write.isError()) {
- return Failure(
+ return Error(
"Failed to set 'memory.memsw.limit_in_bytes'"
": " + write.error());
}
@@ -266,6 +247,53 @@ Future<Nothing> MemorySubsystem::update(
LOG(INFO) << "Updated 'memory.memsw.limit_in_bytes' to " << limit
<< " for container " << containerId;
}
+
+ return Nothing();
+ };
+
+ vector<lambda::function<Try<Nothing>(void)>> setFunctions;
+
+ // Now, determine whether to set the hard limit. We only update the
+ // hard limit if this is the first time or when we're raising the
+ // existing limit, then we can update the hard limit safely.
+ // Otherwise, if we need to decrease 'memory.limit_in_bytes' we may
+ // induce an OOM if too much memory is in use. As a result, we only
+ // update the soft limit when the memory reservation is being
+ // reduced. This is probably okay if the machine has available
+ // resources.
+ //
+ // TODO(benh): Introduce a MemoryWatcherProcess which monitors the
+ // discrepancy between usage and soft limit and introduces a "manual
+ // oom" if necessary.
+ //
+ // If this is the first time, 'memory.limit_in_bytes' is unlimited
+ // which may be one of following possible values:
+ // * LONG_MAX (Linux Kernel Version < 3.12)
+ // * ULONG_MAX (3.12 <= Linux Kernel Version < 3.19)
+ // * LONG_MAX / pageSize * pageSize (Linux Kernel Version >= 3.19)
+ static const size_t pageSize = os::pagesize();
+ Bytes unlimited(static_cast<uint64_t>(LONG_MAX / pageSize * pageSize));
+
+ // NOTE: It's required by the Linux kernel that
+ // 'memory.limit_in_bytes' should be less than or equal to
+ // 'memory.memsw.limit_in_bytes'. Otherwise, the kernel will fail
+ // the cgroup write with EINVAL. As a result, the order of setting
+ // these two control files is important. See MESOS-7237 for details.
+ if (currentLimit.get() >= unlimited) {
+ // This is the first time memory limit is being set. So
+ // effectively we are reducing the memory limits because of which
+ // we need to set the 'memory.limit_in_bytes' before setting
+ // 'memory.memsw.limit_in_bytes'
+ setFunctions = {setLimitInBytes, setMemswLimitInBytes};
+ } else if (limit > currentLimit.get()) {
+ setFunctions = {setMemswLimitInBytes, setLimitInBytes};
+ }
+
+ foreach (const auto& setFunction, setFunctions) {
+ Try<Nothing> result = setFunction();
+ if (result.isError()) {
+ return Failure(result.error());
+ }
}
return Nothing();