You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ji...@apache.org on 2014/08/09 01:09:30 UTC
git commit: Retry freeze in cgroups TasksKiller to workaround
MESOS-1689.
Repository: mesos
Updated Branches:
refs/heads/master d376f05fe -> 52cf9b3ff
Retry freeze in cgroups TasksKiller to workaround MESOS-1689.
Review: https://reviews.apache.org/r/24511
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/52cf9b3f
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/52cf9b3f
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/52cf9b3f
Branch: refs/heads/master
Commit: 52cf9b3ffbbe7648d1b529b5112b1b5b6360eaa5
Parents: d376f05
Author: Jie Yu <yu...@gmail.com>
Authored: Fri Aug 8 14:56:16 2014 -0700
Committer: Jie Yu <yu...@gmail.com>
Committed: Fri Aug 8 15:44:23 2014 -0700
----------------------------------------------------------------------
src/linux/cgroups.cpp | 31 ++++++++++++++++++++++++++++++-
src/linux/cgroups.hpp | 7 +++++++
2 files changed, 37 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/52cf9b3f/src/linux/cgroups.cpp
----------------------------------------------------------------------
diff --git a/src/linux/cgroups.cpp b/src/linux/cgroups.cpp
index 39a4874..47be0ef 100644
--- a/src/linux/cgroups.cpp
+++ b/src/linux/cgroups.cpp
@@ -1470,10 +1470,30 @@ protected:
virtual void finalize()
{
chain.discard();
+
+ // TODO(jieyu): Wait until 'chain' is in DISCARDED state before
+ // discarding 'promise'.
promise.discard();
}
private:
+ static Future<Nothing> freezeTimedout(
+ Future<Nothing> future,
+ const PID<TasksKiller>& pid,
+ const string& hierarchy,
+ const string& cgroup)
+ {
+ // Cancel the freeze operation.
+ // TODO(jieyu): Wait until 'future' is in DISCARDED state before
+ // starting retry.
+ future.discard();
+
+ // Thaw the cgroup before trying to freeze again to allow any
+ // pending signals to be delivered. See MESOS-1689 for details.
+ return cgroups::freezer::thaw(hierarchy, cgroup)
+ .then(defer(pid, &Self::freeze));
+ }
+
void killTasks() {
// Chain together the steps needed to kill all tasks in the cgroup.
chain = freeze() // Freeze the cgroup.
@@ -1486,7 +1506,16 @@ private:
Future<Nothing> freeze()
{
- return cgroups::freezer::freeze(hierarchy, cgroup);
+ // TODO(jieyu): This is a workaround for MESOS-1689. We will move
+ // away from freezer once we have pid namespace support.
+ return cgroups::freezer::freeze(hierarchy, cgroup).after(
+ FREEZE_RETRY_INTERVAL,
+ lambda::bind(
+ &freezeTimedout,
+ lambda::_1,
+ self(),
+ hierarchy,
+ cgroup));
}
Future<Nothing> kill()
http://git-wip-us.apache.org/repos/asf/mesos/blob/52cf9b3f/src/linux/cgroups.hpp
----------------------------------------------------------------------
diff --git a/src/linux/cgroups.hpp b/src/linux/cgroups.hpp
index 9dfba6e..26dcb3d 100644
--- a/src/linux/cgroups.hpp
+++ b/src/linux/cgroups.hpp
@@ -45,6 +45,13 @@ namespace cgroups {
// explicitly specified.
const Duration DESTROY_TIMEOUT = Seconds(60);
+
+// Freezing a cgroup may get stuck (see MESOS-1689 for details). To
+// workaround, we may want to thaw the cgroup and retry freezing it.
+// This is the suggested retry interval.
+const Duration FREEZE_RETRY_INTERVAL = Seconds(10);
+
+
// Default number of assign attempts when moving threads to a cgroup.
const unsigned int THREAD_ASSIGN_RETRIES = 100;