You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by id...@apache.org on 2014/06/17 20:04:54 UTC

[1/3] git commit: Use a timeout to discard cgroups::destroy.

Repository: mesos
Updated Branches:
  refs/heads/master fb1386837 -> 18d737cdd


Use a timeout to discard cgroups::destroy.

A default duration of 60 seconds is used before a cgroups::destroy is
discarded.

Review: https://reviews.apache.org/r/22064


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/18d737cd
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/18d737cd
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/18d737cd

Branch: refs/heads/master
Commit: 18d737cddaaa1e97a60fed27fadfd7d9cf237ac8
Parents: ca2ba6a
Author: Ian Downes <id...@twitter.com>
Authored: Thu May 15 15:40:19 2014 -0700
Committer: Ian Downes <id...@twitter.com>
Committed: Tue Jun 17 11:00:41 2014 -0700

----------------------------------------------------------------------
 src/linux/cgroups.cpp                           | 22 ++++++----
 src/linux/cgroups.hpp                           |  3 ++
 .../isolators/cgroups/cpushare.cpp              | 46 +++++++++++++++-----
 .../isolators/cgroups/cpushare.hpp              |  4 +-
 .../containerizer/isolators/cgroups/mem.cpp     | 27 +++++++++---
 .../containerizer/isolators/cgroups/mem.hpp     |  4 +-
 src/slave/containerizer/linux_launcher.cpp      | 19 ++++----
 7 files changed, 90 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/18d737cd/src/linux/cgroups.cpp
----------------------------------------------------------------------
diff --git a/src/linux/cgroups.cpp b/src/linux/cgroups.cpp
index 0b5f95a..a4274a8 100644
--- a/src/linux/cgroups.cpp
+++ b/src/linux/cgroups.cpp
@@ -1668,20 +1668,26 @@ Future<Nothing> destroy(const string& hierarchy, const string& cgroup)
 }
 
 
-Future<Nothing> destroyTimedOut(
-    Future<Nothing> future,
-    const string& hierarchy,
-    const string& cgroup,
-    const Duration& timeout)
-{
-  LOG(WARNING) << "Failed to destroy cgroup '" << path::join(hierarchy, cgroup)
-               << "' after " << timeout;
+namespace {
 
+Future<Nothing> discard(Future<Nothing> future)
+{
   future.discard();
 
   return future;
 }
 
+} // namespace
+
+Future<Nothing> destroy(
+    const string& hierarchy,
+    const string& cgroup,
+    const Duration& timeout)
+{
+  return destroy(hierarchy, cgroup)
+    .after(timeout, lambda::bind(&discard, lambda::_1));
+}
+
 
 // Forward declaration.
 Future<bool> _cleanup(const string& hierarchy);

http://git-wip-us.apache.org/repos/asf/mesos/blob/18d737cd/src/linux/cgroups.hpp
----------------------------------------------------------------------
diff --git a/src/linux/cgroups.hpp b/src/linux/cgroups.hpp
index 38e345f..75c5602 100644
--- a/src/linux/cgroups.hpp
+++ b/src/linux/cgroups.hpp
@@ -40,6 +40,9 @@
 
 namespace cgroups {
 
+// Suggested timeout for use with the convenience version of
+// cgroups::destroy(); it is not a default timeout and must be
+// explicitly specified.
 const Duration DESTROY_TIMEOUT = Seconds(60);
 
 // Default number of assign attempts when moving threads to a cgroup.

http://git-wip-us.apache.org/repos/asf/mesos/blob/18d737cd/src/slave/containerizer/isolators/cgroups/cpushare.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/cgroups/cpushare.cpp b/src/slave/containerizer/isolators/cgroups/cpushare.cpp
index 1e2db0f..ca1b421 100644
--- a/src/slave/containerizer/isolators/cgroups/cpushare.cpp
+++ b/src/slave/containerizer/isolators/cgroups/cpushare.cpp
@@ -173,7 +173,9 @@ Future<Nothing> CgroupsCpushareIsolatorProcess::recover(
     if (!cgroups.contains(orphan)) {
       LOG(INFO) << "Removing orphaned cgroup"
                 << " '" << path::join("cpu", orphan) << "'";
-      cgroups::destroy(hierarchies["cpu"], orphan);
+      // We don't wait on the destroy as we don't want to block recovery.
+      cgroups::destroy(
+          hierarchies["cpu"], orphan, cgroups::DESTROY_TIMEOUT);
     }
   }
 
@@ -198,7 +200,9 @@ Future<Nothing> CgroupsCpushareIsolatorProcess::recover(
     if (!cgroups.contains(orphan)) {
       LOG(INFO) << "Removing orphaned cgroup"
                 << " '" << path::join("cpuacct", orphan) << "'";
-      cgroups::destroy(hierarchies["cpuacct"], orphan);
+      // We don't wait on the destroy as we don't want to block recovery.
+      cgroups::destroy(
+          hierarchies["cpuacct"], orphan, cgroups::DESTROY_TIMEOUT);
     }
   }
 
@@ -429,6 +433,13 @@ Future<ResourceStatistics> CgroupsCpushareIsolatorProcess::usage(
 }
 
 
+namespace {
+
+Future<Nothing> _nothing() { return Nothing(); }
+
+} // namespace {
+
+
 Future<Nothing> CgroupsCpushareIsolatorProcess::cleanup(
     const ContainerID& containerId)
 {
@@ -442,25 +453,40 @@ Future<Nothing> CgroupsCpushareIsolatorProcess::cleanup(
   Info* info = CHECK_NOTNULL(infos[containerId]);
 
   list<Future<Nothing> > futures;
-  futures.push_back(cgroups::destroy(hierarchies["cpu"], info->cgroup));
-  futures.push_back(cgroups::destroy(hierarchies["cpuacct"], info->cgroup));
+  foreachvalue (const string& hierarchy, hierarchies) {
+    futures.push_back(
+        cgroups::destroy(hierarchy, info->cgroup, cgroups::DESTROY_TIMEOUT));
+  }
 
   return collect(futures)
-    .then(defer(PID<CgroupsCpushareIsolatorProcess>(this),
+    .onAny(defer(PID<CgroupsCpushareIsolatorProcess>(this),
                 &CgroupsCpushareIsolatorProcess::_cleanup,
-                containerId));
+                containerId,
+                lambda::_1))
+    .then(lambda::bind(&_nothing));
 }
 
 
-Future<Nothing> CgroupsCpushareIsolatorProcess::_cleanup(
-    const ContainerID& containerId)
+Future<list<Nothing> > CgroupsCpushareIsolatorProcess::_cleanup(
+    const ContainerID& containerId,
+    const Future<list<Nothing> >& future)
 {
-  CHECK(infos.contains(containerId));
+  if (!infos.contains(containerId)) {
+    return Failure("Unknown container");
+  }
+
+  CHECK_NOTNULL(infos[containerId]);
+
+  if (!future.isReady()) {
+    return Failure("Failed to clean up container " + stringify(containerId) +
+                   " : " + (future.isFailed() ? future.failure()
+                                              : "discarded"));
+  }
 
   delete infos[containerId];
   infos.erase(containerId);
 
-  return Nothing();
+  return future;
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/18d737cd/src/slave/containerizer/isolators/cgroups/cpushare.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/cgroups/cpushare.hpp b/src/slave/containerizer/isolators/cgroups/cpushare.hpp
index 909ea88..780037b 100644
--- a/src/slave/containerizer/isolators/cgroups/cpushare.hpp
+++ b/src/slave/containerizer/isolators/cgroups/cpushare.hpp
@@ -75,7 +75,9 @@ private:
       const Flags& flags,
       const hashmap<std::string, std::string>& hierarchies);
 
-  virtual process::Future<Nothing> _cleanup(const ContainerID& containerId);
+  virtual process::Future<std::list<Nothing> > _cleanup(
+      const ContainerID& containerId,
+      const process::Future<std::list<Nothing> >& future);
 
   struct Info
   {

http://git-wip-us.apache.org/repos/asf/mesos/blob/18d737cd/src/slave/containerizer/isolators/cgroups/mem.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/cgroups/mem.cpp b/src/slave/containerizer/isolators/cgroups/mem.cpp
index 73b926f..3b731f9 100644
--- a/src/slave/containerizer/isolators/cgroups/mem.cpp
+++ b/src/slave/containerizer/isolators/cgroups/mem.cpp
@@ -166,7 +166,8 @@ Future<Nothing> CgroupsMemIsolatorProcess::recover(
 
     if (!cgroups.contains(orphan)) {
       LOG(INFO) << "Removing orphaned cgroup '" << orphan << "'";
-      cgroups::destroy(hierarchy, orphan);
+      // We don't wait on the destroy as we don't want to block recovery.
+      cgroups::destroy(hierarchy, orphan, cgroups::DESTROY_TIMEOUT);
     }
   }
 
@@ -380,17 +381,29 @@ Future<Nothing> CgroupsMemIsolatorProcess::cleanup(
     info->oomNotifier.discard();
   }
 
-  return cgroups::destroy(hierarchy, info->cgroup)
-    .then(defer(PID<CgroupsMemIsolatorProcess>(this),
-                &CgroupsMemIsolatorProcess::_cleanup,
-                containerId));
+  return cgroups::destroy(hierarchy, info->cgroup, cgroups::DESTROY_TIMEOUT)
+    .onAny(defer(PID<CgroupsMemIsolatorProcess>(this),
+                 &CgroupsMemIsolatorProcess::_cleanup,
+                 containerId,
+                 lambda::_1));
 }
 
 
 Future<Nothing> CgroupsMemIsolatorProcess::_cleanup(
-    const ContainerID& containerId)
+    const ContainerID& containerId,
+    const Future<Nothing>& future)
 {
-  CHECK(infos.contains(containerId));
+  if (!infos.contains(containerId)) {
+    return Failure("Unknown container");
+  }
+
+  CHECK_NOTNULL(infos[containerId]);
+
+  if (!future.isReady()) {
+    return Failure("Failed to clean up container " + stringify(containerId) +
+                   " : " + (future.isFailed() ? future.failure()
+                                              : "discarded"));
+  }
 
   delete infos[containerId];
   infos.erase(containerId);

http://git-wip-us.apache.org/repos/asf/mesos/blob/18d737cd/src/slave/containerizer/isolators/cgroups/mem.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/cgroups/mem.hpp b/src/slave/containerizer/isolators/cgroups/mem.hpp
index 362ebcf..33b0ca8 100644
--- a/src/slave/containerizer/isolators/cgroups/mem.hpp
+++ b/src/slave/containerizer/isolators/cgroups/mem.hpp
@@ -73,7 +73,9 @@ public:
 private:
   CgroupsMemIsolatorProcess(const Flags& flags, const std::string& hierarchy);
 
-  virtual process::Future<Nothing> _cleanup(const ContainerID& containerId);
+  virtual process::Future<Nothing> _cleanup(
+      const ContainerID& containerId,
+      const process::Future<Nothing>& future);
 
   struct Info
   {

http://git-wip-us.apache.org/repos/asf/mesos/blob/18d737cd/src/slave/containerizer/linux_launcher.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/linux_launcher.cpp b/src/slave/containerizer/linux_launcher.cpp
index 9d0205e..85c74f0 100644
--- a/src/slave/containerizer/linux_launcher.cpp
+++ b/src/slave/containerizer/linux_launcher.cpp
@@ -125,7 +125,9 @@ Try<Nothing> LinuxLauncher::recover(const std::list<state::RunState>& states)
     if (!cgroups.contains(orphan)) {
       LOG(INFO) << "Removing orphaned cgroup"
                 << " '" << path::join("freezer", orphan) << "'";
-      cgroups::destroy(hierarchy, orphan);
+      // Do not wait on the destroy to complete so we don't block
+      // recovery.
+      cgroups::destroy(hierarchy, orphan, cgroups::DESTROY_TIMEOUT);
     }
   }
 
@@ -271,13 +273,13 @@ Try<pid_t> LinuxLauncher::fork(
 
 Future<Nothing> _destroy(
     const ContainerID& containerId,
-    process::Future<Nothing> destroyed)
+    const process::Future<Nothing>& destroyed)
 {
-  if (destroyed.isFailed()) {
-    LOG(ERROR) << "Failed to destroy freezer cgroup for '"
-               << containerId << "': " << destroyed.failure();
-    return Failure("Failed to destroy launcher: " + destroyed.failure());
+  if (!destroyed.isReady()) {
+    return Failure("Failed to destroy launcher: " +
+                   (destroyed.isFailed() ? destroyed.failure() : "discarded"));
   }
+
   return Nothing();
 }
 
@@ -286,8 +288,9 @@ Future<Nothing> LinuxLauncher::destroy(const ContainerID& containerId)
 {
   pids.erase(containerId);
 
-  return cgroups::destroy(hierarchy, cgroup(containerId))
-    .then(lambda::bind(&_destroy, containerId, lambda::_1));
+  return cgroups::destroy(
+      hierarchy, cgroup(containerId), cgroups::DESTROY_TIMEOUT)
+    .onAny(lambda::bind(&_destroy, containerId, lambda::_1));
 }
 
 


[3/3] git commit: Refactor cgroups::internal::Freezer.

Posted by id...@apache.org.
Refactor cgroups::internal::Freezer.

The Freezer tries to converge to the "FROZEN" state by repeatedly (every
100 ms) writing "FROZEN" to the freezer.state control file. It assumes
there are two possible reasons why a process does not get frozen during
an attempt:

1. It was in the middle of being forked and did not receive the signal;
it will receive it at the next attempt.

2. It is in uninterruptable sleep ("D" state). Normally, this is from
device I/O or paging and is shortlived, in which case it'll be frozen on
retry. However, processes can get stuck in "D" state, either because of
a device issue, incorrect OOM handling, or kernel bugs. Under this
scenario the correct behavior is to fail after a timeout (defaults to 60
seconds).

Freezer functions have been namespaced under cgroups::freezer.

Review: https://reviews.apache.org/r/20817


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/07c3dc57
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/07c3dc57
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/07c3dc57

Branch: refs/heads/master
Commit: 07c3dc57fa272a9c25b5df2f7ac218fdcfb2f5d4
Parents: fb13868
Author: Ian Downes <id...@twitter.com>
Authored: Wed Apr 23 10:48:33 2014 -0700
Committer: Ian Downes <id...@twitter.com>
Committed: Tue Jun 17 11:00:41 2014 -0700

----------------------------------------------------------------------
 src/linux/cgroups.cpp       | 350 +++++++++++++++------------------------
 src/linux/cgroups.hpp       |  72 ++++----
 src/tests/cgroups_tests.cpp |  65 ++++----
 3 files changed, 195 insertions(+), 292 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/07c3dc57/src/linux/cgroups.cpp
----------------------------------------------------------------------
diff --git a/src/linux/cgroups.cpp b/src/linux/cgroups.cpp
index 142ac43..723acc1 100644
--- a/src/linux/cgroups.cpp
+++ b/src/linux/cgroups.cpp
@@ -1296,270 +1296,144 @@ Future<uint64_t> listen(
   return future;
 }
 
-
 namespace internal {
 
+namespace freezer {
 
-// The process that freezes or thaws the cgroup.
-class Freezer : public Process<Freezer>
+Try<string> state(const string& hierarchy, const string& cgroup)
 {
-public:
-  Freezer(const string& _hierarchy,
-          const string& _cgroup,
-          const string& _action,
-          const Duration& _interval,
-          unsigned int _retries = FREEZE_RETRIES)
-    : hierarchy(_hierarchy),
-      cgroup(_cgroup),
-      action(_action),
-      interval(_interval),
-      retries(_retries) {}
-
-  virtual ~Freezer() {}
+  Try<string> state = cgroups::read(hierarchy, cgroup, "freezer.state");
 
-  // Return a future indicating the state of the freezer.
-  Future<bool> future() { return promise.future(); }
+  if (state.isError()) {
+    return Error("Failed to read freezer state: " + state.error());
+  }
 
-protected:
-  virtual void initialize()
-  {
-    // Stop the process if no one cares.
-    promise.future().onDiscard(lambda::bind(
-        static_cast<void (*)(const UPID&, bool)>(terminate), self(), true));
+  return strings::trim(state.get());
+}
 
-    CHECK(interval >= Seconds(0));
 
-    // Start the action.
-    CHECK(action == "FREEZE" || action == "THAW");
-    if (action == "FREEZE") {
-      freeze();
-    } else if (action == "THAW") {
-      thaw();
-    }
+Try<Nothing> state(
+    const string& hierarchy,
+    const string& cgroup,
+    const string& state)
+{
+  if (state != "FROZEN" && state != "THAWED") {
+    return Error("Invalid freezer state requested: " + state);
   }
 
-  virtual void finalize()
-  {
-    promise.discard();
+  Try<Nothing> write = cgroups::write(
+      hierarchy, cgroup, "freezer.state", state);
+  if (write.isError()) {
+    return Error("Failed to write '" + state +
+                 "' to control 'freezer.state': " + write.error());
+  } else {
+    return Nothing();
   }
+}
 
-private:
-  void freeze()
-  {
-    LOG(INFO) << "Trying to freeze cgroup " << path::join(hierarchy, cgroup);
+} // namespace freezer {
 
-    Try<Nothing> write = internal::write(
-        hierarchy, cgroup, "freezer.state", "FROZEN");
+class Freezer : public Process<Freezer>
+{
+public:
+  Freezer(
+      const string& _hierarchy,
+      const string& _cgroup)
+    : hierarchy(_hierarchy),
+      cgroup(_cgroup),
+      start(Clock::now()) {}
 
-    if (write.isError()) {
-      promise.fail("Failed to write control 'freezer.state': " + write.error());
-      terminate(self());
-    } else {
-      watchFrozen();
-    }
-  }
+  virtual ~Freezer() {}
 
-  void thaw()
+  void freeze()
   {
-    LOG(INFO) << "Trying to thaw cgroup " << path::join(hierarchy, cgroup);
-
-    Try<Nothing> write = internal::write(
-        hierarchy, cgroup, "freezer.state", "THAWED");
-
-    if (write.isError()) {
-      promise.fail("Failed to write control 'freezer.state': " + write.error());
+    Try<Nothing> freeze =
+      internal::freezer::state(hierarchy, cgroup, "FROZEN");
+    if (freeze.isError()) {
+      promise.fail(freeze.error());
       terminate(self());
-    } else {
-      watchThawed();
+      return;
     }
-  }
-
-  void watchFrozen(unsigned int attempt = 0)
-  {
-    Try<string> state = internal::read(hierarchy, cgroup, "freezer.state");
 
+    Try<string> state = internal::freezer::state(hierarchy, cgroup);
     if (state.isError()) {
-      promise.fail("Failed to read control 'freezer.state': " + state.error());
+      promise.fail(state.error());
       terminate(self());
       return;
     }
 
-    if (strings::trim(state.get()) == "FROZEN") {
-      LOG(INFO) << "Successfully froze cgroup " << path::join(hierarchy, cgroup)
-                << " after " << attempt + 1 << " attempts";
-      promise.set(true);
+    if (state.get() == "FROZEN") {
+      LOG(INFO) << "Successfullly froze cgroup "
+                << path::join(hierarchy, cgroup)
+                << " after " << (Clock::now() - start);
+      promise.set(Nothing());
       terminate(self());
       return;
-    } else if (strings::trim(state.get()) == "FREEZING") {
-      // The freezer.state is in FREEZING state. This is because not all the
-      // processes in the given cgroup can be frozen at the moment. The main
-      // cause is that some of the processes are in stopped/traced state ('T'
-      // state shown in ps command). It is likely that the freezer.state keeps
-      // in FREEZING state if these stopped/traced processes are not resumed.
-      // Therefore, here we send SIGCONT to those stopped/traced processes to
-      // make sure that the freezer can finish.
-      // TODO(jieyu): This code can be removed in the future as the newer
-      // version of the kernel solves this problem (e.g. Linux-3.2.0).
-      Try<set<pid_t> > pids = processes(hierarchy, cgroup);
-      if (pids.isError()) {
-        promise.fail("Failed to get processes of cgroup: " + pids.error());
-        terminate(self());
-        return;
-      }
-
-      // It appears possible for processes to go away while the cgroup
-      // is in the FREEZING state. We ignore such processes.
-      // See: https://issues.apache.org/jira/browse/MESOS-461
-      foreach (pid_t pid, pids.get()) {
-        Result<proc::ProcessStatus> status = proc::status(pid);
-
-        if (!status.isSome()) {
-          LOG(WARNING)
-            << "Failed to get process status for pid " << pid << ": "
-            << (status.isError() ? status.error() : "pid does not exist");
-          continue;
-        }
-
-        // Check whether the process is in stopped/traced state.
-        if (status.get().state == 'T') {
-          // Send a SIGCONT signal to the process.
-          if (::kill(pid, SIGCONT) == -1) {
-            promise.fail(
-                "Failed to send SIGCONT to process " + stringify(pid) +
-                ": " + string(strerror(errno)));
-            terminate(self());
-            return;
-          }
-        }
-      }
-
-      if (attempt > retries) {
-        LOG(WARNING) << "Unable to freeze " << path::join(hierarchy, cgroup)
-                     << " within " << retries + 1 << " attempts";
-        promise.set(false);
-        terminate(self());
-        return;
-      }
-
-      // Retry the freezing operation.
-      Try<Nothing> write = internal::write(
-          hierarchy, cgroup, "freezer.state", "FROZEN");
-
-      if (write.isError()) {
-        promise.fail(
-            "Failed to write control 'freezer.state': " + write.error());
-        terminate(self());
-        return;
-      }
-
-      // Not done yet, keep watching (and possibly retrying).
-      delay(interval, self(), &Freezer::watchFrozen, attempt + 1);
-    } else {
-      LOG(FATAL) << "Unexpected state: " << strings::trim(state.get())
-                 << " of cgroup " << path::join(hierarchy, cgroup);
     }
+
+    // Attempt to freeze the freezer cgroup again.
+    delay(Milliseconds(100), self(), &Self::freeze);
   }
 
-  void watchThawed()
+  void thaw()
   {
-    Try<string> state = internal::read(hierarchy, cgroup, "freezer.state");
+    Try<Nothing> thaw = internal::freezer::state(hierarchy, cgroup, "THAWED");
+    if (thaw.isError()) {
+      promise.fail(thaw.error());
+      terminate(self());
+      return;
+    }
 
+    Try<string> state = internal::freezer::state(hierarchy, cgroup);
     if (state.isError()) {
-      promise.fail("Failed to read control 'freezer.state': " + state.error());
+      promise.fail(state.error());
       terminate(self());
       return;
     }
 
-    if (strings::trim(state.get()) == "THAWED") {
-      LOG(INFO) << "Successfully thawed " << path::join(hierarchy, cgroup);
-      promise.set(true);
+    if (state.get() == "THAWED") {
+      LOG(INFO) << "Successfullly thawed cgroup "
+                << path::join(hierarchy, cgroup)
+                << " after " << (Clock::now() - start);
+      promise.set(Nothing());
       terminate(self());
-    } else if (strings::trim(state.get()) == "FROZEN") {
-      // Not done yet, keep watching.
-      delay(interval, self(), &Freezer::watchThawed);
-    } else {
-      LOG(FATAL) << "Unexpected state: " << strings::trim(state.get())
-                 << " of cgroup " << path::join(hierarchy, cgroup);
+      return;
     }
-  }
 
-  const string hierarchy;
-  const string cgroup;
-  const string action;
-  const Duration interval;
-  const unsigned int retries;
-  Promise<bool> promise;
-};
-
-} // namespace internal {
-
-
-Future<bool> freeze(
-    const string& hierarchy,
-    const string& cgroup,
-    const Duration& interval,
-    unsigned int retries)
-{
-  Option<Error> error = verify(hierarchy, cgroup, "freezer.state");
-  if (error.isSome()) {
-    return Failure(error.get());
+    // Attempt to thaw the freezer cgroup again.
+    delay(Milliseconds(100), self(), &Self::thaw);
   }
 
-  if (interval < Seconds(0)) {
-    return Failure("Interval should be non-negative");
-  }
-
-  // Check the current freezer state.
-  Try<string> state = internal::read(hierarchy, cgroup, "freezer.state");
-  if (state.isError()) {
-    return Failure(
-        "Failed to read control 'freezer.state': " + state.error());
-  } else if (strings::trim(state.get()) == "FROZEN") {
-    // Immediately return success.
-    return true;
-  }
+  Future<Nothing> future() { return promise.future(); }
 
-  internal::Freezer* freezer =
-    new internal::Freezer(hierarchy, cgroup, "FREEZE", interval, retries);
-  Future<bool> future = freezer->future();
-  spawn(freezer, true);
-  return future;
-}
-
-
-Future<bool> thaw(
-    const string& hierarchy,
-    const string& cgroup,
-    const Duration& interval)
-{
-  Option<Error> error = verify(hierarchy, cgroup, "freezer.state");
-  if (error.isSome()) {
-    return Failure(error.get());
-  }
+protected:
+  virtual void initialize()
+  {
+    Option<Error> error = verify(hierarchy, cgroup, "freezer.state");
+    if (error.isSome()) {
+      promise.fail("Invalid freezer cgroup: " + error.get().message);
+      terminate(self());
+      return;
+    }
 
-  if (interval < Seconds(0)) {
-    return Failure("Interval should be non-negative");
+    // Stop attempting to freeze/thaw if nobody cares.
+    promise.future().onDiscard(lambda::bind(
+          static_cast<void(*)(const UPID&, bool)>(terminate), self(), true));
   }
 
-  // Check the current freezer state.
-  Try<string> state = internal::read(hierarchy, cgroup, "freezer.state");
-  if (state.isError()) {
-    return Failure(
-        "Failed to read control 'freezer.state': " + state.error());
-  } else if (strings::trim(state.get()) == "THAWED") {
-    // Immediately return success.
-    return true;
+  virtual void finalize()
+  {
+    promise.discard();
   }
 
-  internal::Freezer* freezer =
-    new internal::Freezer(hierarchy, cgroup, "THAW", interval);
-  Future<bool> future = freezer->future();
-  spawn(freezer, true);
-  return future;
-}
+private:
 
+  const string hierarchy;
+  const string cgroup;
+  const Time start;
+  Promise<Nothing> promise;
+};
 
-namespace internal {
 
 // The process used to wait for a cgroup to become empty (no task in it).
 class EmptyWatcher: public Process<EmptyWatcher>
@@ -1701,9 +1575,9 @@ private:
     chain.onAny(defer(self(), &Self::finished, lambda::_1));
   }
 
-  Future<bool> freeze()
+  Future<Nothing> freeze()
   {
-    return cgroups::freeze(hierarchy, cgroup, interval);
+    return cgroups::freezer::freeze(hierarchy, cgroup);
   }
 
   Future<Nothing> kill(const int signal)
@@ -1715,9 +1589,9 @@ private:
     return Nothing();
   }
 
-  Future<bool> thaw()
+  Future<Nothing> thaw()
   {
-    return cgroups::thaw(hierarchy, cgroup, interval);
+    return cgroups::freezer::thaw(hierarchy, cgroup);
   }
 
   Future<bool> empty()
@@ -2114,4 +1988,42 @@ Try<Bytes> max_usage_in_bytes(const string& hierarchy, const string& cgroup)
 
 } // namespace memory {
 
+
+namespace freezer {
+
+Future<Nothing> freeze(
+    const string& hierarchy,
+    const string& cgroup)
+{
+  LOG(INFO) << "Freezing cgroup " << path::join(hierarchy, cgroup);
+
+  internal::Freezer* freezer = new internal::Freezer(hierarchy, cgroup);
+
+  Future<Nothing> future = freezer->future();
+  spawn(freezer, true);
+
+  dispatch(freezer, &internal::Freezer::freeze);
+
+  return future;
+}
+
+
+Future<Nothing> thaw(
+    const string& hierarchy,
+    const string& cgroup)
+{
+  LOG(INFO) << "Thawing cgroup " << path::join(hierarchy, cgroup);
+
+  internal::Freezer* freezer = new internal::Freezer(hierarchy, cgroup);
+
+  Future<Nothing> future = freezer->future();
+  spawn(freezer, true);
+
+  dispatch(freezer, &internal::Freezer::thaw);
+
+  return future;
+}
+
+} // namespace freezer {
+
 } // namespace cgroups {

http://git-wip-us.apache.org/repos/asf/mesos/blob/07c3dc57/src/linux/cgroups.hpp
----------------------------------------------------------------------
diff --git a/src/linux/cgroups.hpp b/src/linux/cgroups.hpp
index 21d87a0..599badb 100644
--- a/src/linux/cgroups.hpp
+++ b/src/linux/cgroups.hpp
@@ -29,6 +29,7 @@
 #include <sys/types.h>
 
 #include <process/future.hpp>
+#include <process/timeout.hpp>
 
 #include <stout/bytes.hpp>
 #include <stout/duration.hpp>
@@ -39,8 +40,7 @@
 
 namespace cgroups {
 
-// Default number of retry attempts when trying to freeze a cgroup.
-const unsigned int FREEZE_RETRIES = 50;
+const Duration DESTROY_TIMEOUT = Seconds(60);
 const unsigned int EMPTY_WATCHER_RETRIES = 50;
 
 // Default number of assign attempts when moving threads to a cgroup.
@@ -321,48 +321,6 @@ process::Future<uint64_t> listen(
     const Option<std::string>& args = Option<std::string>::none());
 
 
-// Freeze all the processes in a given cgroup. We try to use the freezer
-// subsystem implemented in cgroups. More detail can be found in
-// <kernel-source>/Documentation/cgroups/freezer-subsystem.txt. This function
-// will return a future which will become ready when all the processes have been
-// frozen (FROZEN). The future can be discarded to cancel the operation. The
-// freezer state after the cancellation is not defined. So the users need to
-// read the control file if they need to know the freezer state after the
-// cancellation. This function will return future failure if the freezer
-// subsystem is not available or it is not attached to the given hierarchy, or
-// the given cgroup is not valid, or the given cgroup has already been frozen.
-// @param   hierarchy   Path to the hierarchy root.
-// @param   cgroup      Path to the cgroup relative to the hierarchy root.
-// @param   interval    The time interval between two state check
-//                      requests (default: 0.1 seconds).
-// @param   retries     Number of retry attempts before giving up. None
-//                      indicates infinite retries. (default: 50 attempts).
-// @return  A future which will become true when all processes are frozen, or
-//          false when all retries have occurred unsuccessfully.
-//          Error if something unexpected happens.
-process::Future<bool> freeze(
-    const std::string& hierarchy,
-    const std::string& cgroup,
-    const Duration& interval = Milliseconds(100),
-    const unsigned int retries = FREEZE_RETRIES);
-
-
-// Thaw the given cgroup. This is a revert operation of freezeCgroup. It will
-// return error if the given cgroup is already thawed. Same as
-// freezeCgroup, this function will return a future which can be discarded to
-// allow users to cancel the operation.
-// @param   hierarchy   Path to the hierarchy root.
-// @param   cgroup      Path to the cgroup relative to the hierarchy root.
-// @param   interval    The time interval between two state check
-//                      requests (default: 0.1 seconds).
-// @return  A future which will become ready when all processes are thawed.
-//          Error if something unexpected happens.
-process::Future<bool> thaw(
-    const std::string& hierarchy,
-    const std::string& cgroup,
-    const Duration& interval = Milliseconds(100));
-
-
 // Destroy a cgroup under a given hierarchy. It will also recursively
 // destroy any sub-cgroups. If the freezer subsystem is attached to
 // the hierarchy, we attempt to kill all tasks in a given cgroup,
@@ -482,6 +440,32 @@ Try<Bytes> max_usage_in_bytes(
 
 } // namespace memory {
 
+
+// Freezer controls.
+// The freezer can be in one of three states:
+// 1. THAWED   : No process in the cgroup is frozen.
+// 2. FREEZING : Freezing is in progress but not all processes are frozen.
+// 3. FROZEN   : All processes are frozen.
+namespace freezer {
+
+// Freeze all processes in the given cgroup. The cgroup must be in a freezer
+// hierarchy. This function will return a future which will become ready when
+// all processes have been frozen (cgroup is in the FROZEN state).
+process::Future<Nothing> freeze(
+    const std::string& hierarchy,
+    const std::string& cgroup);
+
+
+// Thaw all processes in the given cgroup. The cgroup must be in a freezer
+// hierarchy. This is a revert operation of freezer::freeze. This function will
+// return a future which will become ready when all processes have been thawed
+// (cgroup is in the THAWED state).
+process::Future<Nothing> thaw(
+    const std::string& hierarchy,
+    const std::string& cgroup);
+
+} // namespace freezer {
+
 } // namespace cgroups {
 
 #endif // __CGROUPS_HPP__

http://git-wip-us.apache.org/repos/asf/mesos/blob/07c3dc57/src/tests/cgroups_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/cgroups_tests.cpp b/src/tests/cgroups_tests.cpp
index adcf46b..c00c30a 100644
--- a/src/tests/cgroups_tests.cpp
+++ b/src/tests/cgroups_tests.cpp
@@ -602,35 +602,7 @@ TEST_F(CgroupsAnyHierarchyWithFreezerTest, ROOT_CGROUPS_Freeze)
   pid_t pid = ::fork();
   ASSERT_NE(-1, pid);
 
-  if (pid > 0) {
-    // In parent process.
-    ::close(pipes[1]);
-
-    // Wait until child has assigned the cgroup.
-    ASSERT_LT(0, ::read(pipes[0], &dummy, sizeof(dummy)));
-    ::close(pipes[0]);
-
-    // Freeze the test cgroup.
-    Future<bool> freeze = cgroups::freeze(hierarchy, TEST_CGROUPS_ROOT);
-    freeze.await(Seconds(5));
-    ASSERT_TRUE(freeze.isReady());
-    EXPECT_EQ(true, freeze.get());
-
-    // Thaw the test cgroup.
-    Future<bool> thaw = cgroups::thaw(hierarchy, TEST_CGROUPS_ROOT);
-    thaw.await(Seconds(5));
-    ASSERT_TRUE(thaw.isReady());
-    EXPECT_EQ(true, thaw.get());
-
-    // Kill the child process.
-    ASSERT_NE(-1, ::kill(pid, SIGKILL));
-
-    // Wait for the child process.
-    int status;
-    EXPECT_NE(-1, ::waitpid((pid_t) -1, &status, 0));
-    ASSERT_TRUE(WIFSIGNALED(status));
-    EXPECT_EQ(SIGKILL, WTERMSIG(status));
-  } else {
+  if (pid == 0) {
     // In child process.
     ::close(pipes[0]);
 
@@ -657,6 +629,41 @@ TEST_F(CgroupsAnyHierarchyWithFreezerTest, ROOT_CGROUPS_Freeze)
     std::cerr << "Reach an unreachable statement!" << std::endl;
     abort();
   }
+
+  // In parent process.
+  ::close(pipes[1]);
+
+  // Wait until child has assigned the cgroup.
+  ASSERT_LT(0, ::read(pipes[0], &dummy, sizeof(dummy)));
+  ::close(pipes[0]);
+
+  // Freeze the test cgroup.
+  AWAIT_EXPECT_READY(cgroups::freezer::freeze(hierarchy, TEST_CGROUPS_ROOT));
+
+  // Thaw the test cgroup.
+  AWAIT_EXPECT_READY(cgroups::freezer::thaw(hierarchy, TEST_CGROUPS_ROOT));
+
+  // Kill the child process.
+  ASSERT_NE(-1, ::kill(pid, SIGKILL));
+
+  // Wait for the child process.
+  int status;
+  EXPECT_NE(-1, ::waitpid((pid_t) -1, &status, 0));
+  ASSERT_TRUE(WIFSIGNALED(status));
+  EXPECT_EQ(SIGKILL, WTERMSIG(status));
+}
+
+
+TEST_F(CgroupsAnyHierarchyWithCpuMemoryTest, ROOT_CGROUPS_FreezeNonFreezer)
+{
+  std::string hierarchy = path::join(baseHierarchy, "cpu");
+  ASSERT_SOME(cgroups::create(hierarchy, TEST_CGROUPS_ROOT));
+
+  AWAIT_EXPECT_FAILED(cgroups::freezer::freeze(hierarchy, TEST_CGROUPS_ROOT));
+  AWAIT_EXPECT_FAILED(cgroups::freezer::thaw(hierarchy, TEST_CGROUPS_ROOT));
+
+  // The cgroup is empty so we should still be able to destroy it.
+  AWAIT_READY(cgroups::destroy(hierarchy, TEST_CGROUPS_ROOT));
 }
 
 


[2/3] git commit: Refactored cgroups::destroy to use a single pass.

Posted by id...@apache.org.
Refactored cgroups::destroy to use a single pass.

internal::Destroyer now uses a single pass of freeze, kill, thaw, reap
to kill all processes in a freezer cgroup. cgroups::destroy will not
complete until all processes have been reaped.

Added tests for destroying a freezer cgroup containing a stopped
process or a traced process.

Review: https://reviews.apache.org/r/20818


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/ca2ba6a0
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/ca2ba6a0
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/ca2ba6a0

Branch: refs/heads/master
Commit: ca2ba6a0b20f93b8f4507b35f8eff427115f2e7e
Parents: 07c3dc5
Author: Ian Downes <id...@twitter.com>
Authored: Fri Apr 25 17:58:56 2014 -0700
Committer: Ian Downes <id...@twitter.com>
Committed: Tue Jun 17 11:00:41 2014 -0700

----------------------------------------------------------------------
 src/linux/cgroups.cpp                           | 258 +++++++------------
 src/linux/cgroups.hpp                           |  18 +-
 .../isolators/cgroups/cpushare.cpp              |   2 +-
 src/slave/containerizer/linux_launcher.cpp      |   2 +-
 src/slave/containerizer/mesos_containerizer.cpp |   2 +-
 src/tests/cgroups_tests.cpp                     | 105 +++++++-
 src/tests/slave_recovery_tests.cpp              |   1 +
 7 files changed, 195 insertions(+), 193 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/ca2ba6a0/src/linux/cgroups.cpp
----------------------------------------------------------------------
diff --git a/src/linux/cgroups.cpp b/src/linux/cgroups.cpp
index 723acc1..0b5f95a 100644
--- a/src/linux/cgroups.cpp
+++ b/src/linux/cgroups.cpp
@@ -41,6 +41,7 @@
 #include <process/delay.hpp>
 #include <process/io.hpp>
 #include <process/process.hpp>
+#include <process/reap.hpp>
 
 #include <stout/duration.hpp>
 #include <stout/error.hpp>
@@ -1427,7 +1428,6 @@ protected:
   }
 
 private:
-
   const string hierarchy;
   const string cgroup;
   const Time start;
@@ -1435,142 +1435,43 @@ private:
 };
 
 
-// The process used to wait for a cgroup to become empty (no task in it).
-class EmptyWatcher: public Process<EmptyWatcher>
-{
-public:
-  EmptyWatcher(const string& _hierarchy,
-               const string& _cgroup,
-               const Duration& _interval,
-               unsigned int _retries = EMPTY_WATCHER_RETRIES)
-    : hierarchy(_hierarchy),
-      cgroup(_cgroup),
-      interval(_interval),
-      retries(_retries) {}
-
-  virtual ~EmptyWatcher() {}
-
-  // Return a future indicating the state of the watcher.
-  // There are three outcomes:
-  //   1. true:  the cgroup became empty.
-  //   2. false: the cgroup did not become empty within the retry limit.
-  //   3. error: invalid arguments, or an unexpected error occured.
-  Future<bool> future() { return promise.future(); }
-
-protected:
-  virtual void initialize()
-  {
-    // Stop when no one cares.
-    promise.future().onDiscard(lambda::bind(
-        static_cast<void (*)(const UPID&, bool)>(terminate), self(), true));
-
-    CHECK(interval >= Seconds(0));
-
-    check();
-  }
-
-  virtual void finalize()
-  {
-    promise.discard();
-  }
-
-private:
-  void check(unsigned int attempt = 0)
-  {
-    Try<set<pid_t> > pids = processes(hierarchy, cgroup);
-    if (pids.isError()) {
-      promise.fail("Failed to get processes of cgroup: " + pids.error());
-      terminate(self());
-      return;
-    }
-
-    if (pids.get().empty()) {
-      promise.set(true);
-      terminate(self());
-      return;
-    } else {
-      if (attempt > retries) {
-        promise.set(false);
-        terminate(self());
-        return;
-      }
-
-      // Re-check needed.
-      delay(interval, self(), &EmptyWatcher::check, attempt + 1);
-    }
-  }
-
-  const string hierarchy;
-  const string cgroup;
-  const Duration interval;
-  const unsigned int retries;
-  Promise<bool> promise;
-};
-
-
 // The process used to atomically kill all tasks in a cgroup.
 class TasksKiller : public Process<TasksKiller>
 {
 public:
-  TasksKiller(const string& _hierarchy,
-              const string& _cgroup,
-              const Duration& _interval)
-    : hierarchy(_hierarchy),
-      cgroup(_cgroup),
-      interval(_interval) {}
+  TasksKiller(const string& _hierarchy, const string& _cgroup)
+    : hierarchy(_hierarchy), cgroup(_cgroup) {}
 
   virtual ~TasksKiller() {}
 
   // Return a future indicating the state of the killer.
-  Future<bool> future() { return promise.future(); }
+  // Failure occurs if any process in the cgroup is unable to be
+  // killed.
+  Future<Nothing> future() { return promise.future(); }
 
 protected:
   virtual void initialize()
   {
     // Stop when no one cares.
     promise.future().onDiscard(lambda::bind(
-          static_cast<void(*)(const UPID&, bool)>(terminate), self(), true));
-
-    CHECK(interval >= Seconds(0));
+        static_cast<void (*)(const UPID&, bool)>(terminate), self(), true));
 
     killTasks();
   }
 
   virtual void finalize()
   {
-    // Cancel the chain of operations if the user discards the future.
-    if (promise.future().hasDiscard()) {
-      chain.discard();
-
-      // TODO(benh): Discard our promise only after 'chain' has
-      // completed (ready, failed, or discarded).
-      promise.discard();
-    }
+    chain.discard();
+    promise.discard();
   }
 
 private:
-  // The sequence of operations to kill a cgroup is as follows:
-  // SIGSTOP -> SIGKILL -> empty -> freeze -> SIGKILL -> thaw -> empty
-  // This process is repeated until the cgroup becomes empty.
   void killTasks() {
-    // Chain together the steps needed to kill the tasks. Note that we
-    // ignore the return values of freeze, kill, and thaw because,
-    // provided there are no errors, we'll just retry the chain as
-    // long as tasks still exist.
-    // Send stop signal to all tasks.
-    chain = kill(SIGSTOP)
-      // Now send kill signal.
-      .then(defer(self(), &Self::kill, SIGKILL))
-      // Wait until cgroup is empty.
-      .then(defer(self(), &Self::empty))
-      // Freeze cgroup.
-      .then(defer(self(), &Self::freeze))
-      // Send kill signal to any remaining tasks.
-      .then(defer(self(), &Self::kill, SIGKILL))
-      // Thaw cgroup to deliver signals.
-      .then(defer(self(), &Self::thaw))
-      // Wait until cgroup is empty.
-      .then(defer(self(), &Self::empty));
+    // Chain together the steps needed to kill all tasks in the cgroup.
+    chain = freeze()                     // Freeze the cgroup.
+      .then(defer(self(), &Self::kill))  // Send kill signal.
+      .then(defer(self(), &Self::thaw))  // Thaw cgroup to deliver signal.
+      .then(defer(self(), &Self::reap)); // Wait until all pids are reaped.
 
     chain.onAny(defer(self(), &Self::finished, lambda::_1));
   }
@@ -1580,12 +1481,24 @@ private:
     return cgroups::freezer::freeze(hierarchy, cgroup);
   }
 
-  Future<Nothing> kill(const int signal)
+  Future<Nothing> kill()
   {
-    Try<Nothing> kill = cgroups::kill(hierarchy, cgroup, signal);
+    Try<set<pid_t> > processes = cgroups::processes(hierarchy, cgroup);
+    if (processes.isError()) {
+      return Failure(processes.error());
+    }
+
+    // Reaping the frozen pids before we kill (and thaw) ensures we reap the
+    // correct pids.
+    foreach (const pid_t pid, processes.get()) {
+      statuses.push_back(process::reap(pid));
+    }
+
+    Try<Nothing> kill = cgroups::kill(hierarchy, cgroup, SIGKILL);
     if (kill.isError()) {
       return Failure(kill.error());
     }
+
     return Nothing();
   }
 
@@ -1594,37 +1507,43 @@ private:
     return cgroups::freezer::thaw(hierarchy, cgroup);
   }
 
-  Future<bool> empty()
+  Future<list<Option<int> > > reap()
   {
-    EmptyWatcher* watcher = new EmptyWatcher(hierarchy, cgroup, interval);
-    Future<bool> future = watcher->future();
-    spawn(watcher, true);
-    return future;
+    // Wait until we've reaped all processes.
+    return collect(statuses);
   }
 
-  void finished(const Future<bool>& empty)
+  void finished(const Future<list<Option<int> > >& future)
   {
-    if (empty.isDiscarded()) {
-      promise.discard();
+    if (future.isDiscarded()) {
+      promise.fail("Unexpected discard of future");
       terminate(self());
-    } else if (empty.isFailed()) {
-      promise.fail(empty.failure());
+      return;
+    } else if (future.isFailed()) {
+      promise.fail(future.failure());
       terminate(self());
-    } else if (empty.get()) {
-      promise.set(true);
+      return;
+    }
+
+    // Verify the cgroup is now empty.
+    Try<set<pid_t> > processes = cgroups::processes(hierarchy, cgroup);
+    if (processes.isError() || !processes.get().empty()) {
+      promise.fail("Failed to kill all processes in cgroup: " +
+                   (processes.isError() ? processes.error()
+                                        : "processes remain"));
       terminate(self());
-    } else {
-      // The cgroup was not empty after the retry limit.
-      // We need to re-attempt the freeze/kill/thaw/watch chain.
-      killTasks();
+      return;
     }
+
+    promise.set(Nothing());
+    terminate(self());
   }
 
   const string hierarchy;
   const string cgroup;
-  const Duration interval;
-  Promise<bool> promise;
-  Future<bool> chain; // Used to discard the "chain" of operations.
+  Promise<Nothing> promise;
+  list<Future<Option<int> > > statuses; // List of statuses for processes.
+  Future<list<Option<int> > > chain; // Used to discard all operations.
 };
 
 
@@ -1632,32 +1551,27 @@ private:
 class Destroyer : public Process<Destroyer>
 {
 public:
-  Destroyer(const string& _hierarchy,
-            const vector<string>& _cgroups,
-            const Duration& _interval)
-    : hierarchy(_hierarchy),
-      cgroups(_cgroups),
-      interval(_interval) {}
+  Destroyer(const string& _hierarchy, const vector<string>& _cgroups)
+    : hierarchy(_hierarchy), cgroups(_cgroups) {}
 
   virtual ~Destroyer() {}
 
   // Return a future indicating the state of the destroyer.
-  Future<bool> future() { return promise.future(); }
+  // Failure occurs if any cgroup fails to be destroyed.
+  Future<Nothing> future() { return promise.future(); }
 
 protected:
   virtual void initialize()
   {
     // Stop when no one cares.
     promise.future().onDiscard(lambda::bind(
-          static_cast<void(*)(const UPID&, bool)>(terminate), self(), true));
-
-    CHECK(interval >= Seconds(0));
+        static_cast<void (*)(const UPID&, bool)>(terminate), self(), true));
 
     // Kill tasks in the given cgroups in parallel. Use collect mechanism to
     // wait until all kill processes finish.
     foreach (const string& cgroup, cgroups) {
       internal::TasksKiller* killer =
-        new internal::TasksKiller(hierarchy, cgroup, interval);
+        new internal::TasksKiller(hierarchy, cgroup);
       killers.push_back(killer->future());
       spawn(killer, true);
     }
@@ -1668,18 +1582,12 @@ protected:
 
   virtual void finalize()
   {
-    // Cancel the operation if the user discards the future.
-    if (promise.future().hasDiscard()) {
-      discard<bool>(killers);
-
-      // TODO(benh): Discard our promise only after all 'killers' have
-      // completed (ready, failed, or discarded).
-      promise.discard();
-    }
+    discard(killers);
+    promise.discard();
   }
 
 private:
-  void killed(const Future<list<bool> >& kill)
+  void killed(const Future<list<Nothing> >& kill)
   {
     if (kill.isReady()) {
       remove();
@@ -1687,7 +1595,8 @@ private:
       promise.discard();
       terminate(self());
     } else if (kill.isFailed()) {
-      promise.fail("Failed to kill tasks in nested cgroups: " + kill.failure());
+      promise.fail("Failed to kill tasks in nested cgroups: " +
+                   kill.failure());
       terminate(self());
     }
   }
@@ -1704,31 +1613,23 @@ private:
       }
     }
 
-    promise.set(true);
+    promise.set(Nothing());
     terminate(self());
   }
 
   const string hierarchy;
   const vector<string> cgroups;
-  const Duration interval;
-  Promise<bool> promise;
+  Promise<Nothing> promise;
 
   // The killer processes used to atomically kill tasks in each cgroup.
-  list<Future<bool> > killers;
+  list<Future<Nothing> > killers;
 };
 
 } // namespace internal {
 
 
-Future<bool> destroy(
-    const string& hierarchy,
-    const string& cgroup,
-    const Duration& interval)
+Future<Nothing> destroy(const string& hierarchy, const string& cgroup)
 {
-  if (interval < Seconds(0)) {
-    return Failure("Interval should be non-negative");
-  }
-
   // Construct the vector of cgroups to destroy.
   Try<vector<string> > cgroups = cgroups::get(hierarchy, cgroup);
   if (cgroups.isError()) {
@@ -1742,15 +1643,15 @@ Future<bool> destroy(
   }
 
   if (candidates.empty()) {
-    return true;
+    return Nothing();
   }
 
   // If the freezer subsystem is available, destroy the cgroups.
   Option<Error> error = verify(hierarchy, cgroup, "freezer.state");
   if (error.isNone()) {
     internal::Destroyer* destroyer =
-      new internal::Destroyer(hierarchy, candidates, interval);
-    Future<bool> future = destroyer->future();
+      new internal::Destroyer(hierarchy, candidates);
+    Future<Nothing> future = destroyer->future();
     spawn(destroyer, true);
     return future;
   } else {
@@ -1763,7 +1664,22 @@ Future<bool> destroy(
     }
   }
 
-  return true;
+  return Nothing();
+}
+
+
+Future<Nothing> destroyTimedOut(
+    Future<Nothing> future,
+    const string& hierarchy,
+    const string& cgroup,
+    const Duration& timeout)
+{
+  LOG(WARNING) << "Failed to destroy cgroup '" << path::join(hierarchy, cgroup)
+               << "' after " << timeout;
+
+  future.discard();
+
+  return future;
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/ca2ba6a0/src/linux/cgroups.hpp
----------------------------------------------------------------------
diff --git a/src/linux/cgroups.hpp b/src/linux/cgroups.hpp
index 599badb..38e345f 100644
--- a/src/linux/cgroups.hpp
+++ b/src/linux/cgroups.hpp
@@ -41,7 +41,6 @@
 namespace cgroups {
 
 const Duration DESTROY_TIMEOUT = Seconds(60);
-const unsigned int EMPTY_WATCHER_RETRIES = 50;
 
 // Default number of assign attempts when moving threads to a cgroup.
 const unsigned int THREAD_ASSIGN_RETRIES = 100;
@@ -334,14 +333,21 @@ process::Future<uint64_t> listen(
 // is not present.
 // @param   hierarchy Path to the hierarchy root.
 // @param   cgroup      Path to the cgroup relative to the hierarchy root.
-// @param   interval    The time interval between two state check
-//                      requests (default: 0.1 seconds).
 // @return  A future which will become ready when the operation is done.
 //          Error if something unexpected happens.
-process::Future<bool> destroy(
+process::Future<Nothing> destroy(
     const std::string& hierarchy,
-    const std::string& cgroup = "/",
-    const Duration& interval = Milliseconds(100));
+    const std::string& cgroup = "/");
+
+
+// Destroy a cgroup under a given hierarchy. This is a convenience
+// function which wraps the cgroups::destroy() to add a timeout: if
+// the cgroup(s) cannot be destroyed after timeout the operation will
+// be discarded.
+process::Future<Nothing> destroy(
+    const std::string& hierarchy,
+    const std::string& cgroup,
+    const Duration& timeout);
 
 
 // Cleanup the hierarchy, by first destroying all the underlying

http://git-wip-us.apache.org/repos/asf/mesos/blob/ca2ba6a0/src/slave/containerizer/isolators/cgroups/cpushare.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/isolators/cgroups/cpushare.cpp b/src/slave/containerizer/isolators/cgroups/cpushare.cpp
index eb8933f..1e2db0f 100644
--- a/src/slave/containerizer/isolators/cgroups/cpushare.cpp
+++ b/src/slave/containerizer/isolators/cgroups/cpushare.cpp
@@ -441,7 +441,7 @@ Future<Nothing> CgroupsCpushareIsolatorProcess::cleanup(
 
   Info* info = CHECK_NOTNULL(infos[containerId]);
 
-  list<Future<bool> > futures;
+  list<Future<Nothing> > futures;
   futures.push_back(cgroups::destroy(hierarchies["cpu"], info->cgroup));
   futures.push_back(cgroups::destroy(hierarchies["cpuacct"], info->cgroup));
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/ca2ba6a0/src/slave/containerizer/linux_launcher.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/linux_launcher.cpp b/src/slave/containerizer/linux_launcher.cpp
index c17724b..9d0205e 100644
--- a/src/slave/containerizer/linux_launcher.cpp
+++ b/src/slave/containerizer/linux_launcher.cpp
@@ -271,7 +271,7 @@ Try<pid_t> LinuxLauncher::fork(
 
 Future<Nothing> _destroy(
     const ContainerID& containerId,
-    process::Future<bool> destroyed)
+    process::Future<Nothing> destroyed)
 {
   if (destroyed.isFailed()) {
     LOG(ERROR) << "Failed to destroy freezer cgroup for '"

http://git-wip-us.apache.org/repos/asf/mesos/blob/ca2ba6a0/src/slave/containerizer/mesos_containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos_containerizer.cpp b/src/slave/containerizer/mesos_containerizer.cpp
index d6df9a1..4d97d49 100644
--- a/src/slave/containerizer/mesos_containerizer.cpp
+++ b/src/slave/containerizer/mesos_containerizer.cpp
@@ -926,7 +926,7 @@ Future<Nothing> MesosContainerizerProcess::update(
 
   // Wait for all isolators to complete.
   return collect(futures)
-    .then(lambda::bind(_nothing));
+    .then(lambda::bind(&_nothing));
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/ca2ba6a0/src/tests/cgroups_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/cgroups_tests.cpp b/src/tests/cgroups_tests.cpp
index c00c30a..bfb5858 100644
--- a/src/tests/cgroups_tests.cpp
+++ b/src/tests/cgroups_tests.cpp
@@ -30,6 +30,7 @@
 #include <vector>
 
 #include <sys/mman.h>
+#include <sys/ptrace.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 
@@ -756,15 +757,13 @@ TEST_F(CgroupsAnyHierarchyWithFreezerTest, ROOT_CGROUPS_Destroy)
     ASSERT_LT(0, ::read(pipes[0], &dummy, sizeof(dummy)));
     ::close(pipes[0]);
 
-    Future<bool> future = cgroups::destroy(hierarchy, TEST_CGROUPS_ROOT);
-    future.await(Seconds(5));
-    ASSERT_TRUE(future.isReady());
-    EXPECT_TRUE(future.get());
+    AWAIT_READY(cgroups::destroy(hierarchy, TEST_CGROUPS_ROOT));
 
+    // cgroups::destroy will reap all processes in the cgroup so we should
+    // *not* be able to reap it now.
     int status;
-    EXPECT_NE(-1, ::waitpid((pid_t) -1, &status, 0));
-    ASSERT_TRUE(WIFSIGNALED(status));
-    EXPECT_EQ(SIGKILL, WTERMSIG(status));
+    EXPECT_EQ(-1, ::waitpid(pid, &status, 0));
+    EXPECT_EQ(ECHILD, errno);
   } else {
     // In child process.
 
@@ -855,10 +854,91 @@ TEST_F(CgroupsAnyHierarchyWithFreezerTest, ROOT_CGROUPS_AssignThreads)
   CHECK_SOME(cgroups::assign(hierarchy, "", ::getpid()));
 
   // Destroy the cgroup.
-  Future<bool> future = cgroups::destroy(hierarchy, TEST_CGROUPS_ROOT);
-  future.await(Seconds(5));
-  ASSERT_TRUE(future.isReady());
-  EXPECT_TRUE(future.get());
+  AWAIT_READY(cgroups::destroy(hierarchy, TEST_CGROUPS_ROOT));
+}
+
+
+TEST_F(CgroupsAnyHierarchyWithFreezerTest, ROOT_CGROUPS_DestroyStoppedProcess)
+{
+  std::string hierarchy = path::join(baseHierarchy, "freezer");
+  ASSERT_SOME(cgroups::create(hierarchy, TEST_CGROUPS_ROOT));
+
+  pid_t pid = ::fork();
+  ASSERT_NE(-1, pid);
+
+  if (pid == 0) {
+    // In child process.
+    while (true) { sleep(1); }
+
+    ABORT("Child should not reach this statement");
+  }
+
+  // In parent process.
+
+  // Put child into the freezer cgroup.
+  Try<Nothing> assign = cgroups::assign(hierarchy, TEST_CGROUPS_ROOT, pid);
+
+  // Stop the child process.
+  EXPECT_EQ(0, kill(pid, SIGSTOP));
+
+  AWAIT_READY(cgroups::destroy(hierarchy, TEST_CGROUPS_ROOT));
+
+  // cgroups::destroy will reap all processes in the cgroup so we should
+  // *not* be able to reap it now.
+  int status;
+  EXPECT_EQ(-1, ::waitpid(pid, &status, 0));
+  EXPECT_EQ(ECHILD, errno);
+}
+
+
+TEST_F(CgroupsAnyHierarchyWithFreezerTest, ROOT_CGROUPS_DestroyTracedProcess)
+{
+  std::string hierarchy = path::join(baseHierarchy, "freezer");
+  ASSERT_SOME(cgroups::create(hierarchy, TEST_CGROUPS_ROOT));
+
+  pid_t pid = ::fork();
+  ASSERT_NE(-1, pid);
+
+  if (pid == 0) {
+    // In child process.
+    while (true) { sleep(1); }
+
+    ABORT("Child should not reach this statement");
+  }
+
+  // In parent process.
+  Try<Nothing> assign = cgroups::assign(hierarchy, TEST_CGROUPS_ROOT, pid);
+  ASSERT_SOME(assign);
+
+  // Attach to the child process.
+  ASSERT_EQ(0, ptrace(PT_ATTACH, pid, NULL, NULL));
+
+  // Wait until the process is in traced state ('t' or 'T').
+  Duration elapsed = Duration::zero();
+  while (true) {
+    Result<proc::ProcessStatus> process = proc::status(pid);
+    ASSERT_SOME(process);
+
+    if (process.get().state == 'T' || process.get().state == 't') {
+      break;
+    }
+
+    if (elapsed > Seconds(1)) {
+      FAIL() << "Failed to wait for process to be traced";
+    }
+
+    os::sleep(Milliseconds(5));
+    elapsed += Milliseconds(5);
+  }
+
+  // Now destroy the cgroup.
+  AWAIT_READY(cgroups::destroy(hierarchy, TEST_CGROUPS_ROOT));
+
+  // cgroups::destroy will reap all processes in the cgroup so we should
+  // *not* be able to reap it now.
+  int status;
+  EXPECT_EQ(-1, ::waitpid(pid, &status, 0));
+  EXPECT_EQ(ECHILD, errno);
 }
 
 
@@ -937,7 +1017,6 @@ TEST_F(CgroupsAnyHierarchyWithPerfEventTest, ROOT_CGROUPS_Perf)
   EXPECT_EQ(SIGKILL, WTERMSIG(status));
 
   // Destroy the cgroup.
-  Future<bool> destroy = cgroups::destroy(hierarchy, TEST_CGROUPS_ROOT);
+  Future<Nothing> destroy = cgroups::destroy(hierarchy, TEST_CGROUPS_ROOT);
   AWAIT_READY(destroy);
-  EXPECT_TRUE(destroy.get());
 }

http://git-wip-us.apache.org/repos/asf/mesos/blob/ca2ba6a0/src/tests/slave_recovery_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_recovery_tests.cpp b/src/tests/slave_recovery_tests.cpp
index 7044327..8c6421b 100644
--- a/src/tests/slave_recovery_tests.cpp
+++ b/src/tests/slave_recovery_tests.cpp
@@ -3232,6 +3232,7 @@ TEST_F(MesosContainerizerSlaveRecoveryTest, CGROUPS_ROOT_PerfRollForward)
   // isolator.
   slave::Flags flags = this->CreateSlaveFlags();
   flags.isolation = "cgroups/cpu,cgroups/mem";
+  flags.slave_subsystems = "";
 
   Try<MesosContainerizer*> containerizer1 =
     MesosContainerizer::create(flags, true);