You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by id...@apache.org on 2014/04/29 19:24:53 UTC

git commit: Collect Isolator::cleanup() before completing container destroy.

Repository: mesos
Updated Branches:
  refs/heads/master b1b2b4cc9 -> 01cd0f462


Collect Isolator::cleanup() before completing container destroy.

This ensures all isolators complete cleanup before the container
termination is set.

Review: https://reviews.apache.org/r/20720


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/01cd0f46
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/01cd0f46
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/01cd0f46

Branch: refs/heads/master
Commit: 01cd0f462cfef9441e3f17bd7b019a2f5fec59ea
Parents: b1b2b4c
Author: Ian Downes <id...@twitter.com>
Authored: Mon Apr 28 10:28:34 2014 -0700
Committer: Ian Downes <id...@twitter.com>
Committed: Tue Apr 29 10:18:00 2014 -0700

----------------------------------------------------------------------
 src/slave/containerizer/mesos_containerizer.cpp | 39 +++++++++++++++++---
 src/slave/containerizer/mesos_containerizer.hpp | 10 ++++-
 2 files changed, 42 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/01cd0f46/src/slave/containerizer/mesos_containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos_containerizer.cpp b/src/slave/containerizer/mesos_containerizer.cpp
index 27813af..2c64f2d 100644
--- a/src/slave/containerizer/mesos_containerizer.cpp
+++ b/src/slave/containerizer/mesos_containerizer.cpp
@@ -883,6 +883,9 @@ void MesosContainerizerProcess::_destroy(
     promises[containerId]->fail(
         "Failed to destroy container: " +
         (future.isFailed() ? future.failure() : "discarded future"));
+
+    destroying.erase(containerId);
+
     return;
   }
 
@@ -898,6 +901,37 @@ void MesosContainerizerProcess::__destroy(
     const ContainerID& containerId,
     const Future<Option<int > >& status)
 {
+  // Now that all processes have exited we can now clean up all isolators.
+  list<Future<Nothing> > futures;
+  foreach (const Owned<Isolator>& isolator, isolators) {
+    futures.push_back(isolator->cleanup(containerId));
+  }
+
+  // Wait for all isolators to complete cleanup before continuing.
+  collect(futures)
+    .onAny(defer(self(), &Self::___destroy, containerId, status, lambda::_1));
+}
+
+
+void MesosContainerizerProcess::___destroy(
+    const ContainerID& containerId,
+    const Future<Option<int > >& status,
+    const Future<list<Nothing> >& futures)
+{
+  // Something has gone wrong with one of the Isolators and cleanup failed.
+  // We'll fail the container termination and remove the 'destroying' flag but
+  // leave all other state. The containerizer is now in a bad state because
+  // at least one isolator has failed to clean up.
+  if (!futures.isReady()) {
+    promises[containerId]->fail(
+        "Failed to clean up isolators when destroying container: " +
+        (futures.isFailed() ? futures.failure() : "discarded future"));
+
+    destroying.erase(containerId);
+
+    return;
+  }
+
   // A container is 'killed' if any isolator limited it.
   // Note: We may not see a limitation in time for it to be registered. This
   // could occur if the limitation (e.g., an OOM) killed the executor and we
@@ -914,11 +948,6 @@ void MesosContainerizerProcess::__destroy(
     message = "Executor terminated";
   }
 
-  // We can now clean up all isolators.
-  foreach (const Owned<Isolator>& isolator, isolators) {
-    isolator->cleanup(containerId);
-  }
-
   containerizer::Termination termination;
   termination.set_killed(killed);
   termination.set_message(message);

http://git-wip-us.apache.org/repos/asf/mesos/blob/01cd0f46/src/slave/containerizer/mesos_containerizer.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos_containerizer.hpp b/src/slave/containerizer/mesos_containerizer.hpp
index 99c1354..9be1b67 100644
--- a/src/slave/containerizer/mesos_containerizer.hpp
+++ b/src/slave/containerizer/mesos_containerizer.hpp
@@ -181,12 +181,18 @@ private:
       const ContainerID& containerId,
       const process::Future<Nothing>& future);
 
-  // Continues (and completes) '_destroy()' once we get the exit status of the
-  // executor.
+  // Continues '_destroy()' once we get the exit status of the executor.
   void __destroy(
       const ContainerID& containerId,
       const process::Future<Option<int > >& status);
 
+  // Continues (and completes) '__destroy()' once all isolators have completed
+  // cleanup.
+  void ___destroy(
+      const ContainerID& containerId,
+      const process::Future<Option<int > >& status,
+      const process::Future<std::list<Nothing> >& futures);
+
   // Call back for when an isolator limits a container and impacts the
   // processes. This will trigger container destruction.
   void limited(