You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by tn...@apache.org on 2015/04/21 23:25:55 UTC

mesos git commit: Fix destroying containerizer during isolator prepare.

Repository: mesos
Updated Branches:
  refs/heads/master da607079b -> 664788b94


Fix destroying containerizer during isolator prepare.

This ensures the prepare phase is completed before destroying the container.

Review: https://reviews.apache.org/r/32123


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/664788b9
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/664788b9
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/664788b9

Branch: refs/heads/master
Commit: 664788b94598a77c936a6d41d71e42f87026f96e
Parents: da60707
Author: Timothy Chen <tn...@apache.org>
Authored: Tue Apr 21 12:43:34 2015 -0700
Committer: Timothy Chen <tn...@apache.org>
Committed: Tue Apr 21 14:25:47 2015 -0700

----------------------------------------------------------------------
 src/slave/containerizer/mesos/containerizer.cpp | 100 +++++++++----
 src/slave/containerizer/mesos/containerizer.hpp |  24 +++-
 src/tests/containerizer_tests.cpp               | 142 +++++++++++++++++++
 3 files changed, 229 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/664788b9/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp
index e413609..0bd26ea 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -294,7 +294,11 @@ Future<containerizer::Termination> MesosContainerizer::wait(
 
 void MesosContainerizer::destroy(const ContainerID& containerId)
 {
-  dispatch(process.get(), &MesosContainerizerProcess::destroy, containerId);
+  dispatch(
+      process.get(),
+      &MesosContainerizerProcess::destroy,
+      containerId,
+      true);
 }
 
 
@@ -520,7 +524,7 @@ void MesosContainerizerProcess::__launch(
              << "' of framework '" << executorInfo.framework_id()
              << "': " << failure;
 
-  destroy(containerId);
+  destroy(containerId, false);
 }
 
 
@@ -580,6 +584,8 @@ Future<list<Option<CommandInfo>>> MesosContainerizerProcess::prepare(
     const string& directory,
     const Option<string>& user)
 {
+  CHECK(containers_.contains(containerId));
+
   // We prepare the isolators sequentially according to their ordering
   // to permit basic dependency specification, e.g., preparing a
   // filesystem isolator before other isolators.
@@ -596,6 +602,8 @@ Future<list<Option<CommandInfo>>> MesosContainerizerProcess::prepare(
                             lambda::_1));
   }
 
+  containers_[containerId]->preparations = f;
+
   return f;
 }
 
@@ -933,7 +941,9 @@ Future<ResourceStatistics> MesosContainerizerProcess::usage(
 }
 
 
-void MesosContainerizerProcess::destroy(const ContainerID& containerId)
+void MesosContainerizerProcess::destroy(
+    const ContainerID& containerId,
+    bool killed)
 {
   if (!containers_.contains(containerId)) {
     LOG(WARNING) << "Ignoring destroy of unknown container: " << containerId;
@@ -950,14 +960,23 @@ void MesosContainerizerProcess::destroy(const ContainerID& containerId)
   LOG(INFO) << "Destroying container '" << containerId << "'";
 
   if (container->state == PREPARING) {
-    // We cannot simply terminate the container if it's preparing
-    // since isolator's prepare doesn't need any cleanup.
-    containerizer::Termination termination;
-    termination.set_killed(true);
-    termination.set_message("Container destroyed while preparing isolators");
-    container->promise.set(termination);
+    VLOG(1) << "Waiting for the isolators to complete preparing before "
+            << "destroying the container";
 
-    containers_.erase(containerId);
+    container->state = DESTROYING;
+
+    Future<Option<int>> status = None();
+    // We need to wait for the isolators to finish preparing to prevent
+    // a race that the destroy method calls isolators' cleanup before
+    // it starts preparing.
+    container->preparations
+      .onAny(defer(
+          self(),
+          &Self::___destroy,
+          containerId,
+          status,
+          "Container destroyed while preparing isolators",
+          killed));
 
     return;
   }
@@ -975,27 +994,30 @@ void MesosContainerizerProcess::destroy(const ContainerID& containerId)
     // Wait for the isolators to finish isolating before we start
     // to destroy the container.
     container->isolation
-      .onAny(defer(self(), &Self::_destroy, containerId));
+      .onAny(defer(self(), &Self::_destroy, containerId, killed));
 
     return;
   }
 
   container->state = DESTROYING;
-  _destroy(containerId);
+  _destroy(containerId, killed);
 }
 
 
-void MesosContainerizerProcess::_destroy(const ContainerID& containerId)
+void MesosContainerizerProcess::_destroy(
+    const ContainerID& containerId,
+    bool killed)
 {
   // Kill all processes then continue destruction.
   launcher->destroy(containerId)
-    .onAny(defer(self(), &Self::__destroy, containerId, lambda::_1));
+    .onAny(defer(self(), &Self::__destroy, containerId, lambda::_1, killed));
 }
 
 
 void MesosContainerizerProcess::__destroy(
     const ContainerID& containerId,
-    const Future<Nothing>& future)
+    const Future<Nothing>& future,
+    bool killed)
 {
   CHECK(containers_.contains(containerId));
 
@@ -1021,7 +1043,13 @@ void MesosContainerizerProcess::__destroy(
   // the exit status of the executor when it's ready (it may already
   // be) and continue the destroy.
   containers_[containerId]->status
-    .onAny(defer(self(), &Self::___destroy, containerId, lambda::_1));
+    .onAny(defer(
+        self(),
+        &Self::___destroy,
+        containerId,
+        lambda::_1,
+        None(),
+        killed));
 }
 
 
@@ -1064,7 +1092,9 @@ static T reversed(const T& t)
 
 void MesosContainerizerProcess::___destroy(
     const ContainerID& containerId,
-    const Future<Option<int>>& status)
+    const Future<Option<int>>& status,
+    const Option<string>& message,
+    bool killed)
 {
   // We clean up each isolator in the reverse order they were
   // prepared (see comment in prepare()).
@@ -1084,7 +1114,9 @@ void MesosContainerizerProcess::___destroy(
                 &Self::____destroy,
                 containerId,
                 status,
-                lambda::_1));
+                lambda::_1,
+                message,
+                killed));
 
   return;
 }
@@ -1093,7 +1125,9 @@ void MesosContainerizerProcess::___destroy(
 void MesosContainerizerProcess::____destroy(
     const ContainerID& containerId,
     const Future<Option<int>>& status,
-    const Future<list<Future<Nothing>>>& cleanups)
+    const Future<list<Future<Nothing>>>& cleanups,
+    Option<string> message,
+    bool killed)
 {
   // This should not occur because we only use the Future<list> to
   // facilitate chaining.
@@ -1120,26 +1154,32 @@ void MesosContainerizerProcess::____destroy(
     }
   }
 
-  // A container is 'killed' if any isolator limited it.
+  // If any isolator limited the container then we mark it to killed.
   // Note: We may not see a limitation in time for it to be
   // registered. This could occur if the limitation (e.g., an OOM)
   // killed the executor and we triggered destroy() off the executor
   // exit.
-  bool killed = false;
-  string message;
-  if (container->limitations.size() > 0) {
-    killed = true;
+  if (!killed && container->limitations.size() > 0) {
+    string message_;
     foreach (const Limitation& limitation, container->limitations) {
-      message += limitation.message;
+      message_ += limitation.message;
     }
-    message = strings::trim(message);
-  } else {
+    message = strings::trim(message_);
+  } else if (!killed && message.isNone()) {
     message = "Executor terminated";
   }
 
   containerizer::Termination termination;
+
+  // Killed means that the container was either asked to be destroyed
+  // by the slave or was destroyed because an isolator limited the
+  // container.
   termination.set_killed(killed);
-  termination.set_message(message);
+
+  if (message.isSome()) {
+    termination.set_message(message.get());
+  }
+
   if (status.isReady() && status.get().isSome()) {
     termination.set_status(status.get().get());
   }
@@ -1159,7 +1199,7 @@ void MesosContainerizerProcess::reaped(const ContainerID& containerId)
   LOG(INFO) << "Executor for container '" << containerId << "' has exited";
 
   // The executor has exited so destroy the container.
-  destroy(containerId);
+  destroy(containerId, false);
 }
 
 
@@ -1187,7 +1227,7 @@ void MesosContainerizerProcess::limited(
   }
 
   // The container has been affected by the limitation so destroy it.
-  destroy(containerId);
+  destroy(containerId, true);
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/664788b9/src/slave/containerizer/mesos/containerizer.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.hpp b/src/slave/containerizer/mesos/containerizer.hpp
index ae61a0f..fb334e5 100644
--- a/src/slave/containerizer/mesos/containerizer.hpp
+++ b/src/slave/containerizer/mesos/containerizer.hpp
@@ -156,7 +156,7 @@ public:
       const ContainerID& containerId,
       int pipeWrite);
 
-  virtual void destroy(const ContainerID& containerId);
+  virtual void destroy(const ContainerID& containerId, bool killed);
 
   virtual process::Future<hashset<ContainerID>> containers();
 
@@ -199,24 +199,29 @@ private:
       pid_t _pid);
 
   // Continues 'destroy()' once isolators has completed.
-  void _destroy(const ContainerID& containerId);
+  void _destroy(const ContainerID& containerId, bool killed);
 
   // Continues 'destroy()' once all processes have been killed by the launcher.
   void __destroy(
       const ContainerID& containerId,
-      const process::Future<Nothing>& future);
+      const process::Future<Nothing>& future,
+      bool killed);
 
   // Continues '_destroy()' once we get the exit status of the executor.
   void ___destroy(
       const ContainerID& containerId,
-      const process::Future<Option<int>>& status);
+      const process::Future<Option<int>>& status,
+      const Option<std::string>& message,
+      bool killed);
 
   // Continues (and completes) '__destroy()' once all isolators have completed
   // cleanup.
   void ____destroy(
       const ContainerID& containerId,
       const process::Future<Option<int>>& status,
-      const process::Future<std::list<process::Future<Nothing>>>& cleanups);
+      const process::Future<std::list<process::Future<Nothing>>>& cleanups,
+      Option<std::string> message,
+      bool killed);
 
   // Call back for when an isolator limits a container and impacts the
   // processes. This will trigger container destruction.
@@ -260,8 +265,13 @@ private:
     process::Future<Option<int>> status;
 
     // We keep track of the future that is waiting for all the
-    // isolator's futures, so that destroy will only start calling
-    // cleanup after all isolators has finished isolating.
+    // isolators' prepare futures, so that destroy will only start
+    // calling cleanup after all isolators has finished preparing.
+    process::Future<std::list<Option<CommandInfo>>> preparations;
+
+    // We keep track of the future that is waiting for all the
+    // isolators' isolate futures, so that destroy will only start
+    // calling cleanup after all isolators has finished isolating.
     process::Future<std::list<Nothing>> isolation;
 
     // We keep track of any limitations received from each isolator so we can

http://git-wip-us.apache.org/repos/asf/mesos/blob/664788b9/src/tests/containerizer_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer_tests.cpp b/src/tests/containerizer_tests.cpp
index 5991aa6..d100ab9 100644
--- a/src/tests/containerizer_tests.cpp
+++ b/src/tests/containerizer_tests.cpp
@@ -357,6 +357,70 @@ public:
 };
 
 
+class MockIsolatorProcess : public IsolatorProcess
+{
+public:
+  MockIsolatorProcess()
+  {
+    EXPECT_CALL(*this, watch(_))
+      .WillRepeatedly(Return(watchPromise.future()));
+
+    EXPECT_CALL(*this, isolate(_, _))
+      .WillRepeatedly(Return(Nothing()));
+
+    EXPECT_CALL(*this, cleanup(_))
+      .WillRepeatedly(Return(Nothing()));
+
+    EXPECT_CALL(*this, prepare(_, _, _, _))
+      .WillRepeatedly(Invoke(this, &MockIsolatorProcess::_prepare));
+  }
+
+  MOCK_METHOD1(
+      recover,
+      process::Future<Nothing>(
+          const std::list<mesos::slave::ExecutorRunState>&));
+
+  MOCK_METHOD4(
+      prepare,
+      process::Future<Option<CommandInfo>>(
+          const ContainerID&,
+          const ExecutorInfo&,
+          const std::string&,
+          const Option<std::string>&));
+
+  virtual process::Future<Option<CommandInfo>> _prepare(
+      const ContainerID& containerId,
+      const ExecutorInfo& executorInfo,
+      const std::string& directory,
+      const Option<std::string>& user)
+  {
+    return None();
+  }
+
+  MOCK_METHOD2(
+      isolate,
+      process::Future<Nothing>(const ContainerID&, pid_t));
+
+  MOCK_METHOD1(
+      watch,
+      process::Future<mesos::slave::Limitation>(const ContainerID&));
+
+  MOCK_METHOD2(
+      update,
+      process::Future<Nothing>(const ContainerID&, const Resources&));
+
+  MOCK_METHOD1(
+      usage,
+      process::Future<ResourceStatistics>(const ContainerID&));
+
+  MOCK_METHOD1(
+      cleanup,
+      process::Future<Nothing>(const ContainerID&));
+
+  Promise<mesos::slave::Limitation> watchPromise;
+};
+
+
 // Destroying a mesos containerizer while it is fetching should
 // complete without waiting for the fetching to finish.
 TEST_F(MesosContainerizerDestroyTest, DestroyWhileFetching)
@@ -412,6 +476,84 @@ TEST_F(MesosContainerizerDestroyTest, DestroyWhileFetching)
 }
 
 
+// Destroying a mesos containerizer while it is preparing should
+// wait until isolators are finished preparing before destroying.
+TEST_F(MesosContainerizerDestroyTest, DestroyWhilePreparing)
+{
+  slave::Flags flags;
+  Try<Launcher*> launcher = PosixLauncher::create(flags);
+  ASSERT_SOME(launcher);
+  vector<Owned<Isolator>> isolators;
+
+  MockIsolatorProcess* isolatorProcess = new MockIsolatorProcess();
+
+  Owned<Isolator> isolator(
+      new Isolator(Owned<IsolatorProcess>((IsolatorProcess*)isolatorProcess)));
+
+  isolators.push_back(isolator);
+
+  Future<Nothing> prepare;
+  Promise<Option<CommandInfo>> promise;
+  // Simulate a long prepare from the isolator.
+  EXPECT_CALL(*isolatorProcess, prepare(_, _, _, _))
+    .WillOnce(DoAll(FutureSatisfy(&prepare),
+                    Return(promise.future())));
+
+  Fetcher fetcher;
+
+  MockMesosContainerizerProcess* process = new MockMesosContainerizerProcess(
+      flags,
+      true,
+      &fetcher,
+      Owned<Launcher>(launcher.get()),
+      isolators);
+
+  MesosContainerizer containerizer((Owned<MesosContainerizerProcess>(process)));
+
+  ContainerID containerId;
+  containerId.set_value("test_container");
+
+  TaskInfo taskInfo;
+  CommandInfo commandInfo;
+  taskInfo.mutable_command()->MergeFrom(commandInfo);
+
+  containerizer.launch(
+      containerId,
+      taskInfo,
+      CREATE_EXECUTOR_INFO("executor", "exit 0"),
+      os::getcwd(),
+      None(),
+      SlaveID(),
+      process::PID<Slave>(),
+      false);
+
+  Future<containerizer::Termination> wait = containerizer.wait(containerId);
+
+  AWAIT_READY(prepare);
+
+  containerizer.destroy(containerId);
+
+  // The container should not exit until prepare is complete.
+  ASSERT_TRUE(wait.isPending());
+
+  // Need to help the compiler to disambiguate between overloads.
+  Option<CommandInfo> option = commandInfo;
+  promise.set(option);
+
+  AWAIT_READY(wait);
+
+  containerizer::Termination termination = wait.get();
+
+  ASSERT_EQ(
+      "Container destroyed while preparing isolators",
+      termination.message());
+
+  ASSERT_TRUE(termination.killed());
+
+  ASSERT_FALSE(termination.has_status());
+}
+
+
 // This action destroys the container using the real launcher and
 // waits until the destroy is complete.
 ACTION_P(InvokeDestroyAndWait, launcher)