You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by tn...@apache.org on 2015/04/21 23:25:55 UTC
mesos git commit: Fix destroying containerizer during isolator
prepare.
Repository: mesos
Updated Branches:
refs/heads/master da607079b -> 664788b94
Fix destroying containerizer during isolator prepare.
This ensures the prepare phase is completed before destroying the container.
Review: https://reviews.apache.org/r/32123
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/664788b9
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/664788b9
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/664788b9
Branch: refs/heads/master
Commit: 664788b94598a77c936a6d41d71e42f87026f96e
Parents: da60707
Author: Timothy Chen <tn...@apache.org>
Authored: Tue Apr 21 12:43:34 2015 -0700
Committer: Timothy Chen <tn...@apache.org>
Committed: Tue Apr 21 14:25:47 2015 -0700
----------------------------------------------------------------------
src/slave/containerizer/mesos/containerizer.cpp | 100 +++++++++----
src/slave/containerizer/mesos/containerizer.hpp | 24 +++-
src/tests/containerizer_tests.cpp | 142 +++++++++++++++++++
3 files changed, 229 insertions(+), 37 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/664788b9/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp
index e413609..0bd26ea 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -294,7 +294,11 @@ Future<containerizer::Termination> MesosContainerizer::wait(
void MesosContainerizer::destroy(const ContainerID& containerId)
{
- dispatch(process.get(), &MesosContainerizerProcess::destroy, containerId);
+ dispatch(
+ process.get(),
+ &MesosContainerizerProcess::destroy,
+ containerId,
+ true);
}
@@ -520,7 +524,7 @@ void MesosContainerizerProcess::__launch(
<< "' of framework '" << executorInfo.framework_id()
<< "': " << failure;
- destroy(containerId);
+ destroy(containerId, false);
}
@@ -580,6 +584,8 @@ Future<list<Option<CommandInfo>>> MesosContainerizerProcess::prepare(
const string& directory,
const Option<string>& user)
{
+ CHECK(containers_.contains(containerId));
+
// We prepare the isolators sequentially according to their ordering
// to permit basic dependency specification, e.g., preparing a
// filesystem isolator before other isolators.
@@ -596,6 +602,8 @@ Future<list<Option<CommandInfo>>> MesosContainerizerProcess::prepare(
lambda::_1));
}
+ containers_[containerId]->preparations = f;
+
return f;
}
@@ -933,7 +941,9 @@ Future<ResourceStatistics> MesosContainerizerProcess::usage(
}
-void MesosContainerizerProcess::destroy(const ContainerID& containerId)
+void MesosContainerizerProcess::destroy(
+ const ContainerID& containerId,
+ bool killed)
{
if (!containers_.contains(containerId)) {
LOG(WARNING) << "Ignoring destroy of unknown container: " << containerId;
@@ -950,14 +960,23 @@ void MesosContainerizerProcess::destroy(const ContainerID& containerId)
LOG(INFO) << "Destroying container '" << containerId << "'";
if (container->state == PREPARING) {
- // We cannot simply terminate the container if it's preparing
- // since isolator's prepare doesn't need any cleanup.
- containerizer::Termination termination;
- termination.set_killed(true);
- termination.set_message("Container destroyed while preparing isolators");
- container->promise.set(termination);
+ VLOG(1) << "Waiting for the isolators to complete preparing before "
+ << "destroying the container";
- containers_.erase(containerId);
+ container->state = DESTROYING;
+
+ Future<Option<int>> status = None();
+ // We need to wait for the isolators to finish preparing to prevent
+ // a race that the destroy method calls isolators' cleanup before
+ // it starts preparing.
+ container->preparations
+ .onAny(defer(
+ self(),
+ &Self::___destroy,
+ containerId,
+ status,
+ "Container destroyed while preparing isolators",
+ killed));
return;
}
@@ -975,27 +994,30 @@ void MesosContainerizerProcess::destroy(const ContainerID& containerId)
// Wait for the isolators to finish isolating before we start
// to destroy the container.
container->isolation
- .onAny(defer(self(), &Self::_destroy, containerId));
+ .onAny(defer(self(), &Self::_destroy, containerId, killed));
return;
}
container->state = DESTROYING;
- _destroy(containerId);
+ _destroy(containerId, killed);
}
-void MesosContainerizerProcess::_destroy(const ContainerID& containerId)
+void MesosContainerizerProcess::_destroy(
+ const ContainerID& containerId,
+ bool killed)
{
// Kill all processes then continue destruction.
launcher->destroy(containerId)
- .onAny(defer(self(), &Self::__destroy, containerId, lambda::_1));
+ .onAny(defer(self(), &Self::__destroy, containerId, lambda::_1, killed));
}
void MesosContainerizerProcess::__destroy(
const ContainerID& containerId,
- const Future<Nothing>& future)
+ const Future<Nothing>& future,
+ bool killed)
{
CHECK(containers_.contains(containerId));
@@ -1021,7 +1043,13 @@ void MesosContainerizerProcess::__destroy(
// the exit status of the executor when it's ready (it may already
// be) and continue the destroy.
containers_[containerId]->status
- .onAny(defer(self(), &Self::___destroy, containerId, lambda::_1));
+ .onAny(defer(
+ self(),
+ &Self::___destroy,
+ containerId,
+ lambda::_1,
+ None(),
+ killed));
}
@@ -1064,7 +1092,9 @@ static T reversed(const T& t)
void MesosContainerizerProcess::___destroy(
const ContainerID& containerId,
- const Future<Option<int>>& status)
+ const Future<Option<int>>& status,
+ const Option<string>& message,
+ bool killed)
{
// We clean up each isolator in the reverse order they were
// prepared (see comment in prepare()).
@@ -1084,7 +1114,9 @@ void MesosContainerizerProcess::___destroy(
&Self::____destroy,
containerId,
status,
- lambda::_1));
+ lambda::_1,
+ message,
+ killed));
return;
}
@@ -1093,7 +1125,9 @@ void MesosContainerizerProcess::___destroy(
void MesosContainerizerProcess::____destroy(
const ContainerID& containerId,
const Future<Option<int>>& status,
- const Future<list<Future<Nothing>>>& cleanups)
+ const Future<list<Future<Nothing>>>& cleanups,
+ Option<string> message,
+ bool killed)
{
// This should not occur because we only use the Future<list> to
// facilitate chaining.
@@ -1120,26 +1154,32 @@ void MesosContainerizerProcess::____destroy(
}
}
- // A container is 'killed' if any isolator limited it.
+ // If any isolator limited the container then we mark it to killed.
// Note: We may not see a limitation in time for it to be
// registered. This could occur if the limitation (e.g., an OOM)
// killed the executor and we triggered destroy() off the executor
// exit.
- bool killed = false;
- string message;
- if (container->limitations.size() > 0) {
- killed = true;
+ if (!killed && container->limitations.size() > 0) {
+ string message_;
foreach (const Limitation& limitation, container->limitations) {
- message += limitation.message;
+ message_ += limitation.message;
}
- message = strings::trim(message);
- } else {
+ message = strings::trim(message_);
+ } else if (!killed && message.isNone()) {
message = "Executor terminated";
}
containerizer::Termination termination;
+
+ // Killed means that the container was either asked to be destroyed
+ // by the slave or was destroyed because an isolator limited the
+ // container.
termination.set_killed(killed);
- termination.set_message(message);
+
+ if (message.isSome()) {
+ termination.set_message(message.get());
+ }
+
if (status.isReady() && status.get().isSome()) {
termination.set_status(status.get().get());
}
@@ -1159,7 +1199,7 @@ void MesosContainerizerProcess::reaped(const ContainerID& containerId)
LOG(INFO) << "Executor for container '" << containerId << "' has exited";
// The executor has exited so destroy the container.
- destroy(containerId);
+ destroy(containerId, false);
}
@@ -1187,7 +1227,7 @@ void MesosContainerizerProcess::limited(
}
// The container has been affected by the limitation so destroy it.
- destroy(containerId);
+ destroy(containerId, true);
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/664788b9/src/slave/containerizer/mesos/containerizer.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.hpp b/src/slave/containerizer/mesos/containerizer.hpp
index ae61a0f..fb334e5 100644
--- a/src/slave/containerizer/mesos/containerizer.hpp
+++ b/src/slave/containerizer/mesos/containerizer.hpp
@@ -156,7 +156,7 @@ public:
const ContainerID& containerId,
int pipeWrite);
- virtual void destroy(const ContainerID& containerId);
+ virtual void destroy(const ContainerID& containerId, bool killed);
virtual process::Future<hashset<ContainerID>> containers();
@@ -199,24 +199,29 @@ private:
pid_t _pid);
// Continues 'destroy()' once isolators has completed.
- void _destroy(const ContainerID& containerId);
+ void _destroy(const ContainerID& containerId, bool killed);
// Continues 'destroy()' once all processes have been killed by the launcher.
void __destroy(
const ContainerID& containerId,
- const process::Future<Nothing>& future);
+ const process::Future<Nothing>& future,
+ bool killed);
// Continues '_destroy()' once we get the exit status of the executor.
void ___destroy(
const ContainerID& containerId,
- const process::Future<Option<int>>& status);
+ const process::Future<Option<int>>& status,
+ const Option<std::string>& message,
+ bool killed);
// Continues (and completes) '__destroy()' once all isolators have completed
// cleanup.
void ____destroy(
const ContainerID& containerId,
const process::Future<Option<int>>& status,
- const process::Future<std::list<process::Future<Nothing>>>& cleanups);
+ const process::Future<std::list<process::Future<Nothing>>>& cleanups,
+ Option<std::string> message,
+ bool killed);
// Call back for when an isolator limits a container and impacts the
// processes. This will trigger container destruction.
@@ -260,8 +265,13 @@ private:
process::Future<Option<int>> status;
// We keep track of the future that is waiting for all the
- // isolator's futures, so that destroy will only start calling
- // cleanup after all isolators has finished isolating.
+ // isolators' prepare futures, so that destroy will only start
+ // calling cleanup after all isolators has finished preparing.
+ process::Future<std::list<Option<CommandInfo>>> preparations;
+
+ // We keep track of the future that is waiting for all the
+ // isolators' isolate futures, so that destroy will only start
+ // calling cleanup after all isolators has finished isolating.
process::Future<std::list<Nothing>> isolation;
// We keep track of any limitations received from each isolator so we can
http://git-wip-us.apache.org/repos/asf/mesos/blob/664788b9/src/tests/containerizer_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer_tests.cpp b/src/tests/containerizer_tests.cpp
index 5991aa6..d100ab9 100644
--- a/src/tests/containerizer_tests.cpp
+++ b/src/tests/containerizer_tests.cpp
@@ -357,6 +357,70 @@ public:
};
+class MockIsolatorProcess : public IsolatorProcess
+{
+public:
+ MockIsolatorProcess()
+ {
+ EXPECT_CALL(*this, watch(_))
+ .WillRepeatedly(Return(watchPromise.future()));
+
+ EXPECT_CALL(*this, isolate(_, _))
+ .WillRepeatedly(Return(Nothing()));
+
+ EXPECT_CALL(*this, cleanup(_))
+ .WillRepeatedly(Return(Nothing()));
+
+ EXPECT_CALL(*this, prepare(_, _, _, _))
+ .WillRepeatedly(Invoke(this, &MockIsolatorProcess::_prepare));
+ }
+
+ MOCK_METHOD1(
+ recover,
+ process::Future<Nothing>(
+ const std::list<mesos::slave::ExecutorRunState>&));
+
+ MOCK_METHOD4(
+ prepare,
+ process::Future<Option<CommandInfo>>(
+ const ContainerID&,
+ const ExecutorInfo&,
+ const std::string&,
+ const Option<std::string>&));
+
+ virtual process::Future<Option<CommandInfo>> _prepare(
+ const ContainerID& containerId,
+ const ExecutorInfo& executorInfo,
+ const std::string& directory,
+ const Option<std::string>& user)
+ {
+ return None();
+ }
+
+ MOCK_METHOD2(
+ isolate,
+ process::Future<Nothing>(const ContainerID&, pid_t));
+
+ MOCK_METHOD1(
+ watch,
+ process::Future<mesos::slave::Limitation>(const ContainerID&));
+
+ MOCK_METHOD2(
+ update,
+ process::Future<Nothing>(const ContainerID&, const Resources&));
+
+ MOCK_METHOD1(
+ usage,
+ process::Future<ResourceStatistics>(const ContainerID&));
+
+ MOCK_METHOD1(
+ cleanup,
+ process::Future<Nothing>(const ContainerID&));
+
+ Promise<mesos::slave::Limitation> watchPromise;
+};
+
+
// Destroying a mesos containerizer while it is fetching should
// complete without waiting for the fetching to finish.
TEST_F(MesosContainerizerDestroyTest, DestroyWhileFetching)
@@ -412,6 +476,84 @@ TEST_F(MesosContainerizerDestroyTest, DestroyWhileFetching)
}
+// Destroying a mesos containerizer while it is preparing should
+// wait until isolators are finished preparing before destroying.
+TEST_F(MesosContainerizerDestroyTest, DestroyWhilePreparing)
+{
+ slave::Flags flags;
+ Try<Launcher*> launcher = PosixLauncher::create(flags);
+ ASSERT_SOME(launcher);
+ vector<Owned<Isolator>> isolators;
+
+ MockIsolatorProcess* isolatorProcess = new MockIsolatorProcess();
+
+ Owned<Isolator> isolator(
+ new Isolator(Owned<IsolatorProcess>((IsolatorProcess*)isolatorProcess)));
+
+ isolators.push_back(isolator);
+
+ Future<Nothing> prepare;
+ Promise<Option<CommandInfo>> promise;
+ // Simulate a long prepare from the isolator.
+ EXPECT_CALL(*isolatorProcess, prepare(_, _, _, _))
+ .WillOnce(DoAll(FutureSatisfy(&prepare),
+ Return(promise.future())));
+
+ Fetcher fetcher;
+
+ MockMesosContainerizerProcess* process = new MockMesosContainerizerProcess(
+ flags,
+ true,
+ &fetcher,
+ Owned<Launcher>(launcher.get()),
+ isolators);
+
+ MesosContainerizer containerizer((Owned<MesosContainerizerProcess>(process)));
+
+ ContainerID containerId;
+ containerId.set_value("test_container");
+
+ TaskInfo taskInfo;
+ CommandInfo commandInfo;
+ taskInfo.mutable_command()->MergeFrom(commandInfo);
+
+ containerizer.launch(
+ containerId,
+ taskInfo,
+ CREATE_EXECUTOR_INFO("executor", "exit 0"),
+ os::getcwd(),
+ None(),
+ SlaveID(),
+ process::PID<Slave>(),
+ false);
+
+ Future<containerizer::Termination> wait = containerizer.wait(containerId);
+
+ AWAIT_READY(prepare);
+
+ containerizer.destroy(containerId);
+
+ // The container should not exit until prepare is complete.
+ ASSERT_TRUE(wait.isPending());
+
+ // Need to help the compiler to disambiguate between overloads.
+ Option<CommandInfo> option = commandInfo;
+ promise.set(option);
+
+ AWAIT_READY(wait);
+
+ containerizer::Termination termination = wait.get();
+
+ ASSERT_EQ(
+ "Container destroyed while preparing isolators",
+ termination.message());
+
+ ASSERT_TRUE(termination.killed());
+
+ ASSERT_FALSE(termination.has_status());
+}
+
+
// This action destroys the container using the real launcher and
// waits until the destroy is complete.
ACTION_P(InvokeDestroyAndWait, launcher)