You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by tn...@apache.org on 2015/04/24 02:08:38 UTC
mesos git commit: Fixed recover tasks only by the intiated
containerizer.
Repository: mesos
Updated Branches:
refs/heads/master 5f1b9253c -> 1a69b5565
Fixed recover tasks only by the intiated containerizer.
Review: https://reviews.apache.org/r/33257
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/1a69b556
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/1a69b556
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/1a69b556
Branch: refs/heads/master
Commit: 1a69b55652e867fee80ffb224ee3096b9e6c5390
Parents: 5f1b925
Author: Timothy Chen <tn...@apache.org>
Authored: Wed Apr 15 15:18:35 2015 -0700
Committer: Timothy Chen <tn...@apache.org>
Committed: Thu Apr 23 17:08:31 2015 -0700
----------------------------------------------------------------------
src/slave/containerizer/docker.cpp | 53 ++++++++++++++++++--
src/slave/containerizer/docker.hpp | 4 ++
src/slave/containerizer/mesos/containerizer.cpp | 12 +++++
src/slave/slave.cpp | 9 +++-
src/tests/containerizer_tests.cpp | 51 +++++++++++++++++++
src/tests/docker_containerizer_tests.cpp | 52 +++++++++++++++++++
6 files changed, 175 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/1a69b556/src/slave/containerizer/docker.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/docker.cpp b/src/slave/containerizer/docker.cpp
index f9fb078..f9fc89a 100644
--- a/src/slave/containerizer/docker.cpp
+++ b/src/slave/containerizer/docker.cpp
@@ -421,6 +421,34 @@ Future<Nothing> DockerContainerizerProcess::recover(
const Option<SlaveState>& state)
{
LOG(INFO) << "Recovering Docker containers";
+ // Get the list of all Docker containers (running and exited) in
+ // order to remove any orphans and reconcile checkpointed executors.
+ // TODO(tnachen): Remove this when we expect users to have already
+ // upgraded to 0.23.
+ return docker->ps(true, DOCKER_NAME_PREFIX)
+ .then(defer(self(), &Self::_recover, state, lambda::_1));
+}
+
+Future<Nothing> DockerContainerizerProcess::_recover(
+ const Option<SlaveState>& state,
+ const list<Docker::Container>& _containers)
+{
+ // Although the slave checkpoints executor pids, before 0.23
+ // docker containers without custom executors didn't record the
+ // container type in the executor info, therefore the Docker
+ // containerizer doesn't know if it should recover that container
+ // as it could be launched from another containerizer. The
+ // workaround is to reconcile running Docker containers and see
+ // if we can find an known container that matches the
+ // checkpointed container id.
+ // TODO(tnachen): Remove this explicit reconciliation 0.24.
+ hashset<ContainerID> existingContainers;
+ foreach (const Docker::Container& container, _containers) {
+ Option<ContainerID> id = parse(container);
+ if (id.isSome()) {
+ existingContainers.insert(id.get());
+ }
+ }
if (state.isSome()) {
// Collection of pids that we've started reaping in order to
@@ -467,6 +495,24 @@ Future<Nothing> DockerContainerizerProcess::recover(
continue;
}
+ const ExecutorInfo executorInfo = executor.info.get();
+ if (executorInfo.has_container() &&
+ executorInfo.container().type() != ContainerInfo::DOCKER) {
+ LOG(INFO) << "Skipping recovery of executor '" << executor.id
+ << "' of framework " << framework.id
+ << " because it was not launched from docker containerizer";
+ continue;
+ }
+
+ if (!executorInfo.has_container() &&
+ !existingContainers.contains(containerId)) {
+ LOG(INFO) << "Skipping recovery of executor '" << executor.id
+ << "' of framework " << framework.id
+ << " because its executor is not marked as docker "
+ << "and the docker container doesn't exist";
+ continue;
+ }
+
LOG(INFO) << "Recovering container '" << containerId
<< "' for executor '" << executor.id
<< "' of framework " << framework.id;
@@ -501,14 +547,11 @@ Future<Nothing> DockerContainerizerProcess::recover(
}
}
- // Get the list of all Docker containers (running and exited) in
- // order to remove any orphans.
- return docker->ps(true, DOCKER_NAME_PREFIX)
- .then(defer(self(), &Self::_recover, lambda::_1));
+ return __recover(_containers);
}
-Future<Nothing> DockerContainerizerProcess::_recover(
+Future<Nothing> DockerContainerizerProcess::__recover(
const list<Docker::Container>& _containers)
{
foreach (const Docker::Container& container, _containers) {
http://git-wip-us.apache.org/repos/asf/mesos/blob/1a69b556/src/slave/containerizer/docker.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/docker.hpp b/src/slave/containerizer/docker.hpp
index 0d5289c..b25ec55 100644
--- a/src/slave/containerizer/docker.hpp
+++ b/src/slave/containerizer/docker.hpp
@@ -178,6 +178,10 @@ private:
pid_t pid);
process::Future<Nothing> _recover(
+ const Option<state::SlaveState>& state,
+ const std::list<Docker::Container>& containers);
+
+ process::Future<Nothing> __recover(
const std::list<Docker::Container>& containers);
process::Future<Nothing> _launch(
http://git-wip-us.apache.org/repos/asf/mesos/blob/1a69b556/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp
index 0bd26ea..ea3b499 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -354,6 +354,18 @@ Future<Nothing> MesosContainerizerProcess::recover(
continue;
}
+ // Note that MesosContainerizer will also recover executors
+ // launched by the DockerContainerizer as before 0.23 the
+ // slave doesn't checkpoint container information.
+ const ExecutorInfo& executorInfo = executor.info.get();
+ if (executorInfo.has_container() &&
+ executorInfo.container().type() != ContainerInfo::MESOS) {
+ LOG(INFO) << "Skipping recovery of executor '" << executor.id
+ << "' of framework " << framework.id
+ << " because it was not launched from mesos containerizer";
+ continue;
+ }
+
LOG(INFO) << "Recovering container '" << containerId
<< "' for executor '" << executor.id
<< "' of framework " << framework.id;
http://git-wip-us.apache.org/repos/asf/mesos/blob/1a69b556/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 60345ec..f68a005 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -2953,9 +2953,16 @@ ExecutorInfo Slave::getExecutorInfo(
// Command executors share the same id as the task.
executor.mutable_executor_id()->set_value(task.task_id().value());
-
executor.mutable_framework_id()->CopyFrom(frameworkId);
+ if (task.has_container() &&
+ task.container().type() != ContainerInfo::MESOS) {
+ // Store the container info in the executor info so it will
+ // be checkpointed. This allows the correct containerizer to
+ // recover this task on restart.
+ executor.mutable_container()->CopyFrom(task.container());
+ }
+
// Prepare an executor name which includes information on the
// command being launched.
string name = "(Task: " + task.task_id().value() + ") ";
http://git-wip-us.apache.org/repos/asf/mesos/blob/1a69b556/src/tests/containerizer_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer_tests.cpp b/src/tests/containerizer_tests.cpp
index d100ab9..394b16f 100644
--- a/src/tests/containerizer_tests.cpp
+++ b/src/tests/containerizer_tests.cpp
@@ -45,6 +45,7 @@
#include "tests/utils.hpp"
using namespace mesos::internal::slave;
+using namespace mesos::internal::slave::state;
using namespace mesos::slave;
@@ -634,6 +635,56 @@ TEST_F(MesosContainerizerDestroyTest, LauncherDestroyFailure)
metrics.values["containerizer/mesos/container_destroy_errors"]);
}
+
+class MesosContainerizerRecoverTest : public MesosTest {};
+
+
+// This test checks that MesosContainerizer doesn't recover executors
+// that were started by another containerizer (e.g: Docker).
+TEST_F(MesosContainerizerRecoverTest, SkipRecoverNonMesosContainers)
+{
+ slave::Flags flags;
+ Fetcher fetcher;
+
+ Try<MesosContainerizer*> containerizer =
+ MesosContainerizer::create(flags, true, &fetcher);
+ ASSERT_SOME(containerizer);
+
+ ExecutorID executorId;
+ executorId.set_value(UUID::random().toString());
+
+ ContainerID containerId;
+ containerId.set_value(UUID::random().toString());
+
+ ExecutorInfo executorInfo;
+ executorInfo.mutable_container()->set_type(ContainerInfo::DOCKER);
+
+ ExecutorState executorState;
+ executorState.info = executorInfo;
+ executorState.latest = containerId;
+
+ RunState runState;
+ runState.id = containerId;
+ executorState.runs.put(containerId, runState);
+
+ FrameworkState frameworkState;
+ frameworkState.executors.put(executorId, executorState);
+
+ SlaveState slaveState;
+ FrameworkID frameworkId;
+ frameworkId.set_value(UUID::random().toString());
+ slaveState.frameworks.put(frameworkId, frameworkState);
+
+ Future<Nothing> recover = containerizer.get()->recover(slaveState);
+ AWAIT_READY(recover);
+
+ Future<hashset<ContainerID>> containers = containerizer.get()->containers();
+ AWAIT_READY(containers);
+ EXPECT_EQ(0, containers.get().size());
+
+ delete containerizer.get();
+}
+
} // namespace tests {
} // namespace internal {
} // namespace mesos {
http://git-wip-us.apache.org/repos/asf/mesos/blob/1a69b556/src/tests/docker_containerizer_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/docker_containerizer_tests.cpp b/src/tests/docker_containerizer_tests.cpp
index b119a17..033fd26 100644
--- a/src/tests/docker_containerizer_tests.cpp
+++ b/src/tests/docker_containerizer_tests.cpp
@@ -1278,6 +1278,58 @@ TEST_F(DockerContainerizerTest, DISABLED_ROOT_DOCKER_Recover)
}
+// This test checks the docker containerizer doesn't recover executors
+// that were started by another containerizer (e.g: mesos).
+TEST_F(DockerContainerizerTest, ROOT_DOCKER_SkipRecoverNonDocker)
+{
+ slave::Flags flags = CreateSlaveFlags();
+
+ MockDocker* mockDocker = new MockDocker(tests::flags.docker);
+ Shared<Docker> docker(mockDocker);
+
+ Fetcher fetcher;
+
+ MockDockerContainerizer dockerContainerizer(flags, &fetcher, docker);
+
+ ContainerID containerId;
+ containerId.set_value("c1");
+ ContainerID reapedContainerId;
+ reapedContainerId.set_value("c2");
+
+ ExecutorID executorId;
+ executorId.set_value(UUID::random().toString());
+
+ ExecutorInfo executorInfo;
+ executorInfo.mutable_container()->set_type(ContainerInfo::MESOS);
+
+ ExecutorState executorState;
+ executorState.info = executorInfo;
+ executorState.latest = containerId;
+
+ RunState runState;
+ runState.id = containerId;
+ executorState.runs.put(containerId, runState);
+
+ FrameworkState frameworkState;
+ frameworkState.executors.put(executorId, executorState);
+
+ SlaveState slaveState;
+ FrameworkID frameworkId;
+ frameworkId.set_value(UUID::random().toString());
+ slaveState.frameworks.put(frameworkId, frameworkState);
+
+ Future<Nothing> recover = dockerContainerizer.recover(slaveState);
+ AWAIT_READY(recover);
+
+ Future<hashset<ContainerID>> containers = dockerContainerizer.containers();
+ AWAIT_READY(containers);
+
+ // A MesosContainerizer task shouldn't be recovered by
+ // DockerContainerizer.
+ EXPECT_EQ(0, containers.get().size());
+}
+
+
TEST_F(DockerContainerizerTest, ROOT_DOCKER_Logs)
{
Try<PID<Master> > master = StartMaster();