You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by tn...@apache.org on 2015/04/24 02:08:38 UTC

mesos git commit: Fixed recover tasks only by the intiated containerizer.

Repository: mesos
Updated Branches:
  refs/heads/master 5f1b9253c -> 1a69b5565


Fixed recover tasks only by the intiated containerizer.

Review: https://reviews.apache.org/r/33257


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/1a69b556
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/1a69b556
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/1a69b556

Branch: refs/heads/master
Commit: 1a69b55652e867fee80ffb224ee3096b9e6c5390
Parents: 5f1b925
Author: Timothy Chen <tn...@apache.org>
Authored: Wed Apr 15 15:18:35 2015 -0700
Committer: Timothy Chen <tn...@apache.org>
Committed: Thu Apr 23 17:08:31 2015 -0700

----------------------------------------------------------------------
 src/slave/containerizer/docker.cpp              | 53 ++++++++++++++++++--
 src/slave/containerizer/docker.hpp              |  4 ++
 src/slave/containerizer/mesos/containerizer.cpp | 12 +++++
 src/slave/slave.cpp                             |  9 +++-
 src/tests/containerizer_tests.cpp               | 51 +++++++++++++++++++
 src/tests/docker_containerizer_tests.cpp        | 52 +++++++++++++++++++
 6 files changed, 175 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/1a69b556/src/slave/containerizer/docker.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/docker.cpp b/src/slave/containerizer/docker.cpp
index f9fb078..f9fc89a 100644
--- a/src/slave/containerizer/docker.cpp
+++ b/src/slave/containerizer/docker.cpp
@@ -421,6 +421,34 @@ Future<Nothing> DockerContainerizerProcess::recover(
     const Option<SlaveState>& state)
 {
   LOG(INFO) << "Recovering Docker containers";
+  // Get the list of all Docker containers (running and exited) in
+  // order to remove any orphans and reconcile checkpointed executors.
+  // TODO(tnachen): Remove this when we expect users to have already
+  // upgraded to 0.23.
+  return docker->ps(true, DOCKER_NAME_PREFIX)
+    .then(defer(self(), &Self::_recover, state, lambda::_1));
+}
+
+Future<Nothing> DockerContainerizerProcess::_recover(
+    const Option<SlaveState>& state,
+    const list<Docker::Container>& _containers)
+{
+  // Although the slave checkpoints executor pids, before 0.23
+  // docker containers without custom executors didn't record the
+  // container type in the executor info, therefore the Docker
+  // containerizer doesn't know if it should recover that container
+  // as it could be launched from another containerizer. The
+  // workaround is to reconcile running Docker containers and see
+  // if we can find an known container that matches the
+  // checkpointed container id.
+  // TODO(tnachen): Remove this explicit reconciliation 0.24.
+  hashset<ContainerID> existingContainers;
+  foreach (const Docker::Container& container, _containers) {
+    Option<ContainerID> id = parse(container);
+    if (id.isSome()) {
+      existingContainers.insert(id.get());
+    }
+  }
 
   if (state.isSome()) {
     // Collection of pids that we've started reaping in order to
@@ -467,6 +495,24 @@ Future<Nothing> DockerContainerizerProcess::recover(
           continue;
         }
 
+        const ExecutorInfo executorInfo = executor.info.get();
+        if (executorInfo.has_container() &&
+            executorInfo.container().type() != ContainerInfo::DOCKER) {
+          LOG(INFO) << "Skipping recovery of executor '" << executor.id
+                    << "' of framework " << framework.id
+                    << " because it was not launched from docker containerizer";
+          continue;
+        }
+
+        if (!executorInfo.has_container() &&
+            !existingContainers.contains(containerId)) {
+          LOG(INFO) << "Skipping recovery of executor '" << executor.id
+                    << "' of framework " << framework.id
+                    << " because its executor is not marked as docker "
+                    << "and the docker container doesn't exist";
+          continue;
+        }
+
         LOG(INFO) << "Recovering container '" << containerId
                   << "' for executor '" << executor.id
                   << "' of framework " << framework.id;
@@ -501,14 +547,11 @@ Future<Nothing> DockerContainerizerProcess::recover(
     }
   }
 
-  // Get the list of all Docker containers (running and exited) in
-  // order to remove any orphans.
-  return docker->ps(true, DOCKER_NAME_PREFIX)
-    .then(defer(self(), &Self::_recover, lambda::_1));
+  return __recover(_containers);
 }
 
 
-Future<Nothing> DockerContainerizerProcess::_recover(
+Future<Nothing> DockerContainerizerProcess::__recover(
     const list<Docker::Container>& _containers)
 {
   foreach (const Docker::Container& container, _containers) {

http://git-wip-us.apache.org/repos/asf/mesos/blob/1a69b556/src/slave/containerizer/docker.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/docker.hpp b/src/slave/containerizer/docker.hpp
index 0d5289c..b25ec55 100644
--- a/src/slave/containerizer/docker.hpp
+++ b/src/slave/containerizer/docker.hpp
@@ -178,6 +178,10 @@ private:
       pid_t pid);
 
   process::Future<Nothing> _recover(
+      const Option<state::SlaveState>& state,
+      const std::list<Docker::Container>& containers);
+
+  process::Future<Nothing> __recover(
       const std::list<Docker::Container>& containers);
 
   process::Future<Nothing> _launch(

http://git-wip-us.apache.org/repos/asf/mesos/blob/1a69b556/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp
index 0bd26ea..ea3b499 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -354,6 +354,18 @@ Future<Nothing> MesosContainerizerProcess::recover(
           continue;
         }
 
+        // Note that MesosContainerizer will also recover executors
+        // launched by the DockerContainerizer as before 0.23 the
+        // slave doesn't checkpoint container information.
+        const ExecutorInfo& executorInfo = executor.info.get();
+        if (executorInfo.has_container() &&
+            executorInfo.container().type() != ContainerInfo::MESOS) {
+          LOG(INFO) << "Skipping recovery of executor '" << executor.id
+                    << "' of framework " << framework.id
+                    << " because it was not launched from mesos containerizer";
+          continue;
+        }
+
         LOG(INFO) << "Recovering container '" << containerId
                   << "' for executor '" << executor.id
                   << "' of framework " << framework.id;

http://git-wip-us.apache.org/repos/asf/mesos/blob/1a69b556/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 60345ec..f68a005 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -2953,9 +2953,16 @@ ExecutorInfo Slave::getExecutorInfo(
 
     // Command executors share the same id as the task.
     executor.mutable_executor_id()->set_value(task.task_id().value());
-
     executor.mutable_framework_id()->CopyFrom(frameworkId);
 
+    if (task.has_container() &&
+        task.container().type() != ContainerInfo::MESOS) {
+      // Store the container info in the executor info so it will
+      // be checkpointed. This allows the correct containerizer to
+      // recover this task on restart.
+      executor.mutable_container()->CopyFrom(task.container());
+    }
+
     // Prepare an executor name which includes information on the
     // command being launched.
     string name = "(Task: " + task.task_id().value() + ") ";

http://git-wip-us.apache.org/repos/asf/mesos/blob/1a69b556/src/tests/containerizer_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer_tests.cpp b/src/tests/containerizer_tests.cpp
index d100ab9..394b16f 100644
--- a/src/tests/containerizer_tests.cpp
+++ b/src/tests/containerizer_tests.cpp
@@ -45,6 +45,7 @@
 #include "tests/utils.hpp"
 
 using namespace mesos::internal::slave;
+using namespace mesos::internal::slave::state;
 
 using namespace mesos::slave;
 
@@ -634,6 +635,56 @@ TEST_F(MesosContainerizerDestroyTest, LauncherDestroyFailure)
       metrics.values["containerizer/mesos/container_destroy_errors"]);
 }
 
+
+class MesosContainerizerRecoverTest : public MesosTest {};
+
+
+// This test checks that MesosContainerizer doesn't recover executors
+// that were started by another containerizer (e.g: Docker).
+TEST_F(MesosContainerizerRecoverTest, SkipRecoverNonMesosContainers)
+{
+  slave::Flags flags;
+  Fetcher fetcher;
+
+  Try<MesosContainerizer*> containerizer =
+    MesosContainerizer::create(flags, true, &fetcher);
+  ASSERT_SOME(containerizer);
+
+  ExecutorID executorId;
+  executorId.set_value(UUID::random().toString());
+
+  ContainerID containerId;
+  containerId.set_value(UUID::random().toString());
+
+  ExecutorInfo executorInfo;
+  executorInfo.mutable_container()->set_type(ContainerInfo::DOCKER);
+
+  ExecutorState executorState;
+  executorState.info = executorInfo;
+  executorState.latest = containerId;
+
+  RunState runState;
+  runState.id = containerId;
+  executorState.runs.put(containerId, runState);
+
+  FrameworkState frameworkState;
+  frameworkState.executors.put(executorId, executorState);
+
+  SlaveState slaveState;
+  FrameworkID frameworkId;
+  frameworkId.set_value(UUID::random().toString());
+  slaveState.frameworks.put(frameworkId, frameworkState);
+
+  Future<Nothing> recover = containerizer.get()->recover(slaveState);
+  AWAIT_READY(recover);
+
+  Future<hashset<ContainerID>> containers = containerizer.get()->containers();
+  AWAIT_READY(containers);
+  EXPECT_EQ(0, containers.get().size());
+
+  delete containerizer.get();
+}
+
 } // namespace tests {
 } // namespace internal {
 } // namespace mesos {

http://git-wip-us.apache.org/repos/asf/mesos/blob/1a69b556/src/tests/docker_containerizer_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/docker_containerizer_tests.cpp b/src/tests/docker_containerizer_tests.cpp
index b119a17..033fd26 100644
--- a/src/tests/docker_containerizer_tests.cpp
+++ b/src/tests/docker_containerizer_tests.cpp
@@ -1278,6 +1278,58 @@ TEST_F(DockerContainerizerTest, DISABLED_ROOT_DOCKER_Recover)
 }
 
 
+// This test checks the docker containerizer doesn't recover executors
+// that were started by another containerizer (e.g: mesos).
+TEST_F(DockerContainerizerTest, ROOT_DOCKER_SkipRecoverNonDocker)
+{
+  slave::Flags flags = CreateSlaveFlags();
+
+  MockDocker* mockDocker = new MockDocker(tests::flags.docker);
+  Shared<Docker> docker(mockDocker);
+
+  Fetcher fetcher;
+
+  MockDockerContainerizer dockerContainerizer(flags, &fetcher, docker);
+
+  ContainerID containerId;
+  containerId.set_value("c1");
+  ContainerID reapedContainerId;
+  reapedContainerId.set_value("c2");
+
+  ExecutorID executorId;
+  executorId.set_value(UUID::random().toString());
+
+  ExecutorInfo executorInfo;
+  executorInfo.mutable_container()->set_type(ContainerInfo::MESOS);
+
+  ExecutorState executorState;
+  executorState.info = executorInfo;
+  executorState.latest = containerId;
+
+  RunState runState;
+  runState.id = containerId;
+  executorState.runs.put(containerId, runState);
+
+  FrameworkState frameworkState;
+  frameworkState.executors.put(executorId, executorState);
+
+  SlaveState slaveState;
+  FrameworkID frameworkId;
+  frameworkId.set_value(UUID::random().toString());
+  slaveState.frameworks.put(frameworkId, frameworkState);
+
+  Future<Nothing> recover = dockerContainerizer.recover(slaveState);
+  AWAIT_READY(recover);
+
+  Future<hashset<ContainerID>> containers = dockerContainerizer.containers();
+  AWAIT_READY(containers);
+
+  // A MesosContainerizer task shouldn't be recovered by
+  // DockerContainerizer.
+  EXPECT_EQ(0, containers.get().size());
+}
+
+
 TEST_F(DockerContainerizerTest, ROOT_DOCKER_Logs)
 {
   Try<PID<Master> > master = StartMaster();