You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ji...@apache.org on 2016/10/01 00:14:54 UTC

[4/4] mesos git commit: Updated 'destroy()' to checkpoint termination state of nested container.

Updated 'destroy()' to checkpoint termination state of nested container.

Previously, when a nested container was being destroyed, it's runtime
directory was being deleted (just the same as a top-level container).
However, this meant that calling 'wait()' on a previously terminated
nested container would return 'None()' since its status had already
been reaped. The problem with this, however, is that this will cause
an entire pod to be terminated since it thinks that the container it
is calling wait on cannot be found.

To fix this, we leave the runtime directory of nested containers
around until their top-level containers are destroyed. Additionally,
we checkpiont the entire termination state of the nested container
into its runtime directory, so that subsequent calls to 'wait()' can
retrieve the full termination state for the lifetime of the top-level
container.


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/79838a29
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/79838a29
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/79838a29

Branch: refs/heads/master
Commit: 79838a290ee4efa6c548603b5142be26f77002b3
Parents: b71103a
Author: Kevin Klues <kl...@gmail.com>
Authored: Fri Sep 30 16:38:59 2016 -0700
Committer: Jie Yu <yu...@gmail.com>
Committed: Fri Sep 30 17:05:28 2016 -0700

----------------------------------------------------------------------
 src/Makefile.am                                 |   1 +
 src/slave/containerizer/mesos/containerizer.cpp |  78 +++++++++-
 src/slave/containerizer/mesos/paths.cpp         |  31 ++++
 src/slave/containerizer/mesos/paths.hpp         |   9 ++
 .../containerizer/nested_container_tests.cpp    | 144 +++++++++++++++++++
 5 files changed, 257 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/79838a29/src/Makefile.am
----------------------------------------------------------------------
diff --git a/src/Makefile.am b/src/Makefile.am
index 63559e2..c897d86 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -2163,6 +2163,7 @@ mesos_tests_SOURCES =						\
   tests/containerizer/memory_test_helper.cpp			\
   tests/containerizer/mesos_containerizer_tests.cpp		\
   tests/containerizer/mesos_containerizer_paths_tests.cpp	\
+  tests/containerizer/nested_container_tests.cpp		\
   tests/containerizer/provisioner_appc_tests.cpp		\
   tests/containerizer/provisioner_backend_tests.cpp		\
   tests/containerizer/provisioner_docker_tests.cpp		\

http://git-wip-us.apache.org/repos/asf/mesos/blob/79838a29/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp
index bd27bd8..fc53b4e 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -704,6 +704,19 @@ Future<Nothing> MesosContainerizerProcess::recover(
       continue;
     }
 
+    // Nested containers may have already been destroyed, but we leave
+    // their runtime directories around for the lifetime of their
+    // top-level container. If they have already been destroyed, we
+    // checkpoint their termination state, so the existence of this
+    // checkpointed information means we can safely ignore them here.
+    const string terminationPath = path::join(
+        containerizer::paths::getRuntimePath(flags.runtime_dir, containerId),
+        containerizer::paths::TERMINATION_FILE);
+
+    if (os::exists(terminationPath)) {
+      continue;
+    }
+
     // Attempt to read the pid from the container runtime directory.
     Result<pid_t> pid =
       containerizer::paths::getContainerPid(flags.runtime_dir, containerId);
@@ -1684,8 +1697,28 @@ Future<Option<ContainerTermination>> MesosContainerizerProcess::wait(
     const ContainerID& containerId)
 {
   if (!containers_.contains(containerId)) {
-    // See the comments in destroy() for race conditions
-    // which lead to "unknown containers".
+    // If a container does not exist in our `container_` hashmap, it
+    // may be a nested container with checkpointed termination
+    // state. Attempt to return as such.
+    if (containerId.has_parent()) {
+      Result<ContainerTermination> termination =
+        containerizer::paths::getContainerTermination(
+            flags.runtime_dir,
+            containerId);
+
+      if (termination.isError()) {
+        return Failure("Failed to get container termination state:"
+                       " " + termination.error());
+      }
+
+      if (termination.isSome()) {
+        return termination.get();
+      }
+    }
+
+    // For all other cases return `None()`. See the comments in
+    // `destroy()` for race conditions which lead to "unknown
+    // containers".
     return None();
   }
 
@@ -2151,13 +2184,46 @@ void MesosContainerizerProcess::______destroy(
     termination.set_message(strings::join("; ", messages));
   }
 
-  // Remove the runtime path for the container. Note that it is likely
-  // that the runtime path does not exist (e.g., legacy container). We
-  // should ignore the removal if that's the case.
+  // Now that we are done destroying the container we need to cleanup
+  // it's runtime directory. There are two cases to consider:
+  //
+  // (1) We are a nested container:
+  //     In this case we should defer deletion of the runtime directory
+  //     until the top-level container is destroyed. Instead, we
+  //     checkpoint a file with the termination state indicating that
+  //     the container has already been destroyed. This allows
+  //     subsequent calls to `wait()` to succeed with the proper
+  //     termination state until the top-level container is destroyed.
+  //     It also prevents subsequent `destroy()` calls from attempting
+  //     to cleanup the container a second time.
+  //
+  // (2) We are a top-level container:
+  //     We should simply remove the runtime directory. Since we build
+  //     the runtime directories of nested containers hierarchically,
+  //     removing the top-level runtime directory will automatically
+  //     cleanup all nested container runtime directories as well.
+  //
+  // NOTE: The runtime directory will not exist for legacy containers,
+  // so we need to make sure it actually exists before attempting to
+  // remove it.
   const string runtimePath =
     containerizer::paths::getRuntimePath(flags.runtime_dir, containerId);
 
-  if (os::exists(runtimePath)) {
+  if (containerId.has_parent()) {
+    const string terminationPath =
+      path::join(runtimePath, containerizer::paths::TERMINATION_FILE);
+
+    LOG(INFO) << "Checkpointing termination state to nested container's"
+              << " runtime directory '" << terminationPath <<  "'";
+
+    Try<Nothing> checkpointed =
+      slave::state::checkpoint(terminationPath, termination);
+
+    if (checkpointed.isError()) {
+      LOG(ERROR) << "Failed to checkpoint nested container's termination state"
+                 << " to '" << terminationPath << "': " << checkpointed.error();
+    }
+  } else if (os::exists(runtimePath)) {
     Try<Nothing> rmdir = os::rmdir(runtimePath);
     if (rmdir.isError()) {
       LOG(WARNING) << "Failed to remove the runtime directory"

http://git-wip-us.apache.org/repos/asf/mesos/blob/79838a29/src/slave/containerizer/mesos/paths.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/paths.cpp b/src/slave/containerizer/mesos/paths.cpp
index 9648692..361307f 100644
--- a/src/slave/containerizer/mesos/paths.cpp
+++ b/src/slave/containerizer/mesos/paths.cpp
@@ -17,9 +17,12 @@
 #include <stout/lambda.hpp>
 #include <stout/os.hpp>
 #include <stout/path.hpp>
+#include <stout/protobuf.hpp>
 
 #include "slave/containerizer/mesos/paths.hpp"
 
+using mesos::slave::ContainerTermination;
+
 using std::list;
 using std::string;
 using std::vector;
@@ -131,6 +134,34 @@ Result<int> getContainerStatus(
 }
 
 
+Result<ContainerTermination> getContainerTermination(
+    const string& runtimeDir,
+    const ContainerID& containerId)
+{
+  const string path = path::join(
+      getRuntimePath(runtimeDir, containerId),
+      TERMINATION_FILE);
+
+  if (!os::exists(path)) {
+    // This is possible because we don't atomically create the
+    // directory and write the 'TERMINATION' file and thus we might
+    // terminate/restart after we've created the directory but
+    // before we've written the file.
+    return None();
+  }
+
+  const Result<ContainerTermination>& termination =
+    ::protobuf::read<ContainerTermination>(path);
+
+  if (termination.isError()) {
+    return Error("Failed to read termination state of container:"
+                 " " + termination.error());
+  }
+
+  return termination;
+}
+
+
 Try<vector<ContainerID>> getContainerIds(const string& runtimeDir)
 {
   lambda::function<Try<vector<ContainerID>>(const Option<ContainerID>&)> helper;

http://git-wip-us.apache.org/repos/asf/mesos/blob/79838a29/src/slave/containerizer/mesos/paths.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/paths.hpp b/src/slave/containerizer/mesos/paths.hpp
index 349ee00..883cc26 100644
--- a/src/slave/containerizer/mesos/paths.hpp
+++ b/src/slave/containerizer/mesos/paths.hpp
@@ -27,6 +27,8 @@
 
 #include <mesos/mesos.hpp>
 
+#include <mesos/slave/containerizer.hpp>
+
 namespace mesos {
 namespace internal {
 namespace slave {
@@ -35,6 +37,7 @@ namespace paths {
 
 constexpr char PID_FILE[] = "pid";
 constexpr char STATUS_FILE[] = "status";
+constexpr char TERMINATION_FILE[] = "termination";
 constexpr char CONTAINER_DIRECTORY[] = "containers";
 
 
@@ -90,6 +93,12 @@ Result<int> getContainerStatus(
     const ContainerID& containerId);
 
 
+// The helper method to read the container termination state.
+Result<mesos::slave::ContainerTermination> getContainerTermination(
+    const std::string& runtimeDir,
+    const ContainerID& containerId);
+
+
 // The helper method to list all container IDs (including nested
 // containers) from the container runtime directory. The order of
 // returned vector is a result of pre-ordering walk (i.e., parent

http://git-wip-us.apache.org/repos/asf/mesos/blob/79838a29/src/tests/containerizer/nested_container_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer/nested_container_tests.cpp b/src/tests/containerizer/nested_container_tests.cpp
new file mode 100644
index 0000000..8430823
--- /dev/null
+++ b/src/tests/containerizer/nested_container_tests.cpp
@@ -0,0 +1,144 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string>
+
+#include <stout/gtest.hpp>
+#include <stout/none.hpp>
+#include <stout/option.hpp>
+#include <stout/try.hpp>
+
+#include <process/future.hpp>
+#include <process/gtest.hpp>
+
+#include "slave/containerizer/mesos/containerizer.hpp"
+
+#include "tests/environment.hpp"
+#include "tests/mesos.hpp"
+
+using mesos::internal::slave::Fetcher;
+using mesos::internal::slave::MesosContainerizer;
+using mesos::internal::slave::MesosContainerizerProcess;
+
+using mesos::internal::slave::state::SlaveState;
+
+using mesos::slave::ContainerTermination;
+
+using process::Future;
+using process::Owned;
+
+using std::string;
+
+namespace mesos {
+namespace internal {
+namespace tests {
+
+class NestedContainerTest : public ContainerizerTest<MesosContainerizer> {};
+
+
+TEST_F(NestedContainerTest, ROOT_CGROUPS_WaitAfterDestroy)
+{
+  slave::Flags flags = CreateSlaveFlags();
+  flags.launcher = "linux";
+  flags.isolation = "cgroups/cpu,filesystem/linux,namespaces/pid";
+
+  Fetcher fetcher;
+
+  Try<MesosContainerizer*> create = MesosContainerizer::create(
+      flags,
+      true,
+      &fetcher);
+
+  ASSERT_SOME(create);
+
+  Owned<MesosContainerizer> containerizer(create.get());
+
+  SlaveID slaveId = SlaveID();
+
+  // Launch a top-level container.
+  ContainerID containerId;
+  containerId.set_value(UUID::random().toString());
+
+  ExecutorInfo executor = CREATE_EXECUTOR_INFO("executor", "sleep 1000");
+  executor.mutable_resources()->CopyFrom(Resources::parse("cpus:1").get());
+
+  Try<string> directory = environment->mkdtemp();
+  ASSERT_SOME(directory);
+
+  Future<bool> launch = containerizer->launch(
+      containerId,
+      None(),
+      executor,
+      directory.get(),
+      None(),
+      slaveId,
+      map<string, string>(),
+      true);
+
+  AWAIT_ASSERT_TRUE(launch);
+
+  // Launch a nested container.
+  ContainerID nestedContainerId;
+  nestedContainerId.mutable_parent()->CopyFrom(containerId);
+  nestedContainerId.set_value(UUID::random().toString());
+
+  launch = containerizer->launch(
+      nestedContainerId,
+      CREATE_COMMAND_INFO("exit 42"),
+      None(),
+      None(),
+      slaveId);
+
+  AWAIT_ASSERT_TRUE(launch);
+
+  // Wait once (which does a destroy),
+  // then wait again on the nested container.
+  Future<Option<ContainerTermination>> nestedWait =
+    containerizer->wait(nestedContainerId);
+
+  AWAIT_READY(nestedWait);
+  ASSERT_SOME(nestedWait.get());
+  ASSERT_TRUE(nestedWait.get()->has_status());
+  EXPECT_WEXITSTATUS_EQ(42, nestedWait.get()->status());
+
+  nestedWait = containerizer->wait(nestedContainerId);
+
+  AWAIT_READY(nestedWait);
+  ASSERT_SOME(nestedWait.get());
+  ASSERT_TRUE(nestedWait.get()->has_status());
+  EXPECT_WEXITSTATUS_EQ(42, nestedWait.get()->status());
+
+  // Destroy the top-level container.
+  Future<Option<ContainerTermination>> wait =
+    containerizer->wait(containerId);
+
+  AWAIT_READY(containerizer->destroy(containerId));
+
+  AWAIT_READY(wait);
+  ASSERT_SOME(wait.get());
+  ASSERT_TRUE(wait.get()->has_status());
+  EXPECT_WTERMSIG_EQ(SIGKILL, wait.get()->status());
+
+  // Wait on nested container again.
+  nestedWait = containerizer->wait(nestedContainerId);
+
+  AWAIT_READY(nestedWait);
+  ASSERT_NONE(nestedWait.get());
+}
+
+} // namespace tests {
+} // namespace internal {
+} // namespace mesos {