You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by al...@apache.org on 2017/05/11 10:16:45 UTC

[04/11] mesos git commit: Checkpointed and recovered ContainerLaunchInfo for non-orphans.

Checkpointed and recovered ContainerLaunchInfo for non-orphans.

In mesos containerizer, cache each container's launch info and
recover it for non-orphan containers. This is a prerequisite for
persisting some container characteristics across agent failover.

Review: https://reviews.apache.org/r/58847


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/37f212e6
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/37f212e6
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/37f212e6

Branch: refs/heads/master
Commit: 37f212e608c4f0d0580daefae5583916c570d080
Parents: 7a670aa
Author: Alexander Rukletsov <al...@apache.org>
Authored: Fri Apr 28 16:32:32 2017 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Thu May 11 12:15:55 2017 +0200

----------------------------------------------------------------------
 src/slave/containerizer/mesos/containerizer.cpp | 42 +++++++++++++++++++-
 src/slave/containerizer/mesos/containerizer.hpp | 11 +++++
 src/slave/containerizer/mesos/paths.cpp         | 38 ++++++++++++++++++
 src/slave/containerizer/mesos/paths.hpp         | 14 +++++++
 4 files changed, 103 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/37f212e6/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp
index dd3a070..3ff0b0d 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -909,6 +909,26 @@ Future<Nothing> MesosContainerizerProcess::__recover(
     const list<ContainerState>& recovered,
     const hashset<ContainerID>& orphans)
 {
+  // Recover containers' launch information.
+  foreach (const ContainerState& run, recovered) {
+    const ContainerID& containerId = run.container_id();
+
+    // Attempt to read container's launch information.
+    Result<ContainerLaunchInfo> containerLaunchInfo =
+      containerizer::paths::getContainerLaunchInfo(
+          flags.runtime_dir, containerId);
+
+    if (containerLaunchInfo.isError()) {
+      return Failure(
+          "Failed to recover launch information of container '" +
+          stringify(containerId) + "': " + containerLaunchInfo.error());
+    }
+
+    if (containerLaunchInfo.isSome()) {
+      containers_[containerId]->launchInfo = containerLaunchInfo.get();
+    }
+  }
+
   foreach (const ContainerState& run, recovered) {
     const ContainerID& containerId = run.container_id();
 
@@ -1498,6 +1518,25 @@ Future<bool> MesosContainerizerProcess::_launch(
     launchInfo.set_user("root");
   }
 
+  // Store container's launch information for future access.
+  container->launchInfo = launchInfo;
+
+  // Checkpoint container's launch information.
+  const string launchInfoPath =
+    containerizer::paths::getContainerLaunchInfoPath(
+        flags.runtime_dir, containerId);
+
+  Try<Nothing> checkpointed = slave::state::checkpoint(
+      launchInfoPath, launchInfo);
+
+  if (checkpointed.isError()) {
+    LOG(ERROR) << "Failed to checkpoint container's launch information to '"
+               << launchInfoPath << "': " << checkpointed.error();
+
+    return Failure("Could not checkpoint container's launch information: " +
+                   checkpointed.error());
+  }
+
   // Use a pipe to block the child until it's been isolated.
   // The `pipes` array is captured later in a lambda.
   Try<std::array<int_fd, 2>> pipes_ = os::pipe();
@@ -1657,8 +1696,7 @@ Future<bool> MesosContainerizerProcess::_launch(
       containerizer::paths::getRuntimePath(flags.runtime_dir, containerId),
       containerizer::paths::PID_FILE);
 
-  Try<Nothing> checkpointed =
-    slave::state::checkpoint(pidPath, stringify(pid));
+  checkpointed = slave::state::checkpoint(pidPath, stringify(pid));
 
   if (checkpointed.isError()) {
     return Failure("Failed to checkpoint the container pid to"

http://git-wip-us.apache.org/repos/asf/mesos/blob/37f212e6/src/slave/containerizer/mesos/containerizer.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.hpp b/src/slave/containerizer/mesos/containerizer.hpp
index 29a99f3..7c8d1a0 100644
--- a/src/slave/containerizer/mesos/containerizer.hpp
+++ b/src/slave/containerizer/mesos/containerizer.hpp
@@ -358,6 +358,17 @@ private:
     // is only used during the launch of a container.
     mesos::slave::ContainerConfig config;
 
+    // Container's information at the moment it was launched. For example,
+    // used to bootstrap the launch information of future child DEBUG
+    // containers. Checkpointed and restored on recovery. Optional because
+    // it is not set for orphan containers.
+    //
+    // NOTE: Some of these data, may change during the container lifetime,
+    // e.g., the working directory. Such changes are not be captured here,
+    // which might be problematic, e.g., for DEBUG containers relying on
+    // some data in parent working directory.
+    Option<mesos::slave::ContainerLaunchInfo> launchInfo;
+
     State state;
 
     // Used when `status` needs to be collected from isolators

http://git-wip-us.apache.org/repos/asf/mesos/blob/37f212e6/src/slave/containerizer/mesos/paths.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/paths.cpp b/src/slave/containerizer/mesos/paths.cpp
index ed4bbd2..0c61c20 100644
--- a/src/slave/containerizer/mesos/paths.cpp
+++ b/src/slave/containerizer/mesos/paths.cpp
@@ -25,6 +25,7 @@
 namespace unix = process::network::unix;
 #endif // __WINDOWS__
 
+using mesos::slave::ContainerLaunchInfo;
 using mesos::slave::ContainerTermination;
 
 using std::list;
@@ -343,6 +344,43 @@ Try<vector<ContainerID>> getContainerIds(const string& runtimeDir)
 }
 
 
+string getContainerLaunchInfoPath(
+    const string& runtimeDir,
+    const ContainerID& containerId)
+{
+  return path::join(
+      getRuntimePath(runtimeDir, containerId),
+      CONTAINER_LAUNCH_INFO_FILE);
+}
+
+
+Result<ContainerLaunchInfo> getContainerLaunchInfo(
+    const string& runtimeDir,
+    const ContainerID& containerId)
+{
+  const string path = getContainerLaunchInfoPath(
+      runtimeDir, containerId);
+
+  if (!os::exists(path)) {
+    // This is possible because we don't atomically create the
+    // directory and write the 'CONTAINER_LAUNCH_INFO_FILE' file
+    // and thus we might terminate/restart after we've created
+    // the directory but before we've written the file.
+    return None();
+  }
+
+  const Result<ContainerLaunchInfo>& containerLaunchInfo =
+    ::protobuf::read<ContainerLaunchInfo>(path);
+
+  if (containerLaunchInfo.isError()) {
+    return Error(
+        "Failed to read ContainerLaunchInfo: " + containerLaunchInfo.error());
+  }
+
+  return containerLaunchInfo;
+}
+
+
 string getSandboxPath(
     const string& rootSandboxPath,
     const ContainerID& containerId)

http://git-wip-us.apache.org/repos/asf/mesos/blob/37f212e6/src/slave/containerizer/mesos/paths.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/paths.hpp b/src/slave/containerizer/mesos/paths.hpp
index d85fd34..12ae7c7 100644
--- a/src/slave/containerizer/mesos/paths.hpp
+++ b/src/slave/containerizer/mesos/paths.hpp
@@ -44,6 +44,7 @@ constexpr char SOCKET_FILE[] = "socket";
 constexpr char FORCE_DESTROY_ON_RECOVERY_FILE[] = "force_destroy_on_recovery";
 constexpr char IO_SWITCHBOARD_DIRECTORY[] = "io_switchboard";
 constexpr char CONTAINER_DIRECTORY[] = "containers";
+constexpr char CONTAINER_LAUNCH_INFO_FILE[] = "launch_info";
 
 
 enum Mode
@@ -157,6 +158,19 @@ Try<std::vector<ContainerID>> getContainerIds(
     const std::string& runtimeDir);
 
 
+// The helper method to get the container launch information path.
+std::string getContainerLaunchInfoPath(
+    const std::string& runtimeDir,
+    const ContainerID& containerId);
+
+
+// The helper method to get the container launch information
+// at the moment it was launched.
+Result<mesos::slave::ContainerLaunchInfo> getContainerLaunchInfo(
+    const std::string& runtimeDir,
+    const ContainerID& containerId);
+
+
 // The helper method to get the sandbox path.
 std::string getSandboxPath(
     const std::string& rootSandboxPath,