You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by al...@apache.org on 2017/05/11 10:16:45 UTC
[04/11] mesos git commit: Checkpointed and recovered
ContainerLaunchInfo for non-orphans.
Checkpointed and recovered ContainerLaunchInfo for non-orphans.
In mesos containerizer, cache each container's launch info and
recover it for non-orphan containers. This is a prerequisite for
persisting some container characteristics across agent failover.
Review: https://reviews.apache.org/r/58847
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/37f212e6
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/37f212e6
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/37f212e6
Branch: refs/heads/master
Commit: 37f212e608c4f0d0580daefae5583916c570d080
Parents: 7a670aa
Author: Alexander Rukletsov <al...@apache.org>
Authored: Fri Apr 28 16:32:32 2017 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Thu May 11 12:15:55 2017 +0200
----------------------------------------------------------------------
src/slave/containerizer/mesos/containerizer.cpp | 42 +++++++++++++++++++-
src/slave/containerizer/mesos/containerizer.hpp | 11 +++++
src/slave/containerizer/mesos/paths.cpp | 38 ++++++++++++++++++
src/slave/containerizer/mesos/paths.hpp | 14 +++++++
4 files changed, 103 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/37f212e6/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp
index dd3a070..3ff0b0d 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -909,6 +909,26 @@ Future<Nothing> MesosContainerizerProcess::__recover(
const list<ContainerState>& recovered,
const hashset<ContainerID>& orphans)
{
+ // Recover containers' launch information.
+ foreach (const ContainerState& run, recovered) {
+ const ContainerID& containerId = run.container_id();
+
+ // Attempt to read container's launch information.
+ Result<ContainerLaunchInfo> containerLaunchInfo =
+ containerizer::paths::getContainerLaunchInfo(
+ flags.runtime_dir, containerId);
+
+ if (containerLaunchInfo.isError()) {
+ return Failure(
+ "Failed to recover launch information of container '" +
+ stringify(containerId) + "': " + containerLaunchInfo.error());
+ }
+
+ if (containerLaunchInfo.isSome()) {
+ containers_[containerId]->launchInfo = containerLaunchInfo.get();
+ }
+ }
+
foreach (const ContainerState& run, recovered) {
const ContainerID& containerId = run.container_id();
@@ -1498,6 +1518,25 @@ Future<bool> MesosContainerizerProcess::_launch(
launchInfo.set_user("root");
}
+ // Store container's launch information for future access.
+ container->launchInfo = launchInfo;
+
+ // Checkpoint container's launch information.
+ const string launchInfoPath =
+ containerizer::paths::getContainerLaunchInfoPath(
+ flags.runtime_dir, containerId);
+
+ Try<Nothing> checkpointed = slave::state::checkpoint(
+ launchInfoPath, launchInfo);
+
+ if (checkpointed.isError()) {
+ LOG(ERROR) << "Failed to checkpoint container's launch information to '"
+ << launchInfoPath << "': " << checkpointed.error();
+
+ return Failure("Could not checkpoint container's launch information: " +
+ checkpointed.error());
+ }
+
// Use a pipe to block the child until it's been isolated.
// The `pipes` array is captured later in a lambda.
Try<std::array<int_fd, 2>> pipes_ = os::pipe();
@@ -1657,8 +1696,7 @@ Future<bool> MesosContainerizerProcess::_launch(
containerizer::paths::getRuntimePath(flags.runtime_dir, containerId),
containerizer::paths::PID_FILE);
- Try<Nothing> checkpointed =
- slave::state::checkpoint(pidPath, stringify(pid));
+ checkpointed = slave::state::checkpoint(pidPath, stringify(pid));
if (checkpointed.isError()) {
return Failure("Failed to checkpoint the container pid to"
http://git-wip-us.apache.org/repos/asf/mesos/blob/37f212e6/src/slave/containerizer/mesos/containerizer.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.hpp b/src/slave/containerizer/mesos/containerizer.hpp
index 29a99f3..7c8d1a0 100644
--- a/src/slave/containerizer/mesos/containerizer.hpp
+++ b/src/slave/containerizer/mesos/containerizer.hpp
@@ -358,6 +358,17 @@ private:
// is only used during the launch of a container.
mesos::slave::ContainerConfig config;
+ // Container's information at the moment it was launched. For example,
+ // used to bootstrap the launch information of future child DEBUG
+ // containers. Checkpointed and restored on recovery. Optional because
+ // it is not set for orphan containers.
+ //
+ // NOTE: Some of these data, may change during the container lifetime,
+ // e.g., the working directory. Such changes are not be captured here,
+ // which might be problematic, e.g., for DEBUG containers relying on
+ // some data in parent working directory.
+ Option<mesos::slave::ContainerLaunchInfo> launchInfo;
+
State state;
// Used when `status` needs to be collected from isolators
http://git-wip-us.apache.org/repos/asf/mesos/blob/37f212e6/src/slave/containerizer/mesos/paths.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/paths.cpp b/src/slave/containerizer/mesos/paths.cpp
index ed4bbd2..0c61c20 100644
--- a/src/slave/containerizer/mesos/paths.cpp
+++ b/src/slave/containerizer/mesos/paths.cpp
@@ -25,6 +25,7 @@
namespace unix = process::network::unix;
#endif // __WINDOWS__
+using mesos::slave::ContainerLaunchInfo;
using mesos::slave::ContainerTermination;
using std::list;
@@ -343,6 +344,43 @@ Try<vector<ContainerID>> getContainerIds(const string& runtimeDir)
}
+string getContainerLaunchInfoPath(
+ const string& runtimeDir,
+ const ContainerID& containerId)
+{
+ return path::join(
+ getRuntimePath(runtimeDir, containerId),
+ CONTAINER_LAUNCH_INFO_FILE);
+}
+
+
+Result<ContainerLaunchInfo> getContainerLaunchInfo(
+ const string& runtimeDir,
+ const ContainerID& containerId)
+{
+ const string path = getContainerLaunchInfoPath(
+ runtimeDir, containerId);
+
+ if (!os::exists(path)) {
+ // This is possible because we don't atomically create the
+ // directory and write the 'CONTAINER_LAUNCH_INFO_FILE' file
+ // and thus we might terminate/restart after we've created
+ // the directory but before we've written the file.
+ return None();
+ }
+
+ const Result<ContainerLaunchInfo>& containerLaunchInfo =
+ ::protobuf::read<ContainerLaunchInfo>(path);
+
+ if (containerLaunchInfo.isError()) {
+ return Error(
+ "Failed to read ContainerLaunchInfo: " + containerLaunchInfo.error());
+ }
+
+ return containerLaunchInfo;
+}
+
+
string getSandboxPath(
const string& rootSandboxPath,
const ContainerID& containerId)
http://git-wip-us.apache.org/repos/asf/mesos/blob/37f212e6/src/slave/containerizer/mesos/paths.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/paths.hpp b/src/slave/containerizer/mesos/paths.hpp
index d85fd34..12ae7c7 100644
--- a/src/slave/containerizer/mesos/paths.hpp
+++ b/src/slave/containerizer/mesos/paths.hpp
@@ -44,6 +44,7 @@ constexpr char SOCKET_FILE[] = "socket";
constexpr char FORCE_DESTROY_ON_RECOVERY_FILE[] = "force_destroy_on_recovery";
constexpr char IO_SWITCHBOARD_DIRECTORY[] = "io_switchboard";
constexpr char CONTAINER_DIRECTORY[] = "containers";
+constexpr char CONTAINER_LAUNCH_INFO_FILE[] = "launch_info";
enum Mode
@@ -157,6 +158,19 @@ Try<std::vector<ContainerID>> getContainerIds(
const std::string& runtimeDir);
+// The helper method to get the container launch information path.
+std::string getContainerLaunchInfoPath(
+ const std::string& runtimeDir,
+ const ContainerID& containerId);
+
+
+// The helper method to get the container launch information
+// at the moment it was launched.
+Result<mesos::slave::ContainerLaunchInfo> getContainerLaunchInfo(
+ const std::string& runtimeDir,
+ const ContainerID& containerId);
+
+
// The helper method to get the sandbox path.
std::string getSandboxPath(
const std::string& rootSandboxPath,