You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by gi...@apache.org on 2017/11/20 20:31:24 UTC
[1/5] mesos git commit: Backfilled required fields in TaskInfo in
MesosContainerizer* tests.
Repository: mesos
Updated Branches:
refs/heads/master b433262a8 -> bdb604a9d
Backfilled required fields in TaskInfo in MesosContainerizer* tests.
These tests were using a `TaskInfo` field which is missing required
protobuf fields. After the previous patch begins to checkpoint
`ContainerConfig`, the incomplete `TaskInfo` cannot be checkpointed,
thus caused these tests to fail.
This patch backfills these fields to make these tests pass.
Review: https://reviews.apache.org/r/55335/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/1cbe680a
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/1cbe680a
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/1cbe680a
Branch: refs/heads/master
Commit: 1cbe680a398bfc094fb4879b495654c5654b6454
Parents: 03a2a4d
Author: Zhitao Li <zh...@gmail.com>
Authored: Fri Nov 17 16:36:26 2017 -0800
Committer: Gilbert Song <so...@gmail.com>
Committed: Mon Nov 20 12:28:11 2017 -0800
----------------------------------------------------------------------
.../containerizer/mesos_containerizer_tests.cpp | 43 +++++++++++---------
1 file changed, 24 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/1cbe680a/src/tests/containerizer/mesos_containerizer_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer/mesos_containerizer_tests.cpp b/src/tests/containerizer/mesos_containerizer_tests.cpp
index ce67def..98adcfc 100644
--- a/src/tests/containerizer/mesos_containerizer_tests.cpp
+++ b/src/tests/containerizer/mesos_containerizer_tests.cpp
@@ -757,9 +757,10 @@ TEST_F(MesosContainerizerDestroyTest, DestroyWhileFetching)
ContainerID containerId;
containerId.set_value(UUID::random().toString());
- TaskInfo taskInfo;
- CommandInfo commandInfo;
- taskInfo.mutable_command()->MergeFrom(commandInfo);
+ SlaveID slaveId = SlaveID();
+ slaveId.set_value("slave_id");
+
+ TaskInfo taskInfo = createTask(slaveId, Resources(), CommandInfo());
containerizer->launch(
containerId,
@@ -825,9 +826,10 @@ TEST_F(MesosContainerizerDestroyTest, DestroyWhilePreparing)
ContainerID containerId;
containerId.set_value(UUID::random().toString());
- TaskInfo taskInfo;
- CommandInfo commandInfo;
- taskInfo.mutable_command()->MergeFrom(commandInfo);
+ SlaveID slaveId = SlaveID();
+ slaveId.set_value("slave_id");
+
+ TaskInfo taskInfo = createTask(slaveId, Resources(), CommandInfo());
containerizer->launch(
containerId,
@@ -850,7 +852,7 @@ TEST_F(MesosContainerizerDestroyTest, DestroyWhilePreparing)
// Need to help the compiler to disambiguate between overloads.
ContainerLaunchInfo launchInfo;
- launchInfo.add_pre_exec_commands()->CopyFrom(commandInfo);
+ launchInfo.add_pre_exec_commands()->CopyFrom(taskInfo.command());
Option<ContainerLaunchInfo> option = launchInfo;
promise.set(option);
@@ -958,9 +960,11 @@ TEST_F(MesosContainerizerProvisionerTest, ProvisionFailed)
containerInfo.set_type(ContainerInfo::MESOS);
containerInfo.mutable_mesos()->CopyFrom(mesosInfo);
- TaskInfo taskInfo;
- CommandInfo commandInfo;
- taskInfo.mutable_command()->MergeFrom(commandInfo);
+ SlaveID slaveId = SlaveID();
+ slaveId.set_value("slave_id");
+
+ TaskInfo taskInfo = createTask(slaveId, Resources(), CommandInfo());
+
taskInfo.mutable_container()->CopyFrom(containerInfo);
ExecutorInfo executorInfo = createExecutorInfo("executor", "exit 0");
@@ -1042,9 +1046,10 @@ TEST_F(MesosContainerizerProvisionerTest, DestroyWhileProvisioning)
containerInfo.set_type(ContainerInfo::MESOS);
containerInfo.mutable_mesos()->CopyFrom(mesosInfo);
- TaskInfo taskInfo;
- CommandInfo commandInfo;
- taskInfo.mutable_command()->MergeFrom(commandInfo);
+ SlaveID slaveId = SlaveID();
+ slaveId.set_value("slave_id");
+
+ TaskInfo taskInfo = createTask(slaveId, Resources(), CommandInfo());
taskInfo.mutable_container()->CopyFrom(containerInfo);
ExecutorInfo executorInfo = createExecutorInfo("executor", "exit 0");
@@ -1133,9 +1138,9 @@ TEST_F(MesosContainerizerProvisionerTest, IsolatorCleanupBeforePrepare)
containerInfo.set_type(ContainerInfo::MESOS);
containerInfo.mutable_mesos()->CopyFrom(mesosInfo);
- TaskInfo taskInfo;
- CommandInfo commandInfo;
- taskInfo.mutable_command()->MergeFrom(commandInfo);
+ SlaveID slaveId = SlaveID();
+ slaveId.set_value("slave_id");
+ TaskInfo taskInfo = createTask(slaveId, Resources(), CommandInfo());
taskInfo.mutable_container()->CopyFrom(containerInfo);
ExecutorInfo executorInfo = createExecutorInfo("executor", "exit 0");
@@ -1209,9 +1214,9 @@ TEST_F(MesosContainerizerDestroyTest, LauncherDestroyFailure)
ContainerID containerId;
containerId.set_value(UUID::random().toString());
- TaskInfo taskInfo;
- CommandInfo commandInfo;
- taskInfo.mutable_command()->MergeFrom(commandInfo);
+ SlaveID slaveId = SlaveID();
+ slaveId.set_value("slave_id");
+ TaskInfo taskInfo = createTask(slaveId, Resources(), CommandInfo());
// Destroy the container using the SubprocessLauncher but return a failed
// future to the containerizer.
[4/5] mesos git commit: Added checkpoint and recover capability for
layers in provisioner.
Posted by gi...@apache.org.
Added checkpoint and recover capability for layers in provisioner.
Review: https://reviews.apache.org/r/62997/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/e273efe6
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/e273efe6
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/e273efe6
Branch: refs/heads/master
Commit: e273efe6976434858edb85bbcf367a02e963a467
Parents: 3aa7147
Author: Zhitao Li <zh...@gmail.com>
Authored: Fri Nov 17 16:36:29 2017 -0800
Committer: Gilbert Song <so...@gmail.com>
Committed: Mon Nov 20 12:29:39 2017 -0800
----------------------------------------------------------------------
include/mesos/slave/containerizer.proto | 10 +++++
.../containerizer/mesos/provisioner/paths.cpp | 10 +++++
.../containerizer/mesos/provisioner/paths.hpp | 11 +++++
.../mesos/provisioner/provisioner.cpp | 43 ++++++++++++++++++++
.../mesos/provisioner/provisioner.hpp | 4 ++
5 files changed, 78 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/e273efe6/include/mesos/slave/containerizer.proto
----------------------------------------------------------------------
diff --git a/include/mesos/slave/containerizer.proto b/include/mesos/slave/containerizer.proto
index 689acfc..0f1e1da 100644
--- a/include/mesos/slave/containerizer.proto
+++ b/include/mesos/slave/containerizer.proto
@@ -255,3 +255,13 @@ message ContainerTermination {
// multiple roles (e.g. `"mem(*):1;mem(role):2"`).
repeated Resource limited_resources = 6;
}
+
+
+/**
+ * Information about all layers used to provision a container. This is used
+ * to checkpoint and recover in provisioner.
+ */
+message ContainerLayers {
+ // Paths of all layers used to provision the container.
+ repeated string paths = 1;
+}
http://git-wip-us.apache.org/repos/asf/mesos/blob/e273efe6/src/slave/containerizer/mesos/provisioner/paths.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/paths.cpp b/src/slave/containerizer/mesos/provisioner/paths.cpp
index 268dbeb..80486c5 100644
--- a/src/slave/containerizer/mesos/provisioner/paths.cpp
+++ b/src/slave/containerizer/mesos/provisioner/paths.cpp
@@ -86,6 +86,16 @@ string getContainerDir(
}
+string getLayersFilePath(
+ const string& provisionerDir,
+ const ContainerID& containerId)
+{
+ return path::join(
+ getContainerDir(provisionerDir, containerId),
+ LAYERS_FILE);
+}
+
+
string getContainerRootfsDir(
const string& provisionerDir,
const ContainerID& containerId,
http://git-wip-us.apache.org/repos/asf/mesos/blob/e273efe6/src/slave/containerizer/mesos/provisioner/paths.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/paths.hpp b/src/slave/containerizer/mesos/provisioner/paths.hpp
index 466f5ed..07f5bcf 100644
--- a/src/slave/containerizer/mesos/provisioner/paths.hpp
+++ b/src/slave/containerizer/mesos/provisioner/paths.hpp
@@ -36,12 +36,14 @@ namespace paths {
// |-- provisioner
// |-- containers
// |-- <container_id>
+// |-- layers (paths to all layers to provision)
// |-- backends
// |-- <backend> (copy, bind, etc.)
// |-- rootfses
// |-- <rootfs_id> (the rootfs)
// |-- containers (nested containers)
// |-- <container_id>
+// |-- layers (paths to all layers to provision)
// |-- backends
// |-- <backend> (copy, bind, etc.)
// |-- rootfses
@@ -52,11 +54,20 @@ namespace paths {
// is a UUID.
+constexpr char LAYERS_FILE[] = "layers";
+
+
+// TODO(gilbert): rename this to `getContainerPath` for consistency.
std::string getContainerDir(
const std::string& provisionerDir,
const ContainerID& containerId);
+std::string getLayersFilePath(
+ const std::string& provisionerDir,
+ const ContainerID& containerId);
+
+
std::string getContainerRootfsDir(
const std::string& provisionerDir,
const ContainerID& containerId,
http://git-wip-us.apache.org/repos/asf/mesos/blob/e273efe6/src/slave/containerizer/mesos/provisioner/provisioner.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/provisioner.cpp b/src/slave/containerizer/mesos/provisioner/provisioner.cpp
index 450a3b3..1723588 100644
--- a/src/slave/containerizer/mesos/provisioner/provisioner.cpp
+++ b/src/slave/containerizer/mesos/provisioner/provisioner.cpp
@@ -41,6 +41,7 @@
#endif
#include "slave/paths.hpp"
+#include "slave/state.hpp"
#include "slave/containerizer/mesos/provisioner/constants.hpp"
#include "slave/containerizer/mesos/provisioner/backend.hpp"
@@ -61,6 +62,7 @@ using mesos::internal::slave::BIND_BACKEND;
using mesos::internal::slave::COPY_BACKEND;
using mesos::internal::slave::OVERLAY_BACKEND;
+using mesos::slave::ContainerLayers;
using mesos::slave::ContainerState;
namespace mesos {
@@ -366,6 +368,31 @@ Future<Nothing> ProvisionerProcess::recover(
info->rootfses.put(backend, rootfses.get()[backend]);
}
+ Result<ContainerLayers> layers = None();
+ const string path = provisioner::paths::getLayersFilePath(
+ rootDir, containerId);
+
+ if (!os::exists(path)) {
+ // This is possible if we recovered a container provisioned before we
+ // started to checkpoint `ContainerLayers`.
+ VLOG(1) << "Layers path '" << path << "' is missing for container' "
+ << containerId << "'";
+ } else {
+ layers = ::protobuf::read<ContainerLayers>(path);
+ }
+
+ if (layers.isError()) {
+ return Failure(
+ "Failed to recover layers for container '" + stringify(containerId) +
+ "': " + layers.error());
+ } else if (layers.isSome()) {
+ info->layers = vector<string>();
+ std::copy(
+ layers->paths().begin(),
+ layers->paths().end(),
+ std::back_inserter(info->layers.get()));
+ }
+
infos.put(containerId, info);
if (knownContainerIds.contains(containerId)) {
@@ -478,6 +505,22 @@ Future<ProvisionInfo> ProvisionerProcess::_provision(
rootfs,
backendDir)
.then([=]() -> Future<ProvisionInfo> {
+ const string path =
+ provisioner::paths::getLayersFilePath(rootDir, containerId);
+
+ ContainerLayers containerLayers;
+
+ foreach(const string& layer, imageInfo.layers) {
+ containerLayers.add_paths(layer);
+ }
+
+ Try<Nothing> checkpoint = slave::state::checkpoint(path, containerLayers);
+ if (checkpoint.isError()) {
+ return Failure(
+ "Failed to checkpoint layers to '" + path + "': " +
+ checkpoint.error());
+ }
+
return ProvisionInfo{
rootfs, imageInfo.dockerManifest, imageInfo.appcManifest};
});
http://git-wip-us.apache.org/repos/asf/mesos/blob/e273efe6/src/slave/containerizer/mesos/provisioner/provisioner.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/provisioner.hpp b/src/slave/containerizer/mesos/provisioner/provisioner.hpp
index 7cba54c..b87144c 100644
--- a/src/slave/containerizer/mesos/provisioner/provisioner.hpp
+++ b/src/slave/containerizer/mesos/provisioner/provisioner.hpp
@@ -167,6 +167,10 @@ private:
// Mappings: backend -> {rootfsId, ...}
hashmap<std::string, hashset<std::string>> rootfses;
+ // TODO(zhitao): Remove Option after the deprecation cycle
+ // started in 1.5.
+ Option<std::vector<std::string>> layers;
+
process::Promise<bool> termination;
// The container status in provisioner.
[3/5] mesos git commit: Added tests for recovering ContainerConfig.
Posted by gi...@apache.org.
Added tests for recovering ContainerConfig.
Review: https://reviews.apache.org/r/59687/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/3aa71475
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/3aa71475
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/3aa71475
Branch: refs/heads/master
Commit: 3aa71475efaf2866862aa7d0fc2875185f5c62fa
Parents: 1cbe680
Author: Zhitao Li <zh...@gmail.com>
Authored: Fri Nov 17 16:36:28 2017 -0800
Committer: Gilbert Song <so...@gmail.com>
Committed: Mon Nov 20 12:29:32 2017 -0800
----------------------------------------------------------------------
.../nested_mesos_containerizer_tests.cpp | 170 ++++++++++++++++++-
1 file changed, 168 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/3aa71475/src/tests/containerizer/nested_mesos_containerizer_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer/nested_mesos_containerizer_tests.cpp b/src/tests/containerizer/nested_mesos_containerizer_tests.cpp
index d9b1173..bb2abbc 100644
--- a/src/tests/containerizer/nested_mesos_containerizer_tests.cpp
+++ b/src/tests/containerizer/nested_mesos_containerizer_tests.cpp
@@ -51,9 +51,11 @@ using mesos::internal::slave::Containerizer;
using mesos::internal::slave::Fetcher;
using mesos::internal::slave::MesosContainerizer;
+using mesos::internal::slave::containerizer::paths::getContainerConfig;
using mesos::internal::slave::containerizer::paths::getRuntimePath;
using mesos::internal::slave::containerizer::paths::getSandboxPath;
using mesos::internal::slave::containerizer::paths::buildPath;
+using mesos::internal::slave::containerizer::paths::CONTAINER_CONFIG_FILE;
using mesos::internal::slave::containerizer::paths::JOIN;
using mesos::internal::slave::containerizer::paths::PREFIX;
using mesos::internal::slave::containerizer::paths::SUFFIX;
@@ -1634,6 +1636,16 @@ TEST_F(NestedMesosContainerizerTest, ROOT_CGROUPS_RecoverNested)
AWAIT_READY(status);
ASSERT_TRUE(status->has_executor_pid());
+ const Result<mesos::slave::ContainerConfig> containerConfig =
+ getContainerConfig(flags.runtime_dir, containerId);
+
+ EXPECT_SOME(containerConfig);
+
+ const Result<mesos::slave::ContainerConfig> nestedConfig =
+ getContainerConfig(flags.runtime_dir, nestedContainerId);
+
+ EXPECT_SOME(nestedConfig);
+
pid_t nestedPid = status->executor_pid();
// Force a delete on the containerizer before we create the new one.
@@ -1695,6 +1707,155 @@ TEST_F(NestedMesosContainerizerTest, ROOT_CGROUPS_RecoverNested)
}
+TEST_F(NestedMesosContainerizerTest, ROOT_CGROUPS_RecoverNestedWithoutConfig)
+{
+ slave::Flags flags = CreateSlaveFlags();
+ flags.launcher = "linux";
+ flags.isolation = "cgroups/cpu,filesystem/linux,namespaces/pid";
+
+ Fetcher fetcher(flags);
+
+ Try<MesosContainerizer*> create = MesosContainerizer::create(
+ flags,
+ false,
+ &fetcher);
+
+ ASSERT_SOME(create);
+
+ Owned<MesosContainerizer> containerizer(create.get());
+
+ SlaveState state;
+ state.id = SlaveID();
+
+ AWAIT_READY(containerizer->recover(state));
+
+ ContainerID containerId;
+ containerId.set_value(UUID::random().toString());
+
+ ExecutorInfo executor = createExecutorInfo(
+ "executor",
+ "sleep 1000",
+ "cpus:1");
+
+ Try<string> directory = environment->mkdtemp();
+ ASSERT_SOME(directory);
+
+ Future<Containerizer::LaunchResult> launch = containerizer->launch(
+ containerId,
+ createContainerConfig(None(), executor, directory.get()),
+ map<string, string>(),
+ slave::paths::getForkedPidPath(
+ slave::paths::getMetaRootDir(flags.work_dir),
+ state.id,
+ executor.framework_id(),
+ executor.executor_id(),
+ containerId));
+
+ AWAIT_ASSERT_EQ(Containerizer::LaunchResult::SUCCESS, launch);
+
+ Future<ContainerStatus> status = containerizer->status(containerId);
+ AWAIT_READY(status);
+ ASSERT_TRUE(status->has_executor_pid());
+
+ pid_t pid = status->executor_pid();
+
+ // Now launch nested container.
+ ContainerID nestedContainerId;
+ nestedContainerId.mutable_parent()->CopyFrom(containerId);
+ nestedContainerId.set_value(UUID::random().toString());
+
+ launch = containerizer->launch(
+ nestedContainerId,
+ createContainerConfig(createCommandInfo("sleep 1000")),
+ map<string, string>(),
+ None());
+
+ AWAIT_ASSERT_EQ(Containerizer::LaunchResult::SUCCESS, launch);
+
+ status = containerizer->status(nestedContainerId);
+ AWAIT_READY(status);
+ ASSERT_TRUE(status->has_executor_pid());
+
+ const Result<mesos::slave::ContainerConfig> containerConfig =
+ getContainerConfig(flags.runtime_dir, containerId);
+
+ EXPECT_SOME(containerConfig);
+
+ const Result<mesos::slave::ContainerConfig> nestedConfig =
+ getContainerConfig(flags.runtime_dir, nestedContainerId);
+
+ EXPECT_SOME(nestedConfig);
+
+ pid_t nestedPid = status->executor_pid();
+
+ // Force a delete on the containerizer before we create the new one.
+ containerizer.reset();
+
+ // Remove the checkpointed nested container config to simulate the case
+ // when we upgrade.
+ string nestedConfigPath = path::join(
+ getRuntimePath(flags.runtime_dir, containerId),
+ CONTAINER_CONFIG_FILE);
+
+ CHECK(os::exists(nestedConfigPath));
+ CHECK_SOME(os::rm(nestedConfigPath));
+
+ create = MesosContainerizer::create(
+ flags,
+ false,
+ &fetcher);
+
+ ASSERT_SOME(create);
+
+ containerizer.reset(create.get());
+
+ Try<SlaveState> slaveState = createSlaveState(
+ containerId,
+ pid,
+ executor,
+ state.id,
+ flags.work_dir);
+
+ ASSERT_SOME(slaveState);
+
+ state = slaveState.get();
+ AWAIT_READY(containerizer->recover(state));
+
+ status = containerizer->status(containerId);
+ AWAIT_READY(status);
+ ASSERT_TRUE(status->has_executor_pid());
+ EXPECT_EQ(pid, status->executor_pid());
+
+ status = containerizer->status(nestedContainerId);
+ AWAIT_READY(status);
+ ASSERT_TRUE(status->has_executor_pid());
+ EXPECT_EQ(nestedPid, status->executor_pid());
+
+ Future<Option<ContainerTermination>> nestedWait = containerizer->wait(
+ nestedContainerId);
+
+ containerizer->destroy(nestedContainerId);
+
+ AWAIT_READY(nestedWait);
+ ASSERT_SOME(nestedWait.get());
+
+ // We expect a wait status of SIGKILL on the nested container.
+ // Since the kernel will destroy these via a SIGKILL, we expect
+ // a SIGKILL here.
+ ASSERT_TRUE(nestedWait.get()->has_status());
+ EXPECT_WTERMSIG_EQ(SIGKILL, nestedWait.get()->status());
+
+ Future<Option<ContainerTermination>> wait = containerizer->wait(containerId);
+
+ containerizer->destroy(containerId);
+
+ AWAIT_READY(wait);
+ ASSERT_SOME(wait.get());
+ ASSERT_TRUE(wait.get()->has_status());
+ EXPECT_WTERMSIG_EQ(SIGKILL, wait.get()->status());
+}
+
+
TEST_F(NestedMesosContainerizerTest, ROOT_CGROUPS_RecoverLauncherOrphans)
{
slave::Flags flags = CreateSlaveFlags();
@@ -2477,7 +2638,7 @@ TEST_F(NestedMesosContainerizerTest, ROOT_CGROUPS_Remove)
ASSERT_TRUE(wait.get()->has_status());
EXPECT_WEXITSTATUS_EQ(0, wait.get()->status());
- // The runtime and sandbox directories must exist.
+ // The runtime, config and sandbox directories must exist.
const string runtimePath =
getRuntimePath(flags.runtime_dir, nestedContainerId);
ASSERT_TRUE(os::exists(runtimePath));
@@ -2485,11 +2646,16 @@ TEST_F(NestedMesosContainerizerTest, ROOT_CGROUPS_Remove)
const string sandboxPath = getSandboxPath(directory.get(), nestedContainerId);
ASSERT_TRUE(os::exists(sandboxPath));
+ const Result<mesos::slave::ContainerConfig> containerConfig =
+ getContainerConfig(flags.runtime_dir, nestedContainerId);
+
+ ASSERT_SOME(containerConfig);
+
// Now remove the nested container.
Future<Nothing> remove = containerizer->remove(nestedContainerId);
AWAIT_READY(remove);
- // We now expect the runtime and sandbox directories NOT to exist.
+ // We now expect the runtime, config and sandbox directories NOT to exist.
EXPECT_FALSE(os::exists(runtimePath));
EXPECT_FALSE(os::exists(sandboxPath));
[5/5] mesos git commit: Implemented pruneImages with a mark and sweep
in docker store.
Posted by gi...@apache.org.
Implemented pruneImages with a mark and sweep in docker store.
This includes the following changes:
- add a `pruneImages()` function on the chain of relevant classes;
- implement prune in docker store;
- fix mock interface to keep existing tests pass.
Review: https://reviews.apache.org/r/56721/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/bdb604a9
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/bdb604a9
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/bdb604a9
Branch: refs/heads/master
Commit: bdb604a9dc29ab7bc4b9398cf4c1a2bd8b6061c4
Parents: e273efe
Author: Zhitao Li <zh...@gmail.com>
Authored: Fri Nov 17 16:36:31 2017 -0800
Committer: Gilbert Song <so...@gmail.com>
Committed: Mon Nov 20 12:29:53 2017 -0800
----------------------------------------------------------------------
src/slave/containerizer/composing.cpp | 21 +++
src/slave/containerizer/composing.hpp | 2 +
src/slave/containerizer/containerizer.hpp | 3 +
src/slave/containerizer/docker.cpp | 7 +
src/slave/containerizer/docker.hpp | 2 +
src/slave/containerizer/mesos/containerizer.cpp | 39 ++++
src/slave/containerizer/mesos/containerizer.hpp | 4 +
.../provisioner/docker/metadata_manager.cpp | 50 ++++-
.../provisioner/docker/metadata_manager.hpp | 14 ++
.../mesos/provisioner/docker/paths.cpp | 25 +++
.../mesos/provisioner/docker/paths.hpp | 15 ++
.../mesos/provisioner/docker/store.cpp | 151 ++++++++++++++-
.../mesos/provisioner/docker/store.hpp | 5 +
.../mesos/provisioner/provisioner.cpp | 184 ++++++++++++++-----
.../mesos/provisioner/provisioner.hpp | 24 +++
.../containerizer/mesos/provisioner/store.cpp | 10 +
.../containerizer/mesos/provisioner/store.hpp | 18 ++
src/tests/containerizer.cpp | 17 ++
src/tests/containerizer.hpp | 7 +
src/tests/containerizer/mock_containerizer.hpp | 2 +
20 files changed, 547 insertions(+), 53 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/composing.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/composing.cpp b/src/slave/containerizer/composing.cpp
index 64919ef..9a9e11b 100644
--- a/src/slave/containerizer/composing.cpp
+++ b/src/slave/containerizer/composing.cpp
@@ -95,6 +95,8 @@ public:
Future<Nothing> remove(const ContainerID& containerId);
+ Future<Nothing> pruneImages();
+
private:
// Continuations.
Future<Nothing> _recover();
@@ -257,6 +259,12 @@ Future<Nothing> ComposingContainerizer::remove(const ContainerID& containerId)
}
+Future<Nothing> ComposingContainerizer::pruneImages()
+{
+ return dispatch(process, &ComposingContainerizerProcess::pruneImages);
+}
+
+
ComposingContainerizerProcess::~ComposingContainerizerProcess()
{
foreach (Containerizer* containerizer, containerizers_) {
@@ -687,6 +695,19 @@ Future<Nothing> ComposingContainerizerProcess::remove(
return containers_[rootContainerId]->containerizer->remove(containerId);
}
+
+Future<Nothing> ComposingContainerizerProcess::pruneImages()
+{
+ list<Future<Nothing>> futures;
+
+ foreach (Containerizer* containerizer, containerizers_) {
+ futures.push_back(containerizer->pruneImages());
+ }
+
+ return collect(futures)
+ .then([]() { return Nothing(); });
+}
+
} // namespace slave {
} // namespace internal {
} // namespace mesos {
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/composing.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/composing.hpp b/src/slave/containerizer/composing.hpp
index c2689cf..3d00609 100644
--- a/src/slave/containerizer/composing.hpp
+++ b/src/slave/containerizer/composing.hpp
@@ -86,6 +86,8 @@ public:
virtual process::Future<Nothing> remove(const ContainerID& containerId);
+ virtual process::Future<Nothing> pruneImages();
+
private:
ComposingContainerizerProcess* process;
};
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/containerizer.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/containerizer.hpp b/src/slave/containerizer/containerizer.hpp
index 2027bd9..7a0c6fc 100644
--- a/src/slave/containerizer/containerizer.hpp
+++ b/src/slave/containerizer/containerizer.hpp
@@ -165,6 +165,9 @@ public:
{
return process::Failure("Unsupported");
}
+
+ // Prune unused images from supported image stores.
+ virtual process::Future<Nothing> pruneImages() = 0;
};
} // namespace slave {
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/docker.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/docker.cpp b/src/slave/containerizer/docker.cpp
index 63432a9..9918d83 100644
--- a/src/slave/containerizer/docker.cpp
+++ b/src/slave/containerizer/docker.cpp
@@ -876,6 +876,13 @@ Future<hashset<ContainerID>> DockerContainerizer::containers()
}
+Future<Nothing> DockerContainerizer::pruneImages()
+{
+ VLOG(1) << "DockerContainerizer does not support pruneImages";
+ return Nothing();
+}
+
+
Future<Nothing> DockerContainerizerProcess::recover(
const Option<SlaveState>& state)
{
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/docker.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/docker.hpp b/src/slave/containerizer/docker.hpp
index 105c068..9df9849 100644
--- a/src/slave/containerizer/docker.hpp
+++ b/src/slave/containerizer/docker.hpp
@@ -109,6 +109,8 @@ public:
virtual process::Future<hashset<ContainerID>> containers();
+ virtual process::Future<Nothing> pruneImages();
+
private:
process::Owned<DockerContainerizerProcess> process;
};
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp
index bf71db1..7f3b86d 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -656,6 +656,12 @@ Future<Nothing> MesosContainerizer::remove(const ContainerID& containerId)
}
+Future<Nothing> MesosContainerizer::pruneImages()
+{
+ return dispatch(process.get(), &MesosContainerizerProcess::pruneImages);
+}
+
+
Future<Nothing> MesosContainerizerProcess::recover(
const Option<state::SlaveState>& state)
{
@@ -2818,6 +2824,39 @@ Future<hashset<ContainerID>> MesosContainerizerProcess::containers()
}
+Future<Nothing> MesosContainerizerProcess::pruneImages()
+{
+ vector<Image> excludedImages;
+ excludedImages.reserve(containers_.size());
+
+ foreachpair (
+ const ContainerID& containerId,
+ const Owned<Container>& container,
+ containers_) {
+ // Checkpointing ContainerConfig is introduced recently. Legacy containers
+ // do not have the information of which image is used. Image pruning is
+ // disabled.
+ if (container->config.isNone()) {
+ return Failure(
+ "Container " + stringify(containerId) +
+ " does not have ContainerConfig "
+ "checkpointed. Image pruning is disabled");
+ }
+
+ const ContainerConfig& containerConfig = container->config.get();
+ if (containerConfig.has_container_info() &&
+ containerConfig.container_info().mesos().has_image()) {
+ excludedImages.push_back(
+ containerConfig.container_info().mesos().image());
+ }
+ }
+
+ // TODO(zhitao): use std::unique to deduplicate `excludedImages`.
+
+ return provisioner->pruneImages(excludedImages);
+}
+
+
MesosContainerizerProcess::Metrics::Metrics()
: container_destroy_errors(
"containerizer/mesos/container_destroy_errors")
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/containerizer.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.hpp b/src/slave/containerizer/mesos/containerizer.hpp
index e859b5b..e2739e0 100644
--- a/src/slave/containerizer/mesos/containerizer.hpp
+++ b/src/slave/containerizer/mesos/containerizer.hpp
@@ -111,6 +111,8 @@ public:
virtual process::Future<Nothing> remove(const ContainerID& containerId);
+ virtual process::Future<Nothing> pruneImages();
+
private:
explicit MesosContainerizer(
const process::Owned<MesosContainerizerProcess>& process);
@@ -181,6 +183,8 @@ public:
virtual process::Future<hashset<ContainerID>> containers();
+ virtual process::Future<Nothing> pruneImages();
+
private:
enum State
{
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp b/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp
index d86afd2..1ab66c1 100644
--- a/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp
+++ b/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp
@@ -67,7 +67,8 @@ public:
const spec::ImageReference& reference,
bool cached);
- // TODO(chenlily): Implement removal of unreferenced images.
+ Future<hashset<string>> prune(
+ const vector<spec::ImageReference>& excludedImages);
private:
// Write out metadata manager state to persistent store.
@@ -134,6 +135,16 @@ Future<Option<Image>> MetadataManager::get(
}
+Future<hashset<string>> MetadataManager::prune(
+ const vector<spec::ImageReference>& excludedImages)
+{
+ return dispatch(
+ process.get(),
+ &MetadataManagerProcess::prune,
+ excludedImages);
+}
+
+
Future<Image> MetadataManagerProcess::put(
const spec::ImageReference& reference,
const vector<string>& layerIds)
@@ -180,6 +191,43 @@ Future<Option<Image>> MetadataManagerProcess::get(
}
+Future<hashset<string>> MetadataManagerProcess::prune(
+ const vector<spec::ImageReference>& excludedImages)
+{
+ hashmap<string, Image> retainedImages;
+ hashset<string> retainedLayers;
+
+ foreach (const spec::ImageReference& reference, excludedImages) {
+ const string imageName = stringify(reference);
+ Option<Image> image = storedImages.get(imageName);
+
+ if (image.isNone()) {
+ // This is possible if docker store was cleaned
+ // in a recovery after the container using this image was
+ // launched.
+ VLOG(1) << "Excluded docker image '" << imageName
+ << "' is not cached in metadata manager.";
+ continue;
+ }
+
+ retainedImages[imageName] = image.get();
+
+ foreach (const string& layerId, image->layer_ids()) {
+ retainedLayers.insert(layerId);
+ }
+ }
+
+ storedImages = std::move(retainedImages);
+
+ Try<Nothing> status = persist();
+ if (status.isError()) {
+ return Failure("Failed to save state of Docker images: " + status.error());
+ }
+
+ return retainedLayers;
+}
+
+
Try<Nothing> MetadataManagerProcess::persist()
{
Images images;
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.hpp b/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.hpp
index 954da16..cfafd44 100644
--- a/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.hpp
+++ b/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.hpp
@@ -21,6 +21,7 @@
#include <string>
#include <stout/hashmap.hpp>
+#include <stout/hashset.hpp>
#include <stout/json.hpp>
#include <stout/option.hpp>
#include <stout/protobuf.hpp>
@@ -92,6 +93,19 @@ public:
const ::docker::spec::ImageReference& reference,
bool cached);
+ /**
+ * Prune images from the metadata manager by comparing
+ * existing images with active images in use. This function will
+ * remove all images not used anymore, and return the list of
+ * layers which are still referenced. The caller should
+ * ensure such layers are kept in best effort.
+ *
+ * @param excludedImages all images to exclude from pruning.
+ * @return a list of all layers still refered.
+ */
+ process::Future<hashset<std::string>> prune(
+ const std::vector<::docker::spec::ImageReference>& excludedImages);
+
private:
explicit MetadataManager(process::Owned<MetadataManagerProcess> process);
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/docker/paths.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/docker/paths.cpp b/src/slave/containerizer/mesos/provisioner/docker/paths.cpp
index cd684b3..f692552 100644
--- a/src/slave/containerizer/mesos/provisioner/docker/paths.cpp
+++ b/src/slave/containerizer/mesos/provisioner/docker/paths.cpp
@@ -18,8 +18,12 @@
#include "slave/containerizer/mesos/provisioner/docker/paths.hpp"
+#include <process/clock.hpp>
+
+#include <stout/os.hpp>
#include <stout/path.hpp>
+using std::list;
using std::string;
namespace mesos {
@@ -46,6 +50,13 @@ string getImageLayerPath(const string& storeDir, const string& layerId)
}
+Try<list<string>> listLayers(const string& storeDir)
+{
+ const string layersDir = path::join(storeDir, "layers");
+ return os::ls(layersDir);
+}
+
+
string getImageLayerManifestPath(const string& layerPath)
{
return path::join(layerPath, "json");
@@ -100,6 +111,20 @@ string getStoredImagesPath(const string& storeDir)
return path::join(storeDir, "storedImages");
}
+
+string getGcDir(const string& storeDir)
+{
+ return path::join(storeDir, "gc");
+}
+
+
+string getGcLayerPath(const string& storeDir, const string& layerId)
+{
+ return path::join(
+ getGcDir(storeDir),
+ layerId + "." + stringify(process::Clock::now().duration().ns()));
+}
+
} // namespace paths {
} // namespace docker {
} // namespace slave {
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/docker/paths.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/docker/paths.hpp b/src/slave/containerizer/mesos/provisioner/docker/paths.hpp
index 232c027..0cd7f31 100644
--- a/src/slave/containerizer/mesos/provisioner/docker/paths.hpp
+++ b/src/slave/containerizer/mesos/provisioner/docker/paths.hpp
@@ -17,8 +17,11 @@
#ifndef __PROVISIONER_DOCKER_PATHS_HPP__
#define __PROVISIONER_DOCKER_PATHS_HPP__
+#include <list>
#include <string>
+#include <stout/try.hpp>
+
namespace mesos {
namespace internal {
namespace slave {
@@ -40,6 +43,7 @@ namespace paths {
* |-- json(manifest)
* |-- VERSION
* |--storedImages (file holding on cached images)
+ * |--gc (dir holding marked layers to be sweeped)
*/
// TODO(gilbert): Clean up any unused method after refactoring.
@@ -90,6 +94,17 @@ std::string getImageArchiveTarPath(
std::string getStoredImagesPath(const std::string& storeDir);
+
+std::string getGcDir(const std::string& storeDir);
+
+
+std::string getGcLayerPath(
+ const std::string& storeDir,
+ const std::string& layerId);
+
+
+Try<std::list<std::string>> listLayers(const std::string& storeDir);
+
} // namespace paths {
} // namespace docker {
} // namespace slave {
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/docker/store.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/docker/store.cpp b/src/slave/containerizer/mesos/provisioner/docker/store.cpp
index f357710..d64e6eb 100644
--- a/src/slave/containerizer/mesos/provisioner/docker/store.cpp
+++ b/src/slave/containerizer/mesos/provisioner/docker/store.cpp
@@ -24,13 +24,16 @@
#include <mesos/secret/resolver.hpp>
#include <stout/hashmap.hpp>
+#include <stout/hashset.hpp>
#include <stout/json.hpp>
#include <stout/os.hpp>
#include <process/collect.hpp>
#include <process/defer.hpp>
#include <process/dispatch.hpp>
+#include <process/executor.hpp>
#include <process/id.hpp>
+#include <process/metrics/counter.hpp>
#include "slave/containerizer/mesos/provisioner/constants.hpp"
#include "slave/containerizer/mesos/provisioner/utils.hpp"
@@ -75,7 +78,9 @@ public:
: ProcessBase(process::ID::generate("docker-provisioner-store")),
flags(_flags),
metadataManager(_metadataManager),
- puller(_puller) {}
+ puller(_puller)
+ {
+ }
~StoreProcess() {}
@@ -85,6 +90,10 @@ public:
const mesos::Image& image,
const string& backend);
+ Future<Nothing> prune(
+ const std::vector<mesos::Image>& excludeImages,
+ const hashset<string>& activeLayerPaths);
+
private:
Future<Image> _get(
const spec::ImageReference& reference,
@@ -106,11 +115,18 @@ private:
const string& layerId,
const string& backend);
+ Future<Nothing> _prune(
+ const hashset<string>& activeLayerPaths,
+ const hashset<string>& retainedImageLayers);
+
const Flags flags;
Owned<MetadataManager> metadataManager;
Owned<Puller> puller;
hashmap<string, Owned<Promise<Image>>> pulling;
+
+ // For executing path removals in a separated actor.
+ process::Executor executor;
};
@@ -164,6 +180,12 @@ Try<Owned<slave::Store>> Store::create(
mkdir.error());
}
+ mkdir = os::mkdir(paths::getGcDir(flags.docker_store_dir));
+ if (mkdir.isError()) {
+ return Error("Failed to create Docker store gc directory: " +
+ mkdir.error());
+ }
+
Try<Owned<MetadataManager>> metadataManager = MetadataManager::create(flags);
if (metadataManager.isError()) {
return Error(metadataManager.error());
@@ -203,6 +225,15 @@ Future<ImageInfo> Store::get(
}
+Future<Nothing> Store::prune(
+ const vector<mesos::Image>& excludedImages,
+ const hashset<string>& activeLayerPaths)
+{
+ return dispatch(
+ process.get(), &StoreProcess::prune, excludedImages, activeLayerPaths);
+}
+
+
Future<Nothing> StoreProcess::recover()
{
return metadataManager->recover();
@@ -447,6 +478,124 @@ Future<Nothing> StoreProcess::moveLayer(
return Nothing();
}
+
+Future<Nothing> StoreProcess::prune(
+ const vector<mesos::Image>& excludedImages,
+ const hashset<string>& activeLayerPaths)
+{
+ // All existing pulling should have finished.
+ if (!pulling.empty()) {
+ return Failure("Cannot prune and pull at the same time");
+ }
+
+ vector<spec::ImageReference> imageReferences;
+ imageReferences.reserve(excludedImages.size());
+
+ foreach (const mesos::Image& image, excludedImages) {
+ Try<spec::ImageReference> reference =
+ spec::parseImageReference(image.docker().name());
+
+ if (reference.isError()) {
+ return Failure(
+ "Failed to parse docker image '" + image.docker().name() +
+ "': " + reference.error());
+ }
+
+ imageReferences.push_back(reference.get());
+ }
+
+ return metadataManager->prune(imageReferences)
+ .then(defer(self(), &Self::_prune, activeLayerPaths, lambda::_1));
+}
+
+
+Future<Nothing> StoreProcess::_prune(
+ const hashset<string>& activeLayerRootfses,
+ const hashset<string>& retainedLayerIds)
+{
+ Try<list<string>> allLayers = paths::listLayers(flags.docker_store_dir);
+ if (allLayers.isError()) {
+ return Failure("Failed to find all layer paths: " + allLayers.error());
+ }
+
+ // Paths in provisioner are layer rootfs. Normalize them to layer
+ // path.
+ hashset<string> activeLayerPaths;
+
+ foreach (const string& rootfsPath, activeLayerRootfses) {
+ activeLayerPaths.insert(Path(rootfsPath).dirname());
+ }
+
+ foreach (const string& layerId, allLayers.get()) {
+ if (retainedLayerIds.contains(layerId)) {
+ VLOG(1) << "Layer '" << layerId << "' is retained by image store cache";
+ continue;
+ }
+
+ const string layerPath =
+ paths::getImageLayerPath(flags.docker_store_dir, layerId);
+
+ if (activeLayerPaths.contains(layerPath)) {
+ VLOG(1) << "Layer '" << layerId << "' is retained by active container";
+ continue;
+ }
+
+ const string target =
+ paths::getGcLayerPath(flags.docker_store_dir, layerId);
+
+ if (os::exists(target)) {
+ return Failure("Marking phase target '" + target + "' already exists");
+ }
+
+ VLOG(1) << "Marking layer '" << layerId << "' to gc by renaming '"
+ << layerPath << "' to '" << target << "'";
+
+ Try<Nothing> rename = os::rename(layerPath, target);
+ if (rename.isError()) {
+ return Failure(
+ "Failed to move layer from '" + layerPath +
+ "' to '" + target + "': " + rename.error());
+ }
+ }
+
+ const string gcDir = paths::getGcDir(flags.docker_store_dir);
+ auto rmdirs = [gcDir]() {
+ Try<list<string>> targets = os::ls(gcDir);
+ if (targets.isError()) {
+ LOG(WARNING) << "Error when listing gcDir '" << gcDir
+ << "': " << targets.error();
+ return Nothing();
+ }
+
+ foreach (const string& target, targets.get()) {
+ const string path = path::join(gcDir, target);
+ // Run the removal operation with 'continueOnError = false'.
+ // A possible situation is that we incorrectly marked a layer
+ // which is still used by certain layer based backends (aufs, overlay).
+ // In such a case, we proceed with a warning and try to free up as much
+ // disk spaces as possible.
+ LOG(INFO) << "Deleting path '" << path << "'";
+ Try<Nothing> rmdir = os::rmdir(path, true, true, false);
+
+ if (rmdir.isError()) {
+ LOG(WARNING) << "Failed to delete '" << path << "': "
+ << rmdir.error();
+ } else {
+ LOG(INFO) << "Deleted '" << path << "'";
+ }
+ }
+
+ return Nothing();
+ };
+
+ // NOTE: All `rmdirs` calls are dispatched to one executor so that:
+ // 1. They do not block other dispatches;
+ // 2. They do not occupy all worker threads.
+ executor.execute(rmdirs);
+
+ return Nothing();
+}
+
} // namespace docker {
} // namespace slave {
} // namespace internal {
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/docker/store.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/docker/store.hpp b/src/slave/containerizer/mesos/provisioner/docker/store.hpp
index 1cf6866..a420fa0 100644
--- a/src/slave/containerizer/mesos/provisioner/docker/store.hpp
+++ b/src/slave/containerizer/mesos/provisioner/docker/store.hpp
@@ -21,6 +21,7 @@
#include <process/owned.hpp>
+#include <stout/hashset.hpp>
#include <stout/try.hpp>
#include "slave/flags.hpp"
@@ -58,6 +59,10 @@ public:
const mesos::Image& image,
const std::string& backend);
+ virtual process::Future<Nothing> prune(
+ const std::vector<mesos::Image>& excludeImages,
+ const hashset<std::string>& activeLayerPaths);
+
private:
explicit Store(process::Owned<StoreProcess> process);
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/provisioner.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/provisioner.cpp b/src/slave/containerizer/mesos/provisioner/provisioner.cpp
index 1723588..d19dc1a 100644
--- a/src/slave/containerizer/mesos/provisioner/provisioner.cpp
+++ b/src/slave/containerizer/mesos/provisioner/provisioner.cpp
@@ -32,6 +32,7 @@
#include <stout/foreach.hpp>
#include <stout/hashmap.hpp>
#include <stout/hashset.hpp>
+#include <stout/lambda.hpp>
#include <stout/os.hpp>
#include <stout/stringify.hpp>
#include <stout/uuid.hpp>
@@ -56,6 +57,7 @@ using std::vector;
using process::Failure;
using process::Future;
using process::Owned;
+using process::ReadWriteLock;
using mesos::internal::slave::AUFS_BACKEND;
using mesos::internal::slave::BIND_BACKEND;
@@ -312,6 +314,16 @@ Future<bool> Provisioner::destroy(const ContainerID& containerId) const
}
+Future<Nothing> Provisioner::pruneImages(
+ const vector<Image>& excludedImages) const
+{
+ return dispatch(
+ CHECK_NOTNULL(process.get()),
+ &ProvisionerProcess::pruneImages,
+ excludedImages);
+}
+
+
ProvisionerProcess::ProvisionerProcess(
const string& _rootDir,
const string& _defaultBackend,
@@ -450,20 +462,28 @@ Future<ProvisionInfo> ProvisionerProcess::provision(
const ContainerID& containerId,
const Image& image)
{
- if (!stores.contains(image.type())) {
- return Failure(
- "Unsupported container image type: " +
- stringify(image.type()));
- }
+ // `destroy` and `provision` can happen concurrently, but `pruneImages`
+ // is exclusive.
+ return rwLock.read_lock()
+ .then(defer(self(), [this, containerId, image]() -> Future<ProvisionInfo> {
+ if (!stores.contains(image.type())) {
+ return Failure(
+ "Unsupported container image type: " + stringify(image.type()));
+ }
- // Get and then provision image layers from the store.
- return stores.get(image.type()).get()->get(image, defaultBackend)
- .then(defer(self(),
- &Self::_provision,
- containerId,
- image,
- defaultBackend,
- lambda::_1));
+ // Get and then provision image layers from the store.
+ return stores.get(image.type()).get()->get(image, defaultBackend)
+ .then(defer(
+ self(),
+ &Self::_provision,
+ containerId,
+ image,
+ defaultBackend,
+ lambda::_1));
+ }))
+ .onAny(defer(self(), [this](const Future<ProvisionInfo>&) {
+ rwLock.read_unlock();
+ }));
}
@@ -510,7 +530,7 @@ Future<ProvisionInfo> ProvisionerProcess::_provision(
ContainerLayers containerLayers;
- foreach(const string& layer, imageInfo.layers) {
+ foreach (const string& layer, imageInfo.layers) {
containerLayers.add_paths(layer);
}
@@ -529,46 +549,55 @@ Future<ProvisionInfo> ProvisionerProcess::_provision(
Future<bool> ProvisionerProcess::destroy(const ContainerID& containerId)
{
- if (!infos.contains(containerId)) {
- VLOG(1) << "Ignoring destroy request for unknown container " << containerId;
-
- return false;
- }
-
- if (infos[containerId]->destroying) {
- return infos[containerId]->termination.future();
- }
+ // `destroy` and `provision` can happen concurrently, but `pruneImages`
+ // is exclusive.
+ return rwLock.read_lock()
+ .then(defer(self(), [this, containerId]() -> Future<bool> {
+ if (!infos.contains(containerId)) {
+ VLOG(1) << "Ignoring destroy request for unknown container "
+ << containerId;
+
+ return false;
+ }
- infos[containerId]->destroying = true;
+ if (infos[containerId]->destroying) {
+ return infos[containerId]->termination.future();
+ }
- // Provisioner destroy can be invoked from:
- // 1. Provisioner `recover` to destroy all unknown orphans.
- // 2. Containerizer `recover` to destroy known orphans.
- // 3. Containerizer `destroy` on one specific container.
- //
- // NOTE: For (2) and (3), we expect the container being destroyed
- // has no any child contain remain running. However, for case (1),
- // if the container runtime directory does not survive after the
- // machine reboots and the provisioner directory under the agent
- // work dir still exists, all containers will be regarded as
- // unknown containers and will be destroyed. In this case, a parent
- // container may be destroyed before its child containers are
- // cleaned up. So we have to make `destroy()` recursively for
- // this particular case.
- //
- // TODO(gilbert): Move provisioner directory to the container
- // runtime directory after a deprecation cycle to avoid
- // making `provisioner::destroy()` being recursive.
- list<Future<bool>> destroys;
-
- foreachkey (const ContainerID& entry, infos) {
- if (entry.has_parent() && entry.parent() == containerId) {
- destroys.push_back(destroy(entry));
- }
- }
+ infos[containerId]->destroying = true;
+
+ // Provisioner destroy can be invoked from:
+ // 1. Provisioner `recover` to destroy all unknown orphans.
+ // 2. Containerizer `recover` to destroy known orphans.
+ // 3. Containerizer `destroy` on one specific container.
+ //
+ // NOTE: For (2) and (3), we expect the container being destroyed
+ // has no any child contain remain running. However, for case (1),
+ // if the container runtime directory does not survive after the
+ // machine reboots and the provisioner directory under the agent
+ // work dir still exists, all containers will be regarded as
+ // unknown containers and will be destroyed. In this case, a parent
+ // container may be destroyed before its child containers are
+ // cleaned up. So we have to make `destroy()` recursively for
+ // this particular case.
+ //
+ // TODO(gilbert): Move provisioner directory to the container
+ // runtime directory after a deprecation cycle to avoid
+ // making `provisioner::destroy()` being recursive.
+ list<Future<bool>> destroys;
+
+ foreachkey (const ContainerID& entry, infos) {
+ if (entry.has_parent() && entry.parent() == containerId) {
+ destroys.push_back(destroy(entry));
+ }
+ }
- return await(destroys)
- .then(defer(self(), &Self::_destroy, containerId, lambda::_1));
+ return await(destroys)
+ .then(defer(self(), &Self::_destroy, containerId, lambda::_1));
+ }))
+ .onAny(defer(self(), [this](const Future<bool>&) {
+ rwLock.read_unlock();
+ }));
}
@@ -661,6 +690,59 @@ Future<bool> ProvisionerProcess::__destroy(const ContainerID& containerId)
}
+Future<Nothing> ProvisionerProcess::pruneImages(
+ const vector<Image>& excludedImages)
+{
+ // `destroy` and `provision` can happen concurrently, but `pruneImages`
+ // is exclusive.
+ return rwLock.write_lock()
+ .then(defer(self(), [this, excludedImages]() -> Future<Nothing> {
+ hashset<string> activeLayerPaths;
+
+ foreachpair (
+ const ContainerID& containerId, const Owned<Info>& info, infos) {
+ if (info->layers.isNone()) {
+ // There are several possibilities if layer information missing:
+ // - legacy containers provisioned before layer checkpointing:
+ // they should already be excluded by the containerizer;
+ // - the agent crashed after `backend::provision()` finished but
+ // before checkpointing the `layers`. In such a case, the rootfs
+ // should not be used by any running containers yet so it is safe
+ // to skip those layers;
+ // - checkpointed layer files were manually deleted: we do not expect
+ // this to be allowd, but log it for information purpose.
+ VLOG(1) << "Container " << containerId
+ << " has no checkpointed layers";
+
+ continue;
+ }
+
+ activeLayerPaths.insert(info->layers->begin(), info->layers->end());
+ }
+
+ list<Future<Nothing>> futures;
+
+ foreachpair (
+ const Image::Type& type, const Owned<Store>& store, stores) {
+ vector<Image> images;
+ foreach (const Image& image, excludedImages) {
+ if (image.type() == type) {
+ images.push_back(image);
+ }
+ }
+
+ futures.push_back(store.get()->prune(images, activeLayerPaths));
+ }
+
+ return collect(futures)
+ .then([]() { return Nothing(); });
+ }))
+ .onAny(defer(self(), [this](const Future<Nothing>&) {
+ rwLock.write_unlock();
+ }));
+}
+
+
ProvisionerProcess::Metrics::Metrics()
: remove_container_errors(
"containerizer/mesos/provisioner/remove_container_errors")
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/provisioner.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/provisioner.hpp b/src/slave/containerizer/mesos/provisioner/provisioner.hpp
index b87144c..88d7019 100644
--- a/src/slave/containerizer/mesos/provisioner/provisioner.hpp
+++ b/src/slave/containerizer/mesos/provisioner/provisioner.hpp
@@ -34,6 +34,7 @@
#include <process/future.hpp>
#include <process/owned.hpp>
+#include <process/rwlock.hpp>
#include <process/metrics/counter.hpp>
#include <process/metrics/metrics.hpp>
@@ -102,6 +103,12 @@ public:
// provisioned root filesystem for the given container.
virtual process::Future<bool> destroy(const ContainerID& containerId) const;
+ // Prune images in different stores. Image references in excludedImages
+ // will be passed to stores and retained in a best effort fashion.
+ // All layer paths used by active containers will not be pruned.
+ virtual process::Future<Nothing> pruneImages(
+ const std::vector<Image>& excludedImages) const;
+
protected:
Provisioner() {} // For creating mock object.
@@ -132,6 +139,9 @@ public:
process::Future<bool> destroy(const ContainerID& containerId);
+ process::Future<Nothing> pruneImages(
+ const std::vector<Image>& excludedImages);
+
private:
process::Future<ProvisionInfo> _provision(
const ContainerID& containerId,
@@ -186,6 +196,20 @@ private:
process::metrics::Counter remove_container_errors;
} metrics;
+
+ // This `ReadWriteLock` instance is used to protect the critical
+ // section, which includes store directory and provision directory.
+ // Because `provision` and `destroy` are scoped by `containerId`,
+ // they are not expected to touch the same critical section
+ // simultaneously, so any `provision` and `destroy` can happen concurrently.
+ // This is guaranteed by Mesos containerizer, e.g., a `destroy` will always
+ // wait for a container's `provision` to finish, then do the cleanup.
+ //
+ // On the other hand, `pruneImages` needs to know all active layers from all
+ // containers, therefore it must be exclusive to other `provision`, `destroy`
+ // and `pruneImages` so that we do not prune image layers which is used by an
+ // active `provision` or `destroy`.
+ process::ReadWriteLock rwLock;
};
} // namespace slave {
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/store.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/store.cpp b/src/slave/containerizer/mesos/provisioner/store.cpp
index cc5cc81..11fce0e 100644
--- a/src/slave/containerizer/mesos/provisioner/store.cpp
+++ b/src/slave/containerizer/mesos/provisioner/store.cpp
@@ -15,6 +15,7 @@
// limitations under the License.
#include <string>
+#include <vector>
#include <mesos/type_utils.hpp>
@@ -31,6 +32,7 @@
#include "slave/containerizer/mesos/provisioner/docker/store.hpp"
using std::string;
+using std::vector;
using process::Owned;
@@ -86,6 +88,14 @@ Try<hashmap<Image::Type, Owned<Store>>> Store::create(
return stores;
}
+
+process::Future<Nothing> Store::prune(
+ const vector<Image>& excludeImages,
+ const hashset<string>& activeLayerPaths)
+{
+ return Nothing();
+}
+
} // namespace slave {
} // namespace internal {
} // namespace mesos {
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/store.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/store.hpp b/src/slave/containerizer/mesos/provisioner/store.hpp
index 01ab83d..a4ae00e 100644
--- a/src/slave/containerizer/mesos/provisioner/store.hpp
+++ b/src/slave/containerizer/mesos/provisioner/store.hpp
@@ -31,6 +31,7 @@
#include <process/future.hpp>
#include <process/owned.hpp>
+#include <stout/hashset.hpp>
#include <stout/try.hpp>
#include "slave/flags.hpp"
@@ -87,6 +88,23 @@ public:
virtual process::Future<ImageInfo> get(
const Image& image,
const std::string& backend) = 0;
+
+ // Prune unused images from the given store. This is called within
+ // an exclusive lock from `provisioner`, which means any other
+ // image provision or prune are blocked until the future is satsified,
+ // so an implementation should minimize the blocking time.
+ //
+ // Any image specified in `excludedImages` should not be pruned if
+ // it is already cached previously.
+ //
+ // On top of this, all layer paths used to provisioner all active
+ // containers are also passed in `activeLayerPaths`, and these layers
+ // should also be retained. Because in certain store (e.g, docker store)
+ // the cache is not source of truth, and we need to not only keep the
+ // excluded images, but also maintain the cache.
+ virtual process::Future<Nothing> prune(
+ const std::vector<Image>& excludedImages,
+ const hashset<std::string>& activeLayerPaths);
};
} // namespace slave {
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/tests/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer.cpp b/src/tests/containerizer.cpp
index c6f1ec0..665bd5d 100644
--- a/src/tests/containerizer.cpp
+++ b/src/tests/containerizer.cpp
@@ -31,6 +31,7 @@ using process::http::Connection;
using std::map;
using std::shared_ptr;
using std::string;
+using std::vector;
using testing::_;
using testing::Invoke;
@@ -313,6 +314,11 @@ public:
return containers_.keys();
}
+ Future<Nothing> pruneImages()
+ {
+ return Nothing();
+ }
+
private:
struct ContainerData
{
@@ -437,6 +443,9 @@ void TestContainerizer::setup()
EXPECT_CALL(*this, kill(_, _))
.WillRepeatedly(Invoke(this, &TestContainerizer::_kill));
+
+ EXPECT_CALL(*this, pruneImages())
+ .WillRepeatedly(Invoke(this, &TestContainerizer::_pruneImages));
}
@@ -568,6 +577,14 @@ Future<hashset<ContainerID>> TestContainerizer::containers()
&TestContainerizerProcess::containers);
}
+
+Future<Nothing> TestContainerizer::_pruneImages()
+{
+ return process::dispatch(
+ process.get(),
+ &TestContainerizerProcess::pruneImages);
+}
+
} // namespace tests {
} // namespace internal {
} // namespace mesos {
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/tests/containerizer.hpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer.hpp b/src/tests/containerizer.hpp
index c98913f..3d162fe 100644
--- a/src/tests/containerizer.hpp
+++ b/src/tests/containerizer.hpp
@@ -122,6 +122,10 @@ public:
kill,
process::Future<bool>(const ContainerID&, int));
+ MOCK_METHOD0(
+ pruneImages,
+ process::Future<Nothing>());
+
// Additional destroy method for testing because we won't know the
// ContainerID created for each container.
process::Future<bool> destroy(
@@ -163,10 +167,13 @@ private:
process::Future<bool> _destroy(
const ContainerID& containerId);
+
process::Future<bool> _kill(
const ContainerID& containerId,
int status);
+ process::Future<Nothing> _pruneImages();
+
process::Owned<TestContainerizerProcess> process;
};
http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/tests/containerizer/mock_containerizer.hpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer/mock_containerizer.hpp b/src/tests/containerizer/mock_containerizer.hpp
index 5befccc..bbfa768 100644
--- a/src/tests/containerizer/mock_containerizer.hpp
+++ b/src/tests/containerizer/mock_containerizer.hpp
@@ -81,6 +81,8 @@ public:
MOCK_METHOD0(
containers,
process::Future<hashset<ContainerID>>());
+
+ MOCK_METHOD0(pruneImages, process::Future<Nothing>());
};
} // namespace tests {
[2/5] mesos git commit: Checkpoint and recover `ContainerConfig` in
Mesos containerizer.
Posted by gi...@apache.org.
Checkpoint and recover `ContainerConfig` in Mesos containerizer.
This patch includes the following change:
- Checkpointed `ContainerConfig` used to launch a container;
- Added helper function to read checkpointed `ContainerConfig`;
- Recovered `ContainerConfig`.
- Make `ContainerConfig` Option in `Container` struct to indicate
whether recovered.
Review: https://reviews.apache.org/r/55334/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/03a2a4df
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/03a2a4df
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/03a2a4df
Branch: refs/heads/master
Commit: 03a2a4dfa47b1d47c5eb23e81f5ef8213e46d545
Parents: b433262
Author: Zhitao Li <zh...@gmail.com>
Authored: Fri Nov 17 16:36:24 2017 -0800
Committer: Gilbert Song <so...@gmail.com>
Committed: Mon Nov 20 12:28:11 2017 -0800
----------------------------------------------------------------------
src/slave/containerizer/mesos/containerizer.cpp | 144 +++++++++++++------
src/slave/containerizer/mesos/containerizer.hpp | 9 +-
src/slave/containerizer/mesos/paths.cpp | 37 ++++-
src/slave/containerizer/mesos/paths.hpp | 8 ++
4 files changed, 152 insertions(+), 46 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/03a2a4df/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp
index db5f044..bf71db1 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -759,6 +759,24 @@ Future<Nothing> MesosContainerizerProcess::recover(
container->state = RUNNING;
container->pid = state.pid();
container->directory = state.directory();
+
+ // Attempt to read the launch config of the container.
+ Result<ContainerConfig> config =
+ containerizer::paths::getContainerConfig(flags.runtime_dir, containerId);
+
+ if (config.isError()) {
+ return Failure(
+ "Failed to get config for container " + stringify(containerId) +
+ ": " + config.error());
+ }
+
+ if (config.isSome()) {
+ container->config = config.get();
+ } else {
+ VLOG(1) << "No config is recovered for container " << containerId
+ << ", this means image pruning will be disabled.";
+ }
+
containers_[containerId] = container;
}
@@ -812,6 +830,14 @@ Future<Nothing> MesosContainerizerProcess::recover(
return Failure("Failed to get container pid: " + pid.error());
}
+ // Attempt to read the launch config of the container.
+ Result<ContainerConfig> config =
+ containerizer::paths::getContainerConfig(flags.runtime_dir, containerId);
+
+ if (config.isError()) {
+ return Failure("Failed to get container config: " + config.error());
+ }
+
// Determine the sandbox if this is a nested or standalone container.
const bool isStandaloneContainer =
containerizer::paths::isStandaloneContainer(
@@ -848,6 +874,15 @@ Future<Nothing> MesosContainerizerProcess::recover(
container->status = Future<Option<int>>(None());
}
+ if (config.isSome()) {
+ container->config = ContainerConfig();
+ container->config->CopyFrom(config.get());
+ } else {
+ VLOG(1) << "No checkpointed config recovered for container "
+ << containerId << ", this means image pruning will "
+ << "be disabled.";
+ }
+
containers_[containerId] = container;
// TODO(klueska): The final check in the if statement makes sure
@@ -989,8 +1024,8 @@ Future<Nothing> MesosContainerizerProcess::__recover(
if (containerLaunchInfo.isError()) {
return Failure(
- "Failed to recover launch information of container '" +
- stringify(containerId) + "': " + containerLaunchInfo.error());
+ "Failed to recover launch information of container " +
+ stringify(containerId) + ": " + containerLaunchInfo.error());
}
if (containerLaunchInfo.isSome()) {
@@ -1140,9 +1175,14 @@ Future<Containerizer::LaunchResult> MesosContainerizerProcess::launch(
// TODO(jieyu): This is currently best effort. After the agent fails
// over, 'executor_info' won't be set in root parent container's
// 'config'. Consider populating 'executor_info' in recover path.
- if (containers_[rootContainerId]->config.has_executor_info()) {
- containerConfig.mutable_executor_info()->CopyFrom(
- containers_[rootContainerId]->config.executor_info());
+ if (containers_[rootContainerId]->config.isSome()) {
+ if (containers_[rootContainerId]->config->has_executor_info()) {
+ containerConfig.mutable_executor_info()->CopyFrom(
+ containers_[rootContainerId]->config->executor_info());
+ }
+ } else {
+ LOG(WARNING) << "Cannot determine executor_info for root container '"
+ << rootContainerId << "' which has no config recovered.";
}
}
@@ -1279,11 +1319,10 @@ Future<Nothing> MesosContainerizerProcess::prepare(
}
CHECK_EQ(container->state, PROVISIONING);
-
- transition(containerId, PREPARING);
+ CHECK_SOME(container->config);
if (provisionInfo.isSome()) {
- container->config.set_rootfs(provisionInfo->rootfs);
+ container->config->set_rootfs(provisionInfo->rootfs);
if (provisionInfo->dockerManifest.isSome() &&
provisionInfo->appcManifest.isSome()) {
@@ -1291,18 +1330,37 @@ Future<Nothing> MesosContainerizerProcess::prepare(
}
if (provisionInfo->dockerManifest.isSome()) {
- ContainerConfig::Docker* docker = container->config.mutable_docker();
+ ContainerConfig::Docker* docker = container->config->mutable_docker();
docker->mutable_manifest()->CopyFrom(provisionInfo->dockerManifest.get());
}
if (provisionInfo->appcManifest.isSome()) {
- ContainerConfig::Appc* appc = container->config.mutable_appc();
+ ContainerConfig::Appc* appc = container->config->mutable_appc();
appc->mutable_manifest()->CopyFrom(provisionInfo->appcManifest.get());
}
}
// Captured for lambdas below.
- ContainerConfig containerConfig = container->config;
+ ContainerConfig containerConfig = container->config.get();
+
+ // Checkpoint the `ContainerConfig` which includes all information to launch a
+ // container. Critical information (e.g., `ContainerInfo`) can be used for
+ // tracking container image usage.
+ const string configPath = path::join(
+ containerizer::paths::getRuntimePath(flags.runtime_dir, containerId),
+ containerizer::paths::CONTAINER_CONFIG_FILE);
+
+ Try<Nothing> configCheckpointed =
+ slave::state::checkpoint(configPath, containerConfig);
+
+ if (configCheckpointed.isError()) {
+ return Failure("Failed to checkpoint the container config to '" +
+ configPath + "': " + configCheckpointed.error());
+ }
+
+ VLOG(1) << "Checkpointed ContainerConfig at '" << configPath << "'";
+
+ transition(containerId, PREPARING);
// We prepare the isolators sequentially according to their ordering
// to permit basic dependency specification, e.g., preparing a
@@ -1350,14 +1408,16 @@ Future<Nothing> MesosContainerizerProcess::fetch(
transition(containerId, FETCHING);
- const string directory = container->config.directory();
+ CHECK_SOME(container->config);
+
+ const string directory = container->config->directory();
return fetcher->fetch(
containerId,
- container->config.command_info(),
+ container->config->command_info(),
directory,
- container->config.has_user()
- ? container->config.user()
+ container->config->has_user()
+ ? container->config->user()
: Option<string>::none())
.then([=]() -> Future<Nothing> {
if (HookManager::hooksAvailable()) {
@@ -1387,6 +1447,7 @@ Future<Containerizer::LaunchResult> MesosContainerizerProcess::_launch(
CHECK(containerIO.isSome());
CHECK_EQ(container->state, PREPARING);
CHECK_READY(container->launchInfos);
+ CHECK_SOME(container->config);
ContainerLaunchInfo launchInfo;
@@ -1461,11 +1522,11 @@ Future<Containerizer::LaunchResult> MesosContainerizerProcess::_launch(
// Determine the launch command for the container.
if (!launchInfo.has_command()) {
- launchInfo.mutable_command()->CopyFrom(container->config.command_info());
+ launchInfo.mutable_command()->CopyFrom(container->config->command_info());
} else {
// For command tasks, merge the launch commands with the executor
// launch command.
- if (container->config.has_task_info()) {
+ if (container->config->has_task_info()) {
// Isolators are not supposed to set any other fields in the
// command except the arguments for the command executor.
CHECK(launchInfo.command().uris().empty())
@@ -1482,7 +1543,7 @@ Future<Containerizer::LaunchResult> MesosContainerizerProcess::_launch(
// NOTE: The ordering here is important because we want the
// command executor arguments to be in front of the arguments
// set by isolators. See details in MESOS-7909.
- CommandInfo launchCommand = container->config.command_info();
+ CommandInfo launchCommand = container->config->command_info();
launchCommand.MergeFrom(launchInfo.command());
launchInfo.mutable_command()->CopyFrom(launchCommand);
}
@@ -1492,7 +1553,7 @@ Future<Containerizer::LaunchResult> MesosContainerizerProcess::_launch(
// flag to the launch command of the command executor.
// TODO(tillt): Remove this once we no longer support the old style
// command task (i.e., that uses mesos-execute).
- if (container->config.has_task_info() && launchInfo.has_task_environment()) {
+ if (container->config->has_task_info() && launchInfo.has_task_environment()) {
hashmap<string, string> commandTaskEnvironment;
foreach (const Environment::Variable& variable,
@@ -1526,10 +1587,10 @@ Future<Containerizer::LaunchResult> MesosContainerizerProcess::_launch(
// TODO(jieyu): Remove this once we no longer support the old style
// command task (i.e., that uses mesos-execute).
// TODO(jieyu): Consider move this to filesystem isolator.
- if (container->config.has_task_info() &&
- container->config.has_rootfs()) {
+ if (container->config->has_task_info() &&
+ container->config->has_rootfs()) {
launchInfo.mutable_command()->add_arguments(
- "--rootfs=" + container->config.rootfs());
+ "--rootfs=" + container->config->rootfs());
}
// TODO(jieyu): 'uris', 'environment' and 'user' in the launch
@@ -1561,8 +1622,8 @@ Future<Containerizer::LaunchResult> MesosContainerizerProcess::_launch(
Environment containerEnvironment;
// Inherit environment from the parent container for DEBUG containers.
- if (container->config.has_container_class() &&
- container->config.container_class() == ContainerClass::DEBUG) {
+ if (container->config->has_container_class() &&
+ container->config->container_class() == ContainerClass::DEBUG) {
// DEBUG containers must have a parent.
CHECK(containerId.has_parent());
if (containers_[containerId.parent()]->launchInfo.isSome()) {
@@ -1579,8 +1640,8 @@ Future<Containerizer::LaunchResult> MesosContainerizerProcess::_launch(
}
// DEBUG containers inherit MESOS_SANDBOX from their parent.
- if (!container->config.has_container_class() ||
- container->config.container_class() != ContainerClass::DEBUG) {
+ if (!container->config->has_container_class() ||
+ container->config->container_class() != ContainerClass::DEBUG) {
// TODO(jieyu): Consider moving this to filesystem isolator.
//
// NOTE: For the command executor case, although it uses the host
@@ -1589,9 +1650,9 @@ Future<Containerizer::LaunchResult> MesosContainerizerProcess::_launch(
// itself does not use this environment variable.
Environment::Variable* variable = containerEnvironment.add_variables();
variable->set_name("MESOS_SANDBOX");
- variable->set_value(container->config.has_rootfs()
+ variable->set_value(container->config->has_rootfs()
? flags.sandbox_directory
- : container->config.directory());
+ : container->config->directory());
}
// `launchInfo.environment` contains the environment returned by
@@ -1603,9 +1664,9 @@ Future<Containerizer::LaunchResult> MesosContainerizerProcess::_launch(
// Include user specified environment.
// Skip over any secrets as they should have been resolved by the
// environment_secret isolator.
- if (container->config.command_info().has_environment()) {
+ if (container->config->command_info().has_environment()) {
foreach (const Environment::Variable& variable,
- container->config.command_info().environment().variables()) {
+ container->config->command_info().environment().variables()) {
if (variable.type() != Environment::Variable::SECRET) {
containerEnvironment.add_variables()->CopyFrom(variable);
}
@@ -1620,8 +1681,9 @@ Future<Containerizer::LaunchResult> MesosContainerizerProcess::_launch(
// NOTE: Command task is a special case. Even if the container
// config has a root filesystem, the executor container still uses
// the host filesystem.
- if (!container->config.has_task_info() && container->config.has_rootfs()) {
- launchInfo.set_rootfs(container->config.rootfs());
+ if (!container->config->has_task_info() &&
+ container->config->has_rootfs()) {
+ launchInfo.set_rootfs(container->config->rootfs());
}
// For a non-DEBUG container, working directory is set to container sandbox,
@@ -1631,22 +1693,22 @@ Future<Containerizer::LaunchResult> MesosContainerizerProcess::_launch(
//
// TODO(alexr): Determining working directory is a convoluted process. We
// should either simplify the logic or extract it into a helper routine.
- if (container->config.has_container_class() &&
- container->config.container_class() == ContainerClass::DEBUG) {
+ if (container->config->has_container_class() &&
+ container->config->container_class() == ContainerClass::DEBUG) {
// DEBUG containers must have a parent.
CHECK(containerId.has_parent());
if (containers_[containerId.parent()]->launchInfo.isSome()) {
// TODO(alexr): Remove this once we no longer support executorless
// command tasks in favor of default executor.
- if (containers_[containerId.parent()]->config.has_task_info()) {
+ if (containers_[containerId.parent()]->config->has_task_info()) {
// For the command executor case, even if the task itself has a root
// filesystem, the executor container still uses the host filesystem,
// hence `ContainerLaunchInfo.working_directory`, which points to the
// executor working directory in the host filesystem, may be different
// from the task working directory when task defines an image. Fall back
// to the sandbox directory if task working directory is not present.
- if (containers_[containerId.parent()]->config.has_rootfs()) {
+ if (containers_[containerId.parent()]->config->has_rootfs()) {
// We can extract the task working directory from the flag being
// passed to the command executor.
foreach (
@@ -1699,18 +1761,18 @@ Future<Containerizer::LaunchResult> MesosContainerizerProcess::_launch(
<< "host filesystem";
}
- launchInfo.set_working_directory(container->config.directory());
+ launchInfo.set_working_directory(container->config->directory());
}
// Determine the user to launch the container as.
- if (container->config.has_user()) {
- launchInfo.set_user(container->config.user());
+ if (container->config->has_user()) {
+ launchInfo.set_user(container->config->user());
}
// TODO(gilbert): Remove this once we no longer support command
// task in favor of default executor.
- if (container->config.has_task_info() &&
- container->config.has_rootfs()) {
+ if (container->config->has_task_info() &&
+ container->config->has_rootfs()) {
// We need to set the executor user as root as it needs to
// perform chroot (even when switch_user is set to false).
launchInfo.set_user("root");
http://git-wip-us.apache.org/repos/asf/mesos/blob/03a2a4df/src/slave/containerizer/mesos/containerizer.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.hpp b/src/slave/containerizer/mesos/containerizer.hpp
index f5d5146..e859b5b 100644
--- a/src/slave/containerizer/mesos/containerizer.hpp
+++ b/src/slave/containerizer/mesos/containerizer.hpp
@@ -342,9 +342,12 @@ private:
// the ResourceStatistics limits in usage().
Resources resources;
- // The configuration for the container to be launched. This field
- // is only used during the launch of a container.
- mesos::slave::ContainerConfig config;
+ // The configuration for the container to be launched.
+ // This can only be None if the underlying container is launched
+ // before we checkpiont `ContainerConfig` in MESOS-6894.
+ // TODO(zhitao): Drop the `Option` part at the end of deprecation
+ // cycle.
+ Option<mesos::slave::ContainerConfig> config;
// Container's information at the moment it was launched. For example,
// used to bootstrap the launch information of future child DEBUG
http://git-wip-us.apache.org/repos/asf/mesos/blob/03a2a4df/src/slave/containerizer/mesos/paths.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/paths.cpp b/src/slave/containerizer/mesos/paths.cpp
index 23f1fee..8a188a9 100644
--- a/src/slave/containerizer/mesos/paths.cpp
+++ b/src/slave/containerizer/mesos/paths.cpp
@@ -19,12 +19,15 @@
#include <stout/path.hpp>
#include <stout/protobuf.hpp>
+#include "common/protobuf_utils.hpp"
+
#include "slave/containerizer/mesos/paths.hpp"
#ifndef __WINDOWS__
namespace unix = process::network::unix;
#endif // __WINDOWS__
+using mesos::slave::ContainerConfig;
using mesos::slave::ContainerLaunchInfo;
using mesos::slave::ContainerTermination;
@@ -276,8 +279,8 @@ Result<ContainerTermination> getContainerTermination(
::protobuf::read<ContainerTermination>(path);
if (termination.isError()) {
- return Error("Failed to read termination state of container:"
- " " + termination.error());
+ return Error("Failed to read termination state of container: " +
+ termination.error());
}
return termination;
@@ -304,6 +307,34 @@ bool isStandaloneContainer(
}
+Result<ContainerConfig> getContainerConfig(
+ const string& runtimeDir,
+ const ContainerID& containerId)
+{
+ const string path = path::join(
+ getRuntimePath(runtimeDir, containerId),
+ CONTAINER_CONFIG_FILE);
+
+ if (!os::exists(path)) {
+ // This is possible if we recovered a container launched before we
+ // started to checkpoint `ContainerConfig`.
+ VLOG(1) << "Config path '" << path << "' is missing for container' "
+ << containerId << "'";
+ return None();
+ }
+
+ const Result<ContainerConfig>& containerConfig =
+ ::protobuf::read<ContainerConfig>(path);
+
+ if (containerConfig.isError()) {
+ return Error("Failed to read launch config of container: " +
+ containerConfig.error());
+ }
+
+ return containerConfig;
+}
+
+
Try<vector<ContainerID>> getContainerIds(const string& runtimeDir)
{
lambda::function<Try<vector<ContainerID>>(const Option<ContainerID>&)> helper;
@@ -453,6 +484,8 @@ Try<ContainerID> parseSandboxPath(
return currentContainerId;
}
+
+
} // namespace paths {
} // namespace containerizer {
} // namespace slave {
http://git-wip-us.apache.org/repos/asf/mesos/blob/03a2a4df/src/slave/containerizer/mesos/paths.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/paths.hpp b/src/slave/containerizer/mesos/paths.hpp
index 7b67ccf..65830a1 100644
--- a/src/slave/containerizer/mesos/paths.hpp
+++ b/src/slave/containerizer/mesos/paths.hpp
@@ -48,6 +48,7 @@ namespace paths {
// root ('--runtime_dir' flag)
// |-- containers
// |-- <container_id>
+// |-- config
// |-- containers
// | |-- <container_id>
// | | |-- <more nesting of containers>
@@ -65,6 +66,7 @@ namespace paths {
constexpr char PID_FILE[] = "pid";
+constexpr char CONTAINER_CONFIG_FILE[] = "config";
constexpr char STATUS_FILE[] = "status";
constexpr char TERMINATION_FILE[] = "termination";
constexpr char SOCKET_FILE[] = "socket";
@@ -182,6 +184,12 @@ bool isStandaloneContainer(
const ContainerID& containerId);
+// The helper method to read the launch config of the contaienr.
+Result<mesos::slave::ContainerConfig> getContainerConfig(
+ const std::string& runtimeDir,
+ const ContainerID& containerId);
+
+
// The helper method to list all container IDs (including nested
// containers) from the container runtime directory. The order of
// returned vector is a result of pre-ordering walk (i.e., parent