You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by qi...@apache.org on 2020/05/28 02:42:15 UTC
[mesos] branch master updated (5a04a16 -> b7c3da5)
This is an automated email from the ASF dual-hosted git repository.
qianzhang pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git.
from 5a04a16 Added a test for os::memory.
new 2845330 Erased `Info` struct before unmouting volumes in Docker volume isolator.
new b7c3da5 Added a test `ROOT_CommandTaskNoRootfsWithUnmountVolumeFailure`.
The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
.../mesos/isolators/docker/volume/isolator.cpp | 12 +-
.../containerizer/docker_volume_isolator_tests.cpp | 188 +++++++++++++++++++++
2 files changed, 195 insertions(+), 5 deletions(-)
[mesos] 01/02: Erased `Info` struct before unmouting volumes in
Docker volume isolator.
Posted by qi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
qianzhang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git
commit 2845330fbd78a80fb7e71c6101724655fa254392
Author: Qian Zhang <zh...@gmail.com>
AuthorDate: Fri May 15 10:23:51 2020 +0800
Erased `Info` struct before unmouting volumes in Docker volume isolator.
Currently when `DockerVolumeIsolatorProcess::cleanup()` is called, we will
unmount the volume first, and if the unmount operation fails we will NOT
erase the container's `Info` struct from `infos`. This is problematic
because the remaining `Info` in `infos` will cause the reference count of
the volume is greater than 0, but actually the volume is not being used by
any containers. That means we may never get a chance to unmount this volume
on this agent, furthermore if it is an EBS volume, it cannot be used by any
tasks launched on any other agents since a EBS volume can only be attached
to one node at a time. The only workaround would manually unmount the volume.
So in this patch `DockerVolumeIsolatorProcess::cleanup()` is updated to erase
container's `Info` struct before unmounting volumes.
Review: https://reviews.apache.org/r/72516
---
.../containerizer/mesos/isolators/docker/volume/isolator.cpp | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp b/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp
index c547696..2f2f624 100644
--- a/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp
+++ b/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp
@@ -658,6 +658,13 @@ Future<Nothing> DockerVolumeIsolatorProcess::cleanup(
futures.push_back(this->unmount(volume.driver(), volume.name()));
}
+ // Erase the `Info` struct of this container before unmounting the volumes.
+ // This is to ensure the reference count of the volume will not be wrongly
+ // increased if unmounting volumes fail, otherwise next time when another
+ // container using the same volume is destroyed, we would NOT unmount the
+ // volume since its reference count would be larger than 1.
+ infos.erase(containerId);
+
return await(futures)
.then(defer(
PID<DockerVolumeIsolatorProcess>(this),
@@ -671,8 +678,6 @@ Future<Nothing> DockerVolumeIsolatorProcess::_cleanup(
const ContainerID& containerId,
const vector<Future<Nothing>>& futures)
{
- CHECK(infos.contains(containerId));
-
vector<string> messages;
foreach (const Future<Nothing>& future, futures) {
if (!future.isReady()) {
@@ -697,9 +702,6 @@ Future<Nothing> DockerVolumeIsolatorProcess::_cleanup(
LOG(INFO) << "Removed the checkpoint directory at '" << containerDir
<< "' for container " << containerId;
- // Remove all this container's docker volume information from infos.
- infos.erase(containerId);
-
return Nothing();
}
[mesos] 02/02: Added a test
`ROOT_CommandTaskNoRootfsWithUnmountVolumeFailure`.
Posted by qi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
qianzhang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git
commit b7c3da5a28fb46b4517d52872aec504fff098967
Author: Qian Zhang <zh...@gmail.com>
AuthorDate: Sun May 17 23:30:38 2020 +0800
Added a test `ROOT_CommandTaskNoRootfsWithUnmountVolumeFailure`.
Review: https://reviews.apache.org/r/72523
---
.../containerizer/docker_volume_isolator_tests.cpp | 188 +++++++++++++++++++++
1 file changed, 188 insertions(+)
diff --git a/src/tests/containerizer/docker_volume_isolator_tests.cpp b/src/tests/containerizer/docker_volume_isolator_tests.cpp
index 88d0dc7..ef58f7d 100644
--- a/src/tests/containerizer/docker_volume_isolator_tests.cpp
+++ b/src/tests/containerizer/docker_volume_isolator_tests.cpp
@@ -43,6 +43,8 @@
namespace slave = mesos::internal::slave;
+using process::Clock;
+using process::Failure;
using process::Future;
using process::Owned;
@@ -1675,6 +1677,192 @@ TEST_F(DockerVolumeIsolatorTest,
driver.join();
}
+
+// This test verifies that unmount operation can be still invoked for
+// a docker volume even the previous unmount operation for the same
+// docker volume failed. This is a regression test for MESOS-10126.
+TEST_F(DockerVolumeIsolatorTest, ROOT_CommandTaskNoRootfsWithUnmountVolumeFailure)
+{
+ Clock::pause();
+
+ master::Flags masterFlags = CreateMasterFlags();
+
+ Try<Owned<cluster::Master>> master = StartMaster(masterFlags);
+ ASSERT_SOME(master);
+
+ slave::Flags flags = CreateSlaveFlags();
+ flags.docker_volume_checkpoint_dir = path::join(os::getcwd(), "checkpoint");
+
+ MockDockerVolumeDriverClient* mockClient = new MockDockerVolumeDriverClient;
+
+ Try<Owned<MesosContainerizer>> containerizer =
+ createContainerizer(flags, Owned<DriverClient>(mockClient));
+
+ ASSERT_SOME(containerizer);
+
+ Owned<MasterDetector> detector = master.get()->createDetector();
+
+ Try<Owned<cluster::Slave>> slave = StartSlave(
+ detector.get(),
+ containerizer->get(),
+ flags);
+
+ ASSERT_SOME(slave);
+
+ MockScheduler sched;
+
+ MesosSchedulerDriver driver(
+ &sched,
+ DEFAULT_FRAMEWORK_INFO,
+ master.get()->pid,
+ DEFAULT_CREDENTIAL);
+
+ EXPECT_CALL(sched, registered(&driver, _, _));
+
+ Future<vector<Offer>> offers1;
+ EXPECT_CALL(sched, resourceOffers(&driver, _))
+ .WillOnce(FutureArg<1>(&offers1));
+
+ driver.start();
+
+ Clock::advance(masterFlags.allocation_interval);
+
+ AWAIT_READY(offers1);
+ ASSERT_FALSE(offers1->empty());
+
+ const Offer& offer1 = offers1.get()[0];
+
+ // Create a docker volume with relative path.
+ const string volumeDriver = "driver";
+ const string volumeName = "name";
+ const string containerPath = "tmp/foo";
+
+ Volume volume = createDockerVolume(volumeDriver, volumeName, containerPath);
+
+ // Launch the first task with the docker volume.
+ TaskInfo task1 = createTask(
+ offer1.slave_id(),
+ offer1.resources(),
+ "test -f " + containerPath + "/file");
+
+ ContainerInfo containerInfo;
+ containerInfo.set_type(ContainerInfo::MESOS);
+ containerInfo.add_volumes()->CopyFrom(volume);
+
+ task1.mutable_container()->CopyFrom(containerInfo);
+
+ // Create mount point for the volume.
+ const string mountPoint = path::join(os::getcwd(), "volume");
+ ASSERT_SOME(os::mkdir(mountPoint));
+ ASSERT_SOME(os::touch(path::join(mountPoint, "file")));
+
+ Future<string> mountName1;
+ EXPECT_CALL(*mockClient, mount(volumeDriver, _, _))
+ .WillOnce(DoAll(FutureArg<1>(&mountName1),
+ Return(mountPoint)));
+
+ // Simulate an unmount failure.
+ Future<string> unmountName1;
+ EXPECT_CALL(*mockClient, unmount(volumeDriver, _))
+ .WillOnce(DoAll(FutureArg<1>(&unmountName1),
+ Return(Failure("Mock failure"))));
+
+ Future<TaskStatus> statusStarting1;
+ Future<TaskStatus> statusRunning1;
+ Future<TaskStatus> statusFinished1;
+
+ EXPECT_CALL(sched, statusUpdate(&driver, _))
+ .WillOnce(FutureArg<1>(&statusStarting1))
+ .WillOnce(FutureArg<1>(&statusRunning1))
+ .WillOnce(FutureArg<1>(&statusFinished1));
+
+ Future<vector<Offer>> offers2;
+ EXPECT_CALL(sched, resourceOffers(&driver, _))
+ .WillOnce(FutureArg<1>(&offers2))
+ .WillRepeatedly(Return());
+
+ driver.launchTasks(offer1.id(), {task1});
+
+ AWAIT_READY(statusStarting1);
+ EXPECT_EQ(TASK_STARTING, statusStarting1->state());
+
+ AWAIT_READY(statusRunning1);
+ EXPECT_EQ(TASK_RUNNING, statusRunning1->state());
+
+ // Make sure the docker volume mount parameters are same with the
+ // parameters in `containerInfo`.
+ AWAIT_EXPECT_EQ(volumeName, mountName1);
+
+ AWAIT_READY(statusFinished1);
+ EXPECT_EQ(TASK_FINISHED, statusFinished1->state());
+
+ Clock::resume();
+
+ // Make sure the docker volume unmount parameters are same with
+ // the parameters in `containerInfo`.
+ AWAIT_EXPECT_EQ(volumeName, unmountName1);
+
+ Clock::pause();
+ Clock::settle();
+ Clock::advance(masterFlags.allocation_interval);
+
+ AWAIT_READY(offers2);
+ ASSERT_FALSE(offers2->empty());
+
+ const Offer& offer2 = offers2.get()[0];
+
+ // Launch the second task with the same docker volume.
+ TaskInfo task2 = createTask(
+ offer2.slave_id(),
+ offer2.resources(),
+ "test -f " + containerPath + "/file");
+
+ task2.mutable_container()->CopyFrom(containerInfo);
+
+ Future<string> mountName2;
+ EXPECT_CALL(*mockClient, mount(volumeDriver, _, _))
+ .WillOnce(DoAll(FutureArg<1>(&mountName2),
+ Return(mountPoint)));
+
+ Future<string> unmountName2;
+ EXPECT_CALL(*mockClient, unmount(volumeDriver, _))
+ .WillOnce(DoAll(FutureArg<1>(&unmountName2),
+ Return(Nothing())));
+
+ Future<TaskStatus> statusStarting2;
+ Future<TaskStatus> statusRunning2;
+ Future<TaskStatus> statusFinished2;
+
+ EXPECT_CALL(sched, statusUpdate(&driver, _))
+ .WillOnce(FutureArg<1>(&statusStarting2))
+ .WillOnce(FutureArg<1>(&statusRunning2))
+ .WillOnce(FutureArg<1>(&statusFinished2));
+
+ driver.launchTasks(offer2.id(), {task2});
+
+ AWAIT_READY(statusStarting2);
+ EXPECT_EQ(TASK_STARTING, statusStarting2->state());
+
+ AWAIT_READY(statusRunning2);
+ EXPECT_EQ(TASK_RUNNING, statusRunning2->state());
+
+ // Make sure the docker volume mount parameters are same with the
+ // parameters in `containerInfo`.
+ AWAIT_EXPECT_EQ(volumeName, mountName2);
+
+ AWAIT_READY(statusFinished2);
+ EXPECT_EQ(TASK_FINISHED, statusFinished2->state());
+
+ Clock::resume();
+
+ // Check the unmount operation can still be invoked even the
+ // previous one failed.
+ AWAIT_EXPECT_EQ(volumeName, unmountName2);
+
+ driver.stop();
+ driver.join();
+}
+
} // namespace tests {
} // namespace internal {
} // namespace mesos {