You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by qi...@apache.org on 2020/05/28 02:42:15 UTC

[mesos] branch master updated (5a04a16 -> b7c3da5)

This is an automated email from the ASF dual-hosted git repository.

qianzhang pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git.


    from 5a04a16  Added a test for os::memory.
     new 2845330  Erased `Info` struct before unmouting volumes in Docker volume isolator.
     new b7c3da5  Added a test `ROOT_CommandTaskNoRootfsWithUnmountVolumeFailure`.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../mesos/isolators/docker/volume/isolator.cpp     |  12 +-
 .../containerizer/docker_volume_isolator_tests.cpp | 188 +++++++++++++++++++++
 2 files changed, 195 insertions(+), 5 deletions(-)


[mesos] 01/02: Erased `Info` struct before unmouting volumes in Docker volume isolator.

Posted by qi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

qianzhang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 2845330fbd78a80fb7e71c6101724655fa254392
Author: Qian Zhang <zh...@gmail.com>
AuthorDate: Fri May 15 10:23:51 2020 +0800

    Erased `Info` struct before unmouting volumes in Docker volume isolator.
    
    Currently when `DockerVolumeIsolatorProcess::cleanup()` is called, we will
    unmount the volume first, and if the unmount operation fails we will NOT
    erase the container's `Info` struct from `infos`. This is problematic
    because the remaining `Info` in `infos` will cause the reference count of
    the volume is greater than 0, but actually the volume is not being used by
    any containers. That means we may never get a chance to unmount this volume
    on this agent, furthermore if it is an EBS volume, it cannot be used by any
    tasks launched on any other agents since a EBS volume can only be attached
    to one node at a time. The only workaround would manually unmount the volume.
    
    So in this patch `DockerVolumeIsolatorProcess::cleanup()` is updated to erase
    container's `Info` struct before unmounting volumes.
    
    Review: https://reviews.apache.org/r/72516
---
 .../containerizer/mesos/isolators/docker/volume/isolator.cpp | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp b/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp
index c547696..2f2f624 100644
--- a/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp
+++ b/src/slave/containerizer/mesos/isolators/docker/volume/isolator.cpp
@@ -658,6 +658,13 @@ Future<Nothing> DockerVolumeIsolatorProcess::cleanup(
     futures.push_back(this->unmount(volume.driver(), volume.name()));
   }
 
+  // Erase the `Info` struct of this container before unmounting the volumes.
+  // This is to ensure the reference count of the volume will not be wrongly
+  // increased if unmounting volumes fail, otherwise next time when another
+  // container using the same volume is destroyed, we would NOT unmount the
+  // volume since its reference count would be larger than 1.
+  infos.erase(containerId);
+
   return await(futures)
     .then(defer(
         PID<DockerVolumeIsolatorProcess>(this),
@@ -671,8 +678,6 @@ Future<Nothing> DockerVolumeIsolatorProcess::_cleanup(
     const ContainerID& containerId,
     const vector<Future<Nothing>>& futures)
 {
-  CHECK(infos.contains(containerId));
-
   vector<string> messages;
   foreach (const Future<Nothing>& future, futures) {
     if (!future.isReady()) {
@@ -697,9 +702,6 @@ Future<Nothing> DockerVolumeIsolatorProcess::_cleanup(
   LOG(INFO) << "Removed the checkpoint directory at '" << containerDir
             << "' for container " << containerId;
 
-  // Remove all this container's docker volume information from infos.
-  infos.erase(containerId);
-
   return Nothing();
 }
 


[mesos] 02/02: Added a test `ROOT_CommandTaskNoRootfsWithUnmountVolumeFailure`.

Posted by qi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

qianzhang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit b7c3da5a28fb46b4517d52872aec504fff098967
Author: Qian Zhang <zh...@gmail.com>
AuthorDate: Sun May 17 23:30:38 2020 +0800

    Added a test `ROOT_CommandTaskNoRootfsWithUnmountVolumeFailure`.
    
    Review: https://reviews.apache.org/r/72523
---
 .../containerizer/docker_volume_isolator_tests.cpp | 188 +++++++++++++++++++++
 1 file changed, 188 insertions(+)

diff --git a/src/tests/containerizer/docker_volume_isolator_tests.cpp b/src/tests/containerizer/docker_volume_isolator_tests.cpp
index 88d0dc7..ef58f7d 100644
--- a/src/tests/containerizer/docker_volume_isolator_tests.cpp
+++ b/src/tests/containerizer/docker_volume_isolator_tests.cpp
@@ -43,6 +43,8 @@
 
 namespace slave = mesos::internal::slave;
 
+using process::Clock;
+using process::Failure;
 using process::Future;
 using process::Owned;
 
@@ -1675,6 +1677,192 @@ TEST_F(DockerVolumeIsolatorTest,
   driver.join();
 }
 
+
+// This test verifies that unmount operation can be still invoked for
+// a docker volume even the previous unmount operation for the same
+// docker volume failed. This is a regression test for MESOS-10126.
+TEST_F(DockerVolumeIsolatorTest, ROOT_CommandTaskNoRootfsWithUnmountVolumeFailure)
+{
+  Clock::pause();
+
+  master::Flags masterFlags = CreateMasterFlags();
+
+  Try<Owned<cluster::Master>> master = StartMaster(masterFlags);
+  ASSERT_SOME(master);
+
+  slave::Flags flags = CreateSlaveFlags();
+  flags.docker_volume_checkpoint_dir = path::join(os::getcwd(), "checkpoint");
+
+  MockDockerVolumeDriverClient* mockClient = new MockDockerVolumeDriverClient;
+
+  Try<Owned<MesosContainerizer>> containerizer =
+    createContainerizer(flags, Owned<DriverClient>(mockClient));
+
+  ASSERT_SOME(containerizer);
+
+  Owned<MasterDetector> detector = master.get()->createDetector();
+
+  Try<Owned<cluster::Slave>> slave = StartSlave(
+      detector.get(),
+      containerizer->get(),
+      flags);
+
+  ASSERT_SOME(slave);
+
+  MockScheduler sched;
+
+  MesosSchedulerDriver driver(
+      &sched,
+      DEFAULT_FRAMEWORK_INFO,
+      master.get()->pid,
+      DEFAULT_CREDENTIAL);
+
+  EXPECT_CALL(sched, registered(&driver, _, _));
+
+  Future<vector<Offer>> offers1;
+  EXPECT_CALL(sched, resourceOffers(&driver, _))
+    .WillOnce(FutureArg<1>(&offers1));
+
+  driver.start();
+
+  Clock::advance(masterFlags.allocation_interval);
+
+  AWAIT_READY(offers1);
+  ASSERT_FALSE(offers1->empty());
+
+  const Offer& offer1 = offers1.get()[0];
+
+  // Create a docker volume with relative path.
+  const string volumeDriver = "driver";
+  const string volumeName = "name";
+  const string containerPath = "tmp/foo";
+
+  Volume volume = createDockerVolume(volumeDriver, volumeName, containerPath);
+
+  // Launch the first task with the docker volume.
+  TaskInfo task1 = createTask(
+      offer1.slave_id(),
+      offer1.resources(),
+      "test -f " + containerPath + "/file");
+
+  ContainerInfo containerInfo;
+  containerInfo.set_type(ContainerInfo::MESOS);
+  containerInfo.add_volumes()->CopyFrom(volume);
+
+  task1.mutable_container()->CopyFrom(containerInfo);
+
+  // Create mount point for the volume.
+  const string mountPoint = path::join(os::getcwd(), "volume");
+  ASSERT_SOME(os::mkdir(mountPoint));
+  ASSERT_SOME(os::touch(path::join(mountPoint, "file")));
+
+  Future<string> mountName1;
+  EXPECT_CALL(*mockClient, mount(volumeDriver, _, _))
+    .WillOnce(DoAll(FutureArg<1>(&mountName1),
+                    Return(mountPoint)));
+
+  // Simulate an unmount failure.
+  Future<string> unmountName1;
+  EXPECT_CALL(*mockClient, unmount(volumeDriver, _))
+    .WillOnce(DoAll(FutureArg<1>(&unmountName1),
+                    Return(Failure("Mock failure"))));
+
+  Future<TaskStatus> statusStarting1;
+  Future<TaskStatus> statusRunning1;
+  Future<TaskStatus> statusFinished1;
+
+  EXPECT_CALL(sched, statusUpdate(&driver, _))
+    .WillOnce(FutureArg<1>(&statusStarting1))
+    .WillOnce(FutureArg<1>(&statusRunning1))
+    .WillOnce(FutureArg<1>(&statusFinished1));
+
+  Future<vector<Offer>> offers2;
+  EXPECT_CALL(sched, resourceOffers(&driver, _))
+    .WillOnce(FutureArg<1>(&offers2))
+    .WillRepeatedly(Return());
+
+  driver.launchTasks(offer1.id(), {task1});
+
+  AWAIT_READY(statusStarting1);
+  EXPECT_EQ(TASK_STARTING, statusStarting1->state());
+
+  AWAIT_READY(statusRunning1);
+  EXPECT_EQ(TASK_RUNNING, statusRunning1->state());
+
+  // Make sure the docker volume mount parameters are same with the
+  // parameters in `containerInfo`.
+  AWAIT_EXPECT_EQ(volumeName, mountName1);
+
+  AWAIT_READY(statusFinished1);
+  EXPECT_EQ(TASK_FINISHED, statusFinished1->state());
+
+  Clock::resume();
+
+  // Make sure the docker volume unmount parameters are same with
+  // the parameters in `containerInfo`.
+  AWAIT_EXPECT_EQ(volumeName, unmountName1);
+
+  Clock::pause();
+  Clock::settle();
+  Clock::advance(masterFlags.allocation_interval);
+
+  AWAIT_READY(offers2);
+  ASSERT_FALSE(offers2->empty());
+
+  const Offer& offer2 = offers2.get()[0];
+
+  // Launch the second task with the same docker volume.
+  TaskInfo task2 = createTask(
+      offer2.slave_id(),
+      offer2.resources(),
+      "test -f " + containerPath + "/file");
+
+  task2.mutable_container()->CopyFrom(containerInfo);
+
+  Future<string> mountName2;
+  EXPECT_CALL(*mockClient, mount(volumeDriver, _, _))
+    .WillOnce(DoAll(FutureArg<1>(&mountName2),
+                    Return(mountPoint)));
+
+  Future<string> unmountName2;
+  EXPECT_CALL(*mockClient, unmount(volumeDriver, _))
+    .WillOnce(DoAll(FutureArg<1>(&unmountName2),
+                    Return(Nothing())));
+
+  Future<TaskStatus> statusStarting2;
+  Future<TaskStatus> statusRunning2;
+  Future<TaskStatus> statusFinished2;
+
+  EXPECT_CALL(sched, statusUpdate(&driver, _))
+    .WillOnce(FutureArg<1>(&statusStarting2))
+    .WillOnce(FutureArg<1>(&statusRunning2))
+    .WillOnce(FutureArg<1>(&statusFinished2));
+
+  driver.launchTasks(offer2.id(), {task2});
+
+  AWAIT_READY(statusStarting2);
+  EXPECT_EQ(TASK_STARTING, statusStarting2->state());
+
+  AWAIT_READY(statusRunning2);
+  EXPECT_EQ(TASK_RUNNING, statusRunning2->state());
+
+  // Make sure the docker volume mount parameters are same with the
+  // parameters in `containerInfo`.
+  AWAIT_EXPECT_EQ(volumeName, mountName2);
+
+  AWAIT_READY(statusFinished2);
+  EXPECT_EQ(TASK_FINISHED, statusFinished2->state());
+
+  Clock::resume();
+
+  // Check the unmount operation can still be invoked even the
+  // previous one failed.
+  AWAIT_EXPECT_EQ(volumeName, unmountName2);
+
+  driver.stop();
+  driver.join();
+}
+
 } // namespace tests {
 } // namespace internal {
 } // namespace mesos {