You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ya...@apache.org on 2016/10/14 06:23:16 UTC

mesos git commit: Allow tasks to set persistent volume as readonly or readwrite resource.

Repository: mesos
Updated Branches:
  refs/heads/master 3140aafea -> 9fc2901d2


Allow tasks to set persistent volume as readonly or readwrite resource.

Allow the task to specify the persistent volume access to be read-only
or read-write. Note that the persistent volume is always created as
read-write.

If the task is the first consumer of the shared persistent volume, then
set the ownership of the persistent volume to match that of the task.
Otherwise, launch the task but if the task is unable to read/write the
persistent volume, it may fail at that point of time.

Review: https://reviews.apache.org/r/45963/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/9fc2901d
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/9fc2901d
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/9fc2901d

Branch: refs/heads/master
Commit: 9fc2901d23659ca0912ad42fb79828b1a8ab9691
Parents: 3140aaf
Author: Anindya Sinha <an...@apple.com>
Authored: Thu Oct 13 22:29:56 2016 -0700
Committer: Jiang Yan Xu <xu...@apple.com>
Committed: Thu Oct 13 23:22:18 2016 -0700

----------------------------------------------------------------------
 src/master/validation.cpp                       |  7 +-
 src/slave/containerizer/docker.cpp              | 63 ++++++++-----
 .../mesos/isolators/filesystem/linux.cpp        | 70 +++++++++-----
 .../mesos/isolators/filesystem/posix.cpp        | 76 +++++++++------
 .../linux_filesystem_isolator_tests.cpp         | 98 ++++++++++++++++++++
 src/tests/master_validation_tests.cpp           | 21 +++--
 6 files changed, 246 insertions(+), 89 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/9fc2901d/src/master/validation.cpp
----------------------------------------------------------------------
diff --git a/src/master/validation.cpp b/src/master/validation.cpp
index e5da3c9..480a94b 100644
--- a/src/master/validation.cpp
+++ b/src/master/validation.cpp
@@ -421,9 +421,6 @@ Option<Error> validateDiskInfo(const RepeatedPtrField<Resource>& resources)
       if (!resource.disk().has_volume()) {
         return Error("Expecting 'volume' to be set for persistent volume");
       }
-      if (resource.disk().volume().mode() == Volume::RO) {
-        return Error("Read-only persistent volume not supported");
-      }
       if (resource.disk().volume().has_host_path()) {
         return Error("Expecting 'host_path' to be unset for persistent volume");
       }
@@ -500,6 +497,10 @@ Option<Error> validatePersistentVolume(
       return Error("Resource " + stringify(volume) + " does not have DiskInfo");
     } else if (!volume.disk().has_persistence()) {
       return Error("'persistence' is not set in DiskInfo");
+    } else if (!volume.disk().has_volume()) {
+      return Error("Expecting 'volume' to be set for persistent volume");
+    } else if (volume.disk().volume().mode() == Volume::RO) {
+      return Error("Read-only persistent volume not supported");
     }
   }
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/9fc2901d/src/slave/containerizer/docker.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/docker.cpp b/src/slave/containerizer/docker.cpp
index 9a1a871..8ec4c0a 100644
--- a/src/slave/containerizer/docker.cpp
+++ b/src/slave/containerizer/docker.cpp
@@ -466,24 +466,16 @@ Try<Nothing> DockerContainerizerProcess::updatePersistentVolumes(
     // more details please refer to MESOS-3483.
   }
 
-  // Set the ownership of the persistent volume to match that of the
-  // sandbox directory.
-  //
-  // NOTE: Currently, persistent volumes in Mesos are exclusive,
-  // meaning that if a persistent volume is used by one task or
-  // executor, it cannot be concurrently used by other task or
-  // executor. But if we allow multiple executors to use same
-  // persistent volume at the same time in the future, the ownership
-  // of the persistent volume may conflict here.
-  //
-  // TODO(haosdent): Consider letting the frameworks specify the
-  // user/group of the persistent volumes.
+  // Get user and group info for this task based on the sandbox directory.
   struct stat s;
   if (::stat(directory.c_str(), &s) < 0) {
     return Error("Failed to get ownership for '" + directory + "': " +
                  os::strerror(errno));
   }
 
+  const uid_t uid = s.st_uid;
+  const gid_t gid = s.st_gid;
+
   // Mount all new persistent volumes added.
   foreach (const Resource& resource, updated.persistentVolumes()) {
     // This is enforced by the master.
@@ -506,22 +498,36 @@ Try<Nothing> DockerContainerizerProcess::updatePersistentVolumes(
       continue;
     }
 
-    const string target = path::join(directory, containerPath);
+    bool isVolumeInUse = false;
 
-    LOG(INFO) << "Changing the ownership of the persistent volume at '"
-              << source << "' with uid " << s.st_uid
-              << " and gid " << s.st_gid;
+    foreachvalue (const Container* container, containers_) {
+      if (container->resources.contains(resource)) {
+        isVolumeInUse = true;
+        break;
+      }
+    }
 
-    Try<Nothing> chown = os::chown(s.st_uid, s.st_gid, source, false);
-    if (chown.isError()) {
-      return Error(
-          "Failed to change the ownership of the persistent volume at '" +
-          source + "' with uid " + stringify(s.st_uid) +
-          " and gid " + stringify(s.st_gid) + ": " + chown.error());
+    // Set the ownership of the persistent volume to match that of the sandbox
+    // directory if the volume is not already in use. If the volume is
+    // currently in use by other containers, tasks in this container may fail
+    // to read from or write to the persistent volume due to incompatible
+    // ownership and file system permissions.
+    if (!isVolumeInUse) {
+      LOG(INFO) << "Changing the ownership of the persistent volume at '"
+                << source << "' with uid " << uid << " and gid " << gid;
+
+      Try<Nothing> chown = os::chown(uid, gid, source, false);
+      if (chown.isError()) {
+        return Error(
+            "Failed to change the ownership of the persistent volume at '" +
+            source + "' with uid " + stringify(uid) +
+            " and gid " + stringify(gid) + ": " + chown.error());
+      }
     }
 
     // TODO(tnachen): We should check if the target already exists
     // when we support updating persistent mounts.
+    const string target = path::join(directory, containerPath);
 
     Try<Nothing> mkdir = os::mkdir(target);
     if (mkdir.isError()) {
@@ -533,12 +539,25 @@ Try<Nothing> DockerContainerizerProcess::updatePersistentVolumes(
               << "' for persistent volume " << resource
               << " of container " << containerId;
 
+    // Bind mount the persistent volume to the container.
     Try<Nothing> mount = fs::mount(source, target, None(), MS_BIND, nullptr);
     if (mount.isError()) {
       return Error(
           "Failed to mount persistent volume from '" +
           source + "' to '" + target + "': " + mount.error());
     }
+
+    // If the mount needs to be read-only, do a remount.
+    if (resource.disk().volume().mode() == Volume::RO) {
+      mount = fs::mount(
+          None(), target, None(), MS_BIND | MS_RDONLY | MS_REMOUNT, nullptr);
+
+      if (mount.isError()) {
+        return Error(
+            "Failed to remount persistent volume as read-only from '" +
+            source + "' to '" + target + "': " + mount.error());
+      }
+    }
   }
 #else
   if (!current.persistentVolumes().empty() ||

http://git-wip-us.apache.org/repos/asf/mesos/blob/9fc2901d/src/slave/containerizer/mesos/isolators/filesystem/linux.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/filesystem/linux.cpp b/src/slave/containerizer/mesos/isolators/filesystem/linux.cpp
index 8f62162..df16b8f 100644
--- a/src/slave/containerizer/mesos/isolators/filesystem/linux.cpp
+++ b/src/slave/containerizer/mesos/isolators/filesystem/linux.cpp
@@ -645,6 +645,16 @@ Future<Nothing> LinuxFilesystemIsolatorProcess::update(
     }
   }
 
+  // Get user and group info for this task based on the task's sandbox.
+  struct stat s;
+  if (::stat(info->directory.c_str(), &s) < 0) {
+    return Failure("Failed to get ownership for '" + info->directory +
+                   "': " + os::strerror(errno));
+  }
+
+  const uid_t uid = s.st_uid;
+  const gid_t gid = s.st_gid;
+
   // We then mount new persistent volumes.
   foreach (const Resource& resource, resources.persistentVolumes()) {
     // This is enforced by the master.
@@ -667,34 +677,32 @@ Future<Nothing> LinuxFilesystemIsolatorProcess::update(
     // Determine the source of the mount.
     string source = paths::getPersistentVolumePath(flags.work_dir, resource);
 
-    // Set the ownership of the persistent volume to match that of the
-    // sandbox directory.
-    //
-    // NOTE: Currently, persistent volumes in Mesos are exclusive,
-    // meaning that if a persistent volume is used by one task or
-    // executor, it cannot be concurrently used by other task or
-    // executor. But if we allow multiple executors to use same
-    // persistent volume at the same time in the future, the ownership
-    // of the persistent volume may conflict here.
-    //
-    // TODO(haosdent): Consider letting the frameworks specify the
-    // user/group of the persistent volumes.
-    struct stat s;
-    if (::stat(info->directory.c_str(), &s) < 0) {
-      return Failure("Failed to get ownership for '" + info->directory + "': " +
-                     os::strerror(errno));
+    bool isVolumeInUse = false;
+
+    foreachvalue (const Owned<Info>& info, infos) {
+      if (info->resources.contains(resource)) {
+        isVolumeInUse = true;
+        break;
+      }
     }
 
-    LOG(INFO) << "Changing the ownership of the persistent volume at '"
-              << source << "' with uid " << s.st_uid
-              << " and gid " << s.st_gid;
+    // Set the ownership of the persistent volume to match that of the sandbox
+    // directory if the volume is not already in use. If the volume is
+    // currently in use by other containers, tasks in this container may fail
+    // to read from or write to the persistent volume due to incompatible
+    // ownership and file system permissions.
+    if (!isVolumeInUse) {
+      LOG(INFO) << "Changing the ownership of the persistent volume at '"
+                << source << "' with uid " << uid << " and gid " << gid;
 
-    Try<Nothing> chown = os::chown(s.st_uid, s.st_gid, source, false);
-    if (chown.isError()) {
-      return Failure(
-          "Failed to change the ownership of the persistent volume at '" +
-          source + "' with uid " + stringify(s.st_uid) +
-          " and gid " + stringify(s.st_gid) + ": " + chown.error());
+      Try<Nothing> chown = os::chown(uid, gid, source, false);
+
+      if (chown.isError()) {
+        return Failure(
+            "Failed to change the ownership of the persistent volume at '" +
+            source + "' with uid " + stringify(uid) +
+            " and gid " + stringify(gid) + ": " + chown.error());
+      }
     }
 
     // Determine the target of the mount.
@@ -729,6 +737,18 @@ Future<Nothing> LinuxFilesystemIsolatorProcess::update(
             "Failed to mount persistent volume from '" +
             source + "' to '" + target + "': " + mount.error());
       }
+
+      // If the mount needs to be read-only, do a remount.
+      if (resource.disk().volume().mode() == Volume::RO) {
+        mount = fs::mount(
+            None(), target, None(), MS_BIND | MS_RDONLY | MS_REMOUNT, nullptr);
+
+        if (mount.isError()) {
+          return Failure(
+              "Failed to remount persistent volume as read-only from '" +
+              source + "' to '" + target + "': " + mount.error());
+        }
+      }
     }
   }
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/9fc2901d/src/slave/containerizer/mesos/isolators/filesystem/posix.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/filesystem/posix.cpp b/src/slave/containerizer/mesos/isolators/filesystem/posix.cpp
index af427c6..270d2aa 100644
--- a/src/slave/containerizer/mesos/isolators/filesystem/posix.cpp
+++ b/src/slave/containerizer/mesos/isolators/filesystem/posix.cpp
@@ -150,6 +150,16 @@ Future<Nothing> PosixFilesystemIsolatorProcess::update(
     }
   }
 
+  // Get user and group info for this task based on the task's sandbox.
+  struct stat s;
+  if (::stat(info->directory.c_str(), &s) < 0) {
+    return Failure("Failed to get ownership for '" + info->directory +
+                   "': " + os::strerror(errno));
+  }
+
+  const uid_t uid = s.st_uid;
+  const gid_t gid = s.st_gid;
+
   // We then link additional persistent volumes.
   foreach (const Resource& resource, resources.persistentVolumes()) {
     // This is enforced by the master.
@@ -171,41 +181,40 @@ Future<Nothing> PosixFilesystemIsolatorProcess::update(
 
     string original = paths::getPersistentVolumePath(flags.work_dir, resource);
 
-    // Set the ownership of the persistent volume to match that of the
-    // sandbox directory.
-    //
-    // NOTE: Currently, persistent volumes in Mesos are exclusive,
-    // meaning that if a persistent volume is used by one task or
-    // executor, it cannot be concurrently used by other task or
-    // executor. But if we allow multiple executors to use same
-    // persistent volume at the same time in the future, the ownership
-    // of the persistent volume may conflict here.
-    //
-    // TODO(haosdent): Consider letting the frameworks specify the
-    // user/group of the persistent volumes.
-    struct stat s;
-    if (::stat(info->directory.c_str(), &s) < 0) {
-      return Failure("Failed to get ownership for '" + info->directory + "': " +
-                     os::strerror(errno));
+    bool isVolumeInUse = false;
+
+    foreachvalue (const Owned<Info>& info, infos) {
+      if (info->resources.contains(resource)) {
+        isVolumeInUse = true;
+        break;
+      }
     }
 
-    // TODO(hausdorff): (MESOS-5461) Persistent volumes maintain the invariant
-    // that they are used by one task at a time. This is currently enforced by
-    // `os::chown`. Windows does not support `os::chown`, we will need to
-    // revisit this later.
+    // Set the ownership of the persistent volume to match that of the sandbox
+    // directory if the volume is not already in use. If the volume is
+    // currently in use by other containers, tasks in this container may fail
+    // to read from or write to the persistent volume due to incompatible
+    // ownership and file system permissions.
+    if (!isVolumeInUse) {
+      // TODO(hausdorff): (MESOS-5461) Persistent volumes maintain the invariant
+      // that they are used by one task at a time. This is currently enforced by
+      // `os::chown`. Windows does not support `os::chown`, we will need to
+      // revisit this later.
 #ifndef __WINDOWS__
-    LOG(INFO) << "Changing the ownership of the persistent volume at '"
-              << original << "' with uid " << s.st_uid
-              << " and gid " << s.st_gid;
+      LOG(INFO) << "Changing the ownership of the persistent volume at '"
+                << original << "' with uid " << uid << " and gid " << gid;
 
-    Try<Nothing> chown = os::chown(s.st_uid, s.st_gid, original, false);
-    if (chown.isError()) {
-      return Failure(
-          "Failed to change the ownership of the persistent volume at '" +
-          original + "' with uid " + stringify(s.st_uid) +
-          " and gid " + stringify(s.st_gid) + ": " + chown.error());
-    }
+      Try<Nothing> chown = os::chown(uid, gid, original, false);
+
+      if (chown.isError()) {
+        return Failure(
+            "Failed to change the ownership of the persistent volume at '" +
+            original + "' with uid " + stringify(uid) +
+            " and gid " + stringify(gid) + ": " + chown.error());
+      }
 #endif
+    }
+
     string link = path::join(info->directory, containerPath);
 
     if (os::exists(link)) {
@@ -242,6 +251,13 @@ Future<Nothing> PosixFilesystemIsolatorProcess::update(
                 << link << "' for persistent volume " << resource
                 << " of container " << containerId;
 
+      // NOTE: We cannot enforce read-only access given the symlink without
+      // changing the source so we just log a warning here.
+      if (resource.disk().volume().mode() == Volume::RO) {
+        LOG(WARNING) << "Allowing read-write access to read-only volume '"
+                     << original << "' of container " << containerId;
+      }
+
       Try<Nothing> symlink = ::fs::symlink(original, link);
       if (symlink.isError()) {
         return Failure(

http://git-wip-us.apache.org/repos/asf/mesos/blob/9fc2901d/src/tests/containerizer/linux_filesystem_isolator_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer/linux_filesystem_isolator_tests.cpp b/src/tests/containerizer/linux_filesystem_isolator_tests.cpp
index eb191a3..f17ed44 100644
--- a/src/tests/containerizer/linux_filesystem_isolator_tests.cpp
+++ b/src/tests/containerizer/linux_filesystem_isolator_tests.cpp
@@ -1424,6 +1424,104 @@ TEST_F(LinuxFilesystemIsolatorMesosTest,
   driver.join();
 }
 
+
+// Tests that the task fails when it attempts to write to a persistent volume
+// mounted as read-only. Note that although we use a shared persistent volume,
+// the behavior is same for non-shared persistent volume.
+TEST_F(LinuxFilesystemIsolatorMesosTest,
+       ROOT_WriteAccessSharedPersistentVolumeReadOnlyMode)
+{
+  Try<Owned<cluster::Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  string registry = path::join(sandbox.get(), "registry");
+  AWAIT_READY(DockerArchive::create(registry, "test_image"));
+
+  slave::Flags flags = CreateSlaveFlags();
+  flags.resources = "cpus:2;mem:128;disk(role1):128";
+  flags.isolation = "filesystem/linux,docker/runtime";
+  flags.docker_registry = registry;
+  flags.docker_store_dir = path::join(sandbox.get(), "store");
+  flags.image_providers = "docker";
+
+  Owned<MasterDetector> detector = master.get()->createDetector();
+
+  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags);
+  ASSERT_SOME(slave);
+
+  MockScheduler sched;
+  FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
+  frameworkInfo.set_role("role1");
+
+  MesosSchedulerDriver driver(
+      &sched,
+      frameworkInfo,
+      master.get()->pid,
+      DEFAULT_CREDENTIAL);
+
+  EXPECT_CALL(sched, registered(&driver, _, _))
+    .Times(1);
+
+  Future<vector<Offer>> offers;
+  EXPECT_CALL(sched, resourceOffers(&driver, _))
+    .WillOnce(FutureArg<1>(&offers))
+    .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+  driver.start();
+
+  AWAIT_READY(offers);
+  ASSERT_NE(0u, offers->size());
+
+  // We create a shared volume which shall be used by the task to
+  // write to that volume.
+  Resource volume = createPersistentVolume(
+      Megabytes(4),
+      "role1",
+      "id1",
+      "volume_path",
+      None(),
+      None(),
+      frameworkInfo.principal(),
+      true); // Shared volume.
+
+  // The task uses the shared volume as read-only.
+  Resource roVolume = volume;
+  roVolume.mutable_disk()->mutable_volume()->set_mode(Volume::RO);
+
+  Resources taskResources =
+    Resources::parse("cpus:1;mem:64;disk(role1):1").get() + roVolume;
+
+  TaskInfo task = createTask(
+      offers.get()[0].slave_id(),
+      taskResources,
+      "echo hello > volume_path/file");
+
+  // The task fails to write to the volume since the task's resources
+  // intends to use the volume as read-only.
+  Future<TaskStatus> statusRunning;
+  Future<TaskStatus> statusFailed;
+
+  EXPECT_CALL(sched, statusUpdate(&driver, _))
+    .WillOnce(FutureArg<1>(&statusRunning))
+    .WillOnce(FutureArg<1>(&statusFailed));
+
+  driver.acceptOffers(
+      {offers.get()[0].id()},
+      {CREATE(volume),
+       LAUNCH({task})});
+
+  AWAIT_READY(statusRunning);
+  EXPECT_EQ(task.task_id(), statusRunning->task_id());
+  EXPECT_EQ(TASK_RUNNING, statusRunning->state());
+
+  AWAIT_READY(statusFailed);
+  EXPECT_EQ(task.task_id(), statusFailed->task_id());
+  EXPECT_EQ(TASK_FAILED, statusFailed->state());
+
+  driver.stop();
+  driver.join();
+}
+
 } // namespace tests {
 } // namespace internal {
 } // namespace mesos {

http://git-wip-us.apache.org/repos/asf/mesos/blob/9fc2901d/src/tests/master_validation_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_validation_tests.cpp b/src/tests/master_validation_tests.cpp
index 99e350e..0f8d33b 100644
--- a/src/tests/master_validation_tests.cpp
+++ b/src/tests/master_validation_tests.cpp
@@ -153,15 +153,6 @@ TEST_F(ResourceValidationTest, PersistentVolumeWithoutVolumeInfo)
 }
 
 
-TEST_F(ResourceValidationTest, ReadOnlyPersistentVolume)
-{
-  Resource volume = Resources::parse("disk", "128", "role1").get();
-  volume.mutable_disk()->CopyFrom(createDiskInfo("id1", "path1", Volume::RO));
-
-  EXPECT_SOME(resource::validate(CreateResources(volume)));
-}
-
-
 TEST_F(ResourceValidationTest, PersistentVolumeWithHostPath)
 {
   Resource volume = Resources::parse("disk", "128", "role1").get();
@@ -499,6 +490,18 @@ TEST_F(CreateOperationValidationTest, NonMatchingPrincipal)
 }
 
 
+TEST_F(CreateOperationValidationTest, ReadOnlyPersistentVolume)
+{
+  Resource volume = Resources::parse("disk", "128", "role1").get();
+  volume.mutable_disk()->CopyFrom(createDiskInfo("id1", "path1", Volume::RO));
+
+  Offer::Operation::Create create;
+  create.add_volumes()->CopyFrom(volume);
+
+  EXPECT_SOME(operation::validate(create, Resources(), None()));
+}
+
+
 // This test verifies that creating a persistent volume that is larger
 // than the offered disk resource results won't succeed.
 TEST_F(CreateOperationValidationTest, InsufficientDiskResource)