You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ya...@apache.org on 2016/04/09 01:52:05 UTC
[1/6] mesos git commit: Make tests::cluster::Slave more tolerant of
start failures.
Repository: mesos
Updated Branches:
refs/heads/master 6a04e4603 -> b900abff1
Make tests::cluster::Slave more tolerant of start failures.
If cluster::Slave::start() fails, make sure we don't crash in the
destructor.
Review: https://reviews.apache.org/r/45689/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/548da8ff
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/548da8ff
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/548da8ff
Branch: refs/heads/master
Commit: 548da8ff3597935c618b43a82bd432482e5e5fed
Parents: 6a04e46
Author: James Peach <jp...@apache.org>
Authored: Fri Apr 8 14:00:10 2016 -0700
Committer: Jiang Yan Xu <ya...@jxu.me>
Committed: Fri Apr 8 16:27:03 2016 -0700
----------------------------------------------------------------------
src/tests/cluster.cpp | 5 +++++
src/tests/cluster.hpp | 4 ++--
2 files changed, 7 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/548da8ff/src/tests/cluster.cpp
----------------------------------------------------------------------
diff --git a/src/tests/cluster.cpp b/src/tests/cluster.cpp
index 7e488d2..b4d6910 100644
--- a/src/tests/cluster.cpp
+++ b/src/tests/cluster.cpp
@@ -442,6 +442,11 @@ Slave::~Slave()
return;
}
+ // Startup didn't complete so don't try to do the full shutdown.
+ if (!containerizer) {
+ return;
+ }
+
// This extra closure is necessary in order to use `AWAIT` and `ASSERT_*`,
// as these macros require a void return type.
[this]() {
http://git-wip-us.apache.org/repos/asf/mesos/blob/548da8ff/src/tests/cluster.hpp
----------------------------------------------------------------------
diff --git a/src/tests/cluster.hpp b/src/tests/cluster.hpp
index 39ca15e..887342a 100644
--- a/src/tests/cluster.hpp
+++ b/src/tests/cluster.hpp
@@ -185,13 +185,13 @@ private:
bool cleanUpContainersInDestructor = true;
// Master detector that is not managed by this object.
- mesos::master::detector::MasterDetector* detector;
+ mesos::master::detector::MasterDetector* detector = nullptr;
// Containerizer that is either owned outside of this `Slave` object
// or by `ownedContainerizer`. We keep a copy of this pointer
// because the cleanup logic acts upon the containerizer (regardless
// of who created it).
- slave::Containerizer* containerizer;
+ slave::Containerizer* containerizer = nullptr;
// Dependencies that are created by the factory method.
process::Owned<slave::Containerizer> ownedContainerizer;
[3/6] mesos git commit: Add tests for XFS project quota utilities.
Posted by ya...@apache.org.
Add tests for XFS project quota utilities.
Review: https://reviews.apache.org/r/44947/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/04be1d03
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/04be1d03
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/04be1d03
Branch: refs/heads/master
Commit: 04be1d03ca71513cc966a17f87cd10611d959ac9
Parents: a0e96bd
Author: James Peach <jp...@apache.org>
Authored: Fri Apr 8 14:07:03 2016 -0700
Committer: Jiang Yan Xu <ya...@jxu.me>
Committed: Fri Apr 8 16:46:08 2016 -0700
----------------------------------------------------------------------
src/Makefile.am | 5 +
src/tests/containerizer/xfs_quota_tests.cpp | 337 +++++++++++++++++++++++
src/tests/environment.cpp | 61 ++--
3 files changed, 387 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/04be1d03/src/Makefile.am
----------------------------------------------------------------------
diff --git a/src/Makefile.am b/src/Makefile.am
index f235a6a..a16c2da 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1967,6 +1967,11 @@ mesos_tests_SOURCES = \
tests/containerizer/provisioner_backend_tests.cpp \
tests/containerizer/provisioner_docker_tests.cpp
+if ENABLE_XFS_DISK_ISOLATOR
+mesos_tests_SOURCES += \
+ tests/containerizer/xfs_quota_tests.cpp
+endif
+
mesos_tests_CPPFLAGS = $(MESOS_CPPFLAGS)
mesos_tests_CPPFLAGS += -DSOURCE_DIR=\"$(abs_top_srcdir)\"
mesos_tests_CPPFLAGS += -DBUILD_DIR=\"$(abs_top_builddir)\"
http://git-wip-us.apache.org/repos/asf/mesos/blob/04be1d03/src/tests/containerizer/xfs_quota_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer/xfs_quota_tests.cpp b/src/tests/containerizer/xfs_quota_tests.cpp
new file mode 100644
index 0000000..8b0322b
--- /dev/null
+++ b/src/tests/containerizer/xfs_quota_tests.cpp
@@ -0,0 +1,337 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <linux/loop.h>
+
+#include <string>
+#include <vector>
+
+#include <gmock/gmock.h>
+
+#include <mesos/mesos.hpp>
+#include <mesos/resources.hpp>
+
+#include <process/gtest.hpp>
+#include <process/pid.hpp>
+
+#include <stout/fs.hpp>
+#include <stout/gtest.hpp>
+#include <stout/os.hpp>
+#include <stout/path.hpp>
+
+#include "linux/fs.hpp"
+
+#include "master/master.hpp"
+
+#include "slave/constants.hpp"
+#include "slave/flags.hpp"
+#include "slave/slave.hpp"
+
+#include "slave/containerizer/fetcher.hpp"
+#include "slave/containerizer/mesos/containerizer.hpp"
+#include "slave/containerizer/mesos/isolators/xfs/utils.hpp"
+
+#include "tests/environment.hpp"
+#include "tests/mesos.hpp"
+#include "tests/utils.hpp"
+
+using namespace mesos::internal::xfs;
+
+using namespace process;
+
+using std::string;
+using std::vector;
+
+using testing::_;
+using testing::Return;
+
+using mesos::internal::master::Master;
+
+using mesos::internal::slave::Fetcher;
+using mesos::internal::slave::MesosContainerizer;
+using mesos::internal::slave::Slave;
+
+namespace mesos {
+namespace internal {
+namespace tests {
+
+static QuotaInfo makeQuotaInfo(
+ Bytes limit,
+ Bytes used)
+{
+ return {limit, used};
+}
+
+
+class ROOT_XFS_QuotaTest : public MesosTest
+{
+public:
+ virtual void SetUp()
+ {
+ MesosTest::SetUp();
+
+ Try<string> base = environment->mkdtemp();
+ ASSERT_SOME(base) << "Failed to mkdtemp";
+
+ string devPath = path::join(base.get(), "device");
+ string mntPath = path::join(base.get(), "mnt");
+
+ ASSERT_SOME(os::mkdir(mntPath));
+ ASSERT_SOME(mkfile(devPath, Megabytes(40)));
+
+ // Get an unused loop device.
+ Try<string> loop = mkloop();
+ ASSERT_SOME(loop);
+
+ // Attach the loop to a backing file.
+ Try<Subprocess> losetup = subprocess(
+ "losetup " + loop.get() + " " + devPath,
+ Subprocess::PATH("/dev/null"));
+
+ ASSERT_SOME(losetup);
+ AWAIT_READY(losetup->status());
+ ASSERT_SOME_EQ(0, losetup->status().get());
+
+ loopDevice = loop.get();
+ ASSERT_SOME(loopDevice);
+
+ // Make an XFS filesystem (using the force flag). The defaults
+ // should be good enough for tests.
+ Try<Subprocess> mkfs = subprocess(
+ "mkfs.xfs -f " + loopDevice.get(),
+ Subprocess::PATH("/dev/null"));
+
+ ASSERT_SOME(mkfs);
+ AWAIT_READY(mkfs->status());
+ ASSERT_SOME_EQ(0, mkfs->status().get());
+
+ ASSERT_SOME(fs::mount(
+ loopDevice.get(),
+ mntPath,
+ "xfs",
+ 0, // Flags.
+ "prjquota"));
+ mountPoint = mntPath;
+
+ ASSERT_SOME(os::chdir(mountPoint.get()))
+ << "Failed to chdir into '" << mountPoint.get() << "'";
+ }
+
+ virtual void TearDown()
+ {
+ if (mountPoint.isSome()) {
+ fs::unmount(mountPoint.get(), MNT_FORCE | MNT_DETACH);
+ }
+
+ // Make a best effort to tear everything down. We don't make any assertions
+ // here because even if something goes wrong we still want to clean up as
+ // much as we can.
+ if (loopDevice.isSome()) {
+ Try<Subprocess> cmdProcess = subprocess(
+ "losetup -d " + loopDevice.get(),
+ Subprocess::PATH("/dev/null"));
+
+ if (cmdProcess.isSome()) {
+ cmdProcess->status().await(Seconds(15));
+ }
+ }
+
+ MesosTest::TearDown();
+ }
+
+ slave::Flags CreateSlaveFlags()
+ {
+ slave::Flags flags = MesosTest::CreateSlaveFlags();
+
+ // We only need an XFS-specific directory for the work directory. We
+ // don't mind that other flags refer to a different temp directory.
+ flags.work_dir = mountPoint.get();
+ return flags;
+ }
+
+ static Try<Nothing> mkfile(string path, Bytes size)
+ {
+ Try<int> fd = os::open(path, O_CREAT | O_RDWR | O_EXCL);
+
+ if (fd.isError()) {
+ return Error(fd.error());
+ }
+
+ // XFS supports posix_fallocate(3), and we depend on it actually
+ // allocating storage in the quota tests.
+ if (int error = ::posix_fallocate(fd.get(), 0, size.bytes())) {
+ os::close(fd.get());
+ return Error("posix_fallocate failed: " + os::strerror(error));
+ }
+
+ os::close(fd.get());
+ return Nothing();
+ }
+
+ static Try<string> mkloop()
+ {
+ Try<int> fd = os::open("/dev/loop-control", O_RDWR);
+
+ if (fd.isError()) {
+ return Error(fd.error());
+ }
+
+ // All failure cases here are reported in errno with a -1 return value.
+ int devno = ::ioctl(fd.get(), LOOP_CTL_GET_FREE);
+ if (devno == -1) {
+ ErrnoError error("ioctl(LOOP_CTL_GET_FREE failed");
+ os::close(fd.get());
+ return error;
+ }
+
+ os::close(fd.get());
+
+ return string("/dev/loop") + stringify(devno);
+ }
+
+ Option<string> loopDevice; // The loop device we attached.
+ Option<string> mountPoint; // XFS filesystem mountpoint.
+};
+
+
+TEST_F(ROOT_XFS_QuotaTest, QuotaGetSet)
+{
+ prid_t projectId = 44;
+ string root = "project";
+ Bytes limit = Megabytes(44);
+
+ ASSERT_SOME(os::mkdir(root));
+
+ EXPECT_SOME(setProjectQuota(root, projectId, limit));
+
+ Result<QuotaInfo> info = getProjectQuota(root, projectId);
+ ASSERT_SOME(info);
+
+ EXPECT_EQ(limit, info.get().limit);
+ EXPECT_EQ(Bytes(0), info.get().used);
+
+ EXPECT_SOME(clearProjectQuota(root, projectId));
+}
+
+
+TEST_F(ROOT_XFS_QuotaTest, QuotaLimit)
+{
+ prid_t projectId = 55;
+ string root = "project";
+ Bytes limit = Megabytes(11);
+ Bytes used = Megabytes(10);
+
+ ASSERT_SOME(os::mkdir(root));
+
+ // Assign a project quota.
+ EXPECT_SOME(setProjectQuota(root, projectId, limit));
+
+ // Move the directory into the project.
+ EXPECT_SOME(setProjectId(root, projectId));
+
+ // Allocate some storage to this project.
+ EXPECT_SOME(mkfile(path::join(root, "file"), used));
+
+ // And verify the quota reflects what we used.
+ EXPECT_SOME_EQ(
+ makeQuotaInfo(limit, used),
+ getProjectQuota(root, projectId));
+
+ // We have 1MB of our quota left. Verify that we get a write
+ // error if we overflow that.
+ EXPECT_ERROR(mkfile(path::join(root, "file2"), Megabytes(2)));
+
+ EXPECT_SOME(clearProjectQuota(root, projectId));
+}
+
+
+TEST_F(ROOT_XFS_QuotaTest, ProjectIdErrors)
+{
+ // Setting project IDs should not work for non-directories.
+ EXPECT_SOME(::fs::symlink("symlink", "nowhere"));
+ EXPECT_ERROR(setProjectId("symlink", 99));
+ EXPECT_ERROR(clearProjectId("symlink"));
+
+ EXPECT_SOME(mkfile("file", Bytes(1)));
+ EXPECT_ERROR(setProjectId("file", 99));
+ EXPECT_ERROR(clearProjectId("file"));
+
+ // Setting on a missing file should error.
+ EXPECT_ERROR(setProjectId("none", 99));
+ EXPECT_ERROR(clearProjectId("none"));
+}
+
+
+TEST_F(ROOT_XFS_QuotaTest, DirectoryTree)
+{
+ Bytes limit = Megabytes(100);
+ prid_t projectA = 200;
+ prid_t projectB = 400;
+ string rootA = "projectA";
+ string rootB = "projectB";
+
+ // Create rootA with 2MB of data.
+ ASSERT_SOME(os::mkdir(path::join(rootA, "depth1/depth2/depth3"), true));
+ EXPECT_SOME(mkfile(path::join(rootA, "depth1/file1"), Megabytes(1)));
+ EXPECT_SOME(mkfile(path::join(rootA, "depth1/depth2/file2"), Megabytes(1)));
+
+ // Create rootB with 1MB of data.
+ ASSERT_SOME(os::mkdir(rootB));
+ EXPECT_SOME(mkfile(path::join(rootB, "file1"), Megabytes(1)));
+
+ // Symlink from rootA into rootB. This should have no effect on the
+ // measured quota.
+ EXPECT_SOME(::fs::symlink(
+ path::join(rootB, "file1"), path::join(rootA, "depth1/file1.A")));
+ EXPECT_SOME(::fs::symlink(
+ path::join(rootB, "file1"), path::join(rootA, "depth1/depth2/file2.A")));
+ EXPECT_SOME(::fs::symlink(rootB,
+ path::join(rootA, "depth1/depth2/depth3.A")));
+
+ // Now we want to verify that assigning and removing project IDs is recursive
+ // and does not follow symlinks. For each directory, assign the project ID and
+ // verify the expected quota usage. Then verify the inverse.
+
+ EXPECT_SOME(setProjectId(rootA, projectA));
+ EXPECT_SOME(setProjectQuota(rootA, projectA, limit));
+
+ EXPECT_SOME_EQ(
+ makeQuotaInfo(limit, Megabytes(2)),
+ getProjectQuota(rootA, projectA));
+
+ EXPECT_SOME(setProjectId(rootB, projectB));
+ EXPECT_SOME(setProjectQuota(rootB, projectB, limit));
+
+ EXPECT_SOME_EQ(
+ makeQuotaInfo(limit, Megabytes(1)),
+ getProjectQuota(rootB, projectB));
+
+ EXPECT_SOME(clearProjectId(rootA));
+
+ EXPECT_SOME_EQ(
+ makeQuotaInfo(limit, Megabytes(0)),
+ getProjectQuota(rootA, projectA));
+
+ EXPECT_SOME(clearProjectId(rootB));
+
+ EXPECT_SOME_EQ(
+ makeQuotaInfo(limit, Megabytes(0)),
+ getProjectQuota(rootB, projectB));
+}
+
+} // namespace tests {
+} // namespace internal {
+} // namespace mesos {
http://git-wip-us.apache.org/repos/asf/mesos/blob/04be1d03/src/tests/environment.cpp
----------------------------------------------------------------------
diff --git a/src/tests/environment.cpp b/src/tests/environment.cpp
index acadb5b..45ed8f2 100644
--- a/src/tests/environment.cpp
+++ b/src/tests/environment.cpp
@@ -484,38 +484,60 @@ private:
};
-class OverlayFSTestFilter : public TestFilter
+class SupportedFilesystemTestFilter : public TestFilter
{
public:
- OverlayFSTestFilter()
+ explicit SupportedFilesystemTestFilter(const string fsname)
{
#ifdef __linux__
- Try<bool> check = fs::overlay::supported();
+ Try<bool> check = (fsname == "overlayfs")
+ ? fs::overlay::supported()
+ : fs::supported(fsname);
+
if (check.isError()) {
- overlayfsError = check.error();
+ fsSupportError = check.error();
} else if (!check.get()) {
- overlayfsError = Error("Overlayfs is not supported on your systems");
+ fsSupportError = Error(fsname + " is not supported on your systems");
}
#else
- overlayfsError =
- Error("Overlayfs tests not supported on non-Linux systems");
-#endif // __linux__
- if (overlayfsError.isSome()) {
+ fsSupportError =
+ Error(fsname + " tests not supported on non-Linux systems");
+#endif
+
+ if (fsSupportError.isSome()) {
std::cerr
<< "-------------------------------------------------------------\n"
- << "We cannot run any overlayfs tests because:\n"
- << overlayfsError.get().message << "\n"
+ << "We cannot run any " << fsname << " tests because:\n"
+ << fsSupportError.get().message << "\n"
<< "-------------------------------------------------------------\n";
}
}
+ Option<Error> fsSupportError;
+};
+
+
+class OverlayFSFilter : public SupportedFilesystemTestFilter
+{
+public:
+ OverlayFSFilter() : SupportedFilesystemTestFilter("overlayfs") {}
+
bool disable(const ::testing::TestInfo* test) const
{
- return overlayfsError.isSome() && matches(test, "OVERLAYFS_");
+ return fsSupportError.isSome() && matches(test, "OVERLAYFS_");
}
+};
-private:
- Option<Error> overlayfsError;
+
+class XfsFilter : public SupportedFilesystemTestFilter
+{
+public:
+ XfsFilter() : SupportedFilesystemTestFilter("xfs") {}
+
+ bool disable(const ::testing::TestInfo* test) const
+ {
+ return fsSupportError.isSome() && matches(test, "XFS_");
+ }
};
@@ -727,11 +749,12 @@ Environment::Environment(const Flags& _flags) : flags(_flags)
filters.push_back(Owned<TestFilter>(new NetClsCgroupsFilter()));
filters.push_back(Owned<TestFilter>(new NetworkIsolatorTestFilter()));
filters.push_back(Owned<TestFilter>(new NvidiaGpuFilter()));
- filters.push_back(Owned<TestFilter>(new OverlayFSTestFilter()));
+ filters.push_back(Owned<TestFilter>(new OverlayFSFilter()));
filters.push_back(Owned<TestFilter>(new PerfCPUCyclesFilter()));
filters.push_back(Owned<TestFilter>(new PerfFilter()));
filters.push_back(Owned<TestFilter>(new RootFilter()));
filters.push_back(Owned<TestFilter>(new UnzipFilter()));
+ filters.push_back(Owned<TestFilter>(new XfsFilter()));
// Construct the filter string to handle system or platform specific tests.
::testing::UnitTest* unitTest = ::testing::UnitTest::GetInstance();
@@ -862,8 +885,14 @@ Try<string> Environment::TemporaryDirectoryEventListener::mkdtemp()
testName = strings::remove(testName, "DISABLED_", strings::PREFIX);
}
+ Option<string> tmpdir = os::getenv("TMPDIR");
+
+ if (tmpdir.isNone()) {
+ tmpdir = "/tmp";
+ }
+
const string& path =
- path::join("/tmp", strings::join("_", testCase, testName, "XXXXXX"));
+ path::join(tmpdir.get(), strings::join("_", testCase, testName, "XXXXXX"));
Try<string> mkdtemp = os::mkdtemp(path);
if (mkdtemp.isSome()) {
[5/6] mesos git commit: Add XFS disk isolator documentation.
Posted by ya...@apache.org.
Add XFS disk isolator documentation.
Review: https://reviews.apache.org/r/44950/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/b900abff
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/b900abff
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/b900abff
Branch: refs/heads/master
Commit: b900abff1648ae397d9819322de95ad99737ce4d
Parents: 255710b
Author: James Peach <jp...@apache.org>
Authored: Fri Apr 8 14:56:12 2016 -0700
Committer: Jiang Yan Xu <ya...@jxu.me>
Committed: Fri Apr 8 16:46:08 2016 -0700
----------------------------------------------------------------------
CHANGELOG | 4 ++++
docs/configuration.md | 25 +++++++++++++++++++++++
docs/mesos-containerizer.md | 43 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 72 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/b900abff/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index 4337490..1f0527e 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -27,6 +27,10 @@ This release contains the following new features:
`mesos.scheduler`. `mesos.native` still exists, combining both modules for
backwards compatibility with existing code.
+ * [MESOS-4828] - **Experimental** support for a new `xfs/disk' isolator
+ has been added to isolate disk resources more efficiently. Please refer to
+ docs/mesos-containerizer.md for more details.
+
Deprecations:
* [MESOS-2281] - Deprecated the plain text format for credentials in favor of
the JSON format.
http://git-wip-us.apache.org/repos/asf/mesos/blob/b900abff/docs/configuration.md
----------------------------------------------------------------------
diff --git a/docs/configuration.md b/docs/configuration.md
index 309a5a0..ba00ec5 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1725,6 +1725,31 @@ isolator. (default: false)
</tr>
</table>
+*XFS disk isolator flags available when configured with
+`--enable-xfs-disk-isolator`*
+
+<table class="table table-striped">
+ <thead>
+ <tr>
+ <th width="30%">
+ Flag
+ </th>
+ <th>
+ Explanation
+ </th>
+ </tr>
+ </thead>
+<tr>
+ <td>
+ --xfs_project_range=VALUE
+ </td>
+<td>
+The ranges of XFS project IDs that the isolator can use to track disk
+quotas for container sandbox directories. Valid project IDs range from
+1 to max(uint32). (default `[5000-10000]`)
+</td>
+</tr>
+</table>
## Libprocess Options
http://git-wip-us.apache.org/repos/asf/mesos/blob/b900abff/docs/mesos-containerizer.md
----------------------------------------------------------------------
diff --git a/docs/mesos-containerizer.md b/docs/mesos-containerizer.md
index 2fde743..6f40c57 100644
--- a/docs/mesos-containerizer.md
+++ b/docs/mesos-containerizer.md
@@ -85,6 +85,49 @@ The interval between two `du`s can be controlled by the slave flag
minute. The default interval is 15 seconds.
+### XFS Disk Isolator
+
+The XFS Disk isolator uses XFS project quotas to track the disk
+space used by each container sandbox and to enforce the corresponding
+disk space allocation. Write operations performed by tasks exceeding
+their disk allocation will fail with an `EDQUOT` error. The task
+will not be terminated by the containerizer.
+
+The XFS disk isolator is functionally similar to Posix Disk isolator
+but avoids the cost of repeatedly running the `du`. Though they will
+not interfere with each other, it is not recommended to use them together.
+
+To enable the XFS Disk isolator, append `xfs/disk` to the
+`--isolation` flag when starting the slave.
+
+The XFS Disk isolator requires the sandbox directory to be located
+on an XFS filesystem that is mounted with the `pquota` option. There
+is no need to configure
+[projects](http://man7.org/linux/man-pages/man5/projects.5.html)
+or [projid](http://man7.org/linux/man-pages/man5/projid.5.html)
+files. The range of project IDs given to the `--xfs_project_range`
+must not overlap any project IDs allocated for other uses.
+
+The XFS disk isolator does not natively support an accounting-only mode
+like that of the Posix Disk isolator. Quota enforcement can be disabled
+by mounting the filesystem with the `pqnoenforce` mount option.
+
+The [xfs_quota](http://man7.org/linux/man-pages/man8/xfs_quota.8.html)
+command can be used to show the current allocation of project IDs
+and quota. For example:
+
+ $ xfs_quota -x -c "report -a -n -L 5000 -U 1000"
+
+To show which project a file belongs to, use the
+[xfs_io](http://man7.org/linux/man-pages/man8/xfs_io.8.html) command
+to display the `fsxattr.projid` field. For example:
+
+ $ xfs_io -r -c stat /mnt/mesos/
+
+Note that the Posix Disk isolator flags `--enforce_container_disk_quota`,
+`--container_disk_watch_interval` and `--enforce_container_disk_quota` do
+not apply to the XFS Disk isolator.
+
### Docker Runtime Isolator
The Docker Runtime isolator is used for supporting runtime
[6/6] mesos git commit: Add utility functions to manipulate XFS
project quotas.
Posted by ya...@apache.org.
Add utility functions to manipulate XFS project quotas.
Review: https://reviews.apache.org/r/44946/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/a0e96bd2
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/a0e96bd2
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/a0e96bd2
Branch: refs/heads/master
Commit: a0e96bd22da7a39086600c3186fbad61c554e262
Parents: 0313707
Author: James Peach <jp...@apache.org>
Authored: Fri Apr 8 13:49:16 2016 -0700
Committer: Jiang Yan Xu <ya...@jxu.me>
Committed: Fri Apr 8 16:46:08 2016 -0700
----------------------------------------------------------------------
src/Makefile.am | 6 +
.../containerizer/mesos/isolators/xfs/utils.cpp | 384 +++++++++++++++++++
.../containerizer/mesos/isolators/xfs/utils.hpp | 81 ++++
3 files changed, 471 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/a0e96bd2/src/Makefile.am
----------------------------------------------------------------------
diff --git a/src/Makefile.am b/src/Makefile.am
index 4375b03..f235a6a 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -896,6 +896,12 @@ MESOS_LINUX_FILES += \
slave/containerizer/mesos/provisioner/backends/bind.hpp \
slave/containerizer/mesos/provisioner/backends/overlay.hpp
+if ENABLE_XFS_DISK_ISOLATOR
+MESOS_LINUX_FILES += \
+ slave/containerizer/mesos/isolators/xfs/utils.cpp \
+ slave/containerizer/mesos/isolators/xfs/utils.hpp
+endif
+
MESOS_NETWORK_ISOLATOR_FILES = \
linux/routing/handle.cpp \
linux/routing/route.cpp \
http://git-wip-us.apache.org/repos/asf/mesos/blob/a0e96bd2/src/slave/containerizer/mesos/isolators/xfs/utils.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/xfs/utils.cpp b/src/slave/containerizer/mesos/isolators/xfs/utils.cpp
new file mode 100644
index 0000000..9285183
--- /dev/null
+++ b/src/slave/containerizer/mesos/isolators/xfs/utils.cpp
@@ -0,0 +1,384 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// The XFS API headers come from the xfsprogs package. xfsprogs versions
+// earlier than 4.5 contain various internal macros that conflict with
+// libstdc++.
+
+// If ENABLE_GETTEXT is not defined, then the XFS headers will define
+// textdomain() to a while(0) loop. When C++ standard headers try to
+// use textdomain(), compilation errors ensue.
+#define ENABLE_GETTEXT
+#include <xfs/xfs.h>
+#include <xfs/xqm.h>
+#undef ENABLE_GETTEXT
+
+// xfs/platform_defs-x86_64.h defines min() and max() macros which conflict
+// with various min() and max() function definitions.
+#undef min
+#undef max
+
+#include <fts.h>
+
+#include <blkid/blkid.h>
+#include <linux/quota.h>
+#include <sys/quota.h>
+
+#include <stout/check.hpp>
+#include <stout/error.hpp>
+#include <stout/numify.hpp>
+#include <stout/path.hpp>
+
+#include <stout/fs.hpp>
+#include <stout/os.hpp>
+
+#include "slave/containerizer/mesos/isolators/xfs/utils.hpp"
+
+using std::string;
+
+namespace mesos {
+namespace internal {
+namespace xfs {
+
+// The quota API defines space limits in terms of in basic
+// blocks (512 bytes).
+static constexpr Bytes BASIC_BLOCK_SIZE = Bytes(512u);
+
+
+// Although XFS itself doesn't define any invalid project IDs,
+// we need a way to know whether or not a project ID was assigned
+// so we use 0 as our sentinel value.
+static constexpr prid_t NON_PROJECT_ID = 0u;
+
+
+static Error nonProjectError()
+{
+ return Error("Invalid project ID '0'");
+}
+
+
+static Try<int> openPath(
+ const string& path,
+ const struct stat& stat)
+{
+ int flags = O_NOFOLLOW | O_RDONLY | O_CLOEXEC;
+
+ // Directories require O_DIRECTORY.
+ flags |= S_ISDIR(stat.st_mode) ? O_DIRECTORY : 0;
+ return os::open(path, flags);
+}
+
+
+static Try<Nothing> setAttributes(
+ int fd,
+ struct fsxattr& attr)
+{
+ if (::xfsctl(nullptr, fd, XFS_IOC_FSSETXATTR, &attr) == -1) {
+ return ErrnoError();
+ }
+
+ return Nothing();
+}
+
+
+static Try<struct fsxattr> getAttributes(int fd)
+{
+ struct fsxattr attr;
+
+ if (::xfsctl(nullptr, fd, XFS_IOC_FSGETXATTR, &attr) == -1) {
+ return ErrnoError();
+ }
+
+ return attr;
+}
+
+
+// Return the path of the device backing the filesystem containing
+// the given path.
+static Try<string> getDeviceForPath(const string& path)
+{
+ struct stat statbuf;
+
+ if (::lstat(path.c_str(), &statbuf) == -1) {
+ return ErrnoError("Unable to access '" + path + "'");
+ }
+
+ char* name = blkid_devno_to_devname(statbuf.st_dev);
+ if (name == nullptr) {
+ return ErrnoError("Unable to get device for '" + path + "'");
+ }
+
+ string devname(name);
+ free(name);
+
+ return devname;
+}
+
+
+namespace internal {
+
+static Try<Nothing> setProjectQuota(
+ const string& path,
+ prid_t projectId,
+ Bytes limit)
+{
+ Try<string> devname = getDeviceForPath(path);
+ if (devname.isError()) {
+ return Error(devname.error());
+ }
+
+ fs_disk_quota_t quota = {0};
+
+ quota.d_version = FS_DQUOT_VERSION;
+
+ // Specify that we are setting a project quota for this ID.
+ quota.d_id = projectId;
+ quota.d_flags = XFS_PROJ_QUOTA;
+
+ // Set both the hard and the soft limit to the same quota, just
+ // for consistency. Functionally all we need is the hard quota.
+ quota.d_fieldmask = FS_DQ_BSOFT | FS_DQ_BHARD;
+
+ quota.d_blk_hardlimit = limit.bytes() / BASIC_BLOCK_SIZE.bytes();
+ quota.d_blk_softlimit = limit.bytes() / BASIC_BLOCK_SIZE.bytes();
+
+ if (::quotactl(QCMD(Q_XSETQLIM, PRJQUOTA),
+ devname.get().c_str(),
+ projectId,
+ reinterpret_cast<caddr_t>("a)) == -1) {
+ return ErrnoError("Failed to set quota for project ID " +
+ stringify(projectId));
+ }
+
+ return Nothing();
+}
+
+
+static Try<Nothing> setProjectId(
+ const string& path,
+ const struct stat& stat,
+ prid_t projectId)
+{
+ Try<int> fd = openPath(path, stat);
+ if (fd.isError()) {
+ return Error("Failed to open '" + path + "': " + fd.error());
+ }
+
+ Try<struct fsxattr> attr = getAttributes(fd.get());
+ if (attr.isError()) {
+ os::close(fd.get());
+ return Error("Failed to get XFS attributes for '" + path + "': " +
+ attr.error());
+ }
+
+ attr->fsx_projid = projectId;
+
+ if (projectId == NON_PROJECT_ID) {
+ attr->fsx_xflags &= ~XFS_XFLAG_PROJINHERIT;
+ } else {
+ attr->fsx_xflags |= XFS_XFLAG_PROJINHERIT;
+ }
+
+ Try<Nothing> status = setAttributes(fd.get(), attr.get());
+ os::close(fd.get());
+
+ if (status.isError()) {
+ return Error("Failed to set XFS attributes for '" + path + "': " +
+ status.error());
+ }
+
+ return Nothing();
+}
+
+} // namespace internal {
+
+
+Result<QuotaInfo> getProjectQuota(
+ const string& path,
+ prid_t projectId)
+{
+ if (projectId == NON_PROJECT_ID) {
+ return nonProjectError();
+ }
+
+ Try<string> devname = getDeviceForPath(path);
+ if (devname.isError()) {
+ return Error(devname.error());
+ }
+
+ fs_disk_quota_t quota = {0};
+
+ quota.d_version = FS_DQUOT_VERSION;
+ quota.d_id = projectId;
+ quota.d_flags = XFS_PROJ_QUOTA;
+
+ // In principle, we should issue a Q_XQUOTASYNC to get an accurate accounting.
+ // However, we don't want to affect performance by continually syncing the
+ // disks, so we accept that the quota information will be slightly out of
+ // date.
+
+ if (::quotactl(QCMD(Q_XGETQUOTA, PRJQUOTA),
+ devname.get().c_str(),
+ projectId,
+ reinterpret_cast<caddr_t>("a)) == -1) {
+ return ErrnoError("Failed to get quota for project ID " +
+ stringify(projectId));
+ }
+
+ // Zero quota means that no quota is assigned.
+ if (quota.d_blk_hardlimit == 0 && quota.d_bcount == 0) {
+ return None();
+ }
+
+ QuotaInfo info;
+ info.limit = BASIC_BLOCK_SIZE * quota.d_blk_hardlimit;
+ info.used = BASIC_BLOCK_SIZE * quota.d_bcount;
+
+ return info;
+}
+
+
+Try<Nothing> setProjectQuota(
+ const string& path,
+ prid_t projectId,
+ Bytes limit)
+{
+ if (projectId == NON_PROJECT_ID) {
+ return nonProjectError();
+ }
+
+ // A 0 limit deletes the quota record. Since the limit is in basic
+ // blocks that effectively means > 512 bytes.
+ if (limit < BASIC_BLOCK_SIZE) {
+ return Error("Quota limit must be >= " + stringify(BASIC_BLOCK_SIZE));
+ }
+
+ return internal::setProjectQuota(path, projectId, limit);
+}
+
+
+Try<Nothing> clearProjectQuota(
+ const string& path,
+ prid_t projectId)
+{
+ if (projectId == NON_PROJECT_ID) {
+ return nonProjectError();
+ }
+
+ return internal::setProjectQuota(path, projectId, Bytes(0));
+}
+
+
+Result<prid_t> getProjectId(
+ const string& directory)
+{
+ struct stat stat;
+
+ if (::lstat(directory.c_str(), &stat) == -1) {
+ return ErrnoError("Failed to access '" + directory);
+ }
+
+ Try<int> fd = openPath(directory, stat);
+ if (fd.isError()) {
+ return Error("Failed to open '" + directory + "': " + fd.error());
+ }
+
+ Try<struct fsxattr> attr = getAttributes(fd.get());
+ os::close(fd.get());
+
+ if (attr.isError()) {
+ return Error("Failed to get XFS attributes for '" + directory + "': " +
+ attr.error());
+ }
+
+ if (attr->fsx_projid == NON_PROJECT_ID) {
+ return None();
+ }
+
+ return attr->fsx_projid;
+}
+
+
+static Try<Nothing> setProjectIdRecursively(
+ const string& directory,
+ prid_t projectId)
+{
+ if (os::stat::islink(directory) || !os::stat::isdir(directory)) {
+ return Error(directory + " is not a directory");
+ }
+
+ char* directory_[] = {const_cast<char*>(directory.c_str()), nullptr};
+
+ FTS* tree = ::fts_open(
+ directory_, FTS_NOCHDIR | FTS_PHYSICAL | FTS_XDEV, nullptr);
+ if (tree == nullptr) {
+ return ErrnoError("Failed to open '" + directory + "'");
+ }
+
+ for (FTSENT *node = ::fts_read(tree);
+ node != nullptr; node = ::fts_read(tree)) {
+ if (node->fts_info == FTS_D || node->fts_info == FTS_F) {
+ Try<Nothing> status = internal::setProjectId(
+ node->fts_path, *node->fts_statp, projectId);
+ if (status.isError()) {
+ ::fts_close(tree);
+ return Error(status.error());
+ }
+ }
+ }
+
+ if (errno != 0) {
+ Error error = ErrnoError();
+ ::fts_close(tree);
+ return error;
+ }
+
+ return Nothing();
+}
+
+
+Try<Nothing> setProjectId(
+ const string& directory,
+ prid_t projectId)
+{
+ if (projectId == NON_PROJECT_ID) {
+ return nonProjectError();
+ }
+
+ return setProjectIdRecursively(directory, projectId);
+}
+
+
+Try<Nothing> clearProjectId(
+ const string& directory)
+{
+ return setProjectIdRecursively(directory, NON_PROJECT_ID);
+}
+
+
+Option<Error> validateProjectIds(const IntervalSet<prid_t>& projectRange)
+{
+ if (projectRange.contains(NON_PROJECT_ID)) {
+ return Error("XFS project ID range contains illegal " +
+ stringify(NON_PROJECT_ID) + " value");
+ }
+
+ return None();
+}
+
+} // namespace xfs {
+} // namespace internal {
+} // namespace mesos {
http://git-wip-us.apache.org/repos/asf/mesos/blob/a0e96bd2/src/slave/containerizer/mesos/isolators/xfs/utils.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/xfs/utils.hpp b/src/slave/containerizer/mesos/isolators/xfs/utils.hpp
new file mode 100644
index 0000000..654dc73
--- /dev/null
+++ b/src/slave/containerizer/mesos/isolators/xfs/utils.hpp
@@ -0,0 +1,81 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef __XFS_UTILS_HPP__
+#define __XFS_UTILS_HPP__
+
+#include <string>
+
+#include <stout/bytes.hpp>
+#include <stout/interval.hpp>
+#include <stout/nothing.hpp>
+#include <stout/try.hpp>
+
+#include <xfs/xfs_types.h>
+
+namespace mesos {
+namespace internal {
+namespace xfs {
+
+struct QuotaInfo
+{
+ Bytes limit;
+ Bytes used;
+};
+
+
+inline bool operator==(const QuotaInfo& left, const QuotaInfo& right)
+{
+ return left.limit == right.limit && left.used == right.used;
+}
+
+
+Option<Error> validateProjectIds(const IntervalSet<prid_t>& projectRange);
+
+
+Result<QuotaInfo> getProjectQuota(
+ const std::string& path,
+ prid_t projectId);
+
+
+Try<Nothing> setProjectQuota(
+ const std::string& path,
+ prid_t projectId,
+ Bytes limit);
+
+
+Try<Nothing> clearProjectQuota(
+ const std::string& path,
+ prid_t projectId);
+
+
+Result<prid_t> getProjectId(
+ const std::string& directory);
+
+
+Try<Nothing> setProjectId(
+ const std::string& directory,
+ prid_t projectId);
+
+
+Try<Nothing> clearProjectId(
+ const std::string& directory);
+
+} // namespace xfs {
+} // namespace internal {
+} // namespace mesos {
+
+#endif // __XFS_UTILS_HPP__
[2/6] mesos git commit: Add autoconf tests for XFS project quotas.
Posted by ya...@apache.org.
Add autoconf tests for XFS project quotas.
Review: https://reviews.apache.org/r/44945/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/03137072
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/03137072
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/03137072
Branch: refs/heads/master
Commit: 031370725d05866f98016dfdba8ebf5448067a22
Parents: 548da8f
Author: James Peach <jp...@apache.org>
Authored: Fri Apr 8 13:48:36 2016 -0700
Committer: Jiang Yan Xu <ya...@jxu.me>
Committed: Fri Apr 8 16:46:01 2016 -0700
----------------------------------------------------------------------
configure.ac | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 54 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/03137072/configure.ac
----------------------------------------------------------------------
diff --git a/configure.ac b/configure.ac
index c693b82..4392909 100644
--- a/configure.ac
+++ b/configure.ac
@@ -258,12 +258,19 @@ AC_ARG_ENABLE([tests-install],
[build and install tests and their helper tools
default: no]),
[], [enable_tests_install=no])
-
+# TODO(MESOS-4991): Since network-isolator is an optional feature, it should
+# be enabled with --enable-network-isolator.
AC_ARG_WITH([network-isolator],
AS_HELP_STRING([--with-network-isolator],
[builds the network isolator]),
[], [with_network_isolator=no])
+AC_ARG_ENABLE([xfs-disk-isolator],
+ AS_HELP_STRING([--enable-xfs-disk-isolator],
+ [builds the XFS disk isolator
+ default: no]),
+ [], [enable_xfs_disk_isolator=no])
+
AC_ARG_ENABLE([libevent],
AS_HELP_STRING([--enable-libevent],
[use libevent instead of libev default: no]),
@@ -938,6 +945,52 @@ AM_CONDITIONAL([WITH_NETWORK_ISOLATOR],
[test "x$with_network_isolator" = "xyes"])
+AC_MSG_CHECKING([whether to enable the XFS disk isolator])
+AS_IF([test "x$enable_xfs_disk_isolator" = "xyes"],
+ [AC_MSG_RESULT([yes])],
+ [AC_MSG_RESULT([no])])
+
+AS_IF([test "x$enable_xfs_disk_isolator" = "xyes"], [
+ # We only support XFS on Linux.
+ AS_IF([test "$OS_NAME" = "linux"],
+ [],
+ [AC_MSG_ERROR([no XFS support on $OS_NAME
+-------------------------------------------------------------------
+The XFS disk isolator is only supported on Linux.
+-------------------------------------------------------------------
+ ])])
+
+ # Check for build dependencies for the XFS disk isolator. We only
+ # enable this if all the needed headers and libraries are present.
+ AC_CHECK_HEADERS([xfs/xfs.h xfs/xqm.h linux/quota.h sys/quota.h],
+ [], [AC_MSG_ERROR([missing XFS quota headers
+-------------------------------------------------------------------
+Please install the Linux kernel headers and xfsprogs development
+packages for XFS disk isolator support.
+-------------------------------------------------------------------
+ ])])
+
+ AC_CHECK_HEADERS([blkid/blkid.h], [], [AC_MSG_ERROR([missing libblkid headers
+-------------------------------------------------------------------
+Please install the libblkid development package for XFS disk
+isolator support.
+-------------------------------------------------------------------
+ ])])
+
+ # Note that AC_SEARCH_LIBS causes libblkid to be added to each binary. In
+ # this case, that is what we want, since the dependency will be in libmesos.
+ AC_SEARCH_LIBS(blkid_devno_to_devname, blkid, [], [AC_MSG_ERROR([missing libblkid
+-------------------------------------------------------------------
+Please install the libblkid package for XFS disk isolator support.
+-------------------------------------------------------------------
+ ])])
+
+ AC_DEFINE([ENABLE_XFS_DISK_ISOLATOR])
+])
+
+AM_CONDITIONAL([ENABLE_XFS_DISK_ISOLATOR], [test "x$enable_xfs_disk_isolator" = "xyes"])
+
+
# Check if Nvidia GPU support is enabled, and if so, verify we can
# access the NVML header files and libs.
if test x"$enable_nvidia_gpu_support" = "xyes"; then
[4/6] mesos git commit: Add XFS disk isolator tests.
Posted by ya...@apache.org.
Add XFS disk isolator tests.
Review: https://reviews.apache.org/r/44949/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/255710b7
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/255710b7
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/255710b7
Branch: refs/heads/master
Commit: 255710b7c95e578c873e1317e3705a55e81b1f61
Parents: 04be1d0
Author: James Peach <jp...@apache.org>
Authored: Fri Apr 8 14:53:56 2016 -0700
Committer: Jiang Yan Xu <ya...@jxu.me>
Committed: Fri Apr 8 16:46:08 2016 -0700
----------------------------------------------------------------------
src/Makefile.am | 4 +-
src/slave/containerizer/mesos/containerizer.cpp | 7 +
.../containerizer/mesos/isolators/xfs/disk.cpp | 437 +++++++++++++++++++
.../containerizer/mesos/isolators/xfs/disk.hpp | 107 +++++
.../containerizer/mesos/isolators/xfs/utils.cpp | 6 +
.../containerizer/mesos/isolators/xfs/utils.hpp | 3 +
src/slave/flags.cpp | 7 +
src/slave/flags.hpp | 3 +
src/tests/containerizer/xfs_quota_tests.cpp | 425 +++++++++++++++++-
9 files changed, 997 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/255710b7/src/Makefile.am
----------------------------------------------------------------------
diff --git a/src/Makefile.am b/src/Makefile.am
index a16c2da..dc8f8e3 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -899,7 +899,9 @@ MESOS_LINUX_FILES += \
if ENABLE_XFS_DISK_ISOLATOR
MESOS_LINUX_FILES += \
slave/containerizer/mesos/isolators/xfs/utils.cpp \
- slave/containerizer/mesos/isolators/xfs/utils.hpp
+ slave/containerizer/mesos/isolators/xfs/utils.hpp \
+ slave/containerizer/mesos/isolators/xfs/disk.cpp \
+ slave/containerizer/mesos/isolators/xfs/disk.hpp
endif
MESOS_NETWORK_ISOLATOR_FILES = \
http://git-wip-us.apache.org/repos/asf/mesos/blob/255710b7/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp
index a5dd223..c25fa92 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -55,6 +55,10 @@
#include "slave/containerizer/mesos/isolators/posix/disk.hpp"
+#if ENABLE_XFS_DISK_ISOLATOR
+#include "slave/containerizer/mesos/isolators/xfs/disk.hpp"
+#endif
+
#ifdef __linux__
#include "slave/containerizer/mesos/isolators/cgroups/cpushare.hpp"
#include "slave/containerizer/mesos/isolators/cgroups/mem.hpp"
@@ -215,6 +219,9 @@ Try<MesosContainerizer*> MesosContainerizer::create(
{"posix/cpu", &PosixCpuIsolatorProcess::create},
{"posix/mem", &PosixMemIsolatorProcess::create},
{"posix/disk", &PosixDiskIsolatorProcess::create},
+#if ENABLE_XFS_DISK_ISOLATOR
+ {"xfs/disk", &XfsDiskIsolatorProcess::create},
+#endif
#ifdef __linux__
{"cgroups/cpu", &CgroupsCpushareIsolatorProcess::create},
{"cgroups/mem", &CgroupsMemIsolatorProcess::create},
http://git-wip-us.apache.org/repos/asf/mesos/blob/255710b7/src/slave/containerizer/mesos/isolators/xfs/disk.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/xfs/disk.cpp b/src/slave/containerizer/mesos/isolators/xfs/disk.cpp
new file mode 100644
index 0000000..2f65f0a
--- /dev/null
+++ b/src/slave/containerizer/mesos/isolators/xfs/disk.cpp
@@ -0,0 +1,437 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "slave/containerizer/mesos/isolators/xfs/disk.hpp"
+
+#include <glog/logging.h>
+
+#include <stout/check.hpp>
+#include <stout/foreach.hpp>
+#include <stout/os.hpp>
+
+#include <stout/os/stat.hpp>
+
+#include "slave/paths.hpp"
+
+using std::list;
+using std::string;
+
+using process::Failure;
+using process::Future;
+using process::Owned;
+using process::PID;
+using process::Process;
+using process::Promise;
+
+using mesos::slave::ContainerConfig;
+using mesos::slave::ContainerLaunchInfo;
+using mesos::slave::ContainerLimitation;
+using mesos::slave::ContainerState;
+using mesos::slave::Isolator;
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+static Try<IntervalSet<prid_t>> getIntervalSet(
+ const Value::Ranges& ranges)
+{
+ IntervalSet<prid_t> set;
+
+ for (int i = 0; i < ranges.range_size(); i++) {
+ if (ranges.range(i).end() > std::numeric_limits<prid_t>::max()) {
+ return Error("Project ID " + stringify(ranges.range(i).end()) +
+ " is out of range");
+ }
+
+ set += (Bound<prid_t>::closed(ranges.range(i).begin()),
+ Bound<prid_t>::closed(ranges.range(i).end()));
+ }
+
+ return set;
+}
+
+
+static Option<Bytes> getDiskResource(
+ const Resources& resources)
+{
+ Option<Bytes> bytes = None();
+
+ foreach (const Resource& resource, resources) {
+ if (resource.name() != "disk") {
+ continue;
+ }
+
+ // TODO(jpeach): Ignore persistent volume resources. The problem here is
+ // that we need to guarantee that we can track the removal of every
+ // directory for which we assign a project ID. Since destruction of
+ // persistent is not visible to the isolator, we don't want to risk
+ // leaking the project ID, or spuriously reusing it.
+ if (Resources::isPersistentVolume(resource)) {
+ continue;
+ }
+
+ if (resource.has_disk() && resource.disk().has_volume()) {
+ continue;
+ }
+
+ if (bytes.isSome()) {
+ bytes.get() += Megabytes(resource.scalar().value());
+ } else {
+ bytes = Megabytes(resource.scalar().value());
+ }
+ }
+
+ return bytes;
+}
+
+
+Try<Isolator*> XfsDiskIsolatorProcess::create(const Flags& flags)
+{
+ if (!xfs::pathIsXfs(flags.work_dir)) {
+ return Error("'" + flags.work_dir + "' is not an XFS filesystem");
+ }
+
+ Result<uid_t> uid = os::getuid();
+ CHECK_SOME(uid) << "getuid(2) doesn't fail";
+
+ if (uid.get() != 0) {
+ return Error("The XFS disk isolator requires running as root.");
+ }
+
+ Try<Resource> projects =
+ Resources::parse("projects", flags.xfs_project_range, "*");
+
+ if (projects.isError()) {
+ return Error(
+ "Failed to parse XFS project range '" +
+ flags.xfs_project_range +
+ "'");
+ }
+
+ if (projects.get().type() != Value::RANGES) {
+ return Error(
+ "Invalid XFS project resource type " +
+ mesos::Value_Type_Name(projects.get().type()) +
+ ", expecting " +
+ mesos::Value_Type_Name(Value::RANGES));
+ }
+
+ Try<IntervalSet<prid_t>> totalProjectIds =
+ getIntervalSet(projects.get().ranges());
+
+ if (totalProjectIds.isError()) {
+ return Error(totalProjectIds.error());
+ }
+
+ Option<Error> status = xfs::validateProjectIds(totalProjectIds.get());
+ if (status.isSome()) {
+ return Error(status->message);
+ }
+
+ return new MesosIsolator(Owned<MesosIsolatorProcess>(
+ new XfsDiskIsolatorProcess(flags, totalProjectIds.get())));
+}
+
+
+XfsDiskIsolatorProcess::XfsDiskIsolatorProcess(
+ const Flags& _flags,
+ const IntervalSet<prid_t>& projectIds)
+ : flags(_flags),
+ totalProjectIds(projectIds),
+ freeProjectIds(projectIds)
+{
+ // At the beginning, the free project range is the same as the
+ // configured project range.
+
+ LOG(INFO) << "Allocating XFS project IDs from the range " << totalProjectIds;
+}
+
+
+XfsDiskIsolatorProcess::~XfsDiskIsolatorProcess() {}
+
+
+Future<Nothing> XfsDiskIsolatorProcess::recover(
+ const list<ContainerState>& states,
+ const hashset<ContainerID>& orphans)
+{
+ // We don't need to explicitly deal with orphans since we are primarily
+ // concerned with the on-disk state. We scan all the sandbox directories
+ // for project IDs that we have not recovered and make a best effort to
+ // remove all the corresponding on-disk state.
+ Try<std::list<std::string>> sandboxes = os::glob(path::join(
+ paths::getSandboxRootDir(flags.work_dir),
+ "*",
+ "frameworks",
+ "*",
+ "executors",
+ "*",
+ "runs",
+ "*"));
+
+ if (sandboxes.isError()) {
+ return Failure("Failed to scan sandbox directories: " + sandboxes.error());
+ }
+
+ hashset<ContainerID> alive;
+
+ foreach (const ContainerState& state, states) {
+ alive.insert(state.container_id());
+ }
+
+ foreach (const string& sandbox, sandboxes.get()) {
+ // Skip the "latest" symlink.
+ if (os::stat::islink(sandbox)) {
+ continue;
+ }
+
+ ContainerID containerId;
+ containerId.set_value(Path(sandbox).basename());
+
+ CHECK(!infos.contains(containerId)) << "ContainerIDs should never collide";
+
+ // We fail the isolator recovery upon failure in any container because
+ // failing to get the project ID usually suggests some fatal issue on the
+ // host.
+ Result<prid_t> projectId = xfs::getProjectId(sandbox);
+ if (projectId.isError()) {
+ return Failure(projectId.error());
+ }
+
+ // If there is no project ID, don't worry about it. This can happen the
+ // first time an operator enables the XFS disk isolator and we recover a
+ // set of containers that we did not isolate.
+ if (projectId.isNone()) {
+ continue;
+ }
+
+ infos.put(containerId, Owned<Info>(new Info(sandbox, projectId.get())));
+ freeProjectIds -= projectId.get();
+
+ // If this is a known orphan, the containerizer will send a cleanup call
+ // later. If this is a live container, we will manage it. Otherwise, we have
+ // to dispatch a cleanup ourselves. Note that we don't wait for the result
+ // of the cleanups as we don't want to block agent recovery for unknown
+ // orphans.
+ if (!orphans.contains(containerId) && !alive.contains(containerId)) {
+ dispatch(self(), &XfsDiskIsolatorProcess::cleanup, containerId);
+ }
+ }
+
+ return Nothing();
+}
+
+
+// We want to assign the project ID as early as possible. XFS will automatically
+// inherit the project ID to new inodes, so if we do this early we save the work
+// of manually assigning the ID to a lot of files.
+Future<Option<ContainerLaunchInfo>> XfsDiskIsolatorProcess::prepare(
+ const ContainerID& containerId,
+ const ContainerConfig& containerConfig)
+{
+ if (infos.contains(containerId)) {
+ return Failure("Container has already been prepared");
+ }
+
+ Option<prid_t> projectId = nextProjectId();
+ if (projectId.isNone()) {
+ return Failure("Failed to assign project ID, range exhausted");
+ }
+
+ // Keep a record of this container so that cleanup() can remove it if
+ // we fail to assign the project ID.
+ infos.put(
+ containerId,
+ Owned<Info>(new Info(containerConfig.directory(), projectId.get())));
+
+ Try<Nothing> status = xfs::setProjectId(
+ containerConfig.directory(), projectId.get());
+
+ if (status.isError()) {
+ return Failure(
+ "Failed to assign project " + stringify(projectId.get()) + ": " +
+ status.error());
+ }
+
+ LOG(INFO) << "Assigned project " << stringify(projectId.get()) << " to '"
+ << containerConfig.directory() << "'";
+
+ return update(containerId, containerConfig.executor_info().resources())
+ .then([]() -> Future<Option<ContainerLaunchInfo>> {
+ return None();
+ });
+}
+
+
+Future<Nothing> XfsDiskIsolatorProcess::isolate(
+ const ContainerID& containerId,
+ pid_t pid)
+{
+ if (!infos.contains(containerId)) {
+ return Failure("Unknown container");
+ }
+
+ return Nothing();
+}
+
+
+Future<ContainerLimitation> XfsDiskIsolatorProcess::watch(
+ const ContainerID& containerId)
+{
+ // We have nothing to do here, since the XFS quota is enforcing
+ // the limitation.
+ return Future<ContainerLimitation>();
+}
+
+
+Future<Nothing> XfsDiskIsolatorProcess::update(
+ const ContainerID& containerId,
+ const Resources& resources)
+{
+ CHECK(infos.contains(containerId));
+
+ const Owned<Info>& info = infos[containerId];
+
+ Option<Bytes> needed = getDiskResource(resources);
+ if (needed.isNone()) {
+ // TODO(jpeach) If there's no disk resource attached, we should set the
+ // minimum quota (1 block), since a zero quota would be unconstrained.
+ LOG(WARNING) << "Ignoring quota update with no disk resources";
+ return Nothing();
+ }
+
+ // Only update the disk quota if it has changed.
+ if (needed.get() != info->quota) {
+ Try<Nothing> status =
+ xfs::setProjectQuota(info->directory, info->projectId, needed.get());
+
+ if (status.isError()) {
+ return Failure("Failed to update quota for project " +
+ stringify(info->projectId) + ": " + status.error());
+ }
+
+ info->quota = needed.get();
+
+ LOG(INFO) << "Set quota on container " << containerId
+ << " for project " << info->projectId
+ << " to " << info->quota;
+ }
+
+ return Nothing();
+}
+
+
+Future<ResourceStatistics> XfsDiskIsolatorProcess::usage(
+ const ContainerID& containerId)
+{
+ if (!infos.contains(containerId)) {
+ return Failure("Unknown container");
+ }
+
+ ResourceStatistics statistics;
+ const Owned<Info>& info = infos[containerId];
+
+ Result<xfs::QuotaInfo> quota = xfs::getProjectQuota(
+ info->directory, info->projectId);
+
+ if (quota.isError()) {
+ return Failure(quota.error());
+ }
+
+ if (quota.isSome()) {
+ statistics.set_disk_limit_bytes(quota.get().limit.bytes());
+ statistics.set_disk_used_bytes(quota.get().used.bytes());
+ }
+
+ return statistics;
+}
+
+
+// Remove all the quota state that was created for this container. We
+// make a best effort to remove all the state we can, so we keep going
+// even if one operation fails so that we can remove subsequent state.
+Future<Nothing> XfsDiskIsolatorProcess::cleanup(const ContainerID& containerId)
+{
+ if (!infos.contains(containerId)) {
+ LOG(INFO) << "Ignoring cleanup for unknown container " << containerId;
+ return Nothing();
+ }
+
+ // Take a copy of the Info we are removing so that we can use it
+ // to construct the Failure message if necessary.
+ const Info info = *infos[containerId];
+
+ infos.erase(containerId);
+
+ LOG(INFO) << "Removing project ID " << info.projectId
+ << " from '" << info.directory << "'";
+
+ Try<Nothing> quotaStatus = xfs::clearProjectQuota(
+ info.directory, info.projectId);
+
+ if (quotaStatus.isError()) {
+ LOG(ERROR) << "Failed to clear quota for '"
+ << info.directory << "': " << quotaStatus.error();
+ }
+
+ Try<Nothing> projectStatus = xfs::clearProjectId(info.directory);
+ if (projectStatus.isError()) {
+ LOG(ERROR) << "Failed to remove project ID "
+ << info.projectId
+ << " from '" << info.directory << "': "
+ << projectStatus.error();
+ }
+
+ // If we failed to remove the on-disk project ID we can't reclaim it
+ // because the quota would then be applied across two containers. This
+ // would be a project ID leak, but we could recover it at GC time if
+ // that was visible to isolators.
+ if (quotaStatus.isError() || projectStatus.isError()) {
+ freeProjectIds -= info.projectId;
+ return Failure("Failed to cleanup '" + info.directory + "'");
+ } else {
+ returnProjectId(info.projectId);
+ return Nothing();
+ }
+}
+
+
+Option<prid_t> XfsDiskIsolatorProcess::nextProjectId()
+{
+ if (freeProjectIds.empty()) {
+ return None();
+ }
+
+ prid_t projectId = freeProjectIds.begin()->lower();
+
+ freeProjectIds -= projectId;
+ return projectId;
+}
+
+void XfsDiskIsolatorProcess::returnProjectId(
+ prid_t projectId)
+{
+ // Only return this project ID to the free range if it is in the total
+ // range. This could happen if the total range is changed by the operator
+ // and we recover a previous container from the old range.
+ if (totalProjectIds.contains(projectId)) {
+ freeProjectIds += projectId;
+ }
+}
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {
http://git-wip-us.apache.org/repos/asf/mesos/blob/255710b7/src/slave/containerizer/mesos/isolators/xfs/disk.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/xfs/disk.hpp b/src/slave/containerizer/mesos/isolators/xfs/disk.hpp
new file mode 100644
index 0000000..822de65
--- /dev/null
+++ b/src/slave/containerizer/mesos/isolators/xfs/disk.hpp
@@ -0,0 +1,107 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef __XFS_DISK_ISOLATOR_HPP__
+#define __XFS_DISK_ISOLATOR_HPP__
+
+#include <string>
+
+#include <process/owned.hpp>
+
+#include <stout/bytes.hpp>
+#include <stout/duration.hpp>
+#include <stout/hashmap.hpp>
+
+#include "slave/flags.hpp"
+#include "slave/state.hpp"
+
+#include "slave/containerizer/mesos/isolator.hpp"
+
+#include "slave/containerizer/mesos/isolators/xfs/utils.hpp"
+
+namespace mesos {
+namespace internal {
+namespace slave {
+
+class XfsDiskIsolatorProcess : public MesosIsolatorProcess
+{
+public:
+ static Try<mesos::slave::Isolator*> create(const Flags& flags);
+
+ virtual ~XfsDiskIsolatorProcess();
+
+ process::PID<XfsDiskIsolatorProcess> self() const
+ {
+ return process::PID<XfsDiskIsolatorProcess>(this);
+ }
+
+ virtual process::Future<Nothing> recover(
+ const std::list<mesos::slave::ContainerState>& states,
+ const hashset<ContainerID>& orphans);
+
+ virtual process::Future<Option<mesos::slave::ContainerLaunchInfo>> prepare(
+ const ContainerID& containerId,
+ const mesos::slave::ContainerConfig& containerConfig);
+
+ virtual process::Future<Nothing> isolate(
+ const ContainerID& containerId,
+ pid_t pid);
+
+ virtual process::Future<mesos::slave::ContainerLimitation> watch(
+ const ContainerID& containerId);
+
+ virtual process::Future<Nothing> update(
+ const ContainerID& containerId,
+ const Resources& resources);
+
+ virtual process::Future<ResourceStatistics> usage(
+ const ContainerID& containerId);
+
+ virtual process::Future<Nothing> cleanup(
+ const ContainerID& containerId);
+
+private:
+ XfsDiskIsolatorProcess(
+ const Flags& flags,
+ const IntervalSet<prid_t>& projectIds);
+
+ // Take the next project ID from the unallocated pool.
+ Option<prid_t> nextProjectId();
+
+ // Return this project ID to the unallocated pool.
+ void returnProjectId(prid_t projectId);
+
+ struct Info
+ {
+ explicit Info(const std::string& _directory, prid_t _projectId)
+ : directory(_directory), quota(0), projectId(_projectId) {}
+
+ const std::string directory;
+ Bytes quota;
+ const prid_t projectId;
+ };
+
+ const Flags flags;
+ const IntervalSet<prid_t> totalProjectIds;
+ IntervalSet<prid_t> freeProjectIds;
+ hashmap<ContainerID, process::Owned<Info>> infos;
+};
+
+} // namespace slave {
+} // namespace internal {
+} // namespace mesos {
+
+#endif // __XFS_DISK_ISOLATOR_HPP__
http://git-wip-us.apache.org/repos/asf/mesos/blob/255710b7/src/slave/containerizer/mesos/isolators/xfs/utils.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/xfs/utils.cpp b/src/slave/containerizer/mesos/isolators/xfs/utils.cpp
index 9285183..92914af 100644
--- a/src/slave/containerizer/mesos/isolators/xfs/utils.cpp
+++ b/src/slave/containerizer/mesos/isolators/xfs/utils.cpp
@@ -379,6 +379,12 @@ Option<Error> validateProjectIds(const IntervalSet<prid_t>& projectRange)
return None();
}
+
+bool pathIsXfs(const std::string& path)
+{
+ return ::platform_test_xfs_path(path.c_str()) == 1;
+}
+
} // namespace xfs {
} // namespace internal {
} // namespace mesos {
http://git-wip-us.apache.org/repos/asf/mesos/blob/255710b7/src/slave/containerizer/mesos/isolators/xfs/utils.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/xfs/utils.hpp b/src/slave/containerizer/mesos/isolators/xfs/utils.hpp
index 654dc73..7602fe3 100644
--- a/src/slave/containerizer/mesos/isolators/xfs/utils.hpp
+++ b/src/slave/containerizer/mesos/isolators/xfs/utils.hpp
@@ -46,6 +46,9 @@ inline bool operator==(const QuotaInfo& left, const QuotaInfo& right)
Option<Error> validateProjectIds(const IntervalSet<prid_t>& projectRange);
+bool pathIsXfs(const std::string& path);
+
+
Result<QuotaInfo> getProjectQuota(
const std::string& path,
prid_t projectId);
http://git-wip-us.apache.org/repos/asf/mesos/blob/255710b7/src/slave/flags.cpp
----------------------------------------------------------------------
diff --git a/src/slave/flags.cpp b/src/slave/flags.cpp
index 7164afe..dd7bc9a 100644
--- a/src/slave/flags.cpp
+++ b/src/slave/flags.cpp
@@ -776,4 +776,11 @@ mesos::internal::slave::Flags::Flags()
"The symbol name of the master detector to use. This symbol\n"
"should exist in a module specified through the --modules flag.\n"
"Cannot be used in conjunction with --master.");
+
+#if ENABLE_XFS_DISK_ISOLATOR
+ add(&Flags::xfs_project_range,
+ "xfs_project_range",
+ "The ranges of XFS project IDs to use for tracking directory quotas",
+ "[5000-10000]");
+#endif
}
http://git-wip-us.apache.org/repos/asf/mesos/blob/255710b7/src/slave/flags.hpp
----------------------------------------------------------------------
diff --git a/src/slave/flags.hpp b/src/slave/flags.hpp
index 4236b7f..300db49 100644
--- a/src/slave/flags.hpp
+++ b/src/slave/flags.hpp
@@ -144,6 +144,9 @@ public:
Duration qos_correction_interval_min;
Duration oversubscribed_resources_interval;
Option<std::string> master_detector;
+#if ENABLE_XFS_DISK_ISOLATOR
+ std::string xfs_project_range;
+#endif
};
} // namespace slave {
http://git-wip-us.apache.org/repos/asf/mesos/blob/255710b7/src/tests/containerizer/xfs_quota_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer/xfs_quota_tests.cpp b/src/tests/containerizer/xfs_quota_tests.cpp
index 8b0322b..61ea2e5 100644
--- a/src/tests/containerizer/xfs_quota_tests.cpp
+++ b/src/tests/containerizer/xfs_quota_tests.cpp
@@ -36,8 +36,8 @@
#include "master/master.hpp"
-#include "slave/constants.hpp"
#include "slave/flags.hpp"
+#include "slave/paths.hpp"
#include "slave/slave.hpp"
#include "slave/containerizer/fetcher.hpp"
@@ -62,8 +62,11 @@ using mesos::internal::master::Master;
using mesos::internal::slave::Fetcher;
using mesos::internal::slave::MesosContainerizer;
+using mesos::internal::slave::MesosContainerizerProcess;
using mesos::internal::slave::Slave;
+using mesos::master::detector::MasterDetector;
+
namespace mesos {
namespace internal {
namespace tests {
@@ -159,6 +162,7 @@ public:
// We only need an XFS-specific directory for the work directory. We
// don't mind that other flags refer to a different temp directory.
flags.work_dir = mountPoint.get();
+ flags.isolation = "xfs/disk";
return flags;
}
@@ -275,6 +279,10 @@ TEST_F(ROOT_XFS_QuotaTest, ProjectIdErrors)
}
+// Verify that directories are isolated with respect to XFS quotas. We
+// create two trees which have symlinks into each other. If we followed
+// the symlinks when applying the project IDs to the directories, then the
+// quotas would end up being incorrect.
TEST_F(ROOT_XFS_QuotaTest, DirectoryTree)
{
Bytes limit = Megabytes(100);
@@ -332,6 +340,421 @@ TEST_F(ROOT_XFS_QuotaTest, DirectoryTree)
getProjectQuota(rootB, projectB));
}
+
+// Verify that a task that tries to consume more space than it has requested
+// is only allowed to consume exactly the assigned resources. We tell dd
+// to write 2MB but only give it 1MB of resources and (roughly) verify that
+// it exits with a failure (that should be a write error).
+TEST_F(ROOT_XFS_QuotaTest, DiskUsageExceedsQuota)
+{
+ Try<Owned<cluster::Master>> master = StartMaster();
+ ASSERT_SOME(master);
+
+ Owned<MasterDetector> detector = master.get()->createDetector();
+ Try<Owned<cluster::Slave>> slave =
+ StartSlave(detector.get(), CreateSlaveFlags());
+ ASSERT_SOME(slave);
+
+ MockScheduler sched;
+ MesosSchedulerDriver driver(
+ &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);
+
+ EXPECT_CALL(sched, registered(&driver, _, _));
+
+ Future<vector<Offer>> offers;
+ EXPECT_CALL(sched, resourceOffers(&driver, _))
+ .WillOnce(FutureArg<1>(&offers))
+ .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+ driver.start();
+
+ AWAIT_READY(offers);
+ EXPECT_FALSE(offers.get().empty());
+
+ const Offer& offer = offers.get()[0];
+
+ // Create a task which requests 1MB disk, but actually uses more
+ // than 2MB disk.
+ TaskInfo task = createTask(
+ offer.slave_id(),
+ Resources::parse("cpus:1;mem:128;disk:1").get(),
+ "dd if=/dev/zero of=file bs=1048576 count=2");
+
+ Future<TaskStatus> status1;
+ Future<TaskStatus> status2;
+ EXPECT_CALL(sched, statusUpdate(&driver, _))
+ .WillOnce(FutureArg<1>(&status1))
+ .WillOnce(FutureArg<1>(&status2));
+
+ driver.launchTasks(offer.id(), {task});
+
+ AWAIT_READY(status1);
+ EXPECT_EQ(task.task_id(), status1.get().task_id());
+ EXPECT_EQ(TASK_RUNNING, status1.get().state());
+
+ AWAIT_READY(status2);
+ EXPECT_EQ(task.task_id(), status2.get().task_id());
+ EXPECT_EQ(TASK_FAILED, status2.get().state());
+
+ // Unlike the posix/disk isolator, the reason for task failure
+ // should be that dd got an IO error.
+ EXPECT_EQ(TaskStatus::SOURCE_EXECUTOR, status2.get().source());
+ EXPECT_EQ("Command exited with status 1", status2.get().message());
+
+ driver.stop();
+ driver.join();
+}
+
+
+// Verify that we can get accurate resource statistics from the XFS
+// disk isolator.
+TEST_F(ROOT_XFS_QuotaTest, ResourceStatistics)
+{
+ Try<Owned<cluster::Master>> master = StartMaster();
+ ASSERT_SOME(master);
+
+ Fetcher fetcher;
+ Owned<MasterDetector> detector = master.get()->createDetector();
+ slave::Flags flags = CreateSlaveFlags();
+
+ Try<MesosContainerizer*> _containerizer =
+ MesosContainerizer::create(flags, true, &fetcher);
+
+ ASSERT_SOME(_containerizer);
+ Owned<MesosContainerizer> containerizer(_containerizer.get());
+
+ Try<Owned<cluster::Slave>> slave =
+ StartSlave(detector.get(), containerizer.get(), flags);
+ ASSERT_SOME(slave);
+
+ MockScheduler sched;
+
+ MesosSchedulerDriver driver(
+ &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);
+
+ EXPECT_CALL(sched, registered(_, _, _));
+
+ Future<vector<Offer>> offers;
+ EXPECT_CALL(sched, resourceOffers(_, _))
+ .WillOnce(FutureArg<1>(&offers))
+ .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+ driver.start();
+
+ AWAIT_READY(offers);
+ EXPECT_FALSE(offers.get().empty());
+
+ Offer offer = offers.get()[0];
+
+ // Create a task that uses 4 of 3MB disk but doesn't fail. We will verify
+ // that the allocated disk is filled.
+ TaskInfo task = createTask(
+ offer.slave_id(),
+ Resources::parse("cpus:1;mem:128;disk:3").get(),
+ "dd if=/dev/zero of=file bs=1048576 count=4 || sleep 1000");
+
+ Future<TaskStatus> status;
+ EXPECT_CALL(sched, statusUpdate(&driver, _))
+ .WillOnce(FutureArg<1>(&status))
+ .WillRepeatedly(Return()); // Ignore subsequent updates.
+
+ driver.launchTasks(offers.get()[0].id(), {task});
+
+ AWAIT_READY(status);
+ EXPECT_EQ(task.task_id(), status.get().task_id());
+ EXPECT_EQ(TASK_RUNNING, status.get().state());
+
+ Future<hashset<ContainerID>> containers = containerizer.get()->containers();
+ AWAIT_READY(containers);
+ ASSERT_EQ(1u, containers.get().size());
+
+ ContainerID containerId = *(containers.get().begin());
+ Timeout timeout = Timeout::in(Seconds(5));
+
+ while (true) {
+ Future<ResourceStatistics> usage = containerizer.get()->usage(containerId);
+ AWAIT_READY(usage);
+
+ ASSERT_TRUE(usage.get().has_disk_limit_bytes());
+ EXPECT_EQ(Megabytes(3), Bytes(usage.get().disk_limit_bytes()));
+
+ if (usage.get().has_disk_used_bytes()) {
+ // Usage must always be <= the limit.
+ EXPECT_LE(usage.get().disk_used_bytes(), usage.get().disk_limit_bytes());
+
+ // Usage might not be equal to the limit, but it must hit
+ // and not exceed the limit.
+ if (usage.get().disk_used_bytes() >= usage.get().disk_limit_bytes()) {
+ EXPECT_EQ(
+ usage.get().disk_used_bytes(), usage.get().disk_limit_bytes());
+ EXPECT_EQ(Megabytes(3), Bytes(usage.get().disk_used_bytes()));
+ break;
+ }
+ }
+
+ ASSERT_FALSE(timeout.expired());
+ os::sleep(Milliseconds(1));
+ }
+
+ driver.stop();
+ driver.join();
+}
+
+
+// In this test, the framework is not checkpointed. This ensures that when we
+// stop the slave, the executor is killed and we will need to recover the
+// working directories without getting any checkpointed recovery state.
+TEST_F(ROOT_XFS_QuotaTest, NoCheckpointRecovery)
+{
+ slave::Flags flags = CreateSlaveFlags();
+ Try<Owned<cluster::Master>> master = StartMaster();
+ ASSERT_SOME(master);
+
+ Owned<MasterDetector> detector = master.get()->createDetector();
+ Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags);
+ ASSERT_SOME(slave);
+
+ MockScheduler sched;
+
+ MesosSchedulerDriver driver(
+ &sched, DEFAULT_FRAMEWORK_INFO, master.get()->pid, DEFAULT_CREDENTIAL);
+
+ EXPECT_CALL(sched, registered(_, _, _));
+
+ Future<vector<Offer>> offers;
+ EXPECT_CALL(sched, resourceOffers(_, _))
+ .WillOnce(FutureArg<1>(&offers))
+ .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+ driver.start();
+
+ AWAIT_READY(offers);
+ EXPECT_FALSE(offers.get().empty());
+
+ Offer offer = offers.get()[0];
+
+ TaskInfo task = createTask(
+ offer.slave_id(),
+ Resources::parse("cpus:1;mem:128;disk:1").get(),
+ "dd if=/dev/zero of=file bs=1048576 count=1; sleep 1000");
+
+ Future<TaskStatus> status;
+ EXPECT_CALL(sched, statusUpdate(&driver, _))
+ .WillOnce(FutureArg<1>(&status))
+ .WillOnce(Return());
+
+ driver.launchTasks(offer.id(), {task});
+
+ AWAIT_READY(status);
+ EXPECT_EQ(task.task_id(), status.get().task_id());
+ EXPECT_EQ(TASK_RUNNING, status.get().state());
+
+ Future<ResourceUsage> usage1 =
+ process::dispatch(slave.get()->pid, &Slave::usage);
+ AWAIT_READY(usage1);
+
+ // We should have 1 executor using resources.
+ ASSERT_EQ(1, usage1.get().executors().size());
+ EXPECT_EQ(Megabytes(1), usage1->executors(0).statistics().disk_limit_bytes());
+ EXPECT_EQ(Megabytes(1), usage1->executors(0).statistics().disk_used_bytes());
+
+ // Restart the slave.
+ slave.get()->terminate();
+
+ Future<SlaveReregisteredMessage> slaveReregisteredMessage =
+ FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _);
+
+ slave = StartSlave(detector.get(), flags);
+ ASSERT_SOME(slave);
+
+ // Following the example of the filesystem isolator tests, wait
+ // until the containerizer cleans up the orphans. Only after that
+ // should we expect to find the project IDs removed.
+ Future<Nothing> _recover =
+ FUTURE_DISPATCH(_, &MesosContainerizerProcess::___recover);
+ AWAIT_READY(_recover);
+
+ AWAIT_READY(slaveReregisteredMessage);
+
+ Future<ResourceUsage> usage2 =
+ process::dispatch(slave.get()->pid, &Slave::usage);
+ AWAIT_READY(usage2);
+
+ // We should have no executors left because we didn't checkpoint.
+ ASSERT_EQ(0, usage2.get().executors().size());
+
+ Try<std::list<std::string>> sandboxes = os::glob(path::join(
+ slave::paths::getSandboxRootDir(mountPoint.get()),
+ "*",
+ "frameworks",
+ "*",
+ "executors",
+ "*",
+ "runs",
+ "*"));
+
+ ASSERT_SOME(sandboxes);
+
+ // One sandbox and one symlink.
+ ASSERT_EQ(2u, sandboxes->size());
+
+ // Scan the remaining sandboxes and make sure that no projects are assigned.
+ foreach (const string& sandbox, sandboxes.get()) {
+ // Skip the "latest" symlink.
+ if (os::stat::islink(sandbox)) {
+ continue;
+ }
+
+ EXPECT_NONE(xfs::getProjectId(sandbox));
+ }
+
+ driver.stop();
+ driver.join();
+}
+
+
+// In this test, the framework is checkpointed so we expect the executor to
+// persist across the slave restart and to have the same resource usage before
+// and after.
+TEST_F(ROOT_XFS_QuotaTest, CheckpointRecovery)
+{
+ slave::Flags flags = CreateSlaveFlags();
+ Try<Owned<cluster::Master>> master = StartMaster();
+ ASSERT_SOME(master);
+
+ Owned<MasterDetector> detector = master.get()->createDetector();
+ Try<Owned<cluster::Slave>> slave =
+ StartSlave(detector.get(), CreateSlaveFlags());
+ ASSERT_SOME(slave);
+
+ FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
+ frameworkInfo.set_checkpoint(true);
+
+ MockScheduler sched;
+ MesosSchedulerDriver driver(
+ &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL);
+
+ EXPECT_CALL(sched, registered(_, _, _));
+
+ Future<vector<Offer>> offers;
+ EXPECT_CALL(sched, resourceOffers(_, _))
+ .WillOnce(FutureArg<1>(&offers))
+ .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+ driver.start();
+
+ AWAIT_READY(offers);
+ EXPECT_FALSE(offers.get().empty());
+
+ Offer offer = offers.get()[0];
+
+ TaskInfo task = createTask(
+ offer.slave_id(),
+ Resources::parse("cpus:1;mem:128;disk:1").get(),
+ "dd if=/dev/zero of=file bs=1048576 count=1; sleep 1000");
+
+ Future<TaskStatus> status;
+ EXPECT_CALL(sched, statusUpdate(&driver, _))
+ .WillOnce(FutureArg<1>(&status));
+
+ driver.launchTasks(offer.id(), {task});
+
+ AWAIT_READY(status);
+ EXPECT_EQ(task.task_id(), status.get().task_id());
+ EXPECT_EQ(TASK_RUNNING, status.get().state());
+
+ Future<ResourceUsage> usage1 =
+ process::dispatch(slave.get()->pid, &Slave::usage);
+ AWAIT_READY(usage1);
+
+ // We should have 1 executor using resources.
+ ASSERT_EQ(1, usage1.get().executors().size());
+
+ // Restart the slave.
+ slave.get()->terminate();
+
+ Future<SlaveReregisteredMessage> slaveReregisteredMessage =
+ FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _);
+
+ slave = StartSlave(detector.get(), flags);
+ ASSERT_SOME(slave);
+
+ // Wait for the slave to re-register.
+ AWAIT_READY(slaveReregisteredMessage);
+
+ Future<ResourceUsage> usage2 =
+ process::dispatch(slave.get()->pid, &Slave::usage);
+ AWAIT_READY(usage2);
+
+ // We should have still have 1 executor using resources.
+ ASSERT_EQ(1, usage1.get().executors().size());
+
+ Try<std::list<std::string>> sandboxes = os::glob(path::join(
+ slave::paths::getSandboxRootDir(mountPoint.get()),
+ "*",
+ "frameworks",
+ "*",
+ "executors",
+ "*",
+ "runs",
+ "*"));
+
+ ASSERT_SOME(sandboxes);
+
+ // One sandbox and one symlink.
+ ASSERT_EQ(2u, sandboxes->size());
+
+ // Scan the remaining sandboxes. We ought to still have project IDs
+ // assigned to them all.
+ foreach (const string& sandbox, sandboxes.get()) {
+ // Skip the "latest" symlink.
+ if (os::stat::islink(sandbox)) {
+ continue;
+ }
+
+ EXPECT_SOME(xfs::getProjectId(sandbox));
+ }
+
+ driver.stop();
+ driver.join();
+}
+
+
+TEST_F(ROOT_XFS_QuotaTest, IsolatorFlags)
+{
+ slave::Flags flags;
+
+ Try<Owned<cluster::Master>> master = StartMaster();
+ ASSERT_SOME(master);
+
+ Owned<MasterDetector> detector = master.get()->createDetector();
+
+ // work_dir must be an XFS filesystem.
+ flags = CreateSlaveFlags();
+ flags.work_dir = "/proc";
+ ASSERT_ERROR(StartSlave(detector.get(), flags));
+
+ // 0 is an invalid project ID.
+ flags = CreateSlaveFlags();
+ flags.xfs_project_range = "[0-10]";
+ ASSERT_ERROR(StartSlave(detector.get(), flags));
+
+ // Project IDs are 32 bit.
+ flags = CreateSlaveFlags();
+ flags.xfs_project_range = "[100-1099511627776]";
+ ASSERT_ERROR(StartSlave(detector.get(), flags));
+
+ // Project IDs must be a range.
+ flags = CreateSlaveFlags();
+ flags.xfs_project_range = "foo";
+ ASSERT_ERROR(StartSlave(detector.get(), flags));
+
+ // Project IDs must be a range.
+ flags = CreateSlaveFlags();
+ flags.xfs_project_range = "100";
+ ASSERT_ERROR(StartSlave(detector.get(), flags));
+}
+
} // namespace tests {
} // namespace internal {
} // namespace mesos {