You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ya...@apache.org on 2016/08/18 15:50:22 UTC
mesos git commit: Fixed XFS isolator's handling of old containers.
Repository: mesos
Updated Branches:
refs/heads/master e65801485 -> ecc280b81
Fixed XFS isolator's handling of old containers.
Old containers, after recovery of the agent, do not have any entries
stored in `infos` but could still get updated when executors
reregister, tasks terminate and queries for usage are made by the
containerizer.
Review: https://reviews.apache.org/r/51101/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/ecc280b8
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/ecc280b8
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/ecc280b8
Branch: refs/heads/master
Commit: ecc280b8177377739ae4194750df30bb40131abb
Parents: e658014
Author: Jiang Yan Xu <xu...@apple.com>
Authored: Thu Aug 18 08:46:51 2016 -0700
Committer: Jiang Yan Xu <xu...@apple.com>
Committed: Thu Aug 18 08:49:56 2016 -0700
----------------------------------------------------------------------
.../containerizer/mesos/isolators/xfs/disk.cpp | 8 +-
src/tests/containerizer/xfs_quota_tests.cpp | 100 +++++++++++++++++++
2 files changed, 106 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/ecc280b8/src/slave/containerizer/mesos/isolators/xfs/disk.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/xfs/disk.cpp b/src/slave/containerizer/mesos/isolators/xfs/disk.cpp
index 60e8493..74ffcf5 100644
--- a/src/slave/containerizer/mesos/isolators/xfs/disk.cpp
+++ b/src/slave/containerizer/mesos/isolators/xfs/disk.cpp
@@ -295,7 +295,10 @@ Future<Nothing> XfsDiskIsolatorProcess::update(
const ContainerID& containerId,
const Resources& resources)
{
- CHECK(infos.contains(containerId));
+ if (!infos.contains(containerId)) {
+ LOG(INFO) << "Ignoring update for unknown container " << containerId;
+ return Nothing();
+ }
const Owned<Info>& info = infos[containerId];
@@ -332,7 +335,8 @@ Future<ResourceStatistics> XfsDiskIsolatorProcess::usage(
const ContainerID& containerId)
{
if (!infos.contains(containerId)) {
- return Failure("Unknown container");
+ LOG(INFO) << "Ignoring usage for unknown container " << containerId;
+ return ResourceStatistics();
}
ResourceStatistics statistics;
http://git-wip-us.apache.org/repos/asf/mesos/blob/ecc280b8/src/tests/containerizer/xfs_quota_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer/xfs_quota_tests.cpp b/src/tests/containerizer/xfs_quota_tests.cpp
index 243ef33..d0f5032 100644
--- a/src/tests/containerizer/xfs_quota_tests.cpp
+++ b/src/tests/containerizer/xfs_quota_tests.cpp
@@ -718,6 +718,106 @@ TEST_F(ROOT_XFS_QuotaTest, CheckpointRecovery)
}
+// In this test, the agent initially doesn't enable disk isolation
+// but then restarts with XFS disk isolation enabled. We verify that
+// the old container launched before the agent restart is
+// successfully recovered.
+TEST_F(ROOT_XFS_QuotaTest, RecoverOldContainers)
+{
+ Try<Owned<cluster::Master>> master = StartMaster();
+ ASSERT_SOME(master);
+
+ Owned<MasterDetector> detector = master.get()->createDetector();
+
+ slave::Flags flags = CreateSlaveFlags();
+
+ // `CreateSlaveFlags()` enables `disk/xfs` so here we reset
+ // `isolation` to empty.
+ flags.isolation.clear();
+
+ Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags);
+ ASSERT_SOME(slave);
+
+ FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
+ frameworkInfo.set_checkpoint(true);
+
+ MockScheduler sched;
+ MesosSchedulerDriver driver(
+ &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL);
+
+ EXPECT_CALL(sched, registered(_, _, _));
+
+ Future<vector<Offer>> offers;
+ EXPECT_CALL(sched, resourceOffers(_, _))
+ .WillOnce(FutureArg<1>(&offers))
+ .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+ driver.start();
+
+ AWAIT_READY(offers);
+ EXPECT_FALSE(offers.get().empty());
+
+ Offer offer = offers.get()[0];
+
+ TaskInfo task = createTask(
+ offer.slave_id(),
+ Resources::parse("cpus:1;mem:128;disk:1").get(),
+ "dd if=/dev/zero of=file bs=1024 count=1; sleep 1000");
+
+ Future<TaskStatus> status;
+ EXPECT_CALL(sched, statusUpdate(&driver, _))
+ .WillOnce(FutureArg<1>(&status));
+
+ driver.launchTasks(offer.id(), {task});
+
+ AWAIT_READY(status);
+ EXPECT_EQ(task.task_id(), status.get().task_id());
+ EXPECT_EQ(TASK_RUNNING, status.get().state());
+
+ {
+ Future<ResourceUsage> usage =
+ process::dispatch(slave.get()->pid, &Slave::usage);
+ AWAIT_READY(usage);
+
+ // We should have 1 executor using resources but it doesn't have
+ // disk limit enabled.
+ ASSERT_EQ(1, usage.get().executors().size());
+ const ResourceUsage_Executor& executor = usage.get().executors().Get(0);
+ ASSERT_TRUE(executor.has_statistics());
+ ASSERT_FALSE(executor.statistics().has_disk_limit_bytes());
+ }
+
+ // Restart the slave.
+ slave.get()->terminate();
+
+ Future<SlaveReregisteredMessage> slaveReregisteredMessage =
+ FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _);
+
+ // This time use the agent flags that include XFS disk isolation.
+ slave = StartSlave(detector.get(), CreateSlaveFlags());
+ ASSERT_SOME(slave);
+
+ // Wait for the slave to re-register.
+ AWAIT_READY(slaveReregisteredMessage);
+
+ {
+ Future<ResourceUsage> usage =
+ process::dispatch(slave.get()->pid, &Slave::usage);
+ AWAIT_READY(usage);
+
+ // We should still have 1 executor using resources but it doesn't
+ // have disk limit enabled.
+ ASSERT_EQ(1, usage.get().executors().size());
+ const ResourceUsage_Executor& executor = usage.get().executors().Get(0);
+ ASSERT_TRUE(executor.has_statistics());
+ ASSERT_FALSE(executor.statistics().has_disk_limit_bytes());
+ }
+
+ driver.stop();
+ driver.join();
+}
+
+
TEST_F(ROOT_XFS_QuotaTest, IsolatorFlags)
{
slave::Flags flags;