You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ya...@apache.org on 2016/08/18 15:50:22 UTC

mesos git commit: Fixed XFS isolator's handling of old containers.

Repository: mesos
Updated Branches:
  refs/heads/master e65801485 -> ecc280b81


Fixed XFS isolator's handling of old containers.

Old containers, after recovery of the agent, do not have any entries
stored in `infos` but could still get updated when executors
reregister, tasks terminate and queries for usage are made by the
containerizer.

Review: https://reviews.apache.org/r/51101/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/ecc280b8
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/ecc280b8
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/ecc280b8

Branch: refs/heads/master
Commit: ecc280b8177377739ae4194750df30bb40131abb
Parents: e658014
Author: Jiang Yan Xu <xu...@apple.com>
Authored: Thu Aug 18 08:46:51 2016 -0700
Committer: Jiang Yan Xu <xu...@apple.com>
Committed: Thu Aug 18 08:49:56 2016 -0700

----------------------------------------------------------------------
 .../containerizer/mesos/isolators/xfs/disk.cpp  |   8 +-
 src/tests/containerizer/xfs_quota_tests.cpp     | 100 +++++++++++++++++++
 2 files changed, 106 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/ecc280b8/src/slave/containerizer/mesos/isolators/xfs/disk.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/xfs/disk.cpp b/src/slave/containerizer/mesos/isolators/xfs/disk.cpp
index 60e8493..74ffcf5 100644
--- a/src/slave/containerizer/mesos/isolators/xfs/disk.cpp
+++ b/src/slave/containerizer/mesos/isolators/xfs/disk.cpp
@@ -295,7 +295,10 @@ Future<Nothing> XfsDiskIsolatorProcess::update(
     const ContainerID& containerId,
     const Resources& resources)
 {
-  CHECK(infos.contains(containerId));
+  if (!infos.contains(containerId)) {
+    LOG(INFO) << "Ignoring update for unknown container " << containerId;
+    return Nothing();
+  }
 
   const Owned<Info>& info = infos[containerId];
 
@@ -332,7 +335,8 @@ Future<ResourceStatistics> XfsDiskIsolatorProcess::usage(
     const ContainerID& containerId)
 {
   if (!infos.contains(containerId)) {
-    return Failure("Unknown container");
+    LOG(INFO) << "Ignoring usage for unknown container " << containerId;
+    return ResourceStatistics();
   }
 
   ResourceStatistics statistics;

http://git-wip-us.apache.org/repos/asf/mesos/blob/ecc280b8/src/tests/containerizer/xfs_quota_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer/xfs_quota_tests.cpp b/src/tests/containerizer/xfs_quota_tests.cpp
index 243ef33..d0f5032 100644
--- a/src/tests/containerizer/xfs_quota_tests.cpp
+++ b/src/tests/containerizer/xfs_quota_tests.cpp
@@ -718,6 +718,106 @@ TEST_F(ROOT_XFS_QuotaTest, CheckpointRecovery)
 }
 
 
+// In this test, the agent initially doesn't enable disk isolation
+// but then restarts with XFS disk isolation enabled. We verify that
+// the old container launched before the agent restart is
+// successfully recovered.
+TEST_F(ROOT_XFS_QuotaTest, RecoverOldContainers)
+{
+  Try<Owned<cluster::Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  Owned<MasterDetector> detector = master.get()->createDetector();
+
+  slave::Flags flags = CreateSlaveFlags();
+
+  // `CreateSlaveFlags()` enables `disk/xfs` so here we reset
+  // `isolation` to empty.
+  flags.isolation.clear();
+
+  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags);
+  ASSERT_SOME(slave);
+
+  FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
+  frameworkInfo.set_checkpoint(true);
+
+  MockScheduler sched;
+  MesosSchedulerDriver driver(
+      &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL);
+
+  EXPECT_CALL(sched, registered(_, _, _));
+
+  Future<vector<Offer>> offers;
+  EXPECT_CALL(sched, resourceOffers(_, _))
+    .WillOnce(FutureArg<1>(&offers))
+    .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+  driver.start();
+
+  AWAIT_READY(offers);
+  EXPECT_FALSE(offers.get().empty());
+
+  Offer offer = offers.get()[0];
+
+  TaskInfo task = createTask(
+      offer.slave_id(),
+      Resources::parse("cpus:1;mem:128;disk:1").get(),
+      "dd if=/dev/zero of=file bs=1024 count=1; sleep 1000");
+
+  Future<TaskStatus> status;
+  EXPECT_CALL(sched, statusUpdate(&driver, _))
+    .WillOnce(FutureArg<1>(&status));
+
+  driver.launchTasks(offer.id(), {task});
+
+  AWAIT_READY(status);
+  EXPECT_EQ(task.task_id(), status.get().task_id());
+  EXPECT_EQ(TASK_RUNNING, status.get().state());
+
+  {
+    Future<ResourceUsage> usage =
+      process::dispatch(slave.get()->pid, &Slave::usage);
+    AWAIT_READY(usage);
+
+    // We should have 1 executor using resources but it doesn't have
+    // disk limit enabled.
+    ASSERT_EQ(1, usage.get().executors().size());
+    const ResourceUsage_Executor& executor = usage.get().executors().Get(0);
+    ASSERT_TRUE(executor.has_statistics());
+    ASSERT_FALSE(executor.statistics().has_disk_limit_bytes());
+  }
+
+  // Restart the slave.
+  slave.get()->terminate();
+
+  Future<SlaveReregisteredMessage> slaveReregisteredMessage =
+    FUTURE_PROTOBUF(SlaveReregisteredMessage(), _, _);
+
+  // This time use the agent flags that include XFS disk isolation.
+  slave = StartSlave(detector.get(), CreateSlaveFlags());
+  ASSERT_SOME(slave);
+
+  // Wait for the slave to re-register.
+  AWAIT_READY(slaveReregisteredMessage);
+
+  {
+    Future<ResourceUsage> usage =
+      process::dispatch(slave.get()->pid, &Slave::usage);
+    AWAIT_READY(usage);
+
+    // We should still have 1 executor using resources but it doesn't
+    // have disk limit enabled.
+    ASSERT_EQ(1, usage.get().executors().size());
+    const ResourceUsage_Executor& executor = usage.get().executors().Get(0);
+    ASSERT_TRUE(executor.has_statistics());
+    ASSERT_FALSE(executor.statistics().has_disk_limit_bytes());
+  }
+
+  driver.stop();
+  driver.join();
+}
+
+
 TEST_F(ROOT_XFS_QuotaTest, IsolatorFlags)
 {
   slave::Flags flags;