You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ya...@apache.org on 2016/10/12 17:50:28 UTC

mesos git commit: Recover resources when offer is rescinded on DESTROY of shared volume.

Repository: mesos
Updated Branches:
  refs/heads/master 1fdea7dec -> 06d2e23dc


Recover resources when offer is rescinded on DESTROY of shared volume.

When a framework issues a DESTROY of a shared volume, and that volume
is not in use by a running or a pending task, we rescind the pending
offers in which the shared volume is present so that the deleted volume
is not assigned to any task in a future ACCEPT call.

At that time, we need to recover the resources as well for proper
accounting of such resources by the allocator.

Review: https://reviews.apache.org/r/52288/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/06d2e23d
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/06d2e23d
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/06d2e23d

Branch: refs/heads/master
Commit: 06d2e23dccc1ecc3c1d0e0cfb22ccca18bb6e56b
Parents: 1fdea7d
Author: Anindya Sinha <an...@apple.com>
Authored: Wed Oct 12 00:20:07 2016 -0700
Committer: Jiang Yan Xu <xu...@apple.com>
Committed: Wed Oct 12 10:49:12 2016 -0700

----------------------------------------------------------------------
 src/master/master.cpp                 |   6 ++
 src/tests/persistent_volume_tests.cpp | 146 +++++++++++++++++++++++++++++
 2 files changed, 152 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/06d2e23d/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index ad8993a..7ef8987 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -3975,6 +3975,12 @@ void Master::_accept(
           const Resources& offered = offer->resources();
           foreach (const Resource& volume, operation.destroy().volumes()) {
             if (offered.contains(volume)) {
+              allocator->recoverResources(
+                  offer->framework_id(),
+                  offer->slave_id(),
+                  offer->resources(),
+                  None());
+
               removeOffer(offer, true);
             }
           }

http://git-wip-us.apache.org/repos/asf/mesos/blob/06d2e23d/src/tests/persistent_volume_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/persistent_volume_tests.cpp b/src/tests/persistent_volume_tests.cpp
index e10a79e..b7d1c1a 100644
--- a/src/tests/persistent_volume_tests.cpp
+++ b/src/tests/persistent_volume_tests.cpp
@@ -908,6 +908,8 @@ TEST_P(PersistentVolumeTest, SharedPersistentVolumeMultipleTasks)
 
   FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
   frameworkInfo.set_role(DEFAULT_TEST_ROLE);
+  frameworkInfo.add_capabilities()->set_type(
+      FrameworkInfo::Capability::SHARED_RESOURCES);
 
   MockScheduler sched;
   MesosSchedulerDriver driver(
@@ -998,6 +1000,150 @@ TEST_P(PersistentVolumeTest, SharedPersistentVolumeMultipleTasks)
   driver.join();
 }
 
+// This test verifies that pending offers with shared persistent volumes
+// are rescinded when the volumes are destroyed.
+TEST_P(PersistentVolumeTest, SharedPersistentVolumeRescindOnDestroy)
+{
+  Clock::pause();
+
+  master::Flags masterFlags = CreateMasterFlags();
+  Try<Owned<cluster::Master>> master = StartMaster(masterFlags);
+  ASSERT_SOME(master);
+
+  slave::Flags slaveFlags = CreateSlaveFlags();
+
+  slaveFlags.resources = getSlaveResources();
+
+  Owned<MasterDetector> detector = master.get()->createDetector();
+  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), slaveFlags);
+  ASSERT_SOME(slave);
+
+  // 1. Create framework1 so that all resources are offered to this framework.
+  FrameworkInfo frameworkInfo1 = DEFAULT_FRAMEWORK_INFO;
+  frameworkInfo1.set_role(DEFAULT_TEST_ROLE);
+  frameworkInfo1.add_capabilities()->set_type(
+      FrameworkInfo::Capability::SHARED_RESOURCES);
+
+  MockScheduler sched1;
+  MesosSchedulerDriver driver1(
+      &sched1, frameworkInfo1, master.get()->pid, DEFAULT_CREDENTIAL);
+
+  EXPECT_CALL(sched1, registered(&driver1, _, _));
+
+  Future<vector<Offer>> offers1;
+  EXPECT_CALL(sched1, resourceOffers(&driver1, _))
+    .WillOnce(FutureArg<1>(&offers1))
+    .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+  driver1.start();
+
+  AWAIT_READY(offers1);
+  EXPECT_FALSE(offers1.get().empty());
+
+  Offer offer1 = offers1.get()[0];
+
+  // 2. framework1 CREATEs a shared volume, and LAUNCHes a task with a subset
+  //    of resources from the offer.
+  Resource volume = createPersistentVolume(
+      getDiskResource(Megabytes(2048)),
+      "id1",
+      "path1",
+      None(),
+      frameworkInfo1.principal(),
+      true);  // Shared volume.
+
+  // Create a task which uses a portion of the offered resources, so that
+  // the remaining resources can be offered to framework2. It's not important
+  // whether the volume is used (the task is killed soon and its purpose is
+  // only for splitting the offer).
+  TaskInfo task = createTask(
+      offer1.slave_id(),
+      Resources::parse("cpus:1;mem:128").get(),
+      "sleep 1000");
+
+  // Expect an offer containing the persistent volume.
+  EXPECT_CALL(sched1, resourceOffers(&driver1, _))
+    .WillOnce(FutureArg<1>(&offers1))
+    .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+  // We use a filter of 0 seconds so the resources will be available
+  // in the next allocation cycle.
+  Filters filters;
+  filters.set_refuse_seconds(0);
+
+  driver1.acceptOffers(
+      {offer1.id()},
+      {CREATE(volume),
+       LAUNCH({task})},
+      filters);
+
+  // Make sure the call is processed before framework2 registers.
+  Clock::settle();
+
+  // 3. Create framework2 of the same role. It would be offered resources
+  //    recovered from the framework1 call.
+  FrameworkInfo frameworkInfo2 = DEFAULT_FRAMEWORK_INFO;
+  frameworkInfo2.set_role(DEFAULT_TEST_ROLE);
+  frameworkInfo2.add_capabilities()->set_type(
+      FrameworkInfo::Capability::SHARED_RESOURCES);
+
+  MockScheduler sched2;
+  MesosSchedulerDriver driver2(
+      &sched2, frameworkInfo2, master.get()->pid, DEFAULT_CREDENTIAL);
+
+  EXPECT_CALL(sched2, registered(&driver2, _, _));
+
+  Future<vector<Offer>> offers2;
+  EXPECT_CALL(sched2, resourceOffers(&driver2, _))
+    .WillOnce(FutureArg<1>(&offers2))
+    .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+  driver2.start();
+
+  AWAIT_READY(offers2);
+
+  Offer offer2 = offers2.get()[0];
+
+  EXPECT_TRUE(Resources(offer2.resources()).contains(volume));
+
+  // 4. framework1 kills the task which results in an offer to framework1
+  //    with the shared volume. At this point, both frameworks will have
+  //    the shared resource in their pending offers.
+  EXPECT_CALL(sched1, statusUpdate(_, _))
+    .WillOnce(DoDefault());
+
+  driver1.killTask(task.task_id());
+
+  // Advance the clock until the allocator allocates
+  // the recovered resources.
+  Clock::advance(masterFlags.allocation_interval);
+
+  AWAIT_READY(offers1);
+
+  offer1 = offers1.get()[0];
+
+  EXPECT_TRUE(Resources(offer1.resources()).contains(volume));
+
+  // 5. DESTROY the shared volume via framework2 which would result in
+  //    framework1 being rescinded the offer.
+  Future<Nothing> rescinded;
+  EXPECT_CALL(sched1, offerRescinded(&driver1, _))
+    .WillOnce(FutureSatisfy(&rescinded));
+
+  driver2.acceptOffers(
+      {offer2.id()},
+      {DESTROY(volume)},
+      filters);
+
+  AWAIT_READY(rescinded);
+
+  driver1.stop();
+  driver1.join();
+
+  driver2.stop();
+  driver2.join();
+}
+
 
 // This test verifies that persistent volumes are recovered properly
 // after the slave restarts. The idea is to launch a command which