You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2014/05/29 02:20:27 UTC

[2/3] git commit: Fixed Master::launchTasks() to inform allocator of unused resources when any of the offers are invalid.

Fixed Master::launchTasks() to inform allocator of unused resources
when any of the offers are invalid.

Review: https://reviews.apache.org/r/21750


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/0b8f30e8
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/0b8f30e8
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/0b8f30e8

Branch: refs/heads/master
Commit: 0b8f30e8ac3fb0893ffd32f63d132730ddad3d65
Parents: 44e5456
Author: Vinod Kone <vi...@twitter.com>
Authored: Tue May 20 18:44:19 2014 -0700
Committer: Vinod Kone <vi...@twitter.com>
Committed: Wed May 28 17:17:19 2014 -0700

----------------------------------------------------------------------
 src/master/master.cpp      | 53 ++++++++++++++++++++++++++++-------------
 src/tests/master_tests.cpp | 15 ++++++++++++
 2 files changed, 51 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/0b8f30e8/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 472eedd..fcbbc26 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -1637,12 +1637,16 @@ struct OfferVisitor
 
   virtual ~OfferVisitor() {}
 
-  Slave* getSlave(Master* master, const SlaveID& id) {
-    return master->getSlave(id);
+  Slave* getSlave(Master* master, const SlaveID& slaveId)
+  {
+    CHECK_NOTNULL(master);
+    return master->getSlave(slaveId);
   }
 
-  Offer* getOffer(Master* master, const OfferID& id) {
-    return master->getOffer(id);
+  Offer* getOffer(Master* master, const OfferID& offerId)
+  {
+    CHECK_NOTNULL(master);
+    return master->getOffer(offerId);
   }
 };
 
@@ -1672,6 +1676,10 @@ struct FrameworkChecker : OfferVisitor {
       Master* master)
   {
     Offer* offer = getOffer(master, offerId);
+    if (offer == NULL) {
+      return "Offer " + stringify(offerId) + " is no longer valid";
+    }
+
     if (!(framework.id == offer->framework_id())) {
       return "Offer " + stringify(offer->id()) +
           " has invalid framework " + stringify(offer->framework_id()) +
@@ -1693,15 +1701,21 @@ struct SlaveChecker : OfferVisitor
       Master* master)
   {
     Offer* offer = getOffer(master, offerId);
-    Slave* slave = getSlave(master, offer->slave_id());
-    if (slave == NULL) {
-      return "Offer " + stringify(offerId) +
-          " outlived slave " + stringify(offer->slave_id());
+    if (offer == NULL) {
+      return "Offer " + stringify(offerId) + " is no longer valid";
     }
 
+    Slave* slave = getSlave(master, offer->slave_id());
+
+    // This is not possible because the offer should've been removed.
+    CHECK(slave != NULL)
+      << "Offer " << offerId
+      << " outlived slave " << offer->slave_id();
+
+    // This is not possible because the offer should've been removed.
     CHECK(!slave->disconnected)
-      << "Offer " + stringify(offerId)
-      << " outlived disconnected slave " << stringify(slave->id);
+      << "Offer " << offerId
+      << " outlived disconnected slave " << offer->slave_id();
 
     if (slaveId.isNone()) {
       // Set slave id and use as base case for validation.
@@ -1828,7 +1842,7 @@ void Master::launchTasks(
     }
 
     // If offer validation succeeds, we need to pass along the common
-    // slave. So optimisticaly, we store the first slave id we see.
+    // slave. So optimistically, we store the first slave id we see.
     // In case of invalid offers (different slaves for example), we
     // report error and return from launchTask before slaveId is used.
     if (slaveId.isNone()) {
@@ -1845,19 +1859,22 @@ void Master::launchTasks(
     delete visitor;
   };
 
-  // Remove offers.
+  // Remove offers and recover resources if any of the offers are
+  // invalid.
   foreach (const OfferID& offerId, offerIds) {
     Offer* offer = getOffer(offerId);
-    // Explicit check needed if an offerId appears more
-    // than once in offerIds.
-     if (offer != NULL) {
+    if (offer != NULL) {
+      if (offerError.isSome()) {
+        allocator->resourcesRecovered(
+            offer->framework_id(), offer->slave_id(), offer->resources());
+      }
       removeOffer(offer);
     }
   }
 
   if (offerError.isSome()) {
     LOG(WARNING) << "Failed to validate offer " << offerId
-                   << " : " << offerError.get();
+                   << ": " << offerError.get();
 
     foreach (const TaskInfo& task, tasks) {
       const StatusUpdate& update = protobuf::createStatusUpdate(
@@ -1874,7 +1891,6 @@ void Master::launchTasks(
       message.mutable_update()->CopyFrom(update);
       send(framework->pid, message);
     }
-
     return;
   }
 
@@ -3761,6 +3777,8 @@ void Master::removeTask(Task* task)
 }
 
 
+// TODO(vinod): Instead of 'removeOffer()', consider implementing
+// 'useOffer()', 'discardOffer()' and 'rescindOffer()' for clarity.
 void Master::removeOffer(Offer* offer, bool rescind)
 {
   // Remove from framework.
@@ -3790,6 +3808,7 @@ void Master::removeOffer(Offer* offer, bool rescind)
   delete offer;
 }
 
+
 // TODO(bmahler): Consider killing this.
 Framework* Master::getFramework(const FrameworkID& frameworkId)
 {

http://git-wip-us.apache.org/repos/asf/mesos/blob/0b8f30e8/src/tests/master_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_tests.cpp b/src/tests/master_tests.cpp
index b0ff627..7183cb7 100644
--- a/src/tests/master_tests.cpp
+++ b/src/tests/master_tests.cpp
@@ -41,6 +41,7 @@
 #include <stout/os.hpp>
 #include <stout/try.hpp>
 
+#include "master/allocator.hpp"
 #include "master/flags.hpp"
 #include "master/master.hpp"
 
@@ -59,6 +60,8 @@ using namespace mesos::internal::tests;
 
 using mesos::internal::master::Master;
 
+using mesos::internal::master::allocator::AllocatorProcess;
+
 using mesos::internal::slave::GarbageCollectorProcess;
 using mesos::internal::slave::Slave;
 using mesos::internal::slave::Containerizer;
@@ -1300,11 +1303,17 @@ TEST_F(MasterTest, LaunchAcrossSlavesTest)
   combinedOffers.push_back(offers1.get()[0].id());
   combinedOffers.push_back(offers2.get()[0].id());
 
+  Future<Nothing> resourcesRecovered =
+    FUTURE_DISPATCH(_, &AllocatorProcess::resourcesRecovered);
+
   driver.launchTasks(combinedOffers, tasks);
 
   AWAIT_READY(status);
   EXPECT_EQ(TASK_LOST, status.get().state());
 
+  // The resources of the invalid offers should be recovered.
+  AWAIT_READY(resourcesRecovered);
+
   EXPECT_CALL(exec, shutdown(_))
     .Times(AtMost(1));
 
@@ -1372,11 +1381,17 @@ TEST_F(MasterTest, LaunchDuplicateOfferTest)
   EXPECT_CALL(sched, statusUpdate(&driver, _))
     .WillOnce(FutureArg<1>(&status));
 
+  Future<Nothing> resourcesRecovered =
+    FUTURE_DISPATCH(_, &AllocatorProcess::resourcesRecovered);
+
   driver.launchTasks(combinedOffers, tasks);
 
   AWAIT_READY(status);
   EXPECT_EQ(TASK_LOST, status.get().state());
 
+  // The resources of the invalid offers should be recovered.
+  AWAIT_READY(resourcesRecovered);
+
   EXPECT_CALL(exec, shutdown(_))
     .Times(AtMost(1));