You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2014/05/29 02:20:27 UTC
[2/3] git commit: Fixed Master::launchTasks() to inform allocator of
unused resources when any of the offers are invalid.
Fixed Master::launchTasks() to inform allocator of unused resources
when any of the offers are invalid.
Review: https://reviews.apache.org/r/21750
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/0b8f30e8
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/0b8f30e8
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/0b8f30e8
Branch: refs/heads/master
Commit: 0b8f30e8ac3fb0893ffd32f63d132730ddad3d65
Parents: 44e5456
Author: Vinod Kone <vi...@twitter.com>
Authored: Tue May 20 18:44:19 2014 -0700
Committer: Vinod Kone <vi...@twitter.com>
Committed: Wed May 28 17:17:19 2014 -0700
----------------------------------------------------------------------
src/master/master.cpp | 53 ++++++++++++++++++++++++++++-------------
src/tests/master_tests.cpp | 15 ++++++++++++
2 files changed, 51 insertions(+), 17 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/0b8f30e8/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 472eedd..fcbbc26 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -1637,12 +1637,16 @@ struct OfferVisitor
virtual ~OfferVisitor() {}
- Slave* getSlave(Master* master, const SlaveID& id) {
- return master->getSlave(id);
+ Slave* getSlave(Master* master, const SlaveID& slaveId)
+ {
+ CHECK_NOTNULL(master);
+ return master->getSlave(slaveId);
}
- Offer* getOffer(Master* master, const OfferID& id) {
- return master->getOffer(id);
+ Offer* getOffer(Master* master, const OfferID& offerId)
+ {
+ CHECK_NOTNULL(master);
+ return master->getOffer(offerId);
}
};
@@ -1672,6 +1676,10 @@ struct FrameworkChecker : OfferVisitor {
Master* master)
{
Offer* offer = getOffer(master, offerId);
+ if (offer == NULL) {
+ return "Offer " + stringify(offerId) + " is no longer valid";
+ }
+
if (!(framework.id == offer->framework_id())) {
return "Offer " + stringify(offer->id()) +
" has invalid framework " + stringify(offer->framework_id()) +
@@ -1693,15 +1701,21 @@ struct SlaveChecker : OfferVisitor
Master* master)
{
Offer* offer = getOffer(master, offerId);
- Slave* slave = getSlave(master, offer->slave_id());
- if (slave == NULL) {
- return "Offer " + stringify(offerId) +
- " outlived slave " + stringify(offer->slave_id());
+ if (offer == NULL) {
+ return "Offer " + stringify(offerId) + " is no longer valid";
}
+ Slave* slave = getSlave(master, offer->slave_id());
+
+ // This is not possible because the offer should've been removed.
+ CHECK(slave != NULL)
+ << "Offer " << offerId
+ << " outlived slave " << offer->slave_id();
+
+ // This is not possible because the offer should've been removed.
CHECK(!slave->disconnected)
- << "Offer " + stringify(offerId)
- << " outlived disconnected slave " << stringify(slave->id);
+ << "Offer " << offerId
+ << " outlived disconnected slave " << offer->slave_id();
if (slaveId.isNone()) {
// Set slave id and use as base case for validation.
@@ -1828,7 +1842,7 @@ void Master::launchTasks(
}
// If offer validation succeeds, we need to pass along the common
- // slave. So optimisticaly, we store the first slave id we see.
+ // slave. So optimistically, we store the first slave id we see.
// In case of invalid offers (different slaves for example), we
// report error and return from launchTask before slaveId is used.
if (slaveId.isNone()) {
@@ -1845,19 +1859,22 @@ void Master::launchTasks(
delete visitor;
};
- // Remove offers.
+ // Remove offers and recover resources if any of the offers are
+ // invalid.
foreach (const OfferID& offerId, offerIds) {
Offer* offer = getOffer(offerId);
- // Explicit check needed if an offerId appears more
- // than once in offerIds.
- if (offer != NULL) {
+ if (offer != NULL) {
+ if (offerError.isSome()) {
+ allocator->resourcesRecovered(
+ offer->framework_id(), offer->slave_id(), offer->resources());
+ }
removeOffer(offer);
}
}
if (offerError.isSome()) {
LOG(WARNING) << "Failed to validate offer " << offerId
- << " : " << offerError.get();
+ << ": " << offerError.get();
foreach (const TaskInfo& task, tasks) {
const StatusUpdate& update = protobuf::createStatusUpdate(
@@ -1874,7 +1891,6 @@ void Master::launchTasks(
message.mutable_update()->CopyFrom(update);
send(framework->pid, message);
}
-
return;
}
@@ -3761,6 +3777,8 @@ void Master::removeTask(Task* task)
}
+// TODO(vinod): Instead of 'removeOffer()', consider implementing
+// 'useOffer()', 'discardOffer()' and 'rescindOffer()' for clarity.
void Master::removeOffer(Offer* offer, bool rescind)
{
// Remove from framework.
@@ -3790,6 +3808,7 @@ void Master::removeOffer(Offer* offer, bool rescind)
delete offer;
}
+
// TODO(bmahler): Consider killing this.
Framework* Master::getFramework(const FrameworkID& frameworkId)
{
http://git-wip-us.apache.org/repos/asf/mesos/blob/0b8f30e8/src/tests/master_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_tests.cpp b/src/tests/master_tests.cpp
index b0ff627..7183cb7 100644
--- a/src/tests/master_tests.cpp
+++ b/src/tests/master_tests.cpp
@@ -41,6 +41,7 @@
#include <stout/os.hpp>
#include <stout/try.hpp>
+#include "master/allocator.hpp"
#include "master/flags.hpp"
#include "master/master.hpp"
@@ -59,6 +60,8 @@ using namespace mesos::internal::tests;
using mesos::internal::master::Master;
+using mesos::internal::master::allocator::AllocatorProcess;
+
using mesos::internal::slave::GarbageCollectorProcess;
using mesos::internal::slave::Slave;
using mesos::internal::slave::Containerizer;
@@ -1300,11 +1303,17 @@ TEST_F(MasterTest, LaunchAcrossSlavesTest)
combinedOffers.push_back(offers1.get()[0].id());
combinedOffers.push_back(offers2.get()[0].id());
+ Future<Nothing> resourcesRecovered =
+ FUTURE_DISPATCH(_, &AllocatorProcess::resourcesRecovered);
+
driver.launchTasks(combinedOffers, tasks);
AWAIT_READY(status);
EXPECT_EQ(TASK_LOST, status.get().state());
+ // The resources of the invalid offers should be recovered.
+ AWAIT_READY(resourcesRecovered);
+
EXPECT_CALL(exec, shutdown(_))
.Times(AtMost(1));
@@ -1372,11 +1381,17 @@ TEST_F(MasterTest, LaunchDuplicateOfferTest)
EXPECT_CALL(sched, statusUpdate(&driver, _))
.WillOnce(FutureArg<1>(&status));
+ Future<Nothing> resourcesRecovered =
+ FUTURE_DISPATCH(_, &AllocatorProcess::resourcesRecovered);
+
driver.launchTasks(combinedOffers, tasks);
AWAIT_READY(status);
EXPECT_EQ(TASK_LOST, status.get().state());
+ // The resources of the invalid offers should be recovered.
+ AWAIT_READY(resourcesRecovered);
+
EXPECT_CALL(exec, shutdown(_))
.Times(AtMost(1));