You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2015/04/17 23:14:24 UTC
mesos git commit: Updated Frameworks struct to store Resources by
SlaveID.
Repository: mesos
Updated Branches:
refs/heads/master 25844c57b -> a97ae7510
Updated Frameworks struct to store Resources by SlaveID.
Review: https://reviews.apache.org/r/31665
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/a97ae751
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/a97ae751
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/a97ae751
Branch: refs/heads/master
Commit: a97ae751029a753ca054b214f78da37aa8050f64
Parents: 25844c5
Author: Michael Park <mc...@gmail.com>
Authored: Fri Apr 17 14:13:38 2015 -0700
Committer: Benjamin Mahler <be...@gmail.com>
Committed: Fri Apr 17 14:13:49 2015 -0700
----------------------------------------------------------------------
src/master/http.cpp | 6 ++--
src/master/master.cpp | 7 +++--
src/master/master.hpp | 72 +++++++++++++++++++++++++++++++++++++---------
3 files changed, 67 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/a97ae751/src/master/http.cpp
----------------------------------------------------------------------
diff --git a/src/master/http.cpp b/src/master/http.cpp
index f2b123d..00c22c4 100644
--- a/src/master/http.cpp
+++ b/src/master/http.cpp
@@ -122,11 +122,11 @@ JSON::Object model(const Framework& framework)
// TODO(bmahler): Consider deprecating this in favor of the split
// used and offered resources below.
object.values["resources"] =
- model(framework.usedResources + framework.offeredResources);
+ model(framework.totalUsedResources + framework.totalOfferedResources);
// TODO(bmahler): Use these in the webui.
- object.values["used_resources"] = model(framework.usedResources);
- object.values["offered_resources"] = model(framework.offeredResources);
+ object.values["used_resources"] = model(framework.totalUsedResources);
+ object.values["offered_resources"] = model(framework.totalOfferedResources);
object.values["hostname"] = framework.info.hostname();
object.values["webui_url"] = framework.info.webui_url();
http://git-wip-us.apache.org/repos/asf/mesos/blob/a97ae751/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 02f35ac..944a943 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -4129,12 +4129,15 @@ void Master::addFramework(Framework* framework)
roles[framework->info.role()]->addFramework(framework);
// There should be no offered resources yet!
- CHECK_EQ(Resources(), framework->offeredResources);
+ CHECK_EQ(Resources(), framework->totalOfferedResources);
+ // TODO(mpark): Once the allocator API is updated to operate on
+ // 'hashmap<SlaveID, Resources>' rather than 'Resources', pass
+ // 'framework->usedResources' instead.
allocator->addFramework(
framework->id(),
framework->info,
- framework->usedResources);
+ framework->totalUsedResources);
// Export framework metrics.
http://git-wip-us.apache.org/repos/asf/mesos/blob/a97ae751/src/master/master.hpp
----------------------------------------------------------------------
diff --git a/src/master/master.hpp b/src/master/master.hpp
index 4a1df58..c10e7c0 100644
--- a/src/master/master.hpp
+++ b/src/master/master.hpp
@@ -1007,7 +1007,8 @@ struct Framework
tasks[task->task_id()] = task;
if (!protobuf::isTerminalState(task->state())) {
- usedResources += task->resources();
+ totalUsedResources += task->resources();
+ usedResources[task->slave_id()] += task->resources();
}
}
@@ -1022,7 +1023,11 @@ struct Framework
<< "Unknown task " << task->task_id()
<< " of framework " << task->framework_id();
- usedResources -= task->resources();
+ totalUsedResources -= task->resources();
+ usedResources[task->slave_id()] -= task->resources();
+ if (usedResources[task->slave_id()].empty()) {
+ usedResources.erase(task->slave_id());
+ }
}
void addCompletedTask(const Task& task)
@@ -1038,7 +1043,11 @@ struct Framework
<< " of framework " << task->framework_id();
if (!protobuf::isTerminalState(task->state())) {
- usedResources -= task->resources();
+ totalUsedResources -= task->resources();
+ usedResources[task->slave_id()] -= task->resources();
+ if (usedResources[task->slave_id()].empty()) {
+ usedResources.erase(task->slave_id());
+ }
}
addCompletedTask(*task);
@@ -1050,7 +1059,8 @@ struct Framework
{
CHECK(!offers.contains(offer)) << "Duplicate offer " << offer->id();
offers.insert(offer);
- offeredResources += offer->resources();
+ totalOfferedResources += offer->resources();
+ offeredResources[offer->slave_id()] += offer->resources();
}
void removeOffer(Offer* offer)
@@ -1058,7 +1068,12 @@ struct Framework
CHECK(offers.find(offer) != offers.end())
<< "Unknown offer " << offer->id();
- offeredResources -= offer->resources();
+ totalOfferedResources -= offer->resources();
+ offeredResources[offer->slave_id()] -= offer->resources();
+ if (offeredResources[offer->slave_id()].empty()) {
+ offeredResources.erase(offer->slave_id());
+ }
+
offers.erase(offer);
}
@@ -1077,7 +1092,8 @@ struct Framework
<< " on slave " << slaveId;
executors[slaveId][executorInfo.executor_id()] = executorInfo;
- usedResources += executorInfo.resources();
+ totalUsedResources += executorInfo.resources();
+ usedResources[slaveId] += executorInfo.resources();
}
void removeExecutor(const SlaveID& slaveId,
@@ -1088,7 +1104,12 @@ struct Framework
<< " of framework " << id()
<< " of slave " << slaveId;
- usedResources -= executors[slaveId][executorId].resources();
+ totalUsedResources -= executors[slaveId][executorId].resources();
+ usedResources[slaveId] -= executors[slaveId][executorId].resources();
+ if (usedResources[slaveId].empty()) {
+ usedResources.erase(slaveId);
+ }
+
executors[slaveId].erase(executorId);
if (executors[slaveId].empty()) {
executors.erase(slaveId);
@@ -1183,10 +1204,35 @@ struct Framework
hashmap<SlaveID, hashmap<ExecutorID, ExecutorInfo>> executors;
- // TODO(bmahler): Summing set and ranges resources across slaves
- // does not yield meaningful totals.
- Resources usedResources; // Active task / executor resources.
- Resources offeredResources; // Offered resources.
+ // NOTE: For the used and offered resources below, we keep the
+ // total as well as partitioned by SlaveID.
+ // We expose the total resources via the HTTP endpoint, and we
+ // keep a running total of the resources because looping over the
+ // slaves to sum the resources has led to perf issues (MESOS-1862).
+ // We keep the resources partitioned by SlaveID because non-scalar
+ // resources can be lost when summing them up across multiple
+ // slaves (MESOS-2373).
+ //
+ // Also note that keeping the totals is safe even though it yields
+ // incorrect results for non-scalar resources.
+ // (1) For overlapping set items / ranges across slaves, these
+ // will get added N times but only represented once.
+ // (2) When an initial subtraction occurs (N-1), the resource is
+ // no longer represented. (This is the source of the bug).
+ // (3) When any further subtractions occur (N-(1+M)), the
+ // Resources simply ignores the subtraction since there's
+ // nothing to remove, so this is safe for now.
+
+ // TODO(mpark): Strip the non-scalar resources out of the totals
+ // in order to avoid reporting incorrect statistics (MESOS-2623).
+
+ // Active task / executor resources.
+ Resources totalUsedResources;
+ hashmap<SlaveID, Resources> usedResources;
+
+ // Offered resources.
+ Resources totalOfferedResources;
+ hashmap<SlaveID, Resources> offeredResources;
private:
Framework(const Framework&); // No copying.
@@ -1225,8 +1271,8 @@ struct Role
{
Resources resources;
foreachvalue (Framework* framework, frameworks) {
- resources += framework->usedResources;
- resources += framework->offeredResources;
+ resources += framework->totalUsedResources;
+ resources += framework->totalOfferedResources;
}
return resources;