You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2015/04/17 23:14:24 UTC

mesos git commit: Updated Frameworks struct to store Resources by SlaveID.

Repository: mesos
Updated Branches:
  refs/heads/master 25844c57b -> a97ae7510


Updated Frameworks struct to store Resources by SlaveID.

Review: https://reviews.apache.org/r/31665


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/a97ae751
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/a97ae751
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/a97ae751

Branch: refs/heads/master
Commit: a97ae751029a753ca054b214f78da37aa8050f64
Parents: 25844c5
Author: Michael Park <mc...@gmail.com>
Authored: Fri Apr 17 14:13:38 2015 -0700
Committer: Benjamin Mahler <be...@gmail.com>
Committed: Fri Apr 17 14:13:49 2015 -0700

----------------------------------------------------------------------
 src/master/http.cpp   |  6 ++--
 src/master/master.cpp |  7 +++--
 src/master/master.hpp | 72 +++++++++++++++++++++++++++++++++++++---------
 3 files changed, 67 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/a97ae751/src/master/http.cpp
----------------------------------------------------------------------
diff --git a/src/master/http.cpp b/src/master/http.cpp
index f2b123d..00c22c4 100644
--- a/src/master/http.cpp
+++ b/src/master/http.cpp
@@ -122,11 +122,11 @@ JSON::Object model(const Framework& framework)
   // TODO(bmahler): Consider deprecating this in favor of the split
   // used and offered resources below.
   object.values["resources"] =
-    model(framework.usedResources + framework.offeredResources);
+    model(framework.totalUsedResources + framework.totalOfferedResources);
 
   // TODO(bmahler): Use these in the webui.
-  object.values["used_resources"] = model(framework.usedResources);
-  object.values["offered_resources"] = model(framework.offeredResources);
+  object.values["used_resources"] = model(framework.totalUsedResources);
+  object.values["offered_resources"] = model(framework.totalOfferedResources);
 
   object.values["hostname"] = framework.info.hostname();
   object.values["webui_url"] = framework.info.webui_url();

http://git-wip-us.apache.org/repos/asf/mesos/blob/a97ae751/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 02f35ac..944a943 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -4129,12 +4129,15 @@ void Master::addFramework(Framework* framework)
   roles[framework->info.role()]->addFramework(framework);
 
   // There should be no offered resources yet!
-  CHECK_EQ(Resources(), framework->offeredResources);
+  CHECK_EQ(Resources(), framework->totalOfferedResources);
 
+  // TODO(mpark): Once the allocator API is updated to operate on
+  // 'hashmap<SlaveID, Resources>' rather than 'Resources', pass
+  // 'framework->usedResources' instead.
   allocator->addFramework(
       framework->id(),
       framework->info,
-      framework->usedResources);
+      framework->totalUsedResources);
 
   // Export framework metrics.
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/a97ae751/src/master/master.hpp
----------------------------------------------------------------------
diff --git a/src/master/master.hpp b/src/master/master.hpp
index 4a1df58..c10e7c0 100644
--- a/src/master/master.hpp
+++ b/src/master/master.hpp
@@ -1007,7 +1007,8 @@ struct Framework
     tasks[task->task_id()] = task;
 
     if (!protobuf::isTerminalState(task->state())) {
-      usedResources += task->resources();
+      totalUsedResources += task->resources();
+      usedResources[task->slave_id()] += task->resources();
     }
   }
 
@@ -1022,7 +1023,11 @@ struct Framework
       << "Unknown task " << task->task_id()
       << " of framework " << task->framework_id();
 
-    usedResources -= task->resources();
+    totalUsedResources -= task->resources();
+    usedResources[task->slave_id()] -= task->resources();
+    if (usedResources[task->slave_id()].empty()) {
+      usedResources.erase(task->slave_id());
+    }
   }
 
   void addCompletedTask(const Task& task)
@@ -1038,7 +1043,11 @@ struct Framework
       << " of framework " << task->framework_id();
 
     if (!protobuf::isTerminalState(task->state())) {
-      usedResources -= task->resources();
+      totalUsedResources -= task->resources();
+      usedResources[task->slave_id()] -= task->resources();
+      if (usedResources[task->slave_id()].empty()) {
+        usedResources.erase(task->slave_id());
+      }
     }
 
     addCompletedTask(*task);
@@ -1050,7 +1059,8 @@ struct Framework
   {
     CHECK(!offers.contains(offer)) << "Duplicate offer " << offer->id();
     offers.insert(offer);
-    offeredResources += offer->resources();
+    totalOfferedResources += offer->resources();
+    offeredResources[offer->slave_id()] += offer->resources();
   }
 
   void removeOffer(Offer* offer)
@@ -1058,7 +1068,12 @@ struct Framework
     CHECK(offers.find(offer) != offers.end())
       << "Unknown offer " << offer->id();
 
-    offeredResources -= offer->resources();
+    totalOfferedResources -= offer->resources();
+    offeredResources[offer->slave_id()] -= offer->resources();
+    if (offeredResources[offer->slave_id()].empty()) {
+      offeredResources.erase(offer->slave_id());
+    }
+
     offers.erase(offer);
   }
 
@@ -1077,7 +1092,8 @@ struct Framework
       << " on slave " << slaveId;
 
     executors[slaveId][executorInfo.executor_id()] = executorInfo;
-    usedResources += executorInfo.resources();
+    totalUsedResources += executorInfo.resources();
+    usedResources[slaveId] += executorInfo.resources();
   }
 
   void removeExecutor(const SlaveID& slaveId,
@@ -1088,7 +1104,12 @@ struct Framework
       << " of framework " << id()
       << " of slave " << slaveId;
 
-    usedResources -= executors[slaveId][executorId].resources();
+    totalUsedResources -= executors[slaveId][executorId].resources();
+    usedResources[slaveId] -= executors[slaveId][executorId].resources();
+    if (usedResources[slaveId].empty()) {
+      usedResources.erase(slaveId);
+    }
+
     executors[slaveId].erase(executorId);
     if (executors[slaveId].empty()) {
       executors.erase(slaveId);
@@ -1183,10 +1204,35 @@ struct Framework
 
   hashmap<SlaveID, hashmap<ExecutorID, ExecutorInfo>> executors;
 
-  // TODO(bmahler): Summing set and ranges resources across slaves
-  // does not yield meaningful totals.
-  Resources usedResources;    // Active task / executor resources.
-  Resources offeredResources; // Offered resources.
+  // NOTE: For the used and offered resources below, we keep the
+  // total as well as partitioned by SlaveID.
+  // We expose the total resources via the HTTP endpoint, and we
+  // keep a running total of the resources because looping over the
+  // slaves to sum the resources has led to perf issues (MESOS-1862).
+  // We keep the resources partitioned by SlaveID because non-scalar
+  // resources can be lost when summing them up across multiple
+  // slaves (MESOS-2373).
+  //
+  // Also note that keeping the totals is safe even though it yields
+  // incorrect results for non-scalar resources.
+  //   (1) For overlapping set items / ranges across slaves, these
+  //       will get added N times but only represented once.
+  //   (2) When an initial subtraction occurs (N-1), the resource is
+  //       no longer represented. (This is the source of the bug).
+  //   (3) When any further subtractions occur (N-(1+M)), the
+  //       Resources simply ignores the subtraction since there's
+  //       nothing to remove, so this is safe for now.
+
+  // TODO(mpark): Strip the non-scalar resources out of the totals
+  // in order to avoid reporting incorrect statistics (MESOS-2623).
+
+  // Active task / executor resources.
+  Resources totalUsedResources;
+  hashmap<SlaveID, Resources> usedResources;
+
+  // Offered resources.
+  Resources totalOfferedResources;
+  hashmap<SlaveID, Resources> offeredResources;
 
 private:
   Framework(const Framework&);              // No copying.
@@ -1225,8 +1271,8 @@ struct Role
   {
     Resources resources;
     foreachvalue (Framework* framework, frameworks) {
-      resources += framework->usedResources;
-      resources += framework->offeredResources;
+      resources += framework->totalUsedResources;
+      resources += framework->totalOfferedResources;
     }
 
     return resources;