You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2013/08/14 20:07:29 UTC

[06/18] git commit: Improved the allocator to not offer non-checkpointing slave resources to checkpointing frameworks.

Improved the allocator to not offer non-checkpointing slave resources
to checkpointing frameworks.

Review: https://reviews.apache.org/r/13407


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/ea6c766b
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/ea6c766b
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/ea6c766b

Branch: refs/heads/master
Commit: ea6c766b57b1b59e218e4fc47befede762e9231a
Parents: 6da3267
Author: Vinod Kone <vi...@twitter.com>
Authored: Wed Aug 7 22:46:19 2013 -0700
Committer: Vinod Kone <vi...@twitter.com>
Committed: Tue Aug 13 14:09:28 2013 -0700

----------------------------------------------------------------------
 src/master/hierarchical_allocator_process.hpp | 25 ++++++++++++++++------
 src/master/master.cpp                         | 15 ++++---------
 src/tests/slave_recovery_tests.cpp            | 17 ++++++++-------
 3 files changed, 32 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/ea6c766b/src/master/hierarchical_allocator_process.hpp
----------------------------------------------------------------------
diff --git a/src/master/hierarchical_allocator_process.hpp b/src/master/hierarchical_allocator_process.hpp
index 76465eb..183b205 100644
--- a/src/master/hierarchical_allocator_process.hpp
+++ b/src/master/hierarchical_allocator_process.hpp
@@ -61,6 +61,7 @@ struct Slave
   Slave(const SlaveInfo& _info)
     : available(_info.resources()),
       whitelisted(false),
+      checkpoint(_info.checkpoint()),
       info(_info) {}
 
   Resources resources() const { return info.resources(); }
@@ -74,6 +75,7 @@ struct Slave
   // frameworks.
   bool whitelisted;
 
+  bool checkpoint;
 private:
   SlaveInfo info;
 };
@@ -84,13 +86,15 @@ struct Framework
   Framework() {}
 
   Framework(const FrameworkInfo& _info)
-    : info(_info) {}
+    : checkpoint(_info.checkpoint()),
+      info(_info) {}
 
   std::string role() const { return info.role(); }
 
   // Filters that have been added by this framework.
   hashset<Filter*> filters;
 
+  bool checkpoint;
 private:
   FrameworkInfo info;
 };
@@ -773,19 +777,28 @@ HierarchicalAllocatorProcess<RoleSorter, FrameworkSorter>::isFiltered(
     const SlaveID& slaveId,
     const Resources& resources)
 {
-  bool filtered = false;
-
   CHECK(frameworks.contains(frameworkId));
+  CHECK(slaves.contains(slaveId));
+
+  // Do not offer a non-checkpointing slave's resources to a checkpointing
+  // framework. This is a short term fix until the following is resolved:
+  // https://issues.apache.org/jira/browse/MESOS-444.
+  if (frameworks[frameworkId].checkpoint && !slaves[slaveId].checkpoint) {
+    VLOG(1) << "Filtered " << resources
+            << " on non-checkpointing slave " << slaveId
+            << " for checkpointing framework " << frameworkId;
+    return true;
+  }
+
   foreach (Filter* filter, frameworks[frameworkId].filters) {
     if (filter->filter(slaveId, resources)) {
       VLOG(1) << "Filtered " << resources
               << " on slave " << slaveId
               << " for framework " << frameworkId;
-      filtered = true;
-      break;
+      return true;
     }
   }
-  return filtered;
+  return false;
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/ea6c766b/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index b6d12a3..0675b52 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -1349,17 +1349,10 @@ void Master::offer(const FrameworkID& frameworkId,
 
     Slave* slave = slaves[slaveId];
 
-    // Do not offer a non-checkpointing slave's resources to a checkpointing
-    // framework. This is a short term fix until the following is resolved:
-    // https://issues.apache.org/jira/browse/MESOS-444.
-    if (framework->info.checkpoint() && !slave->info.checkpoint()) {
-      LOG(WARNING) << "Master returning resources offered to checkpointing "
-                   << "framework " << frameworkId << " because slave "
-                   << slaveId << " is not checkpointing";
-
-      allocator->resourcesRecovered(frameworkId, slaveId, offered);
-      continue;
-    }
+    CHECK(slave->info.checkpoint() || !framework->info.checkpoint())
+        << "Resources of non checkpointing slave " << slaveId
+        << " (" << slave->info.hostname() << ") are being offered to"
+        << " checkpointing framework " << frameworkId;
 
     // This could happen if the allocator dispatched 'Master::offer' before
     // it received 'Allocator::slaveRemoved' from the master.

http://git-wip-us.apache.org/repos/asf/mesos/blob/ea6c766b/src/tests/slave_recovery_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_recovery_tests.cpp b/src/tests/slave_recovery_tests.cpp
index a0734c3..6de8108 100644
--- a/src/tests/slave_recovery_tests.cpp
+++ b/src/tests/slave_recovery_tests.cpp
@@ -1034,9 +1034,16 @@ TYPED_TEST(SlaveRecoveryTest, NonCheckpointingSlave)
   slave::Flags flags = this->CreateSlaveFlags();
   flags.checkpoint = false;
 
+  Clock::pause();
+
+  Future<RegisterSlaveMessage> registerSlaveMessage =
+    FUTURE_PROTOBUF(RegisterSlaveMessage(), _, _);
+
   Try<PID<Slave> > slave = this->StartSlave(&isolator, flags);
   ASSERT_SOME(slave);
 
+  AWAIT_READY(registerSlaveMessage);
+
   MockScheduler sched;
 
   // Enable checkpointing for the framework.
@@ -1053,17 +1060,11 @@ TYPED_TEST(SlaveRecoveryTest, NonCheckpointingSlave)
   EXPECT_CALL(sched, resourceOffers(_, _))
     .Times(0); // No offers should be received!
 
-  Future<Nothing> offer = FUTURE_DISPATCH(_, &Master::offer);
-
-  Clock::pause();
-
   driver.start();
 
-  AWAIT_READY(registered);
-
-  // Wait for an offer to be made. We do a Clock::settle() here
+  // Wait for scheduler to register. We do a Clock::settle() here
   // to ensure that no offers are received by the scheduler.
-  AWAIT_READY(offer);
+  AWAIT_READY(registered);
   Clock::settle();
 
   driver.stop();