You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2014/04/17 04:28:14 UTC

[1/6] git commit: Consolidated slave re-registration Timers into a single Timer.

Repository: mesos
Updated Branches:
  refs/heads/master 81fc89d1d -> 7bf1e8a6b


Consolidated slave re-registration Timers into a single Timer.

Review: https://reviews.apache.org/r/19857


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/1a88f09c
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/1a88f09c
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/1a88f09c

Branch: refs/heads/master
Commit: 1a88f09c7188f5456d7c8aa1606b95b7cac5b650
Parents: 81fc89d
Author: Benjamin Mahler <bm...@twitter.com>
Authored: Fri Mar 28 12:39:48 2014 -0700
Committer: Benjamin Mahler <bm...@twitter.com>
Committed: Wed Apr 16 19:17:03 2014 -0700

----------------------------------------------------------------------
 src/master/master.cpp | 91 ++++++++++++++++++++++++----------------------
 src/master/master.hpp | 12 ++++--
 2 files changed, 56 insertions(+), 47 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/1a88f09c/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 3c3c989..3803c60 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -609,14 +609,14 @@ void Master::finalize()
   }
   roles.clear();
 
-  foreachvalue (const Timer& timer, slaves.recovered) {
-    // NOTE: This is necessary during tests because we don't want the
-    // timer to fire in a different test and invoke the callback.
-    // The callback would be invoked because the master pid doesn't
-    // change across the tests.
-    // TODO(vinod): This seems to be a bug in libprocess or the
-    // testing infrastructure.
-    Timer::cancel(timer);
+  // NOTE: This is necessary during tests because we don't want the
+  // timer to fire in a different test and invoke the callback.
+  // The callback would be invoked because the master pid doesn't
+  // change across the tests.
+  // TODO(vinod): This seems to be a bug in libprocess or the
+  // testing infrastructure.
+  if (slaves.recoveredTimer.isSome()) {
+    Timer::cancel(slaves.recoveredTimer.get());
   }
 
   terminate(whitelistWatcher);
@@ -747,51 +747,59 @@ Future<Nothing> Master::recover()
 
 Future<Nothing> Master::_recover(const Registry& registry)
 {
-  const Registry::Slaves& slaves = registry.slaves();
-
-  foreach (const Registry::Slave& slave, slaves.slaves()) {
-    // Set up a timeout for this slave to re-register. This timeout
-    // is based on the maximum amount of time the SlaveObserver
-    // allows slaves to not respond to health checks. Re-registration
-    // of the slave will cancel this timer.
-    // XXX: What if there is a ZK issue that delays detection for slaves?
-    //      Should we be more conservative here to avoid a full shutdown?
-    this->slaves.recovered[slave.info().id()] =
-      delay(SLAVE_PING_TIMEOUT * MAX_SLAVE_PING_TIMEOUTS,
-            self(),
-            &Self::__recoverSlaveTimeout,
-            slave);
+  foreach (const Registry::Slave& slave, registry.slaves().slaves()) {
+    slaves.recovered.insert(slave.info().id());
   }
 
+  // Set up a timeout for slaves to re-register. This timeout is based
+  // on the maximum amount of time the SlaveObserver allows slaves to
+  // not respond to health checks.
+  // TODO(bmahler): Consider making this configurable.
+  slaves.recoveredTimer =
+    delay(SLAVE_PING_TIMEOUT * MAX_SLAVE_PING_TIMEOUTS,
+          self(),
+          &Self::recoveredSlavesTimeout,
+          registry);
+
   // Recovery is now complete!
-  LOG(INFO) << "Recovered " << slaves.slaves().size() << " slaves "
-            << " from the Registry (" << Bytes(registry.ByteSize()) << ")";
+  LOG(INFO) << "Recovered " << registry.slaves().slaves().size() << " slaves"
+            << " from the Registry (" << Bytes(registry.ByteSize()) << ")"
+            << " ; allowing " << SLAVE_PING_TIMEOUT * MAX_SLAVE_PING_TIMEOUTS
+            << " for slaves to re-register";
 
   return Nothing();
 }
 
 
-void Master::__recoverSlaveTimeout(const Registry::Slave& slave)
+void Master::recoveredSlavesTimeout(const Registry& registry)
 {
   CHECK(elected());
 
-  if (!slaves.recovered.contains(slave.info().id())) {
-    return; // Slave re-registered.
-  }
+  // TODO(bmahler): Provide a (configurable) limit on the number of
+  // slaves that can be removed here, e.g. maximum 10% of slaves can
+  // be removed after failover if they do not re-register.
+  // This can serve as a configurable safety net for operators of
+  // production environments.
 
-  LOG(WARNING) << "Slave " << slave.info().id()
-               << " (" << slave.info().hostname() << ") did not re-register "
-               << "within the timeout; Removing it from the registrar";
+  foreach (const Registry::Slave& slave, registry.slaves().slaves()) {
+    if (!slaves.recovered.contains(slave.info().id())) {
+      continue; // Slave re-registered.
+    }
 
-  slaves.recovered.erase(slave.info().id());
-  slaves.removing.insert(slave.info().id());
+    LOG(WARNING) << "Slave " << slave.info().id()
+                 << " (" << slave.info().hostname() << ") did not re-register "
+                 << "within the timeout; removing it from the registrar";
 
-  registrar->apply(Owned<Operation>(new RemoveSlave(slave.info())))
-    .onAny(defer(self(),
-                 &Self::_removeSlave,
-                 slave.info(),
-                 vector<StatusUpdate>(), // No TASK_LOST updates to send.
-                 lambda::_1));
+    slaves.recovered.erase(slave.info().id());
+    slaves.removing.insert(slave.info().id());
+
+    registrar->apply(Owned<Operation>(new RemoveSlave(slave.info())))
+      .onAny(defer(self(),
+                   &Self::_removeSlave,
+                   slave.info(),
+                   vector<StatusUpdate>(), // No TASK_LOST updates to send.
+                   lambda::_1));
+  }
 }
 
 
@@ -2156,10 +2164,7 @@ void Master::reregisterSlave(
 
   // Ensure we don't remove the slave for not re-registering after
   // we've recovered it from the registry.
-  if (slaves.recovered.contains(slaveInfo.id())) {
-    Timer::cancel(slaves.recovered[slaveInfo.id()]);
-    slaves.recovered.erase(slaveInfo.id());
-  }
+  slaves.recovered.erase(slaveInfo.id());
 
   // If we're already re-registering this slave, then no need to ask
   // the registrar again.

http://git-wip-us.apache.org/repos/asf/mesos/blob/1a88f09c/src/master/master.hpp
----------------------------------------------------------------------
diff --git a/src/master/master.hpp b/src/master/master.hpp
index fef59c9..2fe0379 100644
--- a/src/master/master.hpp
+++ b/src/master/master.hpp
@@ -204,7 +204,7 @@ protected:
   // Recovers state from the registrar.
   process::Future<Nothing> recover();
   process::Future<Nothing> _recover(const Registry& registry);
-  void __recoverSlaveTimeout(const Registry::Slave& slave);
+  void recoveredSlavesTimeout(const Registry& registry);
 
   void _registerSlave(
       const SlaveInfo& slaveInfo,
@@ -387,10 +387,14 @@ private:
   {
     Slaves() : deactivated(MAX_DEACTIVATED_SLAVES) {}
 
+    // Imposes a time limit for slaves that we recover from the
+    // registry to re-register with the master.
+    Option<process::Timer> recoveredTimer;
+
     // Slaves that have been recovered from the registrar but have yet
-    // to re-register. We keep a Timer for the removal of these slaves
-    // so that we can cancel it to avoid unnecessary dispatches.
-    hashmap<SlaveID, process::Timer> recovered;
+    // to re-register. We keep a "reregistrationTimer" above to ensure
+    // we remove these slaves if they do not re-register.
+    hashset<SlaveID> recovered;
 
     // Slaves that are in the process of registering.
     hashset<process::UPID> registering;


[2/6] git commit: Used LogStorage for all tests.

Posted by bm...@apache.org.
Used LogStorage for all tests.

Review: https://reviews.apache.org/r/20431


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/7bf1e8a6
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/7bf1e8a6
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/7bf1e8a6

Branch: refs/heads/master
Commit: 7bf1e8a6b6d1160c65d37a03f38341604a122078
Parents: a378360
Author: Benjamin Mahler <bm...@twitter.com>
Authored: Wed Apr 16 15:57:28 2014 -0700
Committer: Benjamin Mahler <bm...@twitter.com>
Committed: Wed Apr 16 19:17:04 2014 -0700

----------------------------------------------------------------------
 src/master/master.hpp |  7 ++++---
 src/tests/cluster.hpp | 41 +++++++++++++++++++++++++++++++++++------
 2 files changed, 39 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/7bf1e8a6/src/master/master.hpp
----------------------------------------------------------------------
diff --git a/src/master/master.hpp b/src/master/master.hpp
index 2fe0379..e4c7862 100644
--- a/src/master/master.hpp
+++ b/src/master/master.hpp
@@ -190,6 +190,10 @@ public:
   // Made public for testing purposes.
   void lostCandidacy(const process::Future<Nothing>& lost);
 
+  // Continuation of recover().
+  // Made public for testing purposes.
+  process::Future<Nothing> _recover(const Registry& registry);
+
   MasterInfo info() const
   {
     return info_;
@@ -203,7 +207,6 @@ protected:
 
   // Recovers state from the registrar.
   process::Future<Nothing> recover();
-  process::Future<Nothing> _recover(const Registry& registry);
   void recoveredSlavesTimeout(const Registry& registry);
 
   void _registerSlave(
@@ -354,8 +357,6 @@ private:
   Master(const Master&);              // No copying.
   Master& operator = (const Master&); // No assigning.
 
-  friend struct SlaveRegistrar;
-  friend struct SlaveReregistrar;
   friend struct OfferVisitor;
 
   const Flags flags;

http://git-wip-us.apache.org/repos/asf/mesos/blob/7bf1e8a6/src/tests/cluster.hpp
----------------------------------------------------------------------
diff --git a/src/tests/cluster.hpp b/src/tests/cluster.hpp
index 49d1f40..21a081c 100644
--- a/src/tests/cluster.hpp
+++ b/src/tests/cluster.hpp
@@ -21,6 +21,8 @@
 
 #include <map>
 
+#include <process/gmock.hpp>
+#include <process/gtest.hpp>
 #include <process/owned.hpp>
 #include <process/pid.hpp>
 #include <process/process.hpp>
@@ -40,6 +42,10 @@
 #include "linux/cgroups.hpp"
 #endif // __linux__
 
+#include "log/log.hpp"
+
+#include "log/tool/initialize.hpp"
+
 #include "master/allocator.hpp"
 #include "master/contender.hpp"
 #include "master/detector.hpp"
@@ -54,6 +60,7 @@
 #include "slave/slave.hpp"
 
 #include "state/in_memory.hpp"
+#include "state/log.hpp"
 #include "state/protobuf.hpp"
 #include "state/storage.hpp"
 
@@ -115,12 +122,18 @@ public:
         : master(NULL),
           allocator(NULL),
           allocatorProcess(NULL),
+          log(NULL),
+          storage(NULL),
+          state(NULL),
+          registrar(NULL),
+          repairer(NULL),
           contender(NULL),
           detector(NULL) {}
 
       master::Master* master;
       master::allocator::Allocator* allocator;
       master::allocator::AllocatorProcess* allocatorProcess;
+      log::Log* log;
       state::Storage* storage;
       state::protobuf::State* state;
       master::Registrar* registrar;
@@ -275,12 +288,18 @@ inline Try<process::PID<master::Master> > Cluster::Masters::start(
         new master::allocator::Allocator(allocatorProcess.get());
   }
 
-  if (flags.registry == "in_memory") {
-    master.storage = new state::InMemoryStorage();
-  } else {
-    return Error("'" + flags.registry + "' is not a supported"
-                 " option for registry persistence");
-  }
+  // TODO(bmahler): Add flag support for the replicated log and then
+  // just construct based on the flags.
+  log::tool::Initialize initializer;
+
+  initializer.flags.path = path::join(flags.work_dir, ".log");
+  initializer.execute();
+
+  master.log = new log::Log(
+      1,
+      initializer.flags.path.get(),
+      std::set<process::UPID>());
+  master.storage = new state::LogStorage(master.log);
 
   CHECK_NOTNULL(master.storage);
 
@@ -315,6 +334,16 @@ inline Try<process::PID<master::Master> > Cluster::Masters::start(
 
   masters[pid] = master;
 
+  // Speed up the tests by ensuring that the Master is recovered
+  // before the test proceeds. Otherwise, authentication and
+  // registration messages may be dropped, causing delayed retries.
+  process::Future<Nothing> _recover =
+    FUTURE_DISPATCH(pid, &master::Master::_recover);
+
+  if (!_recover.await(Seconds(10))) {
+    LOG(FATAL) << "Failed to wait for _recover";
+  }
+
   return pid;
 }
 


[6/6] git commit: Added task reconciliation for unknown slaves and tasks.

Posted by bm...@apache.org.
Added task reconciliation for unknown slaves and tasks.

Review: https://reviews.apache.org/r/20104


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/24a22d20
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/24a22d20
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/24a22d20

Branch: refs/heads/master
Commit: 24a22d20cd375c51c75ea8c96c55f83af530326b
Parents: 8e02ce2
Author: Benjamin Mahler <bm...@twitter.com>
Authored: Fri Mar 28 18:49:43 2014 -0700
Committer: Benjamin Mahler <bm...@twitter.com>
Committed: Wed Apr 16 19:17:04 2014 -0700

----------------------------------------------------------------------
 src/master/master.cpp | 75 ++++++++++++++++++++++++++++++----------------
 1 file changed, 50 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/24a22d20/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index b37c6f2..66e4940 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -2494,40 +2494,65 @@ void Master::reconcileTasks(
   LOG(INFO) << "Performing best-effort task state reconciliation for framework "
             << frameworkId;
 
-  // Verify expected task states and send status updates whenever expectations
-  // are not met. When:
-  //   1) Slave is unknown.*
-  //   2) Task is unknown.*
-  //   3) Task state has changed.
+  // Reconciliation occurs for the following cases:
+  //   (1) If the slave is unknown, we send TASK_LOST.
+  //   (2) If the task is missing on the slave, we send TASK_LOST.
+  //   (3) If the task state differs, we send the latest state.
   //
-  // *) TODO(nnielsen): Missing slaves and tasks are currently treated silently
-  //                    i.e. nothing is sent. To give accurate responses in
-  //                    these cases during master fail-over, we need to leverage
-  //                    the registrar.
+  // (1) is applicable only when operating with a strict registry.
   foreach (const TaskStatus& status, statuses) {
     if (!status.has_slave_id()) {
-      LOG(WARNING) << "Status from task " << status.task_id()
-                   << " does not include slave id";
+      LOG(WARNING) << "Status for task " << status.task_id()
+                   << " from framework " << frameworkId
+                   << " does not include slave id; cannot reconcile";
       continue;
     }
 
-    Slave* slave = getSlave(status.slave_id());
-    if (slave != NULL) {
-      Task* task = slave->getTask(frameworkId, status.task_id());
-      if (task != NULL && task->state() != status.state()) {
-        const StatusUpdate& update = protobuf::createStatusUpdate(
+    Option<StatusUpdate> update;
+
+    // Check for a removed slave (case 1).
+    if (flags.registry_strict &&
+        !slaves.recovered.contains(status.slave_id()) &&
+        !slaves.reregistering.contains(status.slave_id()) &&
+        !slaves.activated.contains(status.slave_id()) &&
+        !slaves.removing.contains(status.slave_id())) {
+      // Slave is removed!
+      update = protobuf::createStatusUpdate(
           frameworkId,
-          task->slave_id(),
-          task->task_id(),
-          task->state(),
-          "Task state changed");
+          status.slave_id(),
+          status.task_id(),
+          TASK_LOST,
+          "Reconciliation: Slave is removed");
+    }
 
-        statusUpdate(update, UPID());
+    // Check for a known slave / task (cases (2) and (3)).
+    if (slaves.activated.contains(status.slave_id())) {
+      Slave* slave = CHECK_NOTNULL(slaves.activated[status.slave_id()]);
+      Task* task = slave->getTask(frameworkId, status.task_id());
+
+      if (task == NULL) {
+        // Case (2).
+        // TODO(bmahler): Leverage completed tasks if we track these
+        // in the future.
+        update = protobuf::createStatusUpdate(
+            frameworkId,
+            status.slave_id(),
+            status.task_id(),
+            TASK_LOST,
+            "Reconciliation: Task is unknown to the slave");
+      } else if (task->state() != status.state()) {
+        // Case (3).
+        update = protobuf::createStatusUpdate(
+            frameworkId,
+            task->slave_id(),
+            task->task_id(),
+            task->state(),
+            "Reconciliation: Task state changed");
       }
-    } else {
-      // TODO(bmahler): We can answer here if and only if the slave is
-      // *completely* unknown to us. That is, it's not being tracked
-      // in any of our 'slaves' data.
+    }
+
+    if (update.isSome()) {
+      statusUpdate(update.get(), UPID());
     }
   }
 }


[4/6] git commit: Added a configurable limit on the percentage of slaves that can be removed after the re-registration timeout.

Posted by bm...@apache.org.
Added a configurable limit on the percentage of slaves that can be removed after the re-registration timeout.

Review: https://reviews.apache.org/r/20097


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/8e02ce20
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/8e02ce20
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/8e02ce20

Branch: refs/heads/master
Commit: 8e02ce2070faf676ee977bc31e2c412dc4652cb7
Parents: 1a88f09
Author: Benjamin Mahler <bm...@twitter.com>
Authored: Fri Mar 28 17:15:56 2014 -0700
Committer: Benjamin Mahler <bm...@twitter.com>
Committed: Wed Apr 16 19:17:04 2014 -0700

----------------------------------------------------------------------
 src/local/local.cpp      |  4 ---
 src/master/constants.cpp |  1 +
 src/master/constants.hpp | 11 +++++++
 src/master/flags.hpp     | 21 +++++++++++++
 src/master/main.cpp      |  4 ---
 src/master/master.cpp    | 71 +++++++++++++++++++++++++++++++++++++------
 src/tests/cluster.hpp    |  4 ---
 7 files changed, 94 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/8e02ce20/src/local/local.cpp
----------------------------------------------------------------------
diff --git a/src/local/local.cpp b/src/local/local.cpp
index 7daa5ec..297f35b 100644
--- a/src/local/local.cpp
+++ b/src/local/local.cpp
@@ -116,10 +116,6 @@ PID<Master> launch(const Flags& flags, Allocator* _allocator)
               << "master flags from the environment: " << load.error();
     }
 
-    if (flags.registry_strict) {
-      EXIT(1) << "Cannot run with --registry_strict; currently not supported";
-    }
-
     if (flags.registry == "in_memory") {
       storage = new state::InMemoryStorage();
     } else {

http://git-wip-us.apache.org/repos/asf/mesos/blob/8e02ce20/src/master/constants.cpp
----------------------------------------------------------------------
diff --git a/src/master/constants.cpp b/src/master/constants.cpp
index 1cb8f22..ed966bc 100644
--- a/src/master/constants.cpp
+++ b/src/master/constants.cpp
@@ -33,6 +33,7 @@ const double MIN_CPUS = 0.1;
 const Bytes MIN_MEM = Megabytes(32);
 const Duration SLAVE_PING_TIMEOUT = Seconds(15);
 const uint32_t MAX_SLAVE_PING_TIMEOUTS = 5;
+const double RECOVERY_SLAVE_REMOVAL_PERCENT_LIMIT = 1.0; // 100%.
 const size_t MAX_DEACTIVATED_SLAVES = 100000;
 const uint32_t MAX_COMPLETED_FRAMEWORKS = 50;
 const uint32_t MAX_COMPLETED_TASKS_PER_FRAMEWORK = 1000;

http://git-wip-us.apache.org/repos/asf/mesos/blob/8e02ce20/src/master/constants.hpp
----------------------------------------------------------------------
diff --git a/src/master/constants.hpp b/src/master/constants.hpp
index 52d8d77..27ae4f8 100644
--- a/src/master/constants.hpp
+++ b/src/master/constants.hpp
@@ -53,6 +53,17 @@ extern const Duration SLAVE_PING_TIMEOUT;
 // Maximum number of ping timeouts until slave is considered failed.
 extern const uint32_t MAX_SLAVE_PING_TIMEOUTS;
 
+// Default limit on the percentage of slaves that will be removed
+// after recovering if no re-registration attempts were made.
+// TODO(bmahler): There's no value here that works for all setups.
+// Currently the default is 100% which is favorable to those running
+// small clusters or experimenting with Mesos. However, it's important
+// that we also prevent the catastrophic 100% removal case for
+// production clusters. This TODO is to provide a --production flag
+// which would allow flag defaults that are more appropriate for
+// production use-cases.
+extern const double RECOVERY_SLAVE_REMOVAL_PERCENT_LIMIT;
+
 // Maximum number of deactivated slaves to store in the cache.
 extern const size_t MAX_DEACTIVATED_SLAVES;
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/8e02ce20/src/master/flags.hpp
----------------------------------------------------------------------
diff --git a/src/master/flags.hpp b/src/master/flags.hpp
index 024f86d..acf3963 100644
--- a/src/master/flags.hpp
+++ b/src/master/flags.hpp
@@ -26,6 +26,8 @@
 
 #include "logging/flags.hpp"
 
+#include "master/constants.hpp"
+
 namespace mesos {
 namespace internal {
 namespace master {
@@ -72,6 +74,24 @@ public:
         "bootstrap the persistent state on a running cluster.",
         false);
 
+    // TODO(bmahler): Add a 'Percentage' abstraction for flags.
+    // TODO(bmahler): Add a --production flag for production defaults.
+    add(&Flags::recovery_slave_removal_limit,
+        "recovery_slave_removal_limit",
+        "For failovers, limit on the percentage of slaves that can be removed\n"
+        "from the registry *and* shutdown after the re-registration timeout\n"
+        "elapses. If the limit is exceeded, the master will fail over rather\n"
+        "than remove the slaves.\n"
+        "This can be used to provide safety guarantees for production\n"
+        "environments. Production environments may expect that across Master\n"
+        "failovers, at most a certain percentage of slaves will fail\n"
+        "permanently (e.g. due to rack-level failures).\n"
+        "Setting this limit would ensure that a human needs to get\n"
+        "involved should an unexpected widespread failure of slaves occur\n"
+        "in the cluster.\n"
+        "Values: [0%-100%]",
+        stringify(RECOVERY_SLAVE_REMOVAL_PERCENT_LIMIT * 100.0) + "%");
+
     add(&Flags::webui_dir,
         "webui_dir",
         "Location of the webui files/assets",
@@ -141,6 +161,7 @@ public:
   std::string work_dir;
   std::string registry;
   bool registry_strict;
+  std::string recovery_slave_removal_limit;
   std::string webui_dir;
   std::string whitelist;
   std::string user_sorter;

http://git-wip-us.apache.org/repos/asf/mesos/blob/8e02ce20/src/master/main.cpp
----------------------------------------------------------------------
diff --git a/src/master/main.cpp b/src/master/main.cpp
index f12f20a..ec23781 100644
--- a/src/master/main.cpp
+++ b/src/master/main.cpp
@@ -156,10 +156,6 @@ int main(int argc, char** argv)
   allocator::Allocator* allocator =
     new allocator::Allocator(allocatorProcess);
 
-  if (flags.registry_strict) {
-    EXIT(1) << "Cannot run with --registry_strict; currently not supported";
-  }
-
   state::Storage* storage = NULL;
 
   if (flags.registry == "in_memory") {

http://git-wip-us.apache.org/repos/asf/mesos/blob/8e02ce20/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 3803c60..b37c6f2 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -35,6 +35,7 @@
 #include <stout/memory.hpp>
 #include <stout/multihashmap.hpp>
 #include <stout/nothing.hpp>
+#include <stout/numify.hpp>
 #include <stout/os.hpp>
 #include <stout/path.hpp>
 #include <stout/stringify.hpp>
@@ -252,9 +253,6 @@ Master::Master(
   }
 
   info_.set_hostname(hostname);
-
-  LOG(INFO) << "Master ID: " << info_.id()
-            << " Hostname: " << info_.hostname();
 }
 
 
@@ -263,8 +261,39 @@ Master::~Master() {}
 
 void Master::initialize()
 {
-  LOG(INFO) << "Master started on " << string(self()).substr(7);
+  LOG(INFO) << "Master " << info_.id() << " (" << info_.hostname() << ")"
+            << " started on " << string(self()).substr(7);
+
+  if (flags.registry_strict) {
+    EXIT(1) << "Cannot run with --registry_strict; currently not supported";
+  }
+
+  // Parse the percentage for the slave removal limit.
+  // TODO(bmahler): Add a 'Percentage' abstraction.
+  if (!strings::endsWith(flags.recovery_slave_removal_limit, "%")) {
+    EXIT(1) << "Invalid value '" << flags.recovery_slave_removal_limit << "' "
+            << "for --recovery_slave_removal_percent_limit: " << "missing '%'";
+  }
+
+  Try<double> limit = numify<double>(
+      strings::remove(
+          flags.recovery_slave_removal_limit,
+          "%",
+          strings::SUFFIX));
+
+  if (limit.isError()) {
+    EXIT(1) << "Invalid value '" << flags.recovery_slave_removal_limit << "' "
+            << "for --recovery_slave_removal_percent_limit: " << limit.error();
 
+  }
+
+  if (limit.get() < 0.0 || limit.get() > 100.0) {
+    EXIT(1) << "Invalid value '" << flags.recovery_slave_removal_limit << "' "
+            << "for --recovery_slave_removal_percent_limit: "
+            << "Must be within [0%-100%]";
+  }
+
+  // Validate authentication flags.
   if (flags.authenticate) {
     LOG(INFO) << "Master only allowing authenticated frameworks to register!";
 
@@ -276,7 +305,7 @@ void Master::initialize()
     LOG(INFO) << "Master allowing unauthenticated frameworks to register!!";
   }
 
-
+  // Load credentials.
   if (flags.credentials.isSome()) {
     vector<Credential> credentials;
 
@@ -775,11 +804,33 @@ void Master::recoveredSlavesTimeout(const Registry& registry)
 {
   CHECK(elected());
 
-  // TODO(bmahler): Provide a (configurable) limit on the number of
-  // slaves that can be removed here, e.g. maximum 10% of slaves can
-  // be removed after failover if they do not re-register.
-  // This can serve as a configurable safety net for operators of
-  // production environments.
+  // TODO(bmahler): Add a 'Percentage' abstraction.
+  Try<double> limit_ = numify<double>(
+      strings::remove(
+          flags.recovery_slave_removal_limit,
+          "%",
+          strings::SUFFIX));
+
+  CHECK_SOME(limit_);
+
+  double limit = limit_.get() / 100.0;
+
+  // Compute the percentage of slaves to be removed, if it exceeds the
+  // safety-net limit, bail!
+  double removalPercentage =
+    (1.0 * slaves.recovered.size()) /
+    (1.0 * registry.slaves().slaves().size());
+
+  if (removalPercentage > limit) {
+    EXIT(1) << "Post-recovery slave removal limit exceeded! After "
+            << SLAVE_PING_TIMEOUT * MAX_SLAVE_PING_TIMEOUTS
+            << " there were " << slaves.recovered.size()
+            << " (" << removalPercentage * 100 << "%) slaves recovered from the"
+            << " registry that did not re-register: \n"
+            << stringify(slaves.recovered) << "\n "
+            << " The configured removal limit is " << limit * 100 << "%. Please"
+            << " investigate or increase this limit to proceed further";
+  }
 
   foreach (const Registry::Slave& slave, registry.slaves().slaves()) {
     if (!slaves.recovered.contains(slave.info().id())) {

http://git-wip-us.apache.org/repos/asf/mesos/blob/8e02ce20/src/tests/cluster.hpp
----------------------------------------------------------------------
diff --git a/src/tests/cluster.hpp b/src/tests/cluster.hpp
index 8479fe3..49d1f40 100644
--- a/src/tests/cluster.hpp
+++ b/src/tests/cluster.hpp
@@ -275,10 +275,6 @@ inline Try<process::PID<master::Master> > Cluster::Masters::start(
         new master::allocator::Allocator(allocatorProcess.get());
   }
 
-  if (flags.registry_strict) {
-    EXIT(1) << "Cannot run with --registry_strict; currently not supported";
-  }
-
   if (flags.registry == "in_memory") {
     master.storage = new state::InMemoryStorage();
   } else {


[5/6] git commit: If a non-strict registry is in use, do not inform frameworks that slaves are lost if they do not re-register after a failover.

Posted by bm...@apache.org.
If a non-strict registry is in use, do not inform frameworks that slaves are lost if they do not re-register after a failover.

Review: https://reviews.apache.org/r/20383


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/a3783604
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/a3783604
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/a3783604

Branch: refs/heads/master
Commit: a3783604289dd7b60c15928edb67f4ced7de25a3
Parents: f0226b3
Author: Benjamin Mahler <bm...@twitter.com>
Authored: Tue Apr 15 11:28:38 2014 -0700
Committer: Benjamin Mahler <bm...@twitter.com>
Committed: Wed Apr 16 19:17:04 2014 -0700

----------------------------------------------------------------------
 src/master/master.cpp | 37 +++++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/a3783604/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index d7d200b..0335b34 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -757,6 +757,12 @@ void Master::visit(const MessageEvent& event)
 }
 
 
+void fail(const string& message, const string& failure)
+{
+  LOG(FATAL) << message << ": " << failure;
+}
+
+
 Future<Nothing> Master::recover()
 {
   if (!elected()) {
@@ -842,21 +848,28 @@ void Master::recoveredSlavesTimeout(const Registry& registry)
                  << "within the timeout; removing it from the registrar";
 
     slaves.recovered.erase(slave.info().id());
-    slaves.removing.insert(slave.info().id());
 
-    registrar->apply(Owned<Operation>(new RemoveSlave(slave.info())))
-      .onAny(defer(self(),
-                   &Self::_removeSlave,
-                   slave.info(),
-                   vector<StatusUpdate>(), // No TASK_LOST updates to send.
-                   lambda::_1));
-  }
-}
+    if (flags.registry_strict) {
+      slaves.removing.insert(slave.info().id());
 
+      registrar->apply(Owned<Operation>(new RemoveSlave(slave.info())))
+        .onAny(defer(self(),
+                     &Self::_removeSlave,
+                     slave.info(),
+                     vector<StatusUpdate>(), // No TASK_LOST updates to send.
+                     lambda::_1));
+    } else {
+      // When a non-strict registry is in use, we want to ensure the
+      // registry is used in a write-only manner. Therefore we remove
+      // the slave from the registry but we do not inform the
+      // framework.
+      const string& message =
+        "Failed to remove slave " + stringify(slave.info().id());
 
-void fail(const string& message, const string& failure)
-{
-  LOG(FATAL) << message << ": " << failure;
+      registrar->apply(Owned<Operation>(new RemoveSlave(slave.info())))
+        .onFailed(lambda::bind(fail, message, lambda::_1));
+    }
+  }
 }
 
 


[3/6] git commit: Updated killTask to only send TASK_LOST when a strict registry is in use.

Posted by bm...@apache.org.
Updated killTask to only send TASK_LOST when a strict registry is in use.

Review: https://reviews.apache.org/r/20140


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/f0226b3f
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/f0226b3f
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/f0226b3f

Branch: refs/heads/master
Commit: f0226b3f1f75654658e22c988a5fced6c87eeb32
Parents: 24a22d2
Author: Benjamin Mahler <bm...@twitter.com>
Authored: Tue Apr 8 16:36:20 2014 -0700
Committer: Benjamin Mahler <bm...@twitter.com>
Committed: Wed Apr 16 19:17:04 2014 -0700

----------------------------------------------------------------------
 src/master/master.cpp | 58 ++++++++++++++++++++++++++--------------------
 1 file changed, 33 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/f0226b3f/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 66e4940..d7d200b 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -1928,42 +1928,50 @@ void Master::killTask(
 
   Task* task = framework->getTask(taskId);
   if (task == NULL) {
-    // TODO(bmahler): If we knew the slaveID here we could reply more
-    // frequently in the presence of recovering slaves or slaves being
-    // removed.
+    // TODO(bmahler): Per MESOS-1200, if we knew the SlaveID here we
+    // could reply more frequently in the presence of slaves in a
+    // transitionary state.
     if (!slaves.recovered.empty()) {
-      LOG(WARNING) << "Cannot kill task " << taskId << " of framework "
-                   << frameworkId << " because the slave containing this task "
-                   << "may not have re-registered yet with this master";
-    } else if (!slaves.removing.empty()) {
-      LOG(WARNING) << "Cannot kill task " << taskId << " of framework "
-                   << frameworkId << " because the slave may be in the process "
-                   << "of being removed from the registrar, it is likely "
-                   << "TASK_LOST updates will occur when the slave is removed";
+      LOG(WARNING)
+        << "Cannot kill task " << taskId << " of framework " << frameworkId
+        << " because the slave containing this task may not have re-registered"
+        << " yet with this master";
     } else if (!slaves.reregistering.empty()) {
-      LOG(WARNING) << "Cannot kill task " << taskId << " of framework "
-                   << frameworkId << " because the slave may be in the process "
-                   << "of being re-admitted by the registrar";
-    } else {
-      // TODO(benh): Once the scheduler has persistence and
-      // high-availability of it's tasks, it will be the one that
-      // determines that this invocation of 'killTask' is silly, and
-      // can just return "locally" (i.e., after hitting only the other
-      // replicas). Unfortunately, it still won't know the slave id.
-
-      LOG(WARNING) << "Cannot kill task " << taskId
-                   << " of framework " << frameworkId
-                   << " because it cannot be found, sending TASK_LOST";
+      LOG(WARNING)
+        << "Cannot kill task " << taskId << " of framework " << frameworkId
+        << " because the slave may be in the process of being re-admitted by"
+        << " the registrar";
+    } else if (!slaves.removing.empty()) {
+      LOG(WARNING)
+        << "Cannot kill task " << taskId << " of framework " << frameworkId
+        << " because the slave may be in the process of being removed from the"
+        << " registrar, it is likely TASK_LOST updates will occur when the"
+        << " slave is removed";
+    } else if (flags.registry_strict) {
+      // For a strict registry, if there are no slaves transitioning
+      // between states, then this task is definitely unknown!
+      LOG(WARNING)
+        << "Cannot kill task " << taskId << " of framework " << frameworkId
+        << " because it cannot be found; sending TASK_LOST since there are "
+        << " no transitionary slaves";
+
       StatusUpdateMessage message;
       StatusUpdate* update = message.mutable_update();
       update->mutable_framework_id()->MergeFrom(frameworkId);
       TaskStatus* status = update->mutable_status();
       status->mutable_task_id()->MergeFrom(taskId);
       status->set_state(TASK_LOST);
-      status->set_message("Task not found");
+      status->set_message("Attempted to kill an unknown task");
       update->set_timestamp(Clock::now().secs());
       update->set_uuid(UUID::random().toBytes());
       send(framework->pid, message);
+    } else {
+      // For a non-strict registry, the slave holding this task could
+      // be readmitted even if we have no knowledge of it.
+      LOG(WARNING)
+        << "Cannot kill task " << taskId << " of framework " << frameworkId
+        << " because it cannot be found; cannot send TASK_LOST since a"
+        << " non-strict registry is in use";
     }
 
     return;