You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by be...@apache.org on 2012/04/25 03:23:39 UTC

svn commit: r1330079 - in /incubator/mesos/trunk: include/mesos/mesos.proto include/mesos/scheduler.hpp src/master/constants.hpp src/master/master.cpp src/master/master.hpp

Author: benh
Date: Wed Apr 25 01:23:38 2012
New Revision: 1330079

URL: http://svn.apache.org/viewvc?rev=1330079&view=rev
Log:
Made framework failover timeout configurable (contributed by Thomas Marshall).

Modified:
    incubator/mesos/trunk/include/mesos/mesos.proto
    incubator/mesos/trunk/include/mesos/scheduler.hpp
    incubator/mesos/trunk/src/master/constants.hpp
    incubator/mesos/trunk/src/master/master.cpp
    incubator/mesos/trunk/src/master/master.hpp

Modified: incubator/mesos/trunk/include/mesos/mesos.proto
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/include/mesos/mesos.proto?rev=1330079&r1=1330078&r2=1330079&view=diff
==============================================================================
--- incubator/mesos/trunk/include/mesos/mesos.proto (original)
+++ incubator/mesos/trunk/include/mesos/mesos.proto Wed Apr 25 01:23:38 2012
@@ -88,12 +88,15 @@ message ExecutorID {
  * ID is only available after a framework has registered, however, it
  * is included here in order to facilitate scheduler failover (i.e.,
  * if it is set then the MesosSchedulerDriver expects the scheduler is
- * performing failover).
+ * performing failover). The amount of time that the master will wait
+ * for the scheduler to failover before removing the framework is
+ * specified by failover_timeout.
  */
 message FrameworkInfo {
   required string user = 1;
   required string name = 2;
   optional FrameworkID id = 3;
+  optional double failover_timeout = 4 [default = 0.0];
 }
 
 

Modified: incubator/mesos/trunk/include/mesos/scheduler.hpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/include/mesos/scheduler.hpp?rev=1330079&r1=1330078&r2=1330079&view=diff
==============================================================================
--- incubator/mesos/trunk/include/mesos/scheduler.hpp (original)
+++ incubator/mesos/trunk/include/mesos/scheduler.hpp Wed Apr 25 01:23:38 2012
@@ -197,7 +197,7 @@ public:
    * false then it is expected that this framework will never
    * reconnect to Mesos and all of it's executors and tasks can be
    * terminated. Otherwise, all executors and tasks will remain
-   * running (for some master specified failover timeout) allowing the
+   * running (for some framework specific failover timeout) allowing the
    * scheduler to reconnect (possibly in the same process, or from a
    * different process, for example, on a different machine).
    */

Modified: incubator/mesos/trunk/src/master/constants.hpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/master/constants.hpp?rev=1330079&r1=1330078&r2=1330079&view=diff
==============================================================================
--- incubator/mesos/trunk/src/master/constants.hpp (original)
+++ incubator/mesos/trunk/src/master/constants.hpp Wed Apr 25 01:23:38 2012
@@ -50,9 +50,6 @@ const double SLAVE_PONG_TIMEOUT = 15.0;
 // Maximum number of timeouts until slave is considered failed.
 const int MAX_SLAVE_TIMEOUTS = 5;
 
-// Time to wait for a framework to failover.
-const double FRAMEWORK_FAILOVER_TIMEOUT = 1.0;
-
 // Maximum number of completed frameworks to store in the cache.
 // TODO(thomasm): Make configurable.
 const int MAX_COMPLETED_FRAMEWORKS = 100;

Modified: incubator/mesos/trunk/src/master/master.cpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/master/master.cpp?rev=1330079&r1=1330078&r2=1330079&view=diff
==============================================================================
--- incubator/mesos/trunk/src/master/master.cpp (original)
+++ incubator/mesos/trunk/src/master/master.cpp Wed Apr 25 01:23:38 2012
@@ -229,11 +229,6 @@ void Master::registerOptions(Configurato
       "root_submissions",
       "Can root submit frameworks?",
       true);
-
-  configurator->addOption<int>(
-      "failover_timeout",
-      "Framework failover timeout in seconds",
-      FRAMEWORK_FAILOVER_TIMEOUT);
 }
 
 
@@ -294,8 +289,6 @@ void Master::initialize()
   nextSlaveId = 0;
   nextOfferId = 0;
 
-  failoverTimeout = conf.get<int>("failover_timeout", FRAMEWORK_FAILOVER_TIMEOUT);
-
   // Start all the statistics at 0.
   stats.tasks[TASK_STAGING] = 0;
   stats.tasks[TASK_STARTING] = 0;
@@ -434,6 +427,11 @@ void Master::exited(const UPID& pid)
       // Stop sending offers here for now.
       framework->active = false;
 
+      double failoverTimeout = framework->info.failover_timeout();
+
+      LOG(INFO) << "Giving framework " << framework->id << " "
+                << failoverTimeout << " seconds to failover";
+
       // Delay dispatching a message to ourselves for the timeout.
       delay(failoverTimeout, self(),
             &Master::frameworkFailoverTimeout,

Modified: incubator/mesos/trunk/src/master/master.hpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/master/master.hpp?rev=1330079&r1=1330078&r2=1330079&view=diff
==============================================================================
--- incubator/mesos/trunk/src/master/master.hpp (original)
+++ incubator/mesos/trunk/src/master/master.hpp Wed Apr 25 01:23:38 2012
@@ -212,8 +212,6 @@ private:
 
   std::list<Framework> completedFrameworks;
 
-  double failoverTimeout; // Failover timeout for frameworks, in seconds.
-
   int64_t nextFrameworkId; // Used to give each framework a unique ID.
   int64_t nextOfferId;     // Used to give each slot offer a unique ID.
   int64_t nextSlaveId;     // Used to give each slave a unique ID.