You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by be...@apache.org on 2012/04/25 03:23:39 UTC
svn commit: r1330079 - in /incubator/mesos/trunk: include/mesos/mesos.proto
include/mesos/scheduler.hpp src/master/constants.hpp src/master/master.cpp
src/master/master.hpp
Author: benh
Date: Wed Apr 25 01:23:38 2012
New Revision: 1330079
URL: http://svn.apache.org/viewvc?rev=1330079&view=rev
Log:
Made framework failover timeout configurable (contributed by Thomas Marshall).
Modified:
incubator/mesos/trunk/include/mesos/mesos.proto
incubator/mesos/trunk/include/mesos/scheduler.hpp
incubator/mesos/trunk/src/master/constants.hpp
incubator/mesos/trunk/src/master/master.cpp
incubator/mesos/trunk/src/master/master.hpp
Modified: incubator/mesos/trunk/include/mesos/mesos.proto
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/include/mesos/mesos.proto?rev=1330079&r1=1330078&r2=1330079&view=diff
==============================================================================
--- incubator/mesos/trunk/include/mesos/mesos.proto (original)
+++ incubator/mesos/trunk/include/mesos/mesos.proto Wed Apr 25 01:23:38 2012
@@ -88,12 +88,15 @@ message ExecutorID {
* ID is only available after a framework has registered, however, it
* is included here in order to facilitate scheduler failover (i.e.,
* if it is set then the MesosSchedulerDriver expects the scheduler is
- * performing failover).
+ * performing failover). The amount of time that the master will wait
+ * for the scheduler to failover before removing the framework is
+ * specified by failover_timeout.
*/
message FrameworkInfo {
required string user = 1;
required string name = 2;
optional FrameworkID id = 3;
+ optional double failover_timeout = 4 [default = 0.0];
}
Modified: incubator/mesos/trunk/include/mesos/scheduler.hpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/include/mesos/scheduler.hpp?rev=1330079&r1=1330078&r2=1330079&view=diff
==============================================================================
--- incubator/mesos/trunk/include/mesos/scheduler.hpp (original)
+++ incubator/mesos/trunk/include/mesos/scheduler.hpp Wed Apr 25 01:23:38 2012
@@ -197,7 +197,7 @@ public:
* false then it is expected that this framework will never
* reconnect to Mesos and all of it's executors and tasks can be
* terminated. Otherwise, all executors and tasks will remain
- * running (for some master specified failover timeout) allowing the
+ * running (for some framework specific failover timeout) allowing the
* scheduler to reconnect (possibly in the same process, or from a
* different process, for example, on a different machine).
*/
Modified: incubator/mesos/trunk/src/master/constants.hpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/master/constants.hpp?rev=1330079&r1=1330078&r2=1330079&view=diff
==============================================================================
--- incubator/mesos/trunk/src/master/constants.hpp (original)
+++ incubator/mesos/trunk/src/master/constants.hpp Wed Apr 25 01:23:38 2012
@@ -50,9 +50,6 @@ const double SLAVE_PONG_TIMEOUT = 15.0;
// Maximum number of timeouts until slave is considered failed.
const int MAX_SLAVE_TIMEOUTS = 5;
-// Time to wait for a framework to failover.
-const double FRAMEWORK_FAILOVER_TIMEOUT = 1.0;
-
// Maximum number of completed frameworks to store in the cache.
// TODO(thomasm): Make configurable.
const int MAX_COMPLETED_FRAMEWORKS = 100;
Modified: incubator/mesos/trunk/src/master/master.cpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/master/master.cpp?rev=1330079&r1=1330078&r2=1330079&view=diff
==============================================================================
--- incubator/mesos/trunk/src/master/master.cpp (original)
+++ incubator/mesos/trunk/src/master/master.cpp Wed Apr 25 01:23:38 2012
@@ -229,11 +229,6 @@ void Master::registerOptions(Configurato
"root_submissions",
"Can root submit frameworks?",
true);
-
- configurator->addOption<int>(
- "failover_timeout",
- "Framework failover timeout in seconds",
- FRAMEWORK_FAILOVER_TIMEOUT);
}
@@ -294,8 +289,6 @@ void Master::initialize()
nextSlaveId = 0;
nextOfferId = 0;
- failoverTimeout = conf.get<int>("failover_timeout", FRAMEWORK_FAILOVER_TIMEOUT);
-
// Start all the statistics at 0.
stats.tasks[TASK_STAGING] = 0;
stats.tasks[TASK_STARTING] = 0;
@@ -434,6 +427,11 @@ void Master::exited(const UPID& pid)
// Stop sending offers here for now.
framework->active = false;
+ double failoverTimeout = framework->info.failover_timeout();
+
+ LOG(INFO) << "Giving framework " << framework->id << " "
+ << failoverTimeout << " seconds to failover";
+
// Delay dispatching a message to ourselves for the timeout.
delay(failoverTimeout, self(),
&Master::frameworkFailoverTimeout,
Modified: incubator/mesos/trunk/src/master/master.hpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/master/master.hpp?rev=1330079&r1=1330078&r2=1330079&view=diff
==============================================================================
--- incubator/mesos/trunk/src/master/master.hpp (original)
+++ incubator/mesos/trunk/src/master/master.hpp Wed Apr 25 01:23:38 2012
@@ -212,8 +212,6 @@ private:
std::list<Framework> completedFrameworks;
- double failoverTimeout; // Failover timeout for frameworks, in seconds.
-
int64_t nextFrameworkId; // Used to give each framework a unique ID.
int64_t nextOfferId; // Used to give each slot offer a unique ID.
int64_t nextSlaveId; // Used to give each slave a unique ID.