You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by be...@apache.org on 2012/01/25 18:19:28 UTC
svn commit: r1235838 - in /incubator/mesos/trunk: include/mesos/ src/
src/java/src/org/apache/mesos/ src/python/src/
Author: benh
Date: Wed Jan 25 17:19:27 2012
New Revision: 1235838
URL: http://svn.apache.org/viewvc?rev=1235838&view=rev
Log:
Added some preliminary documentation.
Modified:
incubator/mesos/trunk/include/mesos/executor.hpp
incubator/mesos/trunk/include/mesos/mesos.proto
incubator/mesos/trunk/include/mesos/scheduler.hpp
incubator/mesos/trunk/src/Makefile.am
incubator/mesos/trunk/src/java/src/org/apache/mesos/Executor.java
incubator/mesos/trunk/src/java/src/org/apache/mesos/ExecutorDriver.java
incubator/mesos/trunk/src/java/src/org/apache/mesos/MesosExecutorDriver.java
incubator/mesos/trunk/src/java/src/org/apache/mesos/MesosSchedulerDriver.java
incubator/mesos/trunk/src/java/src/org/apache/mesos/Scheduler.java
incubator/mesos/trunk/src/java/src/org/apache/mesos/SchedulerDriver.java
incubator/mesos/trunk/src/python/src/mesos.py
Modified: incubator/mesos/trunk/include/mesos/executor.hpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/include/mesos/executor.hpp?rev=1235838&r1=1235837&r2=1235838&view=diff
==============================================================================
--- incubator/mesos/trunk/include/mesos/executor.hpp (original)
+++ incubator/mesos/trunk/include/mesos/executor.hpp Wed Jan 25 17:19:27 2012
@@ -23,34 +23,112 @@
#include <mesos/mesos.hpp>
+/**
+ * Mesos executor interface and executor driver. An executor is
+ * responsible for launching tasks in a framework specific way (i.e.,
+ * creating new threads, new processes, etc). One or more executors
+ * from the same framework may run concurrently on the same
+ * machine. Note that we use the term "executor" fairly loosely to
+ * refer to the code that implements the Executor interface (see
+ * below) as well as the program that is responsible for instantiating
+ * a new MesosExecutorDriver (also below). In fact, while a Mesos
+ * slave is responsible for (forking and) executing the "executor",
+ * there is no reason why whatever the slave executed might itself
+ * actually execute another program which actually instantiates and
+ * runs the MesosSchedulerDriver. The only contract with the slave is
+ * that the program that it invokes does not exit until the "executor"
+ * has completed. Thus, what the slave executes may be nothing more
+ * than a script which actually executes (or forks and waits) the
+ * "real" executor.
+ *
+ * IF YOU FIND YOURSELF MODIFYING COMMENTS HERE PLEASE CONSIDER MAKING
+ * THE SAME MODIFICATIONS FOR OTHER LANGUAGE BINDINGS (e.g., Java:
+ * src/java/src/org/apache/mesos, Python: src/python/src, etc.).
+ */
namespace mesos {
+// A few forward declarations.
class ExecutorDriver;
-namespace internal { class ExecutorProcess; }
-
+namespace internal {
+class ExecutorProcess;
+}
/**
- * Callback interface to be implemented by frameworks' executors.
+ * Callback interface to be implemented by frameworks' executors. Note
+ * that only one callback will be invoked at a time, so it is not
+ * recommended that you block within a callback because it may cause a
+ * deadlock.
+ *
+ * Each callback includes a pointer to the executor driver that was
+ * used to run this executor. The pointer will not change for the
+ * duration of an executor (i.e., from the point you do
+ * ExecutorDriver::start() to the point that ExecutorDriver::join()
+ * returns). This is intended for convenience so that an executor
+ * doesn't need to store a pointer to the driver itself.
*/
class Executor
{
public:
+ /**
+ * Empty virtual destructor (necessary to instantiate subclasses).
+ */
virtual ~Executor() {}
+ /**
+ * Invoked once the executor driver has been able to successfully
+ * connect with Mesos. See mesos.proto for a description of the
+ * ExecutorArgs argument. In particular, a scheduler can pass some
+ * data to it's executors through the ExecutorInfo::data field which
+ * gets copied into the ExecutorArgs::data field. (TODO(benh): This
+ * callback will likely become two callbacks,
+ * registered/reregistered, once executors can outlive slaves.)
+ */
virtual void init(ExecutorDriver* driver, const ExecutorArgs& args) = 0;
+ /**
+ * Invoked when a task has been launched on this executor (initiated
+ * via Scheduler::launchTasks). Note that this task can be realized
+ * with a thread, a process, or some simple computation, however, no
+ * other callbacks will be invoked on this executor until this
+ * callback has returned.
+ */
virtual void launchTask(ExecutorDriver* driver,
const TaskDescription& task) = 0;
+ /**
+ * Invoked when a task running within this executor has been killed
+ * (via SchedulerDriver::killTask). Note that no status update will
+ * be sent on behalf of the executor, the executor is responsible
+ * for creating a new TaskStatus (i.e., with TASK_KILLED) and
+ * invoking ExecutorDriver::sendStatusUpdate.
+ */
virtual void killTask(ExecutorDriver* driver, const TaskID& taskId) = 0;
+ /**
+ * Invoked when a framework message has arrived for this
+ * executor. These messages are best effort; do not expect a
+ * framework message to be retransmitted in any reliable fashion.
+ */
virtual void frameworkMessage(ExecutorDriver* driver,
const std::string& data) = 0;
+ /**
+ * Invoked when the executor should terminate all of it's currently
+ * running tasks. Note that after a Mesos has determined that an
+ * executor has terminated any tasks that the executor did not send
+ * terminal status updates for (e.g., TASK_KILLED, TASK_FINISHED,
+ * TASK_FAILED, etc) a TASK_LOST status update will be created.
+ */
virtual void shutdown(ExecutorDriver* driver) = 0;
+ /**
+ * Invoked when a fatal error has occured with the executor and/or
+ * executor driver. The driver will be aborted BEFORE invoking this
+ * callback. This function is deprecated and will probably be
+ * removed in a subsequent release.
+ */
virtual void error(ExecutorDriver* driver,
int code,
const std::string& message) = 0;
@@ -58,51 +136,112 @@ public:
/**
- * Abstract interface for driving an executor connected to Mesos.
- * This interface is used both to start the executor running (and
- * communicating with the slave) and to send information from the executor
- * to Mesos (such as status updates). Concrete implementations of
- * ExecutorDriver will take a Executor as a parameter in order to make
- * callbacks into it on various events.
+ * Abstract interface for connecting an executor to Mesos. This
+ * interface is used both to manage the executor's lifecycle (start
+ * it, stop it, or wait for it to finish) and to interact with Mesos
+ * (e.g., send status updates, send framework messages, etc.). See
+ * MesosExecutorDriver below for a concrete example of an
+ * ExecutorDriver.
*/
class ExecutorDriver
{
public:
+ /**
+ * Empty virtual destructor (necessary to instantiate subclasses).
+ */
virtual ~ExecutorDriver() {}
- // Lifecycle methods.
+ /**
+ * Starts the executor driver. This needs to be called before any
+ * other driver calls are made.
+ */
virtual Status start() = 0;
+
+ /**
+ * Stops the executor driver.
+ */
virtual Status stop() = 0;
+
+ /**
+ * Aborts the driver so that no more callbacks can be made to the
+ * executor. The semantics of abort and stop have deliberately been
+ * separated so that code can detect an aborted driver (i.e., via
+ * the return status of ExecutorDriver::join, see below), and
+ * instantiate and start another driver if desired (from within the
+ * same process ... although this functionality is currently not
+ * supported for executors).
+ */
virtual Status abort() = 0;
+
+ /**
+ * Waits for the driver to be stopped or aborted, possibly
+ * _blocking_ the current thread indefinitely. The return status of
+ * this function can be used to determine if the driver was aborted
+ * (see mesos.proto for a description of Status).
+ */
virtual Status join() = 0;
- virtual Status run() = 0; // Start and then join driver.
- // Communication methods from executor to Mesos.
+ /**
+ * Starts and immediately joins (i.e., blocks on) the driver.
+ */
+ virtual Status run() = 0;
+
+ /**
+ * Sends a status update to the framework scheduler, retrying as
+ * necessary until an acknowledgement has been received or the
+ * executor is terminated (in which case, a TASK_LOST status update
+ * will be sent). See Scheduler::statusUpdate for more information
+ * about status update acknowledgements.
+ */
virtual Status sendStatusUpdate(const TaskStatus& status) = 0;
+ /**
+ * Sends a message to the framework scheduler. These messages are
+ * best effort; do not expect a framework message to be
+ * retransmitted in any reliable fashion.
+ */
virtual Status sendFrameworkMessage(const std::string& data) = 0;
};
/**
- * Concrete implementation of ExecutorDriver that communicates with a
- * Mesos slave. The slave's location is read from environment variables
- * set by it when it execs the user's executor script; users only need
- * to create the MesosExecutorDriver and call run() on it.
+ * Concrete implementation of an ExecutorDriver that connects an
+ * Executor with a Mesos slave. The MesosExecutorDriver is thread-safe.
+ *
+ * The driver is responsible for invoking the Executor callbacks as it
+ * communicates with the Mesos slave.
+ *
+ * Note that blocking on the MesosExecutorDriver (e.g., via
+ * MesosExecutorDriver::join) doesn't affect the executor callbacks in
+ * anyway because they are handled by a different thread.
+ *
+ * See src/examples/test_executor.cpp for an example of using the
+ * MesosExecutorDriver.
*/
class MesosExecutorDriver : public ExecutorDriver
{
public:
+ /**
+ * Creates a new driver that uses the specified Executor. Note
+ */
MesosExecutorDriver(Executor* executor);
+
+ /**
+ * This destructor will block indefinitely if
+ * MesosExecutorDriver::start was invoked successfully (possibly via
+ * MesosExecutorDriver::run) and MesosExecutorDriver::stop has not
+ * been invoked.
+ */
virtual ~MesosExecutorDriver();
- // Lifecycle methods
+ /**
+ * See ExecutorDriver for descriptions of these.
+ */
virtual Status start();
virtual Status stop();
virtual Status abort();
virtual Status join();
- virtual Status run(); // Start and then join driver
-
+ virtual Status run();
virtual Status sendStatusUpdate(const TaskStatus& status);
virtual Status sendFrameworkMessage(const std::string& data);
@@ -111,13 +250,13 @@ private:
Executor* executor;
- // Libprocess process for communicating with slave
+ // Libprocess process for communicating with slave.
internal::ExecutorProcess* process;
- // Mutex to enforce all non-callbacks are execute serially
+ // Mutex to enforce all non-callbacks are execute serially.
pthread_mutex_t mutex;
- // Condition variable for waiting until driver terminates
+ // Condition variable for waiting until driver terminates.
pthread_cond_t cond;
enum State {
Modified: incubator/mesos/trunk/include/mesos/mesos.proto
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/include/mesos/mesos.proto?rev=1235838&r1=1235837&r2=1235838&view=diff
==============================================================================
--- incubator/mesos/trunk/include/mesos/mesos.proto (original)
+++ incubator/mesos/trunk/include/mesos/mesos.proto Wed Jan 25 17:19:27 2012
@@ -22,44 +22,112 @@ option java_package = "org.apache.mesos"
option java_outer_classname = "Protos";
+/**
+ * Status is used to indicate the state of the scheduler and executor
+ * driver after function calls.
+ */
+enum Status {
+ OK = 0;
+
+ DRIVER_NOT_RUNNING = 1;
+ DRIVER_ALREADY_RUNNING = 2;
+ DRIVER_ABORTED = 3;
+ DRIVER_STOPPED = 4;
+}
+
+
+/**
+ * A unique ID assigned to a framework. A framework can reuse this ID
+ * in order to do failover (see MesosSchedulerDriver).
+ */
message FrameworkID {
required string value = 1;
}
+/**
+ * A unique ID assigned to an offer.
+ */
message OfferID {
required string value = 1;
}
+/**
+ * A unique ID assigned to a slave. Currently, a slave gets a new ID
+ * whenever it (re)registers with Mesos. Framework writers shouldn't
+ * assume any binding between a slave ID and and a hostname.
+ */
message SlaveID {
required string value = 1;
}
+/**
+ * A framework generated ID to distinguish a task. The ID must remain
+ * unique while the task is active. However, a framework can reuse an
+ * ID _only_ if a previous task with the same ID has reached a
+ * terminal state (e.g., TASK_FINISHED, TASK_LOST, TASK_KILLED, etc.).
+ */
message TaskID {
required string value = 1;
}
+/**
+ * A framework generated ID to distinguish an executor. Only one
+ * executor with the same ID can be active on the same slave at a
+ * time.
+ */
message ExecutorID {
required string value = 1;
}
-message Param {
- required string key = 1;
- required string value = 2;
+/**
+ * Describes a framework, including its default executor.
+ */
+message FrameworkInfo {
+ required string user = 1;
+ required string name = 2;
+ required ExecutorInfo executor = 3;
}
-// TODO(benh): Eliminate this once some notion of a protocol buffer
-// ResourceOffer object makes it's way to the Mesos interface.
-message Params {
- repeated Param param = 1;
+/**
+ * Describes information about an executor. The 'params' field can be
+ * used to set environment variables for the executor. The 'data'
+ * field can be used to pass arbitrary bytes to an executor (see
+ * ExecutorArgs and Executor::init).
+ */
+message ExecutorInfo {
+ required ExecutorID executor_id = 1;
+ required string uri = 2;
+ repeated Resource resources = 5;
+ optional Params params = 3;
+ optional bytes data = 4;
+}
+
+
+/**
+ * Describes a slave. The 'webui_hostname' and 'webui_port' are
+ * provided in the event a host has different private and public
+ * hostnames (e.g., Amazon EC2).
+ */
+message SlaveInfo {
+ required string hostname = 1;
+ // TODO(benh,andyk): Send bound ports, not just hostname.
+ required string webui_hostname = 2;
+ optional int32 webui_port = 4 [default = 8081];
+ repeated Resource resources = 3;
+ repeated Attribute attributes = 5;
}
+/**
+ * Describes an Attribute or Resource "value". A value is described
+ * using the standard protocol buffer "union" trick.
+ */
message Value {
enum Type {
SCALAR = 0;
@@ -97,7 +165,11 @@ message Value {
}
-// Attribute doesn't support arithmetic operations.
+/**
+ * Describes an attribute that can be set on a machine. For now,
+ * attributes and resources share the same "value" type, but this may
+ * change in the future and attributes may only be string based.
+ */
message Attribute {
required string name = 1;
required Value.Type type = 2;
@@ -107,8 +179,15 @@ message Attribute {
}
-// TODO(benh): Add better support for resources.
-// Resource support arithmetic operations.
+/**
+ * Describes a resource on a machine. A resource can take on one of
+ * three types: scalar (double), a list of finite and discrete ranges
+ * (e.g., [1-10, 20-30]), or a set of items. A resource is described
+ * using the standard protocol buffer "union" trick.
+ *
+ * TODO(benh): Add better support for "expected" resources (e.g.,
+ * cpus, memory, disk, network).
+ */
message Resource {
required string name = 1;
required Value.Type type = 2;
@@ -118,15 +197,41 @@ message Resource {
}
-message ExecutorArgs {
- required FrameworkID framework_id = 1;
- required ExecutorID executor_id = 2;
+/**
+ * Describes a request for resources that can be used by a framework
+ * to proactively influence the allocator. If 'slave_id' is provided
+ * then this request is assumed to only apply to resources on that
+ * slave.
+ */
+message ResourceRequest {
+ optional SlaveID slave_id = 1;
+ repeated Resource resources = 2;
+}
+
+
+/**
+ * Describes some resources available on a slave. An offer only
+ * contains resources from a single slave.
+ */
+message Offer {
+ required OfferID id = 1;
+ required FrameworkID framework_id = 2;
required SlaveID slave_id = 3;
required string hostname = 4;
- optional bytes data = 5;
-};
+ repeated Resource resources = 5;
+ repeated Attribute attributes = 7;
+ repeated ExecutorID executor_ids = 6;
+}
+/**
+ * Describes a task. Passed from the scheduler all the way to an
+ * executor (see SchedulerDriver::launchTasks and
+ * Executor::launchTask). If no executor is specified then the default
+ * executor is assumed (see FrameworkInfo). Otherwise, a different
+ * executor can be used to launch this task, and subsequent tasks
+ * meant for the same executor can reuse the same ExecutorInfo struct.
+ */
message TaskDescription {
required string name = 1;
required TaskID task_id = 2;
@@ -137,16 +242,26 @@ message TaskDescription {
}
+/**
+ * Describes possible task states. IMPORTANT: Mesos assumes tasks that
+ * enter terminal states (see below) imply the task is no longer
+ * running and thus clean up any thing associated with the task
+ * (ultimately offering any resources being consumed by that task to
+ * another task).
+ */
enum TaskState {
TASK_STARTING = 0;
TASK_RUNNING = 1;
- TASK_FINISHED = 2;
- TASK_FAILED = 3;
- TASK_KILLED = 4;
- TASK_LOST = 5;
+ TASK_FINISHED = 2; // TERMINAL.
+ TASK_FAILED = 3; // TERMINAL.
+ TASK_KILLED = 4; // TERMINAL.
+ TASK_LOST = 5; // TERMINAL.
}
+/**
+ * Describes the current status of a task.
+ */
message TaskStatus {
required TaskID task_id = 1;
required TaskState state = 2;
@@ -155,60 +270,40 @@ message TaskStatus {
}
-message ExecutorInfo {
- required ExecutorID executor_id = 1;
- required string uri = 2;
- repeated Resource resources = 5;
- optional Params params = 3;
- optional bytes data = 4;
-}
-
-
-message FrameworkInfo {
- required string user = 1;
- required string name = 2;
- required ExecutorInfo executor = 3;
-}
-
-
-message SlaveInfo {
- // TODO(benh,andyk): Send bound ports, not just hostname.
- required string hostname = 1;
- required string webui_hostname = 2;
- optional int32 webui_port = 4 [default = 8081];
- repeated Resource resources = 3;
- repeated Attribute attributes = 5;
-}
-
-
-message ResourceRequest {
- optional SlaveID slave_id = 1;
- repeated Resource resources = 2;
+/**
+ * Describes possible filters that can be applied to unused resources
+ * (see SchedulerDriver::launchTasks) to influence the allocator.
+ */
+message Filters {
+ // Time to consider unused resources refused.
+ optional double refuse_seconds = 1;
}
-message Offer {
- required OfferID id = 1;
- required FrameworkID framework_id = 2;
+/**
+ * Passed to Executor::init.
+ */
+message ExecutorArgs {
+ required FrameworkID framework_id = 1;
+ required ExecutorID executor_id = 2;
required SlaveID slave_id = 3;
required string hostname = 4;
- repeated Resource resources = 5;
- repeated ExecutorID executor_ids = 6;
- repeated Attribute attributes = 7;
+ optional bytes data = 5;
}
-message Filters {
- // Time to consider unused resources refused.
- optional double refuse_seconds = 1;
+/**
+ * A generic (key, value) pair used in various places for parameters.
+ */
+message Param {
+ required string key = 1;
+ required string value = 2;
}
-
-
-enum Status {
- OK = 0;
-
- DRIVER_NOT_RUNNING = 1;
- DRIVER_ALREADY_RUNNING = 2;
- DRIVER_ABORTED = 3;
- DRIVER_STOPPED = 4;
+
+
+/**
+ * Collection of Param.
+ */
+message Params {
+ repeated Param param = 1;
}
Modified: incubator/mesos/trunk/include/mesos/scheduler.hpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/include/mesos/scheduler.hpp?rev=1235838&r1=1235837&r2=1235838&view=diff
==============================================================================
--- incubator/mesos/trunk/include/mesos/scheduler.hpp (original)
+++ incubator/mesos/trunk/include/mesos/scheduler.hpp Wed Jan 25 17:19:27 2012
@@ -25,9 +25,18 @@
#include <mesos/mesos.hpp>
+/**
+ * Mesos scheduler interface and scheduler driver. A scheduler is used
+ * to interact with Mesos in order run distributed computations.
+ *
+ * IF YOU FIND YOURSELF MODIFYING COMMENTS HERE PLEASE CONSIDER MAKING
+ * THE SAME MODIFICATIONS FOR OTHER LANGUAGE BINDINGS (e.g., Java:
+ * src/java/src/org/apache/mesos, Python: src/python/src, etc.).
+*/
namespace mesos {
+// A few forward declarations.
class SchedulerDriver;
namespace internal {
@@ -38,35 +47,101 @@ class Configuration;
/**
- * Callback interface to be implemented by new frameworks' schedulers.
+ * Callback interface to be implemented by frameworks'
+ * schedulers. Note that only one callback will be invoked at a time,
+ * so it is not recommended that you block within a callback because
+ * it may cause a deadlock.
+ *
+ * Each callback includes a pointer to the scheduler driver that was
+ * used to run this scheduler. The pointer will not change for the
+ * duration of a scheduler (i.e., from the point you do
+ * SchedulerDriver::start() to the point that SchedulerDriver::join()
+ * returns). This is intended for convenience so that a scheduler
+ * doesn't need to store a pointer to the driver itself.
*/
class Scheduler
{
public:
+ /**
+ * Empty virtual destructor (necessary to instantiate subclasses).
+ */
virtual ~Scheduler() {}
- // Callbacks for various Mesos events.
-
+ /**
+ * Invoked when the scheduler successfully registers with a Mesos
+ * master. A unique ID (generated by the master) used for
+ * distinguishing this framework from others is provided as an
+ * argument.
+ */
virtual void registered(SchedulerDriver* driver,
const FrameworkID& frameworkId) = 0;
+ /**
+ * Invoked when resources have been offered to this framework. A
+ * single offer will only contain resources from a single slave.
+ * Resources associated with an offer will not be re-offered to
+ * _this_ framework until either (a) this framework has rejected
+ * those resources (see SchedulerDriver::launchTasks) or (b) those
+ * resources have been rescinded (see Scheduler::offerRescinded).
+ * Note that resources may be concurrently offered to more than one
+ * framework at a time (depending on the allocator being used). In
+ * that case, the first framework to launch tasks using those
+ * resources will be able to use them while the other frameworks
+ * will have those resources rescinded (or if a framework has
+ * already launched tasks with those resources then those tasks will
+ * fail with a TASK_LOST status and a message saying as much).
+ */
virtual void resourceOffers(SchedulerDriver* driver,
const std::vector<Offer>& offers) = 0;
+ /**
+ * Invoked when an offer is no longer valid (e.g., the slave was
+ * lost or another framework used resources in the offer). If for
+ * whatever reason an offer is never rescinded (e.g., dropped
+ * message, failing over framework, etc.), a framwork that attempts
+ * to launch tasks using an invalid offer will receive TASK_LOST
+ * status updats for those tasks (see Scheduler::resourceOffers).
+ */
virtual void offerRescinded(SchedulerDriver* driver,
const OfferID& offerId) = 0;
+ /**
+ * Invoked when the status of a task has changed (e.g., a slave is
+ * lost and so the task is lost, a task finishes and an executor
+ * sends a status update saying so, etc). Note that returning from
+ * this callback _acknowledges_ receipt of this status update! If
+ * for whatever reason the scheduler aborts during this callback (or
+ * the process exits) another status update will be delivered (note,
+ * however, that this is currently not true if the slave sending the
+ * status update is lost/fails during that time).
+ */
virtual void statusUpdate(SchedulerDriver* driver,
const TaskStatus& status) = 0;
+ /**
+ * Invoked when an executor sends a message. These messages are best
+ * effort; do not expect a framework message to be retransmitted in
+ * any reliable fashion.
+ */
virtual void frameworkMessage(SchedulerDriver* driver,
const SlaveID& slaveId,
const ExecutorID& executorId,
const std::string& data) = 0;
+ /**
+ * Invoked when a slave has been determined unreachable (e.g.,
+ * machine failure, network partition). Most frameworks will need to
+ * reschedule any tasks launched on this slave on a new slave.
+ */
virtual void slaveLost(SchedulerDriver* driver,
const SlaveID& slaveId) = 0;
+ /**
+ * Invoked when there is an unrecoverable error in the scheduler or
+ * scheduler driver. The driver will be aborted BEFORE invoking this
+ * callback. This function is deprecated and will probably be
+ * removed in a subsequent release.
+ */
virtual void error(SchedulerDriver* driver,
int code,
const std::string& message) = 0;
@@ -74,43 +149,102 @@ public:
/**
- * Abstract interface for driving a scheduler connected to Mesos.
- * This interface is used both to manage the scheduler's lifecycle (start it,
- * stop it, or wait for it to finish) and to send commands from the user
- * framework to Mesos (such as replies to offers). Concrete implementations
- * of SchedulerDriver will take a Scheduler as a parameter in order to make
- * callbacks into it on various events.
+ * Abstract interface for connecting a scheduler to Mesos. This
+ * interface is used both to manage the scheduler's lifecycle (start
+ * it, stop it, or wait for it to finish) and to interact with Mesos
+ * (e.g., launch tasks, kill tasks, etc.). See MesosSchedulerDriver
+ * below for a concrete example of a SchedulerDriver.
*/
class SchedulerDriver
{
public:
+ /**
+ * Empty virtual destructor (necessary to instantiate subclasses).
+ */
virtual ~SchedulerDriver() {}
- // Lifecycle methods.
+ /**
+ * Starts the scheduler driver. This needs to be called before any
+ * other driver calls are made.
+ */
virtual Status start() = 0;
+
+ /**
+ * Stops the scheduler driver. If the 'failover' flag is set to
+ * false then it is expected that this framework will never
+ * reconnect to Mesos and all of it's executors and tasks can be
+ * terminated. Otherwise, all executors and tasks will remain
+ * running (for some master specified failover timeout) allowing the
+ * scheduler to reconnect (possibly in the same process, or from a
+ * different process, for example, on a different machine).
+ */
virtual Status stop(bool failover = false) = 0;
- // Puts driver into ABORTED state after which no more callbacks
- // can be made to the scheduler. Also, the master is signalled
- // that the driver is inactived. The only call a scheduler
- // can make after abort is stop(), which can un-register the
- // framework (if requested).
+
+ /**
+ * Aborts the driver so that no more callbacks can be made to the
+ * scheduler. The semantics of abort and stop have deliberately been
+ * separated so that code can detect an aborted driver (i.e., via
+ * the return status of SchedulerDriver::join, see below), and
+ * instantiate and start another driver if desired (from within the
+ * same process).
+ */
virtual Status abort() = 0;
+
+ /**
+ * Waits for the driver to be stopped or aborted, possibly
+ * _blocking_ the current thread indefinitely. The return status of
+ * this function can be used to determine if the driver was aborted
+ * (see mesos.proto for a description of Status).
+ */
virtual Status join() = 0;
- virtual Status run() = 0; // Start and then join driver.
- // Communication methods.
+ /**
+ * Starts and immediately joins (i.e., blocks on) the driver.
+ */
+ virtual Status run() = 0;
+ /**
+ * Requests resources from Mesos (see mesos.proto for a description
+ * of ResourceRequest and how, for example, to request resources
+ * from specific slaves). Any resources available are offered to the
+ * framework via Scheduler::resourceOffers callback, asynchronously.
+ */
virtual Status requestResources(
const std::vector<ResourceRequest>& requests) = 0;
+ /**
+ * Launches the given set of tasks. Note that the current mechanism
+ * of rejecting resources is to invoke this with an empty collection
+ * of tasks. A framework can also specify filters on all resources
+ * unused (see mesos.proto for a description of Filters). Note that
+ * currently tasks can only be launched per offer. In the future,
+ * frameworks will be allowed to aggregate offers (resources) to
+ * launch their tasks.
+ */
virtual Status launchTasks(const OfferID& offerId,
const std::vector<TaskDescription>& tasks,
const Filters& filters = Filters()) = 0;
+ /**
+ * Kills the specified task. Note that attempting to kill a task is
+ * currently not reliable. If, for example, a scheduler fails over
+ * while it was attempting to kill a task it will need to retry in
+ * the future (these semantics may be changed in the future).
+ */
virtual Status killTask(const TaskID& taskId) = 0;
+ /**
+ * Removes all filters previously set by the framework (via
+ * launchTasks()). This enables the framework to receive offers from
+ * those filtered slaves.
+ */
virtual Status reviveOffers() = 0;
+ /**
+ * Sends a message from the framework to one of its executors. These
+ * messages are best effort; do not expect a framework message to be
+ * retransmitted in any reliable fashion.
+ */
virtual Status sendFrameworkMessage(const SlaveID& slaveId,
const ExecutorID& executorId,
const std::string& data) = 0;
@@ -118,21 +252,37 @@ public:
/**
- * Concrete implementation of SchedulerDriver that communicates with
- * a Mesos master.
+ * Concrete implementation of a SchedulerDriver that connects a
+ * Scheduler with a Mesos master. The MesosSchedulerDriver is
+ * thread-safe.
+ *
+ * Note that scheduler failover is supported in Mesos. After a
+ * scheduler is registered with Mesos it may failover (to a new
+ * process on the same machine or across multiple machines) by
+ * creating a new driver with the ID given to it in
+ * Scheduler::registered.
+ *
+ * The driver is responsible for invoking the Scheduler callbacks as
+ * it communicates with the Mesos master.
+ *
+ * Note that blocking on the MesosSchedulerDriver (e.g., via
+ * MesosSchedulerDriver::join) doesn't affect the scheduler callbacks
+ * in anyway because they are handled by a different thread.
+ *
+ * See src/examples/test_framework.cpp for an example of using the
+ * MesosSchedulerDriver.
*/
class MesosSchedulerDriver : public SchedulerDriver
{
public:
/**
- * Create a scheduler driver with a given Mesos master URL.
- * Additional Mesos config options are read from the environment, as well
- * as any config files found through it.
+ * Creates a new scheduler driver that connects to a Mesos master
+ * through the specified URL. Optionally providing an existing
+ * framework ID can be used to failover a framework.
*
- * @param sched scheduler to make callbacks into
- * @param url Mesos master URL
- * @param frameworkId optional framework ID for registering
- * redundant schedulers for the same framework
+ * Any Mesos configuration options are read from environment
+ * variables, as well as any configuration files found through the
+ * environment variables.
*/
MesosSchedulerDriver(Scheduler* sched,
const std::string& frameworkName,
@@ -141,15 +291,13 @@ public:
const FrameworkID& frameworkId = FrameworkID());
/**
- * Create a scheduler driver with a configuration, which the master URL
- * and possibly other options are read from.
- * Additional Mesos config options are read from the environment, as well
- * as any config files given through conf or found in the environment.
+ * Creates a new scheduler driver that connects to a Mesos master
+ * through a "url" configuration option specified in the 'params'
+ * argument. Optionally providing an existing framework ID can be
+ * used to failover a framework.
*
- * @param sched scheduler to make callbacks into
- * @param params Map containing configuration options
- * @param frameworkId optional framework ID for registering
- * redundant schedulers for the same framework
+ * Additional Mesos config options are obtained from the 'params'
+ * argument.
*/
MesosSchedulerDriver(Scheduler* sched,
const std::string& frameworkName,
@@ -158,19 +306,10 @@ public:
const FrameworkID& frameworkId = FrameworkID());
/**
- * Create a scheduler driver with a config read from command-line arguments.
- * Additional Mesos config options are read from the environment, as well
- * as any config files given through conf or found in the environment.
- *
- * This constructor is not available through SWIG since it's difficult
- * for it to properly map arrays to an argc/argv pair.
- *
- * @param sched scheduler to make callbacks into
- * @param argc argument count
- * @param argv argument values (argument 0 is expected to be program name
- * and will not be looked at for options)
- * @param frameworkId optional framework ID for registering
- * redundant schedulers for the same framework
+ * Creates a new scheduler driver that connects to a Mesos master
+ * through a "url" configuration option specified on the
+ * command-line (via 'argc' and 'argv'). Optionally providing an
+ * existing framework ID can be used to failover a framework.
*/
MesosSchedulerDriver(Scheduler* sched,
const std::string& frameworkName,
@@ -179,40 +318,44 @@ public:
char** argv,
const FrameworkID& frameworkId = FrameworkID());
+ /**
+ * This destructor will block indefinitely if
+ * MesosSchedulerDriver::start was invoked successfully (possibly
+ * via MesosSchedulerDriver::run) and MesosSchedulerDriver::stop has
+ * not been invoked.
+ */
virtual ~MesosSchedulerDriver();
- // Lifecycle methods.
+ /**
+ * See SchedulerDriver for descriptions of these.
+ */
virtual Status start();
virtual Status stop(bool failover = false);
virtual Status abort();
virtual Status join();
- virtual Status run(); // Start and then join driver.
-
- // Communication methods.
+ virtual Status run();
virtual Status requestResources(
const std::vector<ResourceRequest>& requests);
-
- virtual Status launchTasks(const OfferID& offerId,
- const std::vector<TaskDescription>& tasks,
- const Filters& filters = Filters());
-
+ virtual Status launchTasks(
+ const OfferID& offerId,
+ const std::vector<TaskDescription>& tasks,
+ const Filters& filters = Filters());
virtual Status killTask(const TaskID& taskId);
-
virtual Status reviveOffers();
-
- virtual Status sendFrameworkMessage(const SlaveID& slaveId,
- const ExecutorID& executorId,
- const std::string& data);
+ virtual Status sendFrameworkMessage(
+ const SlaveID& slaveId,
+ const ExecutorID& executorId,
+ const std::string& data);
private:
- // Initialization method used by constructors
+ // Initialization method used by constructors.
void init(Scheduler* sched,
internal::Configuration* conf,
const FrameworkID& frameworkId,
const std::string& frameworkName,
const ExecutorInfo& executorInfo);
- // Internal utility method to report an error to the scheduler
+ // Internal utility method to report an error to the scheduler.
void error(int code, const std::string& message);
Scheduler* sched;
@@ -221,7 +364,7 @@ private:
std::string frameworkName;
ExecutorInfo executorInfo;
- // Libprocess process for communicating with master
+ // Libprocess process for communicating with master.
internal::SchedulerProcess* process;
// Coordination between masters
@@ -231,10 +374,10 @@ private:
// TODO(benh|matei): Does this still need to be a pointer?
internal::Configuration* conf;
- // Mutex to enforce all non-callbacks are execute serially
+ // Mutex to enforce all non-callbacks are execute serially.
pthread_mutex_t mutex;
- // Condition variable for waiting until driver terminates
+ // Condition variable for waiting until driver terminates.
pthread_cond_t cond;
enum State {
@@ -248,7 +391,6 @@ private:
State state;
};
-
} // namespace mesos {
#endif // __MESOS_SCHEDULER_HPP__
Modified: incubator/mesos/trunk/src/Makefile.am
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/Makefile.am?rev=1235838&r1=1235837&r2=1235838&view=diff
==============================================================================
--- incubator/mesos/trunk/src/Makefile.am (original)
+++ incubator/mesos/trunk/src/Makefile.am Wed Jan 25 17:19:27 2012
@@ -261,7 +261,7 @@ libmesos_la_LIBADD = libmesos_no_third_p
# For non-convenience libraries we need to link them in to make the shared
# library each time. (Currently, we don't support platforms where this is not
# possible.)
-libmesos_la_LIBADD += $(top_builddir)/$(PROTOBUF)/src/.libs/libprotobuf.a
+libmesos_la_LIBADD += $(top_builddir)/$(PROTOBUF)/src/.libs/libprotobuf.la
libmesos_la_LIBADD += $(top_builddir)/$(GLOG)/.libs/libglog.la
libmesos_la_LIBADD += $(top_builddir)/$(LIBPROCESS)/libprocess.a
libmesos_la_LIBADD += $(top_builddir)/$(LIBEV)/.libs/libev.la
Modified: incubator/mesos/trunk/src/java/src/org/apache/mesos/Executor.java
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/java/src/org/apache/mesos/Executor.java?rev=1235838&r1=1235837&r2=1235838&view=diff
==============================================================================
--- incubator/mesos/trunk/src/java/src/org/apache/mesos/Executor.java (original)
+++ incubator/mesos/trunk/src/java/src/org/apache/mesos/Executor.java Wed Jan 25 17:19:27 2012
@@ -23,12 +23,70 @@ import org.apache.mesos.Protos.*;
/**
* Callback interface to be implemented by frameworks' executors.
+ * Note that only one callback will be invoked at a time, so it is not
+ * recommended that you block within a callback because it may cause a
+ * deadlock.
+ *
+ * Each callback includes a reference to the executor driver that was
+ * used to run this executor. The reference will not change for the
+ * duration of an executor (i.e., from the point you do {@link
+ * ExecutorDriver#start} to the point that {@link ExecutorDriver#join}
+ * returns). This is intended for convenience so that an executor
+ * doesn't need to store a reference to the driver itself.
*/
public interface Executor {
- public void init(ExecutorDriver driver, ExecutorArgs args);
- public void launchTask(ExecutorDriver driver, TaskDescription task);
- public void killTask(ExecutorDriver driver, TaskID taskId);
- public void frameworkMessage(ExecutorDriver driver, byte[] data);
- public void shutdown(ExecutorDriver driver);
- public void error(ExecutorDriver driver, int code, String message);
+ /**
+ * Invoked once the executor driver has been able to successfully
+ * connect with Mesos. See mesos.proto for a description of the
+ * ExecutorArgs argument. In particular, a scheduler can pass some
+ * data to it's executors through the {@link ExecutorInfo#data}
+ * field which gets copied into the {@link ExecutorArgs#data}
+ * field. (TODO(benh): This callback will likely become two
+ * callbacks, registered/reregistered, once executors can outlive
+ * slaves.)
+ */
+ void init(ExecutorDriver driver, ExecutorArgs args);
+
+ /**
+ * Invoked when a task has been launched on this executor (initiated
+ * via {@link Scheduler#launchTasks}. Note that this task can be
+ * realized with a thread, a process, or some simple computation,
+ * however, no other callbacks will be invoked on this executor
+ * until this callback has returned.
+ */
+ void launchTask(ExecutorDriver driver, TaskDescription task);
+
+ /**
+ * Invoked when a task running within this executor has been killed
+ * (via {@link SchedulerDriver#killTask}). Note that no status
+ * update will be sent on behalf of the executor, the executor is
+ * responsible for creating a new TaskStatus (i.e., with
+ * TASK_KILLED) and invoking {@link
+ * ExecutorDriver#sendStatusUpdate}.
+ */
+ void killTask(ExecutorDriver driver, TaskID taskId);
+
+ /**
+ * Invoked when a framework message has arrived for this
+ * executor. These messages are best effort; do not expect a
+ * framework message to be retransmitted in any reliable fashion.
+ */
+ void frameworkMessage(ExecutorDriver driver, byte[] data);
+
+ /**
+ * Invoked when the executor should terminate all of it's currently
+ * running tasks. Note that after a Mesos has determined that an
+ * executor has terminated any tasks that the executor did not send
+ * terminal status updates for (e.g., TASK_KILLED, TASK_FINISHED,
+ * TASK_FAILED, etc) a TASK_LOST status update will be created.
+ */
+ void shutdown(ExecutorDriver driver);
+
+ /**
+ * Invoked when a fatal error has occured with the executor and/or
+ * executor driver. The driver will be aborted BEFORE invoking this
+ * callback. This function is deprecated and will probably be
+ * removed in a subsequent release.
+ */
+ void error(ExecutorDriver driver, int code, String message);
}
Modified: incubator/mesos/trunk/src/java/src/org/apache/mesos/ExecutorDriver.java
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/java/src/org/apache/mesos/ExecutorDriver.java?rev=1235838&r1=1235837&r2=1235838&view=diff
==============================================================================
--- incubator/mesos/trunk/src/java/src/org/apache/mesos/ExecutorDriver.java (original)
+++ incubator/mesos/trunk/src/java/src/org/apache/mesos/ExecutorDriver.java Wed Jan 25 17:19:27 2012
@@ -22,22 +22,60 @@ import org.apache.mesos.Protos.*;
/**
- * Abstract interface for driving an executor connected to Mesos.
- * This interface is used both to start the executor running (and
- * communicating with the slave) and to send information from the executor
- * to Nexus (such as status updates). Concrete implementations of
- * ExecutorDriver will take a Executor as a parameter in order to make
- * callbacks into it on various events.
+ * Abstract interface for connecting an executor to Mesos. This
+ * interface is used both to manage the executor's lifecycle (start
+ * it, stop it, or wait for it to finish) and to interact with Mesos
+ * (e.g., send status updates, send framework messages, etc.).
*/
public interface ExecutorDriver {
- // Lifecycle methods.
+ /**
+ * Starts the executor driver. This needs to be called before any
+ * other driver calls are made.
+ */
public Status start();
+
+ /**
+ * Stops the scheduler driver.
+ */
public Status stop();
+
+ /**
+ * Aborts the driver so that no more callbacks can be made to the
+ * scheduler. The semantics of abort and stop have deliberately been
+ * separated so that code can detect an aborted driver (i.e., via
+ * the return status of {@link SchedulerDriver#join}, see below),
+ * and instantiate and start another driver if desired (from within
+ * the same process ... although this functionality is currently not
+ * supported for executors).
+ */
public Status abort();
+
+ /**
+ * Waits for the driver to be stopped or aborted, possibly
+ * _blocking_ the current thread indefinitely. The return status of
+ * this function can be used to determine if the driver was aborted
+ * (see mesos.proto for a description of Status).
+ */
public Status join();
+
+ /**
+ * Starts and immediately joins (i.e., blocks on) the driver.
+ */
public Status run();
-
- // Communication methods.
+
+ /**
+ * Sends a status update to the framework scheduler, retrying as
+ * necessary until an acknowledgement has been received or the
+ * executor is terminated (in which case, a TASK_LOST status update
+ * will be sent). See {@link Scheduler#statusUpdate} for more
+ * information about status update acknowledgements.
+ */
public Status sendStatusUpdate(TaskStatus status);
+
+ /**
+ * Sends a message to the framework scheduler. These messages are
+ * best effort; do not expect a framework message to be
+ * retransmitted in any reliable fashion.
+ */
public Status sendFrameworkMessage(byte[] data);
}
Modified: incubator/mesos/trunk/src/java/src/org/apache/mesos/MesosExecutorDriver.java
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/java/src/org/apache/mesos/MesosExecutorDriver.java?rev=1235838&r1=1235837&r2=1235838&view=diff
==============================================================================
--- incubator/mesos/trunk/src/java/src/org/apache/mesos/MesosExecutorDriver.java (original)
+++ incubator/mesos/trunk/src/java/src/org/apache/mesos/MesosExecutorDriver.java Wed Jan 25 17:19:27 2012
@@ -22,23 +22,37 @@ import org.apache.mesos.Protos.*;
/**
- * Concrete implementation of ExecutorDriver that communicates with a
- * Mesos slave. The slave's location is read from environment variables
- * set by it when it execs the user's executor script; users only need
- * to create the MesosExecutorDriver and call run() on it.
+ * Concrete implementation of an ExecutorDriver that connects an
+ * Executor with a Mesos slave. The MesosExecutorDriver is
+ * thread-safe.
+ *
+ * The driver is responsible for invoking the Executor callbacks as it
+ * communicates with the Mesos slave.
+ *
+ * Note that blocking on the MesosExecutorDriver (e.g., via {@link
+ * #join}) doesn't affect the executor callbacks in anyway because
+ * they are handled by a different thread.
+ *
+ * See src/examples/java/TestExecutor.java for an example of using the
+ * MesosExecutorDriver.
*/
public class MesosExecutorDriver implements ExecutorDriver {
static {
System.loadLibrary("mesos");
}
+ /**
+ * Creates a new driver that uses the specified Executor.
+ */
public MesosExecutorDriver(Executor exec) {
this.exec = exec;
initialize();
}
- // Lifecycle methods.
+ /**
+ * See ExecutorDriver for descriptions of these.
+ */
public native Status start();
public native Status stop();
public native Status abort();
@@ -60,4 +74,3 @@ public class MesosExecutorDriver impleme
private long __exec;
private long __driver;
}
-
Modified: incubator/mesos/trunk/src/java/src/org/apache/mesos/MesosSchedulerDriver.java
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/java/src/org/apache/mesos/MesosSchedulerDriver.java?rev=1235838&r1=1235837&r2=1235838&view=diff
==============================================================================
--- incubator/mesos/trunk/src/java/src/org/apache/mesos/MesosSchedulerDriver.java (original)
+++ incubator/mesos/trunk/src/java/src/org/apache/mesos/MesosSchedulerDriver.java Wed Jan 25 17:19:27 2012
@@ -26,14 +26,40 @@ import java.util.Map;
/**
- * Concrete implementation of SchedulerDriver that communicates with
- * a Mesos master.
+ * Concrete implementation of a SchedulerDriver that connects a
+ * Scheduler with a Mesos master. The MesosSchedulerDriver is
+ * thread-safe.
+ *
+ * Note that scheduler failover is supported in Mesos. After a
+ * scheduler is registered with Mesos it may failover (to a new
+ * process on the same machine or across multiple machines) by
+ * creating a new driver with the ID given to it in {@link
+ * Scheduler#registered}.
+ *
+ * The driver is responsible for invoking the Scheduler callbacks as
+ * it communicates with the Mesos master.
+ *
+ * Note that blocking on the MesosSchedulerDriver (e.g., via {@link
+ * #join}) doesn't affect the scheduler callbacks in anyway because
+ * they are handled by a different thread.
+ *
+ * See src/examples/java/TestFramework.java for an example of using
+ * the MesosSchedulerDriver.
*/
public class MesosSchedulerDriver implements SchedulerDriver {
static {
System.loadLibrary("mesos");
}
+ /**
+ * Creates a new scheduler driver that connects to a Mesos master
+ * through the specified URL. Optionally providing an existing
+ * framework ID can be used to failover a framework.
+ *
+ * Any Mesos configuration options are read from environment
+ * variables, as well as any configuration files found through the
+ * environment variables.
+ */
public MesosSchedulerDriver(Scheduler sched,
String frameworkName,
ExecutorInfo executorInfo,
@@ -44,7 +70,7 @@ public class MesosSchedulerDriver implem
}
if (frameworkName == null) {
- throw new NullPointerException("Not expecting a null scheduler");
+ throw new NullPointerException("Not expecting a null framework name");
}
this.sched = sched;
@@ -55,10 +81,23 @@ public class MesosSchedulerDriver implem
initialize();
}
- public MesosSchedulerDriver(Scheduler sched, String frameworkName, ExecutorInfo executorInfo, String url) {
- this(sched, frameworkName, executorInfo, url, FrameworkID.newBuilder().setValue("").build());
+ /**
+ * Creates a new scheduler driver. See above for details.
+ */
+ public MesosSchedulerDriver(Scheduler sched,
+ String frameworkName,
+ ExecutorInfo executorInfo,
+ String url) {
+ this(sched,
+ frameworkName,
+ executorInfo,
+ url,
+ FrameworkID.newBuilder().setValue("").build());
}
+ /**
+ * See SchedulerDriver for descriptions of these.
+ */
public native Status start();
public native Status stop(boolean failover);
public Status stop() {
@@ -74,17 +113,22 @@ public class MesosSchedulerDriver implem
public native Status requestResources(Collection<ResourceRequest> requests);
- public Status launchTasks(OfferID offerId, Collection<TaskDescription> tasks) {
+ public Status launchTasks(OfferID offerId,
+ Collection<TaskDescription> tasks) {
return launchTasks(offerId, tasks, Filters.newBuilder().build());
}
- public native Status launchTasks(OfferID offerId, Collection<TaskDescription> tasks, Filters filters);
+ public native Status launchTasks(OfferID offerId,
+ Collection<TaskDescription> tasks,
+ Filters filters);
public native Status killTask(TaskID taskId);
public native Status reviveOffers();
- public native Status sendFrameworkMessage(SlaveID slaveId, ExecutorID executorId, byte[] data);
+ public native Status sendFrameworkMessage(SlaveID slaveId,
+ ExecutorID executorId,
+ byte[] data);
protected native void initialize();
protected native void finalize();
@@ -97,4 +141,4 @@ public class MesosSchedulerDriver implem
private long __sched;
private long __driver;
-};
+}
Modified: incubator/mesos/trunk/src/java/src/org/apache/mesos/Scheduler.java
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/java/src/org/apache/mesos/Scheduler.java?rev=1235838&r1=1235837&r2=1235838&view=diff
==============================================================================
--- incubator/mesos/trunk/src/java/src/org/apache/mesos/Scheduler.java (original)
+++ incubator/mesos/trunk/src/java/src/org/apache/mesos/Scheduler.java Wed Jan 25 17:19:27 2012
@@ -24,14 +24,89 @@ import java.util.List;
/**
- * Callback interface to be implemented by frameworks' schedulers.
+ * Callback interface to be implemented by frameworks'
+ * schedulers. Note that only one callback will be invoked at a time,
+ * so it is not recommended that you block within a callback because
+ * it may cause a deadlock.
+ *
+ * Each callback includes a reference to the scheduler driver that was
+ * used to run this scheduler. The reference will not change for the
+ * duration of a scheduler (i.e., from the point you do {@link
+ * SchedulerDriver#start} to the point that {@link
+ * SchedulerDriver#join} returns). This is intended for convenience so
+ * that a scheduler doesn't need to store a reference to the driver
+ * itself.
*/
public interface Scheduler {
- public void registered(SchedulerDriver driver, FrameworkID frameworkId);
- public void resourceOffers(SchedulerDriver driver, List<Offer> offers);
- public void offerRescinded(SchedulerDriver driver, OfferID offerId);
- public void statusUpdate(SchedulerDriver driver, TaskStatus status);
- public void frameworkMessage(SchedulerDriver driver, SlaveID slaveId, ExecutorID executorId, byte[] data);
- public void slaveLost(SchedulerDriver driver, SlaveID slaveId);
- public void error(SchedulerDriver driver, int code, String message);
+ /**
+ * Invoked when the scheduler successfully registers with a Mesos
+ * master. A unique ID (generated by the master) used for
+ * distinguishing this framework from others is provided as an
+ * argument.
+ */
+ void registered(SchedulerDriver driver, FrameworkID frameworkId);
+
+ /**
+ * Invoked when resources have been offered to this framework. A
+ * single offer will only contain resources from a single slave.
+ * Resources associated with an offer will not be re-offered to
+ * _this_ framework until either (a) this framework has rejected
+ * those resources (see {@link SchedulerDriver#launchTasks}) or (b)
+ * those resources have been rescinded (see {@link offerRescinded}).
+ * Note that resources may be concurrently offered to more than one
+ * framework at a time (depending on the allocator being used). In
+ * that case, the first framework to launch tasks using those
+ * resources will be able to use them while the other frameworks
+ * will have those resources rescinded (or if a framework has
+ * already launched tasks with those resources then those tasks will
+ * fail with a TASK_LOST status and a message saying as much).
+ */
+ void resourceOffers(SchedulerDriver driver, List<Offer> offers);
+
+ /**
+ * Invoked when an offer is no longer valid (e.g., the slave was
+ * lost or another framework used resources in the offer). If for
+ * whatever reason an offer is never rescinded (e.g., dropped
+ * message, failing over framework, etc.), a framwork that attempts
+ * to launch tasks using an invalid offer will receive TASK_LOST
+ * status updats for those tasks (see {@link #resourceOffers}).
+ */
+ void offerRescinded(SchedulerDriver driver, OfferID offerId);
+
+ /**
+ * Invoked when the status of a task has changed (e.g., a slave is
+ * lost and so the task is lost, a task finishes and an executor
+ * sends a status update saying so, etc). Note that returning from
+ * this callback _acknowledges_ receipt of this status update! If
+ * for whatever reason the scheduler aborts during this callback (or
+ * the process exits) another status update will be delivered (note,
+ * however, that this is currently not true if the slave sending the
+ * status update is lost/fails during that time).
+ */
+ void statusUpdate(SchedulerDriver driver, TaskStatus status);
+
+ /**
+ * Invoked when an executor sends a message. These messages are best
+ * effort; do not expect a framework message to be retransmitted in
+ * any reliable fashion.
+ */
+ void frameworkMessage(SchedulerDriver driver,
+ SlaveID slaveId,
+ ExecutorID executorId,
+ byte[] data);
+
+ /**
+ * Invoked when a slave has been determined unreachable (e.g.,
+ * machine failure, network partition). Most frameworks will need to
+ * reschedule any tasks launched on this slave on a new slave.
+ */
+ void slaveLost(SchedulerDriver driver, SlaveID slaveId);
+
+ /**
+ * Invoked when there is an unrecoverable error in the scheduler or
+ * scheduler driver. The driver will be aborted BEFORE invoking this
+ * callback. This function is deprecated and will probably be
+ * removed in a subsequent release.
+ */
+ void error(SchedulerDriver driver, int code, String message);
}
Modified: incubator/mesos/trunk/src/java/src/org/apache/mesos/SchedulerDriver.java
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/java/src/org/apache/mesos/SchedulerDriver.java?rev=1235838&r1=1235837&r2=1235838&view=diff
==============================================================================
--- incubator/mesos/trunk/src/java/src/org/apache/mesos/SchedulerDriver.java (original)
+++ incubator/mesos/trunk/src/java/src/org/apache/mesos/SchedulerDriver.java Wed Jan 25 17:19:27 2012
@@ -24,20 +24,107 @@ import java.util.Collection;
import java.util.Map;
+/**
+ * Abstract interface for connecting a scheduler to Mesos. This
+ * interface is used both to manage the scheduler's lifecycle (start
+ * it, stop it, or wait for it to finish) and to interact with Mesos
+ * (e.g., launch tasks, kill tasks, etc.).
+ */
public interface SchedulerDriver {
- // Lifecycle methods.
- public Status start();
- public Status stop();
- public Status stop(boolean failover);
- public Status abort();
- public Status join();
- public Status run();
-
- // Communication methods.
- public Status requestResources(Collection<ResourceRequest> requests);
- public Status launchTasks(OfferID offerId, Collection<TaskDescription> tasks, Filters filters);
- public Status launchTasks(OfferID offerId, Collection<TaskDescription> tasks);
- public Status killTask(TaskID taskId);
- public Status reviveOffers();
- public Status sendFrameworkMessage(SlaveID slaveId, ExecutorID executorId, byte[] data);
-};
+ /**
+ * Starts the scheduler driver. This needs to be called before any
+ * other driver calls are made.
+ */
+ Status start();
+
+ /**
+ * Stops the scheduler driver. If the 'failover' flag is set to
+ * false then it is expected that this framework will never
+ * reconnect to Mesos and all of it's executors and tasks can be
+ * terminated. Otherwise, all executors and tasks will remain
+ * running (for some master specified failover timeout) allowing the
+ * scheduler to reconnect (possibly in the same process, or from a
+ * different process, for example, on a different machine).
+ */
+ Status stop(boolean failover);
+
+ /**
+ * Stops the scheduler driver assuming no failover.
+ */
+ Status stop();
+
+ /**
+ * Aborts the driver so that no more callbacks can be made to the
+ * scheduler. The semantics of abort and stop have deliberately been
+ * separated so that code can detect an aborted driver (i.e., via
+ * the return status of {@link #join}, see below), and instantiate
+ * and start another driver if desired (from within the same
+ * process).
+ */
+ Status abort();
+
+ /**
+ * Waits for the driver to be stopped or aborted, possibly
+ * _blocking_ the current thread indefinitely. The return status of
+ * this function can be used to determine if the driver was aborted
+ * (see mesos.proto for a description of Status).
+ */
+ Status join();
+
+ /**
+ * Starts and immediately joins (i.e., blocks on) the driver.
+ */
+ Status run();
+
+ /**
+ * Requests resources from Mesos (see mesos.proto for a description
+ * of ResourceRequest and how, for example, to request resources
+ * from specific slaves). Any resources available are offered to the
+ * framework via {@link Scheduler#resourceOffers} callback,
+ * asynchronously.
+ */
+ Status requestResources(Collection<ResourceRequest> requests);
+
+ /**
+ * Launches the given set of tasks. Note that the current mechanism
+ * of rejecting resources is to invoke this with an empty collection
+ * of tasks. A framework can also specify filters on all resources
+ * unused (see mesos.proto for a description of Filters). Note that
+ * currently tasks can only be launched per offer. In the future,
+ * frameworks will be allowed to aggregate offers (resources) to
+ * launch their tasks.
+ */
+ Status launchTasks(OfferID offerId,
+ Collection<TaskDescription> tasks,
+ Filters filters);
+
+ /**
+ * Launches the given set of tasks. See above for details.
+ */
+ Status launchTasks(OfferID offerId,
+ Collection<TaskDescription> tasks);
+
+ /**
+ * Kills the specified task. Note that attempting to kill a task is
+ * currently not reliable. If, for example, a scheduler fails over
+ * while it was attempting to kill a task it will need to retry in
+ * the future (these semantics may be changed in the future).
+ */
+ Status killTask(TaskID taskId);
+
+ /**
+ * Removes all filters, previously set by the framework (via {@link
+ * #launchTasks}). This enables the framework to receive offers
+ * from those filtered slaves.
+ */
+ Status reviveOffers();
+
+ /**
+ * Sends a message from the framework to one of its executors. These
+ * messages are best effort; do not expect a framework message to be
+ * retransmitted in any reliable fashion.
+ */
+ Status sendFrameworkMessage(SlaveID slaveId,
+ ExecutorID executorId,
+ byte[] data);
+}
Modified: incubator/mesos/trunk/src/python/src/mesos.py
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/python/src/mesos.py?rev=1235838&r1=1235837&r2=1235838&view=diff
==============================================================================
--- incubator/mesos/trunk/src/python/src/mesos.py (original)
+++ incubator/mesos/trunk/src/python/src/mesos.py Wed Jan 25 17:19:27 2012
@@ -14,12 +14,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+# See include/mesos/scheduler.hpp, include/mesos/executor.hpp and
+# include/mesos/mesos.proto for more information documenting this
+# interface.
+
import sys
import _mesos
-
-# Base class for Mesos schedulers. Users' schedulers should extend this class
-# to get default implementations of methods they don't override.
+# Base class for Mesos schedulers. Users' schedulers should extend
+# this class to get default implementations of methods they don't
+# override.
class Scheduler:
def registered(self, driver, frameworkId): pass
def resourceOffers(self, driver, offers): pass
@@ -28,14 +32,14 @@ class Scheduler:
def frameworkMessage(self, driver, message): pass
def slaveLost(self, driver, slaveId): pass
- # Default implementation of error() prints to stderr because we can't
- # make error() an abstract method in Python
+ # Default implementation of error() prints to stderr because we
+ # can't make error() an abstract method in Python.
def error(self, driver, code, message):
print >> sys.stderr, "Error from Mesos: %s (code: %d)" % (message, code)
-# Interface for Mesos scheduler drivers. Users may wish to extend this class
-# in mock objects for tests.
+# Interface for Mesos scheduler drivers. Users may wish to extend this
+# class in mock objects for tests.
class SchedulerDriver:
def start(self): pass
def stop(self, failover=False): pass
@@ -49,8 +53,8 @@ class SchedulerDriver:
def sendFrameworkMessage(self, slaveId, executorId, data): pass
-# Base class for Mesos executors. Users' executors should extend this class
-# to get default implementations of methods they don't override.
+# Base class for Mesos executors. Users' executors should extend this
+# class to get default implementations of methods they don't override.
class Executor:
def init(self, driver, args): pass
def launchTask(self, driver, task): pass
@@ -58,14 +62,14 @@ class Executor:
def frameworkMessage(self, driver, message): pass
def shutdown(self, driver): pass
- # Default implementation of error() prints to stderr because we can't
- # make error() an abstract method in Python
+ # Default implementation of error() prints to stderr because we
+ # can't make error() an abstract method in Python.
def error(self, driver, code, message):
print >> sys.stderr, "Error from Mesos: %s (code: %d)" % (message, code)
-# Interface for Mesos executor drivers. Users may wish to extend this class
-# in mock objects for tests.
+# Interface for Mesos executor drivers. Users may wish to extend this
+# class in mock objects for tests.
class ExecutorDriver:
def start(self): pass
def stop(self): pass