You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by be...@apache.org on 2012/01/25 18:19:28 UTC

svn commit: r1235838 - in /incubator/mesos/trunk: include/mesos/ src/ src/java/src/org/apache/mesos/ src/python/src/

Author: benh
Date: Wed Jan 25 17:19:27 2012
New Revision: 1235838

URL: http://svn.apache.org/viewvc?rev=1235838&view=rev
Log:
Added some preliminary documentation.

Modified:
    incubator/mesos/trunk/include/mesos/executor.hpp
    incubator/mesos/trunk/include/mesos/mesos.proto
    incubator/mesos/trunk/include/mesos/scheduler.hpp
    incubator/mesos/trunk/src/Makefile.am
    incubator/mesos/trunk/src/java/src/org/apache/mesos/Executor.java
    incubator/mesos/trunk/src/java/src/org/apache/mesos/ExecutorDriver.java
    incubator/mesos/trunk/src/java/src/org/apache/mesos/MesosExecutorDriver.java
    incubator/mesos/trunk/src/java/src/org/apache/mesos/MesosSchedulerDriver.java
    incubator/mesos/trunk/src/java/src/org/apache/mesos/Scheduler.java
    incubator/mesos/trunk/src/java/src/org/apache/mesos/SchedulerDriver.java
    incubator/mesos/trunk/src/python/src/mesos.py

Modified: incubator/mesos/trunk/include/mesos/executor.hpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/include/mesos/executor.hpp?rev=1235838&r1=1235837&r2=1235838&view=diff
==============================================================================
--- incubator/mesos/trunk/include/mesos/executor.hpp (original)
+++ incubator/mesos/trunk/include/mesos/executor.hpp Wed Jan 25 17:19:27 2012
@@ -23,34 +23,112 @@
 
 #include <mesos/mesos.hpp>
 
+/**
+ * Mesos executor interface and executor driver. An executor is
+ * responsible for launching tasks in a framework specific way (i.e.,
+ * creating new threads, new processes, etc). One or more executors
+ * from the same framework may run concurrently on the same
+ * machine. Note that we use the term "executor" fairly loosely to
+ * refer to the code that implements the Executor interface (see
+ * below) as well as the program that is responsible for instantiating
+ * a new MesosExecutorDriver (also below). In fact, while a Mesos
+ * slave is responsible for (forking and) executing the "executor",
+ * there is no reason why whatever the slave executed might itself
+ * actually execute another program which actually instantiates and
+ * runs the MesosSchedulerDriver. The only contract with the slave is
+ * that the program that it invokes does not exit until the "executor"
+ * has completed. Thus, what the slave executes may be nothing more
+ * than a script which actually executes (or forks and waits) the
+ * "real" executor.
+ *
+ * IF YOU FIND YOURSELF MODIFYING COMMENTS HERE PLEASE CONSIDER MAKING
+ * THE SAME MODIFICATIONS FOR OTHER LANGUAGE BINDINGS (e.g., Java:
+ * src/java/src/org/apache/mesos, Python: src/python/src, etc.).
+ */
 
 namespace mesos {
 
+// A few forward declarations.
 class ExecutorDriver;
 
-namespace internal { class ExecutorProcess; }
-
+namespace internal {
+class ExecutorProcess;
+}
 
 /**
- * Callback interface to be implemented by frameworks' executors.
+ * Callback interface to be implemented by frameworks' executors. Note
+ * that only one callback will be invoked at a time, so it is not
+ * recommended that you block within a callback because it may cause a
+ * deadlock.
+ *
+ * Each callback includes a pointer to the executor driver that was
+ * used to run this executor. The pointer will not change for the
+ * duration of an executor (i.e., from the point you do
+ * ExecutorDriver::start() to the point that ExecutorDriver::join()
+ * returns). This is intended for convenience so that an executor
+ * doesn't need to store a pointer to the driver itself.
  */
 class Executor
 {
 public:
+  /**
+   * Empty virtual destructor (necessary to instantiate subclasses).
+   */
   virtual ~Executor() {}
 
+  /**
+   * Invoked once the executor driver has been able to successfully
+   * connect with Mesos. See mesos.proto for a description of the
+   * ExecutorArgs argument. In particular, a scheduler can pass some
+   * data to it's executors through the ExecutorInfo::data field which
+   * gets copied into the ExecutorArgs::data field. (TODO(benh): This
+   * callback will likely become two callbacks,
+   * registered/reregistered, once executors can outlive slaves.)
+   */
   virtual void init(ExecutorDriver* driver, const ExecutorArgs& args) = 0;
 
+  /**
+   * Invoked when a task has been launched on this executor (initiated
+   * via Scheduler::launchTasks). Note that this task can be realized
+   * with a thread, a process, or some simple computation, however, no
+   * other callbacks will be invoked on this executor until this
+   * callback has returned.
+   */
   virtual void launchTask(ExecutorDriver* driver,
                           const TaskDescription& task) = 0;
 
+  /**
+   * Invoked when a task running within this executor has been killed
+   * (via SchedulerDriver::killTask). Note that no status update will
+   * be sent on behalf of the executor, the executor is responsible
+   * for creating a new TaskStatus (i.e., with TASK_KILLED) and
+   * invoking ExecutorDriver::sendStatusUpdate.
+   */
   virtual void killTask(ExecutorDriver* driver, const TaskID& taskId) = 0;
 
+  /**
+   * Invoked when a framework message has arrived for this
+   * executor. These messages are best effort; do not expect a
+   * framework message to be retransmitted in any reliable fashion.
+   */
   virtual void frameworkMessage(ExecutorDriver* driver,
 				const std::string& data) = 0;
 
+  /**
+   * Invoked when the executor should terminate all of it's currently
+   * running tasks. Note that after a Mesos has determined that an
+   * executor has terminated any tasks that the executor did not send
+   * terminal status updates for (e.g., TASK_KILLED, TASK_FINISHED,
+   * TASK_FAILED, etc) a TASK_LOST status update will be created.
+   */
   virtual void shutdown(ExecutorDriver* driver) = 0;
 
+  /**
+   * Invoked when a fatal error has occured with the executor and/or
+   * executor driver. The driver will be aborted BEFORE invoking this
+   * callback. This function is deprecated and will probably be
+   * removed in a subsequent release.
+   */
   virtual void error(ExecutorDriver* driver,
                      int code,
                      const std::string& message) = 0;
@@ -58,51 +136,112 @@ public:
 
 
 /**
- * Abstract interface for driving an executor connected to Mesos.
- * This interface is used both to start the executor running (and
- * communicating with the slave) and to send information from the executor
- * to Mesos (such as status updates). Concrete implementations of
- * ExecutorDriver will take a Executor as a parameter in order to make
- * callbacks into it on various events.
+ * Abstract interface for connecting an executor to Mesos. This
+ * interface is used both to manage the executor's lifecycle (start
+ * it, stop it, or wait for it to finish) and to interact with Mesos
+ * (e.g., send status updates, send framework messages, etc.). See
+ * MesosExecutorDriver below for a concrete example of an
+ * ExecutorDriver.
  */
 class ExecutorDriver
 {
 public:
+  /**
+   * Empty virtual destructor (necessary to instantiate subclasses).
+   */
   virtual ~ExecutorDriver() {}
 
-  // Lifecycle methods.
+  /**
+   * Starts the executor driver. This needs to be called before any
+   * other driver calls are made.
+   */
   virtual Status start() = 0;
+
+  /**
+   * Stops the executor driver.
+   */
   virtual Status stop() = 0;
+
+  /**
+   * Aborts the driver so that no more callbacks can be made to the
+   * executor. The semantics of abort and stop have deliberately been
+   * separated so that code can detect an aborted driver (i.e., via
+   * the return status of ExecutorDriver::join, see below), and
+   * instantiate and start another driver if desired (from within the
+   * same process ... although this functionality is currently not
+   * supported for executors).
+   */
   virtual Status abort() = 0;
+
+  /**
+   * Waits for the driver to be stopped or aborted, possibly
+   * _blocking_ the current thread indefinitely. The return status of
+   * this function can be used to determine if the driver was aborted
+   * (see mesos.proto for a description of Status).
+   */
   virtual Status join() = 0;
-  virtual Status run() = 0; // Start and then join driver.
 
-  // Communication methods from executor to Mesos.
+  /**
+   * Starts and immediately joins (i.e., blocks on) the driver.
+   */
+  virtual Status run() = 0;
+
+  /**
+   * Sends a status update to the framework scheduler, retrying as
+   * necessary until an acknowledgement has been received or the
+   * executor is terminated (in which case, a TASK_LOST status update
+   * will be sent). See Scheduler::statusUpdate for more information
+   * about status update acknowledgements.
+   */
   virtual Status sendStatusUpdate(const TaskStatus& status) = 0;
 
+  /**
+   * Sends a message to the framework scheduler. These messages are
+   * best effort; do not expect a framework message to be
+   * retransmitted in any reliable fashion.
+   */
   virtual Status sendFrameworkMessage(const std::string& data) = 0;
 };
 
 
 /**
- * Concrete implementation of ExecutorDriver that communicates with a
- * Mesos slave. The slave's location is read from environment variables
- * set by it when it execs the user's executor script; users only need
- * to create the MesosExecutorDriver and call run() on it.
+ * Concrete implementation of an ExecutorDriver that connects an
+ * Executor with a Mesos slave. The MesosExecutorDriver is thread-safe.
+ *
+ * The driver is responsible for invoking the Executor callbacks as it
+ * communicates with the Mesos slave.
+ *
+ * Note that blocking on the MesosExecutorDriver (e.g., via
+ * MesosExecutorDriver::join) doesn't affect the executor callbacks in
+ * anyway because they are handled by a different thread.
+ *
+ * See src/examples/test_executor.cpp for an example of using the
+ * MesosExecutorDriver.
  */
 class MesosExecutorDriver : public ExecutorDriver
 {
 public:
+  /**
+   * Creates a new driver that uses the specified Executor. Note 
+   */
   MesosExecutorDriver(Executor* executor);
+
+  /**
+   * This destructor will block indefinitely if
+   * MesosExecutorDriver::start was invoked successfully (possibly via
+   * MesosExecutorDriver::run) and MesosExecutorDriver::stop has not
+   * been invoked.
+   */
   virtual ~MesosExecutorDriver();
 
-  // Lifecycle methods
+  /**
+   * See ExecutorDriver for descriptions of these.
+   */
   virtual Status start();
   virtual Status stop();
   virtual Status abort();
   virtual Status join();
-  virtual Status run(); // Start and then join driver
-
+  virtual Status run();
   virtual Status sendStatusUpdate(const TaskStatus& status);
   virtual Status sendFrameworkMessage(const std::string& data);
 
@@ -111,13 +250,13 @@ private:
 
   Executor* executor;
 
-  // Libprocess process for communicating with slave
+  // Libprocess process for communicating with slave.
   internal::ExecutorProcess* process;
 
-  // Mutex to enforce all non-callbacks are execute serially
+  // Mutex to enforce all non-callbacks are execute serially.
   pthread_mutex_t mutex;
 
-  // Condition variable for waiting until driver terminates
+  // Condition variable for waiting until driver terminates.
   pthread_cond_t cond;
 
   enum State {

Modified: incubator/mesos/trunk/include/mesos/mesos.proto
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/include/mesos/mesos.proto?rev=1235838&r1=1235837&r2=1235838&view=diff
==============================================================================
--- incubator/mesos/trunk/include/mesos/mesos.proto (original)
+++ incubator/mesos/trunk/include/mesos/mesos.proto Wed Jan 25 17:19:27 2012
@@ -22,44 +22,112 @@ option java_package = "org.apache.mesos"
 option java_outer_classname = "Protos";
 
 
+/**
+ * Status is used to indicate the state of the scheduler and executor
+ * driver after function calls.
+ */
+enum Status {
+  OK = 0;
+
+  DRIVER_NOT_RUNNING = 1;
+  DRIVER_ALREADY_RUNNING = 2;
+  DRIVER_ABORTED = 3;
+  DRIVER_STOPPED = 4;
+}
+
+
+/**
+ * A unique ID assigned to a framework. A framework can reuse this ID
+ * in order to do failover (see MesosSchedulerDriver).
+ */
 message FrameworkID {
   required string value = 1;
 }
 
 
+/**
+ * A unique ID assigned to an offer.
+ */
 message OfferID {
   required string value = 1;
 }
 
 
+/**
+ * A unique ID assigned to a slave. Currently, a slave gets a new ID
+ * whenever it (re)registers with Mesos. Framework writers shouldn't
+ * assume any binding between a slave ID and and a hostname.
+ */
 message SlaveID {
   required string value = 1;
 }
 
 
+/**
+ * A framework generated ID to distinguish a task. The ID must remain
+ * unique while the task is active. However, a framework can reuse an
+ * ID _only_ if a previous task with the same ID has reached a
+ * terminal state (e.g., TASK_FINISHED, TASK_LOST, TASK_KILLED, etc.).
+ */
 message TaskID {
   required string value = 1;
 }
 
 
+/**
+ * A framework generated ID to distinguish an executor. Only one
+ * executor with the same ID can be active on the same slave at a
+ * time.
+ */
 message ExecutorID {
   required string value = 1;
 }
 
 
-message Param {
-  required string key = 1;
-  required string value = 2;
+/**
+ * Describes a framework, including its default executor.
+ */
+message FrameworkInfo {
+  required string user = 1;
+  required string name = 2;
+  required ExecutorInfo executor = 3;
 }
 
 
-// TODO(benh): Eliminate this once some notion of a protocol buffer
-// ResourceOffer object makes it's way to the Mesos interface.
-message Params {
-  repeated Param param = 1;
+/**
+ * Describes information about an executor. The 'params' field can be
+ * used to set environment variables for the executor. The 'data'
+ * field can be used to pass arbitrary bytes to an executor (see
+ * ExecutorArgs and Executor::init).
+ */
+message ExecutorInfo {
+  required ExecutorID executor_id = 1;
+  required string uri = 2;
+  repeated Resource resources = 5;
+  optional Params params = 3;
+  optional bytes data = 4;
+}
+
+
+/**
+ * Describes a slave. The 'webui_hostname' and 'webui_port' are
+ * provided in the event a host has different private and public
+ * hostnames (e.g., Amazon EC2).
+ */
+message SlaveInfo {
+  required string hostname = 1;
+  // TODO(benh,andyk): Send bound ports, not just hostname.
+  required string webui_hostname = 2;
+  optional int32 webui_port = 4 [default = 8081];
+  repeated Resource resources = 3;
+  repeated Attribute attributes = 5;
 }
 
 
+/**
+ * Describes an Attribute or Resource "value". A value is described
+ * using the standard protocol buffer "union" trick.
+ */
 message Value {
   enum Type {
     SCALAR = 0;
@@ -97,7 +165,11 @@ message Value {
 }
 
 
-// Attribute doesn't support arithmetic operations.
+/**
+ * Describes an attribute that can be set on a machine. For now,
+ * attributes and resources share the same "value" type, but this may
+ * change in the future and attributes may only be string based.
+ */
 message Attribute {
   required string name = 1;
   required Value.Type type = 2;
@@ -107,8 +179,15 @@ message Attribute {
 }
 
 
-// TODO(benh): Add better support for resources.
-// Resource support arithmetic operations.
+/**
+ * Describes a resource on a machine. A resource can take on one of
+ * three types: scalar (double), a list of finite and discrete ranges
+ * (e.g., [1-10, 20-30]), or a set of items. A resource is described
+ * using the standard protocol buffer "union" trick.
+ *
+ * TODO(benh): Add better support for "expected" resources (e.g.,
+ * cpus, memory, disk, network).
+ */
 message Resource {
   required string name = 1;
   required Value.Type type = 2;
@@ -118,15 +197,41 @@ message Resource {
 }
 
 
-message ExecutorArgs {
-  required FrameworkID framework_id = 1;
-  required ExecutorID executor_id = 2;
+/**
+ * Describes a request for resources that can be used by a framework
+ * to proactively influence the allocator.  If 'slave_id' is provided
+ * then this request is assumed to only apply to resources on that
+ * slave.
+ */
+message ResourceRequest {
+  optional SlaveID slave_id = 1;
+  repeated Resource resources = 2;
+}
+
+
+/**
+ * Describes some resources available on a slave. An offer only
+ * contains resources from a single slave.
+ */
+message Offer {
+  required OfferID id = 1;
+  required FrameworkID framework_id = 2;
   required SlaveID slave_id = 3;
   required string hostname = 4;
-  optional bytes data = 5;
-};
+  repeated Resource resources = 5;
+  repeated Attribute attributes = 7;
+  repeated ExecutorID executor_ids = 6;
+}
 
 
+/**
+ * Describes a task. Passed from the scheduler all the way to an
+ * executor (see SchedulerDriver::launchTasks and
+ * Executor::launchTask). If no executor is specified then the default
+ * executor is assumed (see FrameworkInfo). Otherwise, a different
+ * executor can be used to launch this task, and subsequent tasks
+ * meant for the same executor can reuse the same ExecutorInfo struct.
+ */
 message TaskDescription {
   required string name = 1;
   required TaskID task_id = 2;
@@ -137,16 +242,26 @@ message TaskDescription {
 }
 
 
+/**
+ * Describes possible task states. IMPORTANT: Mesos assumes tasks that
+ * enter terminal states (see below) imply the task is no longer
+ * running and thus clean up any thing associated with the task
+ * (ultimately offering any resources being consumed by that task to
+ * another task).
+ */
 enum TaskState {
   TASK_STARTING = 0;
   TASK_RUNNING = 1;
-  TASK_FINISHED = 2;
-  TASK_FAILED = 3;
-  TASK_KILLED = 4;
-  TASK_LOST = 5;
+  TASK_FINISHED = 2; // TERMINAL.
+  TASK_FAILED = 3; // TERMINAL.
+  TASK_KILLED = 4; // TERMINAL.
+  TASK_LOST = 5; // TERMINAL.
 }
 
 
+/**
+ * Describes the current status of a task.
+ */
 message TaskStatus {
   required TaskID task_id = 1;
   required TaskState state = 2;
@@ -155,60 +270,40 @@ message TaskStatus {
 }
 
 
-message ExecutorInfo {
-  required ExecutorID executor_id = 1;
-  required string uri = 2;
-  repeated Resource resources = 5;
-  optional Params params = 3;
-  optional bytes data = 4;
-}
-
-
-message FrameworkInfo {
-  required string user = 1;
-  required string name = 2;
-  required ExecutorInfo executor = 3;
-}
-
-
-message SlaveInfo {
-  // TODO(benh,andyk): Send bound ports, not just hostname.
-  required string hostname = 1;
-  required string webui_hostname = 2;
-  optional int32 webui_port = 4 [default = 8081];
-  repeated Resource resources = 3;
-  repeated Attribute attributes = 5;
-}
-
-
-message ResourceRequest {
-  optional SlaveID slave_id = 1;
-  repeated Resource resources = 2;
+/**
+ * Describes possible filters that can be applied to unused resources
+ * (see SchedulerDriver::launchTasks) to influence the allocator.
+ */
+message Filters {
+  // Time to consider unused resources refused.
+  optional double refuse_seconds = 1;
 }
 
 
-message Offer {
-  required OfferID id = 1;
-  required FrameworkID framework_id = 2;
+/**
+ * Passed to Executor::init.
+ */
+message ExecutorArgs {
+  required FrameworkID framework_id = 1;
+  required ExecutorID executor_id = 2;
   required SlaveID slave_id = 3;
   required string hostname = 4;
-  repeated Resource resources = 5;
-  repeated ExecutorID executor_ids = 6;
-  repeated Attribute attributes = 7;
+  optional bytes data = 5;
 }
 
 
-message Filters {
-  // Time to consider unused resources refused.
-  optional double refuse_seconds = 1;
+/**
+ * A generic (key, value) pair used in various places for parameters.
+ */
+message Param {
+  required string key = 1;
+  required string value = 2;
 }
-
-
-enum Status {
-  OK = 0;
-
-  DRIVER_NOT_RUNNING = 1;
-  DRIVER_ALREADY_RUNNING = 2;
-  DRIVER_ABORTED = 3;
-  DRIVER_STOPPED = 4;
+ 
+ 
+/**
+ * Collection of Param.
+ */
+message Params {
+  repeated Param param = 1;
 }

Modified: incubator/mesos/trunk/include/mesos/scheduler.hpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/include/mesos/scheduler.hpp?rev=1235838&r1=1235837&r2=1235838&view=diff
==============================================================================
--- incubator/mesos/trunk/include/mesos/scheduler.hpp (original)
+++ incubator/mesos/trunk/include/mesos/scheduler.hpp Wed Jan 25 17:19:27 2012
@@ -25,9 +25,18 @@
 
 #include <mesos/mesos.hpp>
 
+/**
+ * Mesos scheduler interface and scheduler driver. A scheduler is used
+ * to interact with Mesos in order run distributed computations.
+ *
+ * IF YOU FIND YOURSELF MODIFYING COMMENTS HERE PLEASE CONSIDER MAKING
+ * THE SAME MODIFICATIONS FOR OTHER LANGUAGE BINDINGS (e.g., Java:
+ * src/java/src/org/apache/mesos, Python: src/python/src, etc.).
+*/
 
 namespace mesos {
 
+// A few forward declarations.
 class SchedulerDriver;
 
 namespace internal {
@@ -38,35 +47,101 @@ class Configuration;
 
 
 /**
- * Callback interface to be implemented by new frameworks' schedulers.
+ * Callback interface to be implemented by frameworks'
+ * schedulers. Note that only one callback will be invoked at a time,
+ * so it is not recommended that you block within a callback because
+ * it may cause a deadlock.
+ *
+ * Each callback includes a pointer to the scheduler driver that was
+ * used to run this scheduler. The pointer will not change for the
+ * duration of a scheduler (i.e., from the point you do
+ * SchedulerDriver::start() to the point that SchedulerDriver::join()
+ * returns). This is intended for convenience so that a scheduler
+ * doesn't need to store a pointer to the driver itself.
  */
 class Scheduler
 {
 public:
+  /**
+   * Empty virtual destructor (necessary to instantiate subclasses).
+   */
   virtual ~Scheduler() {}
 
-  // Callbacks for various Mesos events.
-
+  /**
+   * Invoked when the scheduler successfully registers with a Mesos
+   * master. A unique ID (generated by the master) used for
+   * distinguishing this framework from others is provided as an
+   * argument.
+   */
   virtual void registered(SchedulerDriver* driver,
                           const FrameworkID& frameworkId) = 0;
 
+  /**
+   * Invoked when resources have been offered to this framework. A
+   * single offer will only contain resources from a single slave.
+   * Resources associated with an offer will not be re-offered to
+   * _this_ framework until either (a) this framework has rejected
+   * those resources (see SchedulerDriver::launchTasks) or (b) those
+   * resources have been rescinded (see Scheduler::offerRescinded).
+   * Note that resources may be concurrently offered to more than one
+   * framework at a time (depending on the allocator being used). In
+   * that case, the first framework to launch tasks using those
+   * resources will be able to use them while the other frameworks
+   * will have those resources rescinded (or if a framework has
+   * already launched tasks with those resources then those tasks will
+   * fail with a TASK_LOST status and a message saying as much).
+   */
   virtual void resourceOffers(SchedulerDriver* driver,
                               const std::vector<Offer>& offers) = 0;
 
+  /**
+   * Invoked when an offer is no longer valid (e.g., the slave was
+   * lost or another framework used resources in the offer). If for
+   * whatever reason an offer is never rescinded (e.g., dropped
+   * message, failing over framework, etc.), a framwork that attempts
+   * to launch tasks using an invalid offer will receive TASK_LOST
+   * status updats for those tasks (see Scheduler::resourceOffers).
+   */
   virtual void offerRescinded(SchedulerDriver* driver,
                               const OfferID& offerId) = 0;
 
+  /**
+   * Invoked when the status of a task has changed (e.g., a slave is
+   * lost and so the task is lost, a task finishes and an executor
+   * sends a status update saying so, etc). Note that returning from
+   * this callback _acknowledges_ receipt of this status update! If
+   * for whatever reason the scheduler aborts during this callback (or
+   * the process exits) another status update will be delivered (note,
+   * however, that this is currently not true if the slave sending the
+   * status update is lost/fails during that time).
+   */
   virtual void statusUpdate(SchedulerDriver* driver,
                             const TaskStatus& status) = 0;
 
+  /**
+   * Invoked when an executor sends a message. These messages are best
+   * effort; do not expect a framework message to be retransmitted in
+   * any reliable fashion.
+   */
   virtual void frameworkMessage(SchedulerDriver* driver,
                                 const SlaveID& slaveId,
                                 const ExecutorID& executorId,
                                 const std::string& data) = 0;
 
+  /**
+   * Invoked when a slave has been determined unreachable (e.g.,
+   * machine failure, network partition). Most frameworks will need to
+   * reschedule any tasks launched on this slave on a new slave.
+   */
   virtual void slaveLost(SchedulerDriver* driver,
                          const SlaveID& slaveId) = 0;
 
+  /**
+   * Invoked when there is an unrecoverable error in the scheduler or
+   * scheduler driver. The driver will be aborted BEFORE invoking this
+   * callback. This function is deprecated and will probably be
+   * removed in a subsequent release.
+   */
   virtual void error(SchedulerDriver* driver,
                      int code,
                      const std::string& message) = 0;
@@ -74,43 +149,102 @@ public:
 
 
 /**
- * Abstract interface for driving a scheduler connected to Mesos.
- * This interface is used both to manage the scheduler's lifecycle (start it,
- * stop it, or wait for it to finish) and to send commands from the user
- * framework to Mesos (such as replies to offers). Concrete implementations
- * of SchedulerDriver will take a Scheduler as a parameter in order to make
- * callbacks into it on various events.
+ * Abstract interface for connecting a scheduler to Mesos. This
+ * interface is used both to manage the scheduler's lifecycle (start
+ * it, stop it, or wait for it to finish) and to interact with Mesos
+ * (e.g., launch tasks, kill tasks, etc.). See MesosSchedulerDriver
+ * below for a concrete example of a SchedulerDriver.
  */
 class SchedulerDriver
 {
 public:
+  /**
+   * Empty virtual destructor (necessary to instantiate subclasses).
+   */
   virtual ~SchedulerDriver() {}
 
-  // Lifecycle methods.
+  /**
+   * Starts the scheduler driver. This needs to be called before any
+   * other driver calls are made.
+   */
   virtual Status start() = 0;
+
+  /**
+   * Stops the scheduler driver. If the 'failover' flag is set to
+   * false then it is expected that this framework will never
+   * reconnect to Mesos and all of it's executors and tasks can be
+   * terminated. Otherwise, all executors and tasks will remain
+   * running (for some master specified failover timeout) allowing the
+   * scheduler to reconnect (possibly in the same process, or from a
+   * different process, for example, on a different machine).
+   */
   virtual Status stop(bool failover = false) = 0;
-  // Puts driver into ABORTED state after which no more callbacks
-  // can be made to the scheduler. Also, the master is signalled
-  // that the driver is inactived. The only call a scheduler
-  // can make after abort is stop(), which can un-register the
-  // framework (if requested).
+
+  /**
+   * Aborts the driver so that no more callbacks can be made to the
+   * scheduler. The semantics of abort and stop have deliberately been
+   * separated so that code can detect an aborted driver (i.e., via
+   * the return status of SchedulerDriver::join, see below), and
+   * instantiate and start another driver if desired (from within the
+   * same process).
+   */
   virtual Status abort() = 0;
+
+  /**
+   * Waits for the driver to be stopped or aborted, possibly
+   * _blocking_ the current thread indefinitely. The return status of
+   * this function can be used to determine if the driver was aborted
+   * (see mesos.proto for a description of Status).
+   */
   virtual Status join() = 0;
-  virtual Status run() = 0; // Start and then join driver.
 
-  // Communication methods.
+  /**
+   * Starts and immediately joins (i.e., blocks on) the driver.
+   */
+  virtual Status run() = 0;
 
+  /**
+   * Requests resources from Mesos (see mesos.proto for a description
+   * of ResourceRequest and how, for example, to request resources
+   * from specific slaves). Any resources available are offered to the
+   * framework via Scheduler::resourceOffers callback, asynchronously.
+   */
   virtual Status requestResources(
       const std::vector<ResourceRequest>& requests) = 0;
 
+  /**
+   * Launches the given set of tasks. Note that the current mechanism
+   * of rejecting resources is to invoke this with an empty collection
+   * of tasks. A framework can also specify filters on all resources
+   * unused (see mesos.proto for a description of Filters). Note that
+   * currently tasks can only be launched per offer. In the future,
+   * frameworks will be allowed to aggregate offers (resources) to
+   * launch their tasks.
+   */
   virtual Status launchTasks(const OfferID& offerId,
                              const std::vector<TaskDescription>& tasks,
                              const Filters& filters = Filters()) = 0;
 
+  /**
+   * Kills the specified task. Note that attempting to kill a task is
+   * currently not reliable. If, for example, a scheduler fails over
+   * while it was attempting to kill a task it will need to retry in
+   * the future (these semantics may be changed in the future).
+   */
   virtual Status killTask(const TaskID& taskId) = 0;
 
+  /**
+   * Removes all filters previously set by the framework (via
+   * launchTasks()). This enables the framework to receive offers from
+   * those filtered slaves.
+   */
   virtual Status reviveOffers() = 0;
 
+  /**
+   * Sends a message from the framework to one of its executors. These
+   * messages are best effort; do not expect a framework message to be
+   * retransmitted in any reliable fashion.
+   */
   virtual Status sendFrameworkMessage(const SlaveID& slaveId,
                                       const ExecutorID& executorId,
                                       const std::string& data) = 0;
@@ -118,21 +252,37 @@ public:
 
 
 /**
- * Concrete implementation of SchedulerDriver that communicates with
- * a Mesos master.
+ * Concrete implementation of a SchedulerDriver that connects a
+ * Scheduler with a Mesos master. The MesosSchedulerDriver is
+ * thread-safe.
+ *
+ * Note that scheduler failover is supported in Mesos. After a
+ * scheduler is registered with Mesos it may failover (to a new
+ * process on the same machine or across multiple machines) by
+ * creating a new driver with the ID given to it in
+ * Scheduler::registered.
+ *
+ * The driver is responsible for invoking the Scheduler callbacks as
+ * it communicates with the Mesos master.
+ *
+ * Note that blocking on the MesosSchedulerDriver (e.g., via
+ * MesosSchedulerDriver::join) doesn't affect the scheduler callbacks
+ * in anyway because they are handled by a different thread.
+ *
+ * See src/examples/test_framework.cpp for an example of using the
+ * MesosSchedulerDriver.
  */
 class MesosSchedulerDriver : public SchedulerDriver
 {
 public:
   /**
-   * Create a scheduler driver with a given Mesos master URL.
-   * Additional Mesos config options are read from the environment, as well
-   * as any config files found through it.
+   * Creates a new scheduler driver that connects to a Mesos master
+   * through the specified URL. Optionally providing an existing
+   * framework ID can be used to failover a framework.
    *
-   * @param sched scheduler to make callbacks into
-   * @param url Mesos master URL
-   * @param frameworkId optional framework ID for registering
-   *        redundant schedulers for the same framework
+   * Any Mesos configuration options are read from environment
+   * variables, as well as any configuration files found through the
+   * environment variables.
    */
   MesosSchedulerDriver(Scheduler* sched,
                        const std::string& frameworkName,
@@ -141,15 +291,13 @@ public:
                        const FrameworkID& frameworkId = FrameworkID());
 
   /**
-   * Create a scheduler driver with a configuration, which the master URL
-   * and possibly other options are read from.
-   * Additional Mesos config options are read from the environment, as well
-   * as any config files given through conf or found in the environment.
+   * Creates a new scheduler driver that connects to a Mesos master
+   * through a "url" configuration option specified in the 'params'
+   * argument. Optionally providing an existing framework ID can be
+   * used to failover a framework.
    *
-   * @param sched scheduler to make callbacks into
-   * @param params Map containing configuration options
-   * @param frameworkId optional framework ID for registering
-   *        redundant schedulers for the same framework
+   * Additional Mesos config options are obtained from the 'params'
+   * argument.
    */
   MesosSchedulerDriver(Scheduler* sched,
                        const std::string& frameworkName,
@@ -158,19 +306,10 @@ public:
                        const FrameworkID& frameworkId = FrameworkID());
 
   /**
-   * Create a scheduler driver with a config read from command-line arguments.
-   * Additional Mesos config options are read from the environment, as well
-   * as any config files given through conf or found in the environment.
-   *
-   * This constructor is not available through SWIG since it's difficult
-   * for it to properly map arrays to an argc/argv pair.
-   *
-   * @param sched scheduler to make callbacks into
-   * @param argc argument count
-   * @param argv argument values (argument 0 is expected to be program name
-   *             and will not be looked at for options)
-   * @param frameworkId optional framework ID for registering
-   *        redundant schedulers for the same framework
+   * Creates a new scheduler driver that connects to a Mesos master
+   * through a "url" configuration option specified on the
+   * command-line (via 'argc' and 'argv'). Optionally providing an
+   * existing framework ID can be used to failover a framework.
    */
   MesosSchedulerDriver(Scheduler* sched,
                        const std::string& frameworkName,
@@ -179,40 +318,44 @@ public:
                        char** argv,
                        const FrameworkID& frameworkId = FrameworkID());
 
+  /**
+   * This destructor will block indefinitely if
+   * MesosSchedulerDriver::start was invoked successfully (possibly
+   * via MesosSchedulerDriver::run) and MesosSchedulerDriver::stop has
+   * not been invoked.
+   */
   virtual ~MesosSchedulerDriver();
 
-  // Lifecycle methods.
+  /**
+   * See SchedulerDriver for descriptions of these.
+   */
   virtual Status start();
   virtual Status stop(bool failover = false);
   virtual Status abort();
   virtual Status join();
-  virtual Status run(); // Start and then join driver.
-
-  // Communication methods.
+  virtual Status run();
   virtual Status requestResources(
       const std::vector<ResourceRequest>& requests);
-
-  virtual Status launchTasks(const OfferID& offerId,
-                             const std::vector<TaskDescription>& tasks,
-                             const Filters& filters = Filters());
-
+  virtual Status launchTasks(
+      const OfferID& offerId,
+      const std::vector<TaskDescription>& tasks,
+      const Filters& filters = Filters());
   virtual Status killTask(const TaskID& taskId);
-
   virtual Status reviveOffers();
-
-  virtual Status sendFrameworkMessage(const SlaveID& slaveId,
-                                      const ExecutorID& executorId,
-                                      const std::string& data);
+  virtual Status sendFrameworkMessage(
+      const SlaveID& slaveId,
+      const ExecutorID& executorId,
+      const std::string& data);
 
 private:
-  // Initialization method used by constructors
+  // Initialization method used by constructors.
   void init(Scheduler* sched,
             internal::Configuration* conf,
             const FrameworkID& frameworkId,
             const std::string& frameworkName,
             const ExecutorInfo& executorInfo);
 
-  // Internal utility method to report an error to the scheduler
+  // Internal utility method to report an error to the scheduler.
   void error(int code, const std::string& message);
 
   Scheduler* sched;
@@ -221,7 +364,7 @@ private:
   std::string frameworkName;
   ExecutorInfo executorInfo;
 
-  // Libprocess process for communicating with master
+  // Libprocess process for communicating with master.
   internal::SchedulerProcess* process;
 
   // Coordination between masters
@@ -231,10 +374,10 @@ private:
   // TODO(benh|matei): Does this still need to be a pointer?
   internal::Configuration* conf;
 
-  // Mutex to enforce all non-callbacks are execute serially
+  // Mutex to enforce all non-callbacks are execute serially.
   pthread_mutex_t mutex;
 
-  // Condition variable for waiting until driver terminates
+  // Condition variable for waiting until driver terminates.
   pthread_cond_t cond;
 
   enum State {
@@ -248,7 +391,6 @@ private:
   State state;
 };
 
-
 } // namespace mesos {
 
 #endif // __MESOS_SCHEDULER_HPP__

Modified: incubator/mesos/trunk/src/Makefile.am
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/Makefile.am?rev=1235838&r1=1235837&r2=1235838&view=diff
==============================================================================
--- incubator/mesos/trunk/src/Makefile.am (original)
+++ incubator/mesos/trunk/src/Makefile.am Wed Jan 25 17:19:27 2012
@@ -261,7 +261,7 @@ libmesos_la_LIBADD = libmesos_no_third_p
 # For non-convenience libraries we need to link them in to make the shared
 # library each time. (Currently, we don't support platforms where this is not
 # possible.)
-libmesos_la_LIBADD += $(top_builddir)/$(PROTOBUF)/src/.libs/libprotobuf.a
+libmesos_la_LIBADD += $(top_builddir)/$(PROTOBUF)/src/.libs/libprotobuf.la
 libmesos_la_LIBADD += $(top_builddir)/$(GLOG)/.libs/libglog.la
 libmesos_la_LIBADD += $(top_builddir)/$(LIBPROCESS)/libprocess.a
 libmesos_la_LIBADD += $(top_builddir)/$(LIBEV)/.libs/libev.la

Modified: incubator/mesos/trunk/src/java/src/org/apache/mesos/Executor.java
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/java/src/org/apache/mesos/Executor.java?rev=1235838&r1=1235837&r2=1235838&view=diff
==============================================================================
--- incubator/mesos/trunk/src/java/src/org/apache/mesos/Executor.java (original)
+++ incubator/mesos/trunk/src/java/src/org/apache/mesos/Executor.java Wed Jan 25 17:19:27 2012
@@ -23,12 +23,70 @@ import org.apache.mesos.Protos.*;
 
 /**
  * Callback interface to be implemented by frameworks' executors.
+ * Note that only one callback will be invoked at a time, so it is not
+ * recommended that you block within a callback because it may cause a
+ * deadlock.
+ *
+ * Each callback includes a reference to the executor driver that was
+ * used to run this executor. The reference will not change for the
+ * duration of an executor (i.e., from the point you do {@link
+ * ExecutorDriver#start} to the point that {@link ExecutorDriver#join}
+ * returns). This is intended for convenience so that an executor
+ * doesn't need to store a reference to the driver itself.
  */
 public interface Executor {
-  public void init(ExecutorDriver driver, ExecutorArgs args);
-  public void launchTask(ExecutorDriver driver, TaskDescription task);
-  public void killTask(ExecutorDriver driver, TaskID taskId);
-  public void frameworkMessage(ExecutorDriver driver, byte[] data);
-  public void shutdown(ExecutorDriver driver);
-  public void error(ExecutorDriver driver, int code, String message);
+  /**
+   * Invoked once the executor driver has been able to successfully
+   * connect with Mesos. See mesos.proto for a description of the
+   * ExecutorArgs argument. In particular, a scheduler can pass some
+   * data to it's executors through the {@link ExecutorInfo#data}
+   * field which gets copied into the {@link ExecutorArgs#data}
+   * field. (TODO(benh): This callback will likely become two
+   * callbacks, registered/reregistered, once executors can outlive
+   * slaves.)
+   */
+  void init(ExecutorDriver driver, ExecutorArgs args);
+
+  /**
+   * Invoked when a task has been launched on this executor (initiated
+   * via {@link Scheduler#launchTasks}. Note that this task can be
+   * realized with a thread, a process, or some simple computation,
+   * however, no other callbacks will be invoked on this executor
+   * until this callback has returned.
+   */
+  void launchTask(ExecutorDriver driver, TaskDescription task);
+
+  /**
+   * Invoked when a task running within this executor has been killed
+   * (via {@link SchedulerDriver#killTask}). Note that no status
+   * update will be sent on behalf of the executor, the executor is
+   * responsible for creating a new TaskStatus (i.e., with
+   * TASK_KILLED) and invoking {@link
+   * ExecutorDriver#sendStatusUpdate}.
+   */
+  void killTask(ExecutorDriver driver, TaskID taskId);
+
+  /**
+   * Invoked when a framework message has arrived for this
+   * executor. These messages are best effort; do not expect a
+   * framework message to be retransmitted in any reliable fashion.
+   */
+  void frameworkMessage(ExecutorDriver driver, byte[] data);
+
+  /**
+   * Invoked when the executor should terminate all of it's currently
+   * running tasks. Note that after a Mesos has determined that an
+   * executor has terminated any tasks that the executor did not send
+   * terminal status updates for (e.g., TASK_KILLED, TASK_FINISHED,
+   * TASK_FAILED, etc) a TASK_LOST status update will be created.
+   */
+  void shutdown(ExecutorDriver driver);
+
+  /**
+   * Invoked when a fatal error has occured with the executor and/or
+   * executor driver. The driver will be aborted BEFORE invoking this
+   * callback. This function is deprecated and will probably be
+   * removed in a subsequent release.
+   */
+  void error(ExecutorDriver driver, int code, String message);
 }

Modified: incubator/mesos/trunk/src/java/src/org/apache/mesos/ExecutorDriver.java
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/java/src/org/apache/mesos/ExecutorDriver.java?rev=1235838&r1=1235837&r2=1235838&view=diff
==============================================================================
--- incubator/mesos/trunk/src/java/src/org/apache/mesos/ExecutorDriver.java (original)
+++ incubator/mesos/trunk/src/java/src/org/apache/mesos/ExecutorDriver.java Wed Jan 25 17:19:27 2012
@@ -22,22 +22,60 @@ import org.apache.mesos.Protos.*;
 
 
 /**
- * Abstract interface for driving an executor connected to Mesos.
- * This interface is used both to start the executor running (and
- * communicating with the slave) and to send information from the executor
- * to Nexus (such as status updates). Concrete implementations of
- * ExecutorDriver will take a Executor as a parameter in order to make
- * callbacks into it on various events.
+ * Abstract interface for connecting an executor to Mesos. This
+ * interface is used both to manage the executor's lifecycle (start
+ * it, stop it, or wait for it to finish) and to interact with Mesos
+ * (e.g., send status updates, send framework messages, etc.).
  */
 public interface ExecutorDriver {
-  // Lifecycle methods.
+  /**
+   * Starts the executor driver. This needs to be called before any
+   * other driver calls are made.
+   */
   public Status start();
+
+  /**
+   * Stops the scheduler driver.
+   */
   public Status stop();
+
+  /**
+   * Aborts the driver so that no more callbacks can be made to the
+   * scheduler. The semantics of abort and stop have deliberately been
+   * separated so that code can detect an aborted driver (i.e., via
+   * the return status of {@link SchedulerDriver#join}, see below),
+   * and instantiate and start another driver if desired (from within
+   * the same process ... although this functionality is currently not
+   * supported for executors).
+   */
   public Status abort();
+  
+  /**
+   * Waits for the driver to be stopped or aborted, possibly
+   * _blocking_ the current thread indefinitely. The return status of
+   * this function can be used to determine if the driver was aborted
+   * (see mesos.proto for a description of Status).
+   */
   public Status join();
+  
+  /**
+   * Starts and immediately joins (i.e., blocks on) the driver.
+   */
   public Status run();
-
-  // Communication methods.
+ 
+  /**
+   * Sends a status update to the framework scheduler, retrying as
+   * necessary until an acknowledgement has been received or the
+   * executor is terminated (in which case, a TASK_LOST status update
+   * will be sent). See {@link Scheduler#statusUpdate} for more
+   * information about status update acknowledgements.
+   */
   public Status sendStatusUpdate(TaskStatus status);
+  
+  /**
+   * Sends a message to the framework scheduler. These messages are
+   * best effort; do not expect a framework message to be
+   * retransmitted in any reliable fashion.
+   */
   public Status sendFrameworkMessage(byte[] data);
 }

Modified: incubator/mesos/trunk/src/java/src/org/apache/mesos/MesosExecutorDriver.java
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/java/src/org/apache/mesos/MesosExecutorDriver.java?rev=1235838&r1=1235837&r2=1235838&view=diff
==============================================================================
--- incubator/mesos/trunk/src/java/src/org/apache/mesos/MesosExecutorDriver.java (original)
+++ incubator/mesos/trunk/src/java/src/org/apache/mesos/MesosExecutorDriver.java Wed Jan 25 17:19:27 2012
@@ -22,23 +22,37 @@ import org.apache.mesos.Protos.*;
 
 
 /**
- * Concrete implementation of ExecutorDriver that communicates with a
- * Mesos slave. The slave's location is read from environment variables
- * set by it when it execs the user's executor script; users only need
- * to create the MesosExecutorDriver and call run() on it.
+ * Concrete implementation of an ExecutorDriver that connects an
+ * Executor with a Mesos slave. The MesosExecutorDriver is
+ * thread-safe.
+ *
+ * The driver is responsible for invoking the Executor callbacks as it
+ * communicates with the Mesos slave.
+ *
+ * Note that blocking on the MesosExecutorDriver (e.g., via {@link
+ * #join}) doesn't affect the executor callbacks in anyway because
+ * they are handled by a different thread.
+ *
+ * See src/examples/java/TestExecutor.java for an example of using the
+ * MesosExecutorDriver.
  */
 public class MesosExecutorDriver implements ExecutorDriver {
   static {
     System.loadLibrary("mesos");
   }
 
+  /**
+   * Creates a new driver that uses the specified Executor.
+   */
   public MesosExecutorDriver(Executor exec) {
     this.exec = exec;
 
     initialize();
   }
 
-  // Lifecycle methods.
+  /**
+   * See ExecutorDriver for descriptions of these.
+   */
   public native Status start();
   public native Status stop();
   public native Status abort();
@@ -60,4 +74,3 @@ public class MesosExecutorDriver impleme
   private long __exec;
   private long __driver;
 }
-

Modified: incubator/mesos/trunk/src/java/src/org/apache/mesos/MesosSchedulerDriver.java
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/java/src/org/apache/mesos/MesosSchedulerDriver.java?rev=1235838&r1=1235837&r2=1235838&view=diff
==============================================================================
--- incubator/mesos/trunk/src/java/src/org/apache/mesos/MesosSchedulerDriver.java (original)
+++ incubator/mesos/trunk/src/java/src/org/apache/mesos/MesosSchedulerDriver.java Wed Jan 25 17:19:27 2012
@@ -26,14 +26,40 @@ import java.util.Map;
 
 
 /**
- * Concrete implementation of SchedulerDriver that communicates with
- * a Mesos master.
+ * Concrete implementation of a SchedulerDriver that connects a
+ * Scheduler with a Mesos master. The MesosSchedulerDriver is
+ * thread-safe.
+ *
+ * Note that scheduler failover is supported in Mesos. After a
+ * scheduler is registered with Mesos it may failover (to a new
+ * process on the same machine or across multiple machines) by
+ * creating a new driver with the ID given to it in {@link
+ * Scheduler#registered}.
+ *
+ * The driver is responsible for invoking the Scheduler callbacks as
+ * it communicates with the Mesos master.
+ *
+ * Note that blocking on the MesosSchedulerDriver (e.g., via {@link
+ * #join}) doesn't affect the scheduler callbacks in anyway because
+ * they are handled by a different thread.
+ *
+ * See src/examples/java/TestFramework.java for an example of using
+ * the MesosSchedulerDriver.
  */
 public class MesosSchedulerDriver implements SchedulerDriver {
   static {
     System.loadLibrary("mesos");
   }
 
+  /**
+   * Creates a new scheduler driver that connects to a Mesos master
+   * through the specified URL. Optionally providing an existing
+   * framework ID can be used to failover a framework.
+   *
+   * Any Mesos configuration options are read from environment
+   * variables, as well as any configuration files found through the
+   * environment variables.
+   */
   public MesosSchedulerDriver(Scheduler sched,
                               String frameworkName,
                               ExecutorInfo executorInfo,
@@ -44,7 +70,7 @@ public class MesosSchedulerDriver implem
     }
 
     if (frameworkName == null) {
-      throw new NullPointerException("Not expecting a null scheduler");
+      throw new NullPointerException("Not expecting a null framework name");
     }
 
     this.sched = sched;
@@ -55,10 +81,23 @@ public class MesosSchedulerDriver implem
     initialize();
   }
 
-  public MesosSchedulerDriver(Scheduler sched, String frameworkName, ExecutorInfo executorInfo, String url) {
-    this(sched, frameworkName, executorInfo, url, FrameworkID.newBuilder().setValue("").build());
+  /**
+   * Creates a new scheduler driver. See above for details.
+   */
+  public MesosSchedulerDriver(Scheduler sched,
+                              String frameworkName,
+                              ExecutorInfo executorInfo,
+                              String url) {
+    this(sched,
+         frameworkName,
+         executorInfo,
+         url,
+         FrameworkID.newBuilder().setValue("").build());
   }
 
+  /**
+   * See SchedulerDriver for descriptions of these.
+   */
   public native Status start();
   public native Status stop(boolean failover);
   public Status stop() {
@@ -74,17 +113,22 @@ public class MesosSchedulerDriver implem
 
   public native Status requestResources(Collection<ResourceRequest> requests);
 
-  public Status launchTasks(OfferID offerId, Collection<TaskDescription> tasks) {
+  public Status launchTasks(OfferID offerId,
+                            Collection<TaskDescription> tasks) {
     return launchTasks(offerId, tasks, Filters.newBuilder().build());
   }
 
-  public native Status launchTasks(OfferID offerId, Collection<TaskDescription> tasks, Filters filters);
+  public native Status launchTasks(OfferID offerId,
+                                   Collection<TaskDescription> tasks,
+                                   Filters filters);
 
   public native Status killTask(TaskID taskId);
 
   public native Status reviveOffers();
 
-  public native Status sendFrameworkMessage(SlaveID slaveId, ExecutorID executorId, byte[] data);
+  public native Status sendFrameworkMessage(SlaveID slaveId,
+                                            ExecutorID executorId,
+                                            byte[] data);
 
   protected native void initialize();
   protected native void finalize();
@@ -97,4 +141,4 @@ public class MesosSchedulerDriver implem
 
   private long __sched;
   private long __driver;
-};
+}

Modified: incubator/mesos/trunk/src/java/src/org/apache/mesos/Scheduler.java
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/java/src/org/apache/mesos/Scheduler.java?rev=1235838&r1=1235837&r2=1235838&view=diff
==============================================================================
--- incubator/mesos/trunk/src/java/src/org/apache/mesos/Scheduler.java (original)
+++ incubator/mesos/trunk/src/java/src/org/apache/mesos/Scheduler.java Wed Jan 25 17:19:27 2012
@@ -24,14 +24,89 @@ import java.util.List;
 
 
 /**
- * Callback interface to be implemented by frameworks' schedulers.
+ * Callback interface to be implemented by frameworks'
+ * schedulers. Note that only one callback will be invoked at a time,
+ * so it is not recommended that you block within a callback because
+ * it may cause a deadlock.
+ *
+ * Each callback includes a reference to the scheduler driver that was
+ * used to run this scheduler. The reference will not change for the
+ * duration of a scheduler (i.e., from the point you do {@link
+ * SchedulerDriver#start} to the point that {@link
+ * SchedulerDriver#join} returns). This is intended for convenience so
+ * that a scheduler doesn't need to store a reference to the driver
+ * itself.
  */
 public interface Scheduler {
-  public void registered(SchedulerDriver driver, FrameworkID frameworkId);
-  public void resourceOffers(SchedulerDriver driver, List<Offer> offers);
-  public void offerRescinded(SchedulerDriver driver, OfferID offerId);
-  public void statusUpdate(SchedulerDriver driver, TaskStatus status);
-  public void frameworkMessage(SchedulerDriver driver, SlaveID slaveId, ExecutorID executorId, byte[] data);
-  public void slaveLost(SchedulerDriver driver, SlaveID slaveId);
-  public void error(SchedulerDriver driver, int code, String message);
+  /**
+   * Invoked when the scheduler successfully registers with a Mesos
+   * master. A unique ID (generated by the master) used for
+   * distinguishing this framework from others is provided as an
+   * argument.
+   */
+  void registered(SchedulerDriver driver, FrameworkID frameworkId);
+
+  /**
+   * Invoked when resources have been offered to this framework. A
+   * single offer will only contain resources from a single slave.
+   * Resources associated with an offer will not be re-offered to
+   * _this_ framework until either (a) this framework has rejected
+   * those resources (see {@link SchedulerDriver#launchTasks}) or (b)
+   * those resources have been rescinded (see {@link offerRescinded}).
+   * Note that resources may be concurrently offered to more than one
+   * framework at a time (depending on the allocator being used). In
+   * that case, the first framework to launch tasks using those
+   * resources will be able to use them while the other frameworks
+   * will have those resources rescinded (or if a framework has
+   * already launched tasks with those resources then those tasks will
+   * fail with a TASK_LOST status and a message saying as much).
+   */
+  void resourceOffers(SchedulerDriver driver, List<Offer> offers);
+
+  /**
+   * Invoked when an offer is no longer valid (e.g., the slave was
+   * lost or another framework used resources in the offer). If for
+   * whatever reason an offer is never rescinded (e.g., dropped
+   * message, failing over framework, etc.), a framwork that attempts
+   * to launch tasks using an invalid offer will receive TASK_LOST
+   * status updats for those tasks (see {@link #resourceOffers}).
+   */
+  void offerRescinded(SchedulerDriver driver, OfferID offerId);
+
+  /**
+   * Invoked when the status of a task has changed (e.g., a slave is
+   * lost and so the task is lost, a task finishes and an executor
+   * sends a status update saying so, etc). Note that returning from
+   * this callback _acknowledges_ receipt of this status update! If
+   * for whatever reason the scheduler aborts during this callback (or
+   * the process exits) another status update will be delivered (note,
+   * however, that this is currently not true if the slave sending the
+   * status update is lost/fails during that time).
+   */
+  void statusUpdate(SchedulerDriver driver, TaskStatus status);
+
+  /**
+   * Invoked when an executor sends a message. These messages are best
+   * effort; do not expect a framework message to be retransmitted in
+   * any reliable fashion.
+   */
+  void frameworkMessage(SchedulerDriver driver,
+                        SlaveID slaveId,
+                        ExecutorID executorId,
+                        byte[] data);
+
+  /**
+   * Invoked when a slave has been determined unreachable (e.g.,
+   * machine failure, network partition). Most frameworks will need to
+   * reschedule any tasks launched on this slave on a new slave.
+   */
+  void slaveLost(SchedulerDriver driver, SlaveID slaveId);
+
+  /**
+   * Invoked when there is an unrecoverable error in the scheduler or
+   * scheduler driver. The driver will be aborted BEFORE invoking this
+   * callback. This function is deprecated and will probably be
+   * removed in a subsequent release.
+   */
+  void error(SchedulerDriver driver, int code, String message);
 }

Modified: incubator/mesos/trunk/src/java/src/org/apache/mesos/SchedulerDriver.java
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/java/src/org/apache/mesos/SchedulerDriver.java?rev=1235838&r1=1235837&r2=1235838&view=diff
==============================================================================
--- incubator/mesos/trunk/src/java/src/org/apache/mesos/SchedulerDriver.java (original)
+++ incubator/mesos/trunk/src/java/src/org/apache/mesos/SchedulerDriver.java Wed Jan 25 17:19:27 2012
@@ -24,20 +24,107 @@ import java.util.Collection;
 import java.util.Map;
 
 
+/**
+ * Abstract interface for connecting a scheduler to Mesos. This
+ * interface is used both to manage the scheduler's lifecycle (start
+ * it, stop it, or wait for it to finish) and to interact with Mesos
+ * (e.g., launch tasks, kill tasks, etc.).
+ */
 public interface SchedulerDriver {
-  // Lifecycle methods.
-  public Status start();
-  public Status stop();
-  public Status stop(boolean failover);
-  public Status abort();
-  public Status join();
-  public Status run();
-
-  // Communication methods.
-  public Status requestResources(Collection<ResourceRequest> requests);
-  public Status launchTasks(OfferID offerId, Collection<TaskDescription> tasks, Filters filters);
-  public Status launchTasks(OfferID offerId, Collection<TaskDescription> tasks);
-  public Status killTask(TaskID taskId);
-  public Status reviveOffers();
-  public Status sendFrameworkMessage(SlaveID slaveId, ExecutorID executorId, byte[] data);
-};
+  /**
+   * Starts the scheduler driver. This needs to be called before any
+   * other driver calls are made.
+   */
+  Status start();
+
+  /**
+   * Stops the scheduler driver. If the 'failover' flag is set to
+   * false then it is expected that this framework will never
+   * reconnect to Mesos and all of it's executors and tasks can be
+   * terminated. Otherwise, all executors and tasks will remain
+   * running (for some master specified failover timeout) allowing the
+   * scheduler to reconnect (possibly in the same process, or from a
+   * different process, for example, on a different machine).
+   */
+  Status stop(boolean failover);
+
+  /**
+   * Stops the scheduler driver assuming no failover.
+   */
+  Status stop();
+
+  /**
+   * Aborts the driver so that no more callbacks can be made to the
+   * scheduler. The semantics of abort and stop have deliberately been
+   * separated so that code can detect an aborted driver (i.e., via
+   * the return status of {@link #join}, see below), and instantiate
+   * and start another driver if desired (from within the same
+   * process).
+   */
+  Status abort();
+
+  /**
+   * Waits for the driver to be stopped or aborted, possibly
+   * _blocking_ the current thread indefinitely. The return status of
+   * this function can be used to determine if the driver was aborted
+   * (see mesos.proto for a description of Status).
+   */
+  Status join();
+
+  /**
+   * Starts and immediately joins (i.e., blocks on) the driver.
+   */
+  Status run();
+
+  /**
+   * Requests resources from Mesos (see mesos.proto for a description
+   * of ResourceRequest and how, for example, to request resources
+   * from specific slaves). Any resources available are offered to the
+   * framework via {@link Scheduler#resourceOffers} callback,
+   * asynchronously.
+   */
+  Status requestResources(Collection<ResourceRequest> requests);
+
+  /**
+   * Launches the given set of tasks. Note that the current mechanism
+   * of rejecting resources is to invoke this with an empty collection
+   * of tasks. A framework can also specify filters on all resources
+   * unused (see mesos.proto for a description of Filters). Note that
+   * currently tasks can only be launched per offer. In the future,
+   * frameworks will be allowed to aggregate offers (resources) to
+   * launch their tasks.
+   */
+  Status launchTasks(OfferID offerId,
+                     Collection<TaskDescription> tasks,
+                     Filters filters);
+
+  /**
+   * Launches the given set of tasks. See above for details.
+   */
+  Status launchTasks(OfferID offerId,
+                     Collection<TaskDescription> tasks);
+
+  /**
+   * Kills the specified task. Note that attempting to kill a task is
+   * currently not reliable. If, for example, a scheduler fails over
+   * while it was attempting to kill a task it will need to retry in
+   * the future (these semantics may be changed in the future).
+   */
+  Status killTask(TaskID taskId);
+
+  /**
+   * Removes all filters, previously set by the framework (via {@link
+   * #launchTasks}). This enables the framework to receive offers
+   * from those filtered slaves.
+   */
+  Status reviveOffers();
+
+  /**
+   * Sends a message from the framework to one of its executors. These
+   * messages are best effort; do not expect a framework message to be
+   * retransmitted in any reliable fashion.
+   */
+  Status sendFrameworkMessage(SlaveID slaveId,
+                              ExecutorID executorId,
+                              byte[] data);
+}

Modified: incubator/mesos/trunk/src/python/src/mesos.py
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/python/src/mesos.py?rev=1235838&r1=1235837&r2=1235838&view=diff
==============================================================================
--- incubator/mesos/trunk/src/python/src/mesos.py (original)
+++ incubator/mesos/trunk/src/python/src/mesos.py Wed Jan 25 17:19:27 2012
@@ -14,12 +14,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# See include/mesos/scheduler.hpp, include/mesos/executor.hpp and
+# include/mesos/mesos.proto for more information documenting this
+# interface.
+
 import sys
 import _mesos
 
-
-# Base class for Mesos schedulers. Users' schedulers should extend this class
-# to get default implementations of methods they don't override.
+# Base class for Mesos schedulers. Users' schedulers should extend
+# this class to get default implementations of methods they don't
+# override.
 class Scheduler:
   def registered(self, driver, frameworkId): pass
   def resourceOffers(self, driver, offers): pass
@@ -28,14 +32,14 @@ class Scheduler:
   def frameworkMessage(self, driver, message): pass
   def slaveLost(self, driver, slaveId): pass
 
-  # Default implementation of error() prints to stderr because we can't
-  # make error() an abstract method in Python
+  # Default implementation of error() prints to stderr because we
+  # can't make error() an abstract method in Python.
   def error(self, driver, code, message):
     print >> sys.stderr, "Error from Mesos: %s (code: %d)" % (message, code)
 
 
-# Interface for Mesos scheduler drivers. Users may wish to extend this class
-# in mock objects for tests.
+# Interface for Mesos scheduler drivers. Users may wish to extend this
+# class in mock objects for tests.
 class SchedulerDriver:
   def start(self): pass
   def stop(self, failover=False): pass
@@ -49,8 +53,8 @@ class SchedulerDriver:
   def sendFrameworkMessage(self, slaveId, executorId, data): pass
 
 
-# Base class for Mesos executors. Users' executors should extend this class
-# to get default implementations of methods they don't override.
+# Base class for Mesos executors. Users' executors should extend this
+# class to get default implementations of methods they don't override.
 class Executor:
   def init(self, driver, args): pass
   def launchTask(self, driver, task): pass
@@ -58,14 +62,14 @@ class Executor:
   def frameworkMessage(self, driver, message): pass
   def shutdown(self, driver): pass
 
-  # Default implementation of error() prints to stderr because we can't
-  # make error() an abstract method in Python
+  # Default implementation of error() prints to stderr because we
+  # can't make error() an abstract method in Python.
   def error(self, driver, code, message):
     print >> sys.stderr, "Error from Mesos: %s (code: %d)" % (message, code)
 
 
-# Interface for Mesos executor drivers. Users may wish to extend this class
-# in mock objects for tests.
+# Interface for Mesos executor drivers. Users may wish to extend this
+# class in mock objects for tests.
 class ExecutorDriver:
   def start(self): pass
   def stop(self): pass