You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@flink.apache.org by tr...@apache.org on 2018/08/10 09:57:38 UTC

[flink] 04/04: [FLINK-9795][mesos, docs] Update Mesos documentation

This is an automated email from the ASF dual-hosted git repository.

trohrmann pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git

commit a442eb6c0388558c6fb2e5e616cd1cd15038b95c
Author: gyao <ga...@data-artisans.com>
AuthorDate: Thu Aug 9 13:27:01 2018 +0200

    [FLINK-9795][mesos, docs] Update Mesos documentation
    
    [FLINK-9795][mesos, docs] Remove unnecessary remark about task reconciliation.
    
    The config key high-availability.zookeeper.path.mesos-workers already has a
    default value. Even without explicitly setting the key, the task reconciliation
    will work. Moreover, if there would not be a default key, the code would throw an NPE. So
    either way, the remark is only confusing the reader.
    
    [FLINK-9795][mesos, docs] Remove configuration keys from Mesos Setup page.
    
    - Remove the Mesos specific configuration keys from the Mesos Setup page because
    they duplicate what is already on the configuration page.
    - Add missing descriptions for some of the keys that are under the Mesos section of the configuration
    page.
    - Improve formatting of the descriptions.
    
    [FLINK-9795][mesos, docs] Document which config options are only used in legacy mode.
    
    [FLINK-9795][mesos, docs] Document that mesos.initial-tasks is only needed in legacy mode.
    
    [FLINK-9795][mesos, docs] Clarify necessity of Marathon in documentation.
    
    [FLINK-9795][mesos, docs] Rewrite "Flink's JobManager and Web Interface" section.
    
    [FLINK-9795][mesos, docs] Add missing period at the end of sentence.
    
    This closes #6533.
---
 .../high_availability_zookeeper_configuration.html |  2 +-
 docs/_includes/generated/mesos_configuration.html  |  8 +--
 .../mesos_task_manager_configuration.html          |  8 +--
 docs/ops/deployment/mesos.md                       | 83 ++++------------------
 .../configuration/HighAvailabilityOptions.java     |  5 +-
 .../flink/mesos/configuration/MesosOptions.java    | 31 +++++---
 .../MesosTaskManagerParameters.java                | 22 ++++--
 7 files changed, 68 insertions(+), 91 deletions(-)

diff --git a/docs/_includes/generated/high_availability_zookeeper_configuration.html b/docs/_includes/generated/high_availability_zookeeper_configuration.html
index a49d160..6577878 100644
--- a/docs/_includes/generated/high_availability_zookeeper_configuration.html
+++ b/docs/_includes/generated/high_availability_zookeeper_configuration.html
@@ -60,7 +60,7 @@
         <tr>
             <td><h5>high-availability.zookeeper.path.mesos-workers</h5></td>
             <td style="word-wrap: break-word;">"/mesos-workers"</td>
-            <td>ZooKeeper root path (ZNode) for Mesos workers.</td>
+            <td>The ZooKeeper root path for persisting the Mesos worker information.</td>
         </tr>
         <tr>
             <td><h5>high-availability.zookeeper.path.root</h5></td>
diff --git a/docs/_includes/generated/mesos_configuration.html b/docs/_includes/generated/mesos_configuration.html
index c514c86..54e92e5 100644
--- a/docs/_includes/generated/mesos_configuration.html
+++ b/docs/_includes/generated/mesos_configuration.html
@@ -15,17 +15,17 @@
         <tr>
             <td><h5>mesos.initial-tasks</h5></td>
             <td style="word-wrap: break-word;">0</td>
-            <td>The initial workers to bring up when the master starts</td>
+            <td>The initial workers to bring up when the master starts. This option is ignored unless Flink is in <a href="#legacy">legacy mode</a>.</td>
         </tr>
         <tr>
             <td><h5>mesos.master</h5></td>
             <td style="word-wrap: break-word;">(none)</td>
-            <td>The Mesos master URL. The value should be in one of the following forms: "host:port", "zk://host1:port1,host2:port2,.../path", "zk://username:password@host1:port1,host2:port2,.../path" or "file:///path/to/file"</td>
+            <td>The Mesos master URL. The value should be in one of the following forms: <ul><li>host:port</li><li>zk://host1:port1,host2:port2,.../path</li><li>zk://username:password@host1:port1,host2:port2,.../path</li><li>file:///path/to/file</li></ul></td>
         </tr>
         <tr>
             <td><h5>mesos.maximum-failed-tasks</h5></td>
             <td style="word-wrap: break-word;">-1</td>
-            <td>The maximum number of failed workers before the cluster fails. May be set to -1 to disable this feature</td>
+            <td>The maximum number of failed workers before the cluster fails. May be set to -1 to disable this feature. This option is ignored unless Flink is in <a href="#legacy">legacy mode</a>.</td>
         </tr>
         <tr>
             <td><h5>mesos.resourcemanager.artifactserver.port</h5></td>
@@ -65,7 +65,7 @@
         <tr>
             <td><h5>mesos.resourcemanager.tasks.port-assignments</h5></td>
             <td style="word-wrap: break-word;">(none)</td>
-            <td>Comma-separated list of configuration keys which represent a configurable port.All port keys will dynamically get a port assigned through Mesos.</td>
+            <td>Comma-separated list of configuration keys which represent a configurable port. All port keys will dynamically get a port assigned through Mesos.</td>
         </tr>
     </tbody>
 </table>
diff --git a/docs/_includes/generated/mesos_task_manager_configuration.html b/docs/_includes/generated/mesos_task_manager_configuration.html
index 0af844d..1e67f84 100644
--- a/docs/_includes/generated/mesos_task_manager_configuration.html
+++ b/docs/_includes/generated/mesos_task_manager_configuration.html
@@ -10,12 +10,12 @@
         <tr>
             <td><h5>mesos.constraints.hard.hostattribute</h5></td>
             <td style="word-wrap: break-word;">(none)</td>
-            <td>Constraints for task placement on mesos.</td>
+            <td>Constraints for task placement on Mesos based on agent attributes. Takes a comma-separated list of key:value pairs corresponding to the attributes exposed by the target mesos agents. Example: az:eu-west-1a,series:t2</td>
         </tr>
         <tr>
             <td><h5>mesos.resourcemanager.tasks.bootstrap-cmd</h5></td>
             <td style="word-wrap: break-word;">(none)</td>
-            <td></td>
+            <td>A command which is executed before the TaskManager is started.</td>
         </tr>
         <tr>
             <td><h5>mesos.resourcemanager.tasks.container.docker.force-pull-image</h5></td>
@@ -50,12 +50,12 @@
         <tr>
             <td><h5>mesos.resourcemanager.tasks.gpus</h5></td>
             <td style="word-wrap: break-word;">0</td>
-            <td></td>
+            <td>GPUs to assign to the Mesos workers.</td>
         </tr>
         <tr>
             <td><h5>mesos.resourcemanager.tasks.hostname</h5></td>
             <td style="word-wrap: break-word;">(none)</td>
-            <td></td>
+            <td>Optional value to define the TaskManager’s hostname. The pattern _TASK_ is replaced by the actual id of the Mesos task. This can be used to configure the TaskManager to use Mesos DNS (e.g. _TASK_.flink-service.mesos) for name lookups.</td>
         </tr>
         <tr>
             <td><h5>mesos.resourcemanager.tasks.mem</h5></td>
diff --git a/docs/ops/deployment/mesos.md b/docs/ops/deployment/mesos.md
index aca6f23..1ff8afa 100644
--- a/docs/ops/deployment/mesos.md
+++ b/docs/ops/deployment/mesos.md
@@ -59,13 +59,11 @@ or configuration files. For instance, in non-containerized environments, the
 artifact server will provide the Flink binaries. What files will be served
 depends on the configuration overlay used.
 
-### Flink's JobManager and Web Interface
+### Flink's Dispatcher and Web Interface
 
-The Mesos scheduler currently resides with the JobManager but will be started
-independently of the JobManager in future versions (see
-[FLIP-6](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=65147077)). The
-proposed changes will also add a Dispatcher component which will be the central
-point for job submission and monitoring.
+The Dispatcher and the web interface provide a central point for monitoring,
+job submission, and other client interaction with the cluster
+(see [FLIP-6](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=65147077)).
 
 ### Startup script and configuration overlays
 
@@ -139,7 +137,7 @@ More information about the deployment scripts can be found [here](http://mesos.a
 
 ### Installing Marathon
 
-Optionally, you may also [install Marathon](https://mesosphere.github.io/marathon/docs/) which will be necessary to run Flink in high availability (HA) mode.
+Optionally, you may also [install Marathon](https://mesosphere.github.io/marathon/docs/) which enables you to run Flink in [high availability (HA) mode](#high-availability).
 
 ### Pre-installing Flink vs Docker/Mesos containers
 
@@ -171,8 +169,6 @@ which manage the Flink processes in a Mesos cluster:
    It is automatically launched by the Mesos worker node to bring up a new TaskManager.
 
 In order to run the `mesos-appmaster.sh` script you have to define `mesos.master` in the `flink-conf.yaml` or pass it via `-Dmesos.master=...` to the Java process.
-Additionally, you should define the number of task managers which are started by Mesos via `mesos.initial-tasks`.
-This value can also be defined in the `flink-conf.yaml` or passed as a Java property.
 
 When executing `mesos-appmaster.sh`, it will create a job manager on the machine where you executed the script.
 In contrast to that, the task managers will be run as Mesos tasks in the Mesos cluster.
@@ -188,19 +184,21 @@ For example:
         -Djobmanager.heap.mb=1024 \
         -Djobmanager.rpc.port=6123 \
         -Drest.port=8081 \
-        -Dmesos.initial-tasks=10 \
         -Dmesos.resourcemanager.tasks.mem=4096 \
         -Dtaskmanager.heap.mb=3500 \
         -Dtaskmanager.numberOfTaskSlots=2 \
         -Dparallelism.default=10
 
+<div class="alert alert-info">
+  <strong>Note:</strong> If Flink is in <a href="{{ site.baseurl }}/ops/config.html#legacy">legacy mode</a>,
+  you should additionally define the number of task managers that are started by Mesos via
+  <a href="{{ site.baseurl }}/ops/config.html#mesos-initial-tasks"><code>mesos.initial-tasks</code></a>.
+</div>
 
 ### High Availability
 
 You will need to run a service like Marathon or Apache Aurora which takes care of restarting the Flink master process in case of node or process failures.
-In addition, Zookeeper needs to be configured like described in the [High Availability section of the Flink docs]({{ site.baseurl }}/ops/jobmanager_high_availability.html)
-
-For the reconciliation of tasks to work correctly, please also set `high-availability.zookeeper.path.mesos-workers` to a valid Zookeeper path.
+In addition, Zookeeper needs to be configured like described in the [High Availability section of the Flink docs]({{ site.baseurl }}/ops/jobmanager_high_availability.html).
 
 #### Marathon
 
@@ -211,7 +209,7 @@ Here is an example configuration for Marathon:
 
     {
         "id": "flink",
-        "cmd": "$FLINK_HOME/bin/mesos-appmaster.sh -Djobmanager.heap.mb=1024 -Djobmanager.rpc.port=6123 -Drest.port=8081 -Dmesos.initial-tasks=1 -Dmesos.resourcemanager.tasks.mem=1024 -Dtaskmanager.heap.mb=1024 -Dtaskmanager.numberOfTaskSlots=2 -Dparallelism.default=2 -Dmesos.resourcemanager.tasks.cpus=1",
+        "cmd": "$FLINK_HOME/bin/mesos-appmaster.sh -Djobmanager.heap.mb=1024 -Djobmanager.rpc.port=6123 -Drest.port=8081 -Dmesos.resourcemanager.tasks.mem=1024 -Dtaskmanager.heap.mb=1024 -Dtaskmanager.numberOfTaskSlots=2 -Dparallelism.default=2 -Dmesos.resourcemanager.tasks.cpus=1",
         "cpus": 1.0,
         "mem": 1024
     }
@@ -220,60 +218,7 @@ When running Flink with Marathon, the whole Flink cluster including the job mana
 
 ### Configuration parameters
 
-`mesos.initial-tasks`: The initial workers to bring up when the master starts (**DEFAULT**: The number of workers specified at cluster startup).
-
-`mesos.constraints.hard.hostattribute`: Constraints for task placement on Mesos based on agent attributes (**DEFAULT**: None).
-Takes a comma-separated list of key:value pairs corresponding to the attributes exposed by the target
-mesos agents.  Example: `az:eu-west-1a,series:t2`
-
-`mesos.maximum-failed-tasks`: The maximum number of failed workers before the cluster fails (**DEFAULT**: Number of initial workers).
-May be set to -1 to disable this feature.
-
-`mesos.master`: The Mesos master URL. The value should be in one of the following forms:
-
-* `host:port`
-* `zk://host1:port1,host2:port2,.../path`
-* `zk://username:password@host1:port1,host2:port2,.../path`
-* `file:///path/to/file`
-
-`mesos.failover-timeout`: The failover timeout in seconds for the Mesos scheduler, after which running tasks are automatically shut down (**DEFAULT:** 600).
-
-`mesos.resourcemanager.artifactserver.port`:The config parameter defining the Mesos artifact server port to use. Setting the port to 0 will let the OS choose an available port.
-
-`mesos.resourcemanager.framework.name`: Mesos framework name (**DEFAULT:** Flink)
-
-`mesos.resourcemanager.framework.role`: Mesos framework role definition (**DEFAULT:** *)
-
-`high-availability.zookeeper.path.mesos-workers`: The ZooKeeper root path for persisting the Mesos worker information.
-
-`mesos.resourcemanager.framework.principal`: Mesos framework principal (**NO DEFAULT**)
-
-`mesos.resourcemanager.framework.secret`: Mesos framework secret (**NO DEFAULT**)
-
-`mesos.resourcemanager.framework.user`: Mesos framework user (**DEFAULT:**"")
-
-`mesos.resourcemanager.artifactserver.ssl.enabled`: Enables SSL for the Flink artifact server (**DEFAULT**: true). Note that `security.ssl.enabled` also needs to be set to `true` encryption to enable encryption.
-
-`mesos.resourcemanager.tasks.mem`: Memory to assign to the Mesos workers in MB (**DEFAULT**: 1024)
-
-`mesos.resourcemanager.tasks.cpus`: CPUs to assign to the Mesos workers (**DEFAULT**: 0.0)
-
-`mesos.resourcemanager.tasks.gpus`: GPUs to assign to the Mesos workers (**DEFAULT**: 0.0)
-
-`mesos.resourcemanager.tasks.container.type`: Type of the containerization used: "mesos" or "docker" (DEFAULT: mesos);
-
-`mesos.resourcemanager.tasks.container.image.name`: Image name to use for the container (**NO DEFAULT**)
-
-`mesos.resourcemanager.tasks.container.volumes`: A comma separated list of `[host_path:]`container_path`[:RO|RW]`. This allows for mounting additional volumes into your container. (**NO DEFAULT**)
-
-`mesos.resourcemanager.tasks.container.docker.parameters`: Custom parameters to be passed into docker run command when using the docker containerizer. Comma separated list of `key=value` pairs. `value` may contain '=' (**NO DEFAULT**)
-
-`mesos.resourcemanager.tasks.uris`: A comma separated list of URIs of custom artifacts to be downloaded into the sandbox of Mesos workers. (**NO DEFAULT**)
-
-`mesos.resourcemanager.tasks.container.docker.force-pull-image`: Instruct the docker containerizer to forcefully pull the image rather than reuse a cached version. (**DEFAULT**: false)
-
-`mesos.resourcemanager.tasks.hostname`: Optional value to define the TaskManager's hostname. The pattern `_TASK_` is replaced by the actual id of the Mesos task. This can be used to configure the TaskManager to use Mesos DNS (e.g. `_TASK_.flink-service.mesos`) for name lookups. (**NO DEFAULT**)
-
-`mesos.resourcemanager.tasks.bootstrap-cmd`: A command which is executed before the TaskManager is started (**NO DEFAULT**).
+For a list of Mesos specific configuration, refer to the [Mesos section]({{ site.baseurl }}/ops/config.html#mesos)
+of the configuration documentation.
 
 {% top %}
diff --git a/flink-core/src/main/java/org/apache/flink/configuration/HighAvailabilityOptions.java b/flink-core/src/main/java/org/apache/flink/configuration/HighAvailabilityOptions.java
index c8b8ae9..787efff 100644
--- a/flink-core/src/main/java/org/apache/flink/configuration/HighAvailabilityOptions.java
+++ b/flink-core/src/main/java/org/apache/flink/configuration/HighAvailabilityOptions.java
@@ -22,6 +22,7 @@ import org.apache.flink.annotation.PublicEvolving;
 import org.apache.flink.annotation.docs.ConfigGroup;
 import org.apache.flink.annotation.docs.ConfigGroups;
 import org.apache.flink.annotation.docs.Documentation;
+import org.apache.flink.configuration.description.Description;
 
 import static org.apache.flink.configuration.ConfigOptions.key;
 
@@ -157,7 +158,9 @@ public class HighAvailabilityOptions {
 			key("high-availability.zookeeper.path.mesos-workers")
 			.defaultValue("/mesos-workers")
 			.withDeprecatedKeys("recovery.zookeeper.path.mesos-workers")
-			.withDescription("ZooKeeper root path (ZNode) for Mesos workers.");
+			.withDescription(Description.builder()
+				.text("The ZooKeeper root path for persisting the Mesos worker information.")
+				.build());
 
 	// ------------------------------------------------------------------------
 	//  ZooKeeper Client Settings
diff --git a/flink-mesos/src/main/java/org/apache/flink/mesos/configuration/MesosOptions.java b/flink-mesos/src/main/java/org/apache/flink/mesos/configuration/MesosOptions.java
index 7046605..426a891 100644
--- a/flink-mesos/src/main/java/org/apache/flink/mesos/configuration/MesosOptions.java
+++ b/flink-mesos/src/main/java/org/apache/flink/mesos/configuration/MesosOptions.java
@@ -19,6 +19,9 @@
 package org.apache.flink.mesos.configuration;
 
 import org.apache.flink.configuration.ConfigOption;
+import org.apache.flink.configuration.description.Description;
+import org.apache.flink.configuration.description.LinkElement;
+import org.apache.flink.configuration.description.TextElement;
 
 import static org.apache.flink.configuration.ConfigOptions.key;
 
@@ -33,7 +36,10 @@ public class MesosOptions {
 	public static final ConfigOption<Integer> INITIAL_TASKS =
 		key("mesos.initial-tasks")
 			.defaultValue(0)
-			.withDescription("The initial workers to bring up when the master starts");
+			.withDescription(Description.builder()
+				.text("The initial workers to bring up when the master starts. ")
+				.text("This option is ignored unless Flink is in %s.", LinkElement.link("#legacy", "legacy mode"))
+				.build());
 
 	/**
 	 * The maximum number of failed Mesos tasks before entirely stopping
@@ -44,8 +50,10 @@ public class MesosOptions {
 	public static final ConfigOption<Integer> MAX_FAILED_TASKS =
 		key("mesos.maximum-failed-tasks")
 			.defaultValue(-1)
-			.withDescription("The maximum number of failed workers before the cluster fails. May be set to -1 to disable" +
-				" this feature");
+			.withDescription(Description.builder()
+				.text("The maximum number of failed workers before the cluster fails. May be set to -1 to disable this feature. ")
+				.text("This option is ignored unless Flink is in %s.", LinkElement.link("#legacy", "legacy mode"))
+				.build());
 
 	/**
 	 * The Mesos master URL.
@@ -63,9 +71,14 @@ public class MesosOptions {
 	public static final ConfigOption<String> MASTER_URL =
 		key("mesos.master")
 			.noDefaultValue()
-			.withDescription("The Mesos master URL. The value should be in one of the following forms:" +
-				" \"host:port\", \"zk://host1:port1,host2:port2,.../path\"," +
-				" \"zk://username:password@host1:port1,host2:port2,.../path\" or \"file:///path/to/file\"");
+			.withDescription(Description.builder()
+				.text("The Mesos master URL. The value should be in one of the following forms: ")
+				.list(
+					TextElement.text("host:port"),
+					TextElement.text("zk://host1:port1,host2:port2,.../path"),
+					TextElement.text("zk://username:password@host1:port1,host2:port2,.../path"),
+					TextElement.text("file:///path/to/file"))
+				.build());
 
 	/**
 	 * The failover timeout for the Mesos scheduler, after which running tasks are automatically shut down.
@@ -125,7 +138,9 @@ public class MesosOptions {
 	 */
 	public static final ConfigOption<String> PORT_ASSIGNMENTS = key("mesos.resourcemanager.tasks.port-assignments")
 		.defaultValue("")
-		.withDescription("Comma-separated list of configuration keys which represent a configurable port." +
-			"All port keys will dynamically get a port assigned through Mesos.");
+		.withDescription(Description.builder()
+			.text("Comma-separated list of configuration keys which represent a configurable port. " +
+				"All port keys will dynamically get a port assigned through Mesos.")
+			.build());
 
 }
diff --git a/flink-mesos/src/main/java/org/apache/flink/mesos/runtime/clusterframework/MesosTaskManagerParameters.java b/flink-mesos/src/main/java/org/apache/flink/mesos/runtime/clusterframework/MesosTaskManagerParameters.java
index d915b36..0315629 100644
--- a/flink-mesos/src/main/java/org/apache/flink/mesos/runtime/clusterframework/MesosTaskManagerParameters.java
+++ b/flink-mesos/src/main/java/org/apache/flink/mesos/runtime/clusterframework/MesosTaskManagerParameters.java
@@ -22,6 +22,7 @@ import org.apache.flink.configuration.ConfigOption;
 import org.apache.flink.configuration.Configuration;
 import org.apache.flink.configuration.IllegalConfigurationException;
 import org.apache.flink.configuration.TaskManagerOptions;
+import org.apache.flink.configuration.description.Description;
 import org.apache.flink.runtime.clusterframework.ContaineredTaskManagerParameters;
 import org.apache.flink.util.Preconditions;
 
@@ -65,7 +66,8 @@ public class MesosTaskManagerParameters {
 
 	public static final ConfigOption<Integer> MESOS_RM_TASKS_GPUS =
 		key("mesos.resourcemanager.tasks.gpus")
-		.defaultValue(0);
+		.defaultValue(0)
+		.withDescription(Description.builder().text("GPUs to assign to the Mesos workers.").build());
 
 	public static final ConfigOption<String> MESOS_RM_CONTAINER_TYPE =
 		key("mesos.resourcemanager.tasks.container.type")
@@ -79,7 +81,12 @@ public class MesosTaskManagerParameters {
 
 	public static final ConfigOption<String> MESOS_TM_HOSTNAME =
 		key("mesos.resourcemanager.tasks.hostname")
-		.noDefaultValue();
+		.noDefaultValue()
+		.withDescription(Description.builder()
+			.text("Optional value to define the TaskManager’s hostname. " +
+				"The pattern _TASK_ is replaced by the actual id of the Mesos task. " +
+				"This can be used to configure the TaskManager to use Mesos DNS (e.g. _TASK_.flink-service.mesos) for name lookups.")
+			.build());
 
 	public static final ConfigOption<String> MESOS_TM_CMD =
 		key("mesos.resourcemanager.tasks.taskmanager-cmd")
@@ -87,7 +94,10 @@ public class MesosTaskManagerParameters {
 
 	public static final ConfigOption<String> MESOS_TM_BOOTSTRAP_CMD =
 		key("mesos.resourcemanager.tasks.bootstrap-cmd")
-		.noDefaultValue();
+		.noDefaultValue()
+		.withDescription(Description.builder()
+			.text("A command which is executed before the TaskManager is started.")
+			.build());
 
 	public static final ConfigOption<String> MESOS_TM_URIS =
 		key("mesos.resourcemanager.tasks.uris")
@@ -116,7 +126,11 @@ public class MesosTaskManagerParameters {
 	public static final ConfigOption<String> MESOS_CONSTRAINTS_HARD_HOSTATTR =
 		key("mesos.constraints.hard.hostattribute")
 		.noDefaultValue()
-		.withDescription("Constraints for task placement on mesos.");
+		.withDescription(Description.builder()
+			.text("Constraints for task placement on Mesos based on agent attributes. " +
+				"Takes a comma-separated list of key:value pairs corresponding to the attributes exposed by the target mesos agents. " +
+				"Example: az:eu-west-1a,series:t2")
+			.build());
 
 	/**
 	 * Value for {@code MESOS_RESOURCEMANAGER_TASKS_CONTAINER_TYPE} setting. Tells to use the Mesos containerizer.