You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@flink.apache.org by uc...@apache.org on 2015/06/30 12:16:19 UTC

[49/51] [partial] flink-web git commit: [hotfix] Manual build of docs

http://git-wip-us.apache.org/repos/asf/flink-web/blob/396616d4/content/docs/0.9/apis/cluster_execution.html
----------------------------------------------------------------------
diff --git a/content/docs/0.9/apis/cluster_execution.html b/content/docs/0.9/apis/cluster_execution.html
new file mode 100644
index 0000000..f4be55c
--- /dev/null
+++ b/content/docs/0.9/apis/cluster_execution.html
@@ -0,0 +1,345 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE html>
+
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
+    
+    <title>Apache Flink 0.9.0 Documentation: Cluster Execution</title>
+    
+    <link rel="shortcut icon" href="http://flink.apache.org/docs/0.9/page/favicon.ico" type="image/x-icon">
+    <link rel="icon" href="http://flink.apache.org/docs/0.9/page/favicon.ico" type="image/x-icon">
+
+    <!-- Bootstrap -->
+    <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css">
+    <link rel="stylesheet" href="http://flink.apache.org/docs/0.9/page/css/flink.css">
+    <link rel="stylesheet" href="http://flink.apache.org/docs/0.9/page/css/syntax.css">
+    <link rel="stylesheet" href="http://flink.apache.org/docs/0.9/page/css/codetabs.css">
+    
+    <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
+    <!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
+    <!--[if lt IE 9]>
+      <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
+      <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
+    <![endif]-->
+  </head>
+  <body>
+    
+    
+
+
+
+
+    <!-- Top navbar. -->
+    <nav class="navbar navbar-default navbar-fixed-top">
+      <div class="container">
+        <!-- The logo. -->
+        <div class="navbar-header">
+          <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1">
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+          </button>
+          <div class="navbar-logo">
+            <a href="http://flink.apache.org"><img alt="Apache Flink" src="http://flink.apache.org/docs/0.9/page/img/navbar-brand-logo.jpg"></a>
+          </div>
+        </div><!-- /.navbar-header -->
+
+        <!-- The navigation links. -->
+        <div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1">
+          <ul class="nav navbar-nav">
+            <li><a href="http://flink.apache.org/docs/0.9/index.html">Overview<span class="hidden-sm hidden-xs"> 0.9.0</span></a></li>
+
+            <!-- Setup -->
+            <li class="dropdown">
+              <a href="http://flink.apache.org/docs/0.9/setup" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Setup <span class="caret"></span></a>
+              <ul class="dropdown-menu" role="menu">
+                <li><a href="http://flink.apache.org/docs/0.9/setup/building.html">Get Flink 0.9-SNAPSHOT</a></li>
+
+                <li class="divider"></li>
+                <li role="presentation" class="dropdown-header"><strong>Deployment</strong></li>
+                <li><a href="http://flink.apache.org/docs/0.9/setup/local_setup.html" class="active">Local</a></li>
+                <li><a href="http://flink.apache.org/docs/0.9/setup/cluster_setup.html">Cluster (Standalone)</a></li>
+                <li><a href="http://flink.apache.org/docs/0.9/setup/yarn_setup.html">YARN</a></li>
+                <li><a href="http://flink.apache.org/docs/0.9/setup/gce_setup.html">GCloud</a></li>
+                <li><a href="http://flink.apache.org/docs/0.9/setup/flink_on_tez.html">Flink on Tez <span class="badge">Beta</span></a></li>
+
+                <li class="divider"></li>
+                <li><a href="http://flink.apache.org/docs/0.9/setup/config.html">Configuration</a></li>
+              </ul>
+            </li>
+
+            <!-- Programming Guides -->
+            <li class="dropdown">
+              <a href="http://flink.apache.org/docs/0.9/apis" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Programming Guides <span class="caret"></span></a>
+              <ul class="dropdown-menu" role="menu">
+                <li><a href="http://flink.apache.org/docs/0.9/apis/programming_guide.html"><strong>Batch: DataSet API</strong></a></li>
+                <li><a href="http://flink.apache.org/docs/0.9/apis/streaming_guide.html"><strong>Streaming: DataStream API</strong> <span class="badge">Beta</span></a></li>
+                <li><a href="http://flink.apache.org/docs/0.9/apis/python.html">Python API <span class="badge">Beta</span></a></li>
+
+                <li class="divider"></li>
+                <li><a href="scala_shell.html">Interactive Scala Shell</a></li>
+                <li><a href="http://flink.apache.org/docs/0.9/apis/dataset_transformations.html">Dataset Transformations</a></li>
+                <li><a href="http://flink.apache.org/docs/0.9/apis/best_practices.html">Best Practices</a></li>
+                <li><a href="http://flink.apache.org/docs/0.9/apis/example_connectors.html">Connectors</a></li>
+                <li><a href="http://flink.apache.org/docs/0.9/apis/examples.html">Examples</a></li>
+                <li><a href="http://flink.apache.org/docs/0.9/apis/local_execution.html">Local Execution</a></li>
+                <li><a href="http://flink.apache.org/docs/0.9/apis/cluster_execution.html">Cluster Execution</a></li>
+                <li><a href="http://flink.apache.org/docs/0.9/apis/cli.html">Command Line Interface</a></li>
+                <li><a href="http://flink.apache.org/docs/0.9/apis/web_client.html">Web Client</a></li>
+                <li><a href="http://flink.apache.org/docs/0.9/apis/iterations.html">Iterations</a></li>
+                <li><a href="http://flink.apache.org/docs/0.9/apis/java8.html">Java 8</a></li>
+                <li><a href="http://flink.apache.org/docs/0.9/apis/hadoop_compatibility.html">Hadoop Compatability <span class="badge">Beta</span></a></li>
+              </ul>
+            </li>
+
+            <!-- Libraries -->
+            <li class="dropdown">
+              <a href="http://flink.apache.org/docs/0.9/libs" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Libraries <span class="caret"></span></a>
+                <ul class="dropdown-menu" role="menu">
+                  <li><a href="http://flink.apache.org/docs/0.9/libs/spargel_guide.html">Graphs: Spargel</a></li>
+                  <li><a href="http://flink.apache.org/docs/0.9/libs/gelly_guide.html">Graphs: Gelly <span class="badge">Beta</span></a></li>
+                  <li><a href="http://flink.apache.org/docs/0.9/libs/ml/">Machine Learning <span class="badge">Beta</span></a></li>
+                  <li><a href="http://flink.apache.org/docs/0.9/libs/table.html">Relational: Table <span class="badge">Beta</span></a></li>
+              </ul>
+            </li>
+
+            <!-- Internals -->
+            <li class="dropdown">
+              <a href="http://flink.apache.org/docs/0.9/internals" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">Internals <span class="caret"></span></a>
+              <ul class="dropdown-menu" role="menu">
+                <li role="presentation" class="dropdown-header"><strong>Contribute</strong></li>
+                <li><a href="http://flink.apache.org/docs/0.9/internals/how_to_contribute.html">How to Contribute</a></li>
+                <li><a href="http://flink.apache.org/docs/0.9/internals/coding_guidelines.html">Coding Guidelines</a></li>
+                <li><a href="http://flink.apache.org/docs/0.9/internals/ide_setup.html">IDE Setup</a></li>
+                <li><a href="http://flink.apache.org/docs/0.9/internals/logging.html">Logging</a></li>
+                <li class="divider"></li>
+                <li role="presentation" class="dropdown-header"><strong>Internals</strong></li>
+                <li><a href="http://flink.apache.org/docs/0.9/internals/general_arch.html">Architecture &amp; Process Model</a></li>
+                <li><a href="http://flink.apache.org/docs/0.9/internals/types_serialization.html">Type Extraction &amp; Serialization</a></li>
+                <li><a href="http://flink.apache.org/docs/0.9/internals/job_scheduling.html">Jobs &amp; Scheduling</a></li>
+                <li><a href="http://flink.apache.org/docs/0.9/internals/add_operator.html">How-To: Add an Operator</a></li>
+              </ul>
+            </li>
+          </ul>
+          <form class="navbar-form navbar-right hidden-sm hidden-md" role="search" action="http://flink.apache.org/docs/0.9/search-results.html">
+            <div class="form-group">
+              <input type="text" class="form-control" name="q" placeholder="Search all pages">
+            </div>
+            <button type="submit" class="btn btn-default">Search</button>
+          </form>
+        </div><!-- /.navbar-collapse -->
+      </div><!-- /.container -->
+    </nav>
+
+
+    
+
+    <!-- Main content. -->
+    <div class="container">
+      
+      
+<div class="row">
+  <div class="col-sm-10 col-sm-offset-1">
+    <h1>Cluster Execution</h1>
+
+
+
+<ul id="markdown-toc">
+  <li><a href="#command-line-interface" id="markdown-toc-command-line-interface">Command Line Interface</a></li>
+  <li><a href="#remote-environment" id="markdown-toc-remote-environment">Remote Environment</a>    <ul>
+      <li><a href="#maven-dependency" id="markdown-toc-maven-dependency">Maven Dependency</a></li>
+      <li><a href="#example" id="markdown-toc-example">Example</a></li>
+    </ul>
+  </li>
+  <li><a href="#linking-with-modules-not-contained-in-the-binary-distribution" id="markdown-toc-linking-with-modules-not-contained-in-the-binary-distribution">Linking with modules not contained in the binary distribution</a>    <ul>
+      <li><a href="#packaging-dependencies-with-your-usercode-with-maven" id="markdown-toc-packaging-dependencies-with-your-usercode-with-maven">Packaging dependencies with your usercode with Maven</a></li>
+    </ul>
+  </li>
+</ul>
+
+<p>Flink programs can run distributed on clusters of many machines. There
+are two ways to send a program to a cluster for execution:</p>
+
+<h2 id="command-line-interface">Command Line Interface</h2>
+
+<p>The command line interface lets you submit packaged programs (JARs) to a cluster
+(or single machine setup).</p>
+
+<p>Please refer to the <a href="cli.html">Command Line Interface</a> documentation for
+details.</p>
+
+<h2 id="remote-environment">Remote Environment</h2>
+
+<p>The remote environment lets you execute Flink Java programs on a cluster
+directly. The remote environment points to the cluster on which you want to
+execute the program.</p>
+
+<h3 id="maven-dependency">Maven Dependency</h3>
+
+<p>If you are developing your program as a Maven project, you have to add the
+<code>flink-clients</code> module using this dependency:</p>
+
+<div class="highlight"><pre><code class="language-xml"><span class="nt">&lt;dependency&gt;</span>
+  <span class="nt">&lt;groupId&gt;</span>org.apache.flink<span class="nt">&lt;/groupId&gt;</span>
+  <span class="nt">&lt;artifactId&gt;</span>flink-clients<span class="nt">&lt;/artifactId&gt;</span>
+  <span class="nt">&lt;version&gt;</span>0.9.0<span class="nt">&lt;/version&gt;</span>
+<span class="nt">&lt;/dependency&gt;</span></code></pre></div>
+
+<h3 id="example">Example</h3>
+
+<p>The following illustrates the use of the <code>RemoteEnvironment</code>:</p>
+
+<div class="highlight"><pre><code class="language-java"><span class="kd">public</span> <span class="kd">static</span> <span class="kt">void</span> <span class="nf">main</span><span class="o">(</span><span class="n">String</span><span class="o">[]</span> <span class="n">args</span><span class="o">)</span> <span class="kd">throws</span> <span class="n">Exception</span> <span class="o">{</span>
+    <span class="n">ExecutionEnvironment</span> <span class="n">env</span> <span class="o">=</span> <span class="n">ExecutionEnvironment</span>
+        <span class="o">.</span><span class="na">createRemoteEnvironment</span><span class="o">(</span><span class="s">&quot;flink-master&quot;</span><span class="o">,</span> <span class="mi">6123</span><span class="o">,</span> <span class="s">&quot;/home/user/udfs.jar&quot;</span><span class="o">);</span>
+
+    <span class="n">DataSet</span><span class="o">&lt;</span><span class="n">String</span><span class="o">&gt;</span> <span class="n">data</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="na">readTextFile</span><span class="o">(</span><span class="s">&quot;hdfs://path/to/file&quot;</span><span class="o">);</span>
+
+    <span class="n">data</span>
+        <span class="o">.</span><span class="na">filter</span><span class="o">(</span><span class="k">new</span> <span class="n">FilterFunction</span><span class="o">&lt;</span><span class="n">String</span><span class="o">&gt;()</span> <span class="o">{</span>
+            <span class="kd">public</span> <span class="kt">boolean</span> <span class="nf">filter</span><span class="o">(</span><span class="n">String</span> <span class="n">value</span><span class="o">)</span> <span class="o">{</span>
+                <span class="k">return</span> <span class="n">value</span><span class="o">.</span><span class="na">startsWith</span><span class="o">(</span><span class="s">&quot;http://&quot;</span><span class="o">);</span>
+            <span class="o">}</span>
+        <span class="o">})</span>
+        <span class="o">.</span><span class="na">writeAsText</span><span class="o">(</span><span class="s">&quot;hdfs://path/to/result&quot;</span><span class="o">);</span>
+
+    <span class="n">env</span><span class="o">.</span><span class="na">execute</span><span class="o">();</span>
+<span class="o">}</span></code></pre></div>
+
+<p>Note that the program contains custom user code and hence requires a JAR file with
+the classes of the code attached. The constructor of the remote environment
+takes the path(s) to the JAR file(s).</p>
+
+<h2 id="linking-with-modules-not-contained-in-the-binary-distribution">Linking with modules not contained in the binary distribution</h2>
+
+<p>The binary distribution contains jar packages in the <code>lib</code> folder that are automatically
+provided to the classpath of your distrbuted programs. Almost all of Flink classes are
+located there with a few exceptions, for example the streaming connectors and some freshly
+added modules. To run code depending on these modules you need to make them accessible
+during runtime, for which we suggest two options:</p>
+
+<ol>
+  <li>Either copy the required jar files to the <code>lib</code> folder onto all of your TaskManagers.
+Note that you have to restar your TaskManagers after this.</li>
+  <li>Or package them with your usercode.</li>
+</ol>
+
+<p>The latter version is recommended as it respects the classloader management in Flink.</p>
+
+<h3 id="packaging-dependencies-with-your-usercode-with-maven">Packaging dependencies with your usercode with Maven</h3>
+
+<p>To provide these dependencies not included by Flink we suggest two options with Maven.</p>
+
+<ol>
+  <li>The maven assembly plugin builds a so called fat jar cointaining all your dependencies.
+Assembly configuration is straight-forward, but the resulting jar might become bulky. See 
+<a href="http://maven.apache.org/plugins/maven-assembly-plugin/usage.html">usage</a>.</li>
+  <li>The maven unpack plugin, for unpacking the relevant parts of the dependencies and
+then package it with your code.</li>
+</ol>
+
+<p>Using the latter approach in order to bundle the Kafka connector, <code>flink-connector-kafka</code>
+you would need to add the classes from both the connector and the Kafka API itself. Add
+the following to your plugins section.</p>
+
+<div class="highlight"><pre><code class="language-xml"><span class="nt">&lt;plugin&gt;</span>
+    <span class="nt">&lt;groupId&gt;</span>org.apache.maven.plugins<span class="nt">&lt;/groupId&gt;</span>
+    <span class="nt">&lt;artifactId&gt;</span>maven-dependency-plugin<span class="nt">&lt;/artifactId&gt;</span>
+    <span class="nt">&lt;version&gt;</span>2.9<span class="nt">&lt;/version&gt;</span>
+    <span class="nt">&lt;executions&gt;</span>
+        <span class="nt">&lt;execution&gt;</span>
+            <span class="nt">&lt;id&gt;</span>unpack<span class="nt">&lt;/id&gt;</span>
+            <span class="c">&lt;!-- executed just before the package phase --&gt;</span>
+            <span class="nt">&lt;phase&gt;</span>prepare-package<span class="nt">&lt;/phase&gt;</span>
+            <span class="nt">&lt;goals&gt;</span>
+                <span class="nt">&lt;goal&gt;</span>unpack<span class="nt">&lt;/goal&gt;</span>
+            <span class="nt">&lt;/goals&gt;</span>
+            <span class="nt">&lt;configuration&gt;</span>
+                <span class="nt">&lt;artifactItems&gt;</span>
+                    <span class="c">&lt;!-- For Flink connector classes --&gt;</span>
+                    <span class="nt">&lt;artifactItem&gt;</span>
+                        <span class="nt">&lt;groupId&gt;</span>org.apache.flink<span class="nt">&lt;/groupId&gt;</span>
+                        <span class="nt">&lt;artifactId&gt;</span>flink-connector-kafka<span class="nt">&lt;/artifactId&gt;</span>
+                        <span class="nt">&lt;version&gt;</span>0.9.0<span class="nt">&lt;/version&gt;</span>
+                        <span class="nt">&lt;type&gt;</span>jar<span class="nt">&lt;/type&gt;</span>
+                        <span class="nt">&lt;overWrite&gt;</span>false<span class="nt">&lt;/overWrite&gt;</span>
+                        <span class="nt">&lt;outputDirectory&gt;</span>${project.build.directory}/classes<span class="nt">&lt;/outputDirectory&gt;</span>
+                        <span class="nt">&lt;includes&gt;</span>org/apache/flink/**<span class="nt">&lt;/includes&gt;</span>
+                    <span class="nt">&lt;/artifactItem&gt;</span>
+                    <span class="c">&lt;!-- For Kafka API classes --&gt;</span>
+                    <span class="nt">&lt;artifactItem&gt;</span>
+                        <span class="nt">&lt;groupId&gt;</span>org.apache.kafka<span class="nt">&lt;/groupId&gt;</span>
+                        <span class="nt">&lt;artifactId&gt;</span>kafka_<span class="nt">&lt;YOUR_SCALA_VERSION&gt;&lt;/artifactId&gt;</span>
+                        <span class="nt">&lt;version&gt;&lt;YOUR_KAFKA_VERSION&gt;&lt;/version&gt;</span>
+                        <span class="nt">&lt;type&gt;</span>jar<span class="nt">&lt;/type&gt;</span>
+                        <span class="nt">&lt;overWrite&gt;</span>false<span class="nt">&lt;/overWrite&gt;</span>
+                        <span class="nt">&lt;outputDirectory&gt;</span>${project.build.directory}/classes<span class="nt">&lt;/outputDirectory&gt;</span>
+                        <span class="nt">&lt;includes&gt;</span>kafka/**<span class="nt">&lt;/includes&gt;</span>
+                    <span class="nt">&lt;/artifactItem&gt;</span>
+                <span class="nt">&lt;/artifactItems&gt;</span>
+            <span class="nt">&lt;/configuration&gt;</span>
+        <span class="nt">&lt;/execution&gt;</span>
+    <span class="nt">&lt;/executions&gt;</span>
+<span class="nt">&lt;/plugin&gt;</span></code></pre></div>
+
+<p>Now when running <code>mvn clean package</code> the produced jar includes the required dependencies.</p>
+
+  </div>
+
+  <div class="col-sm-10 col-sm-offset-1">
+    <!-- Disqus thread and some vertical offset -->
+    <div style="margin-top: 75px; margin-bottom: 50px" id="disqus_thread"></div>
+  </div>
+</div>
+
+    </div><!-- /.container -->
+
+    <!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->
+    <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script>
+    <!-- Include all compiled plugins (below), or include individual files as needed -->
+    <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script>
+    <script src="http://flink.apache.org/docs/0.9/page/js/codetabs.js"></script>
+
+    <!-- Google Analytics -->
+    <script>
+      (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+      m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+      })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+      ga('create', 'UA-52545728-1', 'auto');
+      ga('send', 'pageview');
+    </script>
+
+    <!-- Disqus -->
+    <script type="text/javascript">
+    var disqus_shortname = 'stratosphere-eu';
+    (function() {
+        var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
+        dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
+        (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
+    })();
+</script>
+  </body>
+</html>