You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@flink.apache.org by tz...@apache.org on 2017/03/22 14:57:53 UTC

[1/2] flink git commit: [FLINK-6139] [build] Add "mapr" build profile

Repository: flink
Updated Branches:
  refs/heads/release-1.2 ffd146a60 -> 09239ea16


[FLINK-6139] [build] Add "mapr" build profile

This build profile is for building Flink to be compatible with MapR. It
additionally excludes dependency clashes between MapR's Hadoop /
Zookeeper distributions and Flink's dependencies.


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/b4d1eb40
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/b4d1eb40
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/b4d1eb40

Branch: refs/heads/release-1.2
Commit: b4d1eb4064e123fd8202246e756701f673b9ef54
Parents: ffd146a
Author: Tzu-Li (Gordon) Tai <tz...@apache.org>
Authored: Wed Mar 22 15:14:03 2017 +0800
Committer: Tzu-Li (Gordon) Tai <tz...@apache.org>
Committed: Wed Mar 22 22:56:46 2017 +0800

----------------------------------------------------------------------
 .../flink-shaded-hadoop2/pom.xml                | 156 +++++++++++++++++++
 pom.xml                                         |  36 +++++
 2 files changed, 192 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/b4d1eb40/flink-shaded-hadoop/flink-shaded-hadoop2/pom.xml
----------------------------------------------------------------------
diff --git a/flink-shaded-hadoop/flink-shaded-hadoop2/pom.xml b/flink-shaded-hadoop/flink-shaded-hadoop2/pom.xml
index 7df2578..c9b918d 100644
--- a/flink-shaded-hadoop/flink-shaded-hadoop2/pom.xml
+++ b/flink-shaded-hadoop/flink-shaded-hadoop2/pom.xml
@@ -652,4 +652,160 @@ under the License.
 			</dependency>
 		</dependencies>
 	</dependencyManagement>
+
+	<profiles>
+		<profile>
+			<!-- MapR build profile -->
+			<id>mapr</id>
+			<dependencies>
+				<dependency>
+					<groupId>org.apache.hadoop</groupId>
+					<artifactId>hadoop-common</artifactId>
+					<version>${hadoop.version}</version>
+					<exclusions>
+						<!--
+							for MapR builds, we additionally exclude the MapR dependencies to not be
+							bundled with Flink, ensuring that the native MapR libraries will be used
+							and avoid incompatibitilies
+						-->
+						<exclusion>
+							<groupId>com.mapr.hadoop</groupId>
+							<artifactId>maprfs-core</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>com.mapr.hadoop</groupId>
+							<artifactId>hadoop2</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>com.mapr.hadoop</groupId>
+							<artifactId>maprfs</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>com.mapr.hadoop</groupId>
+							<artifactId>maprfs-diagnostic-tools</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>com.mapr.hadoop</groupId>
+							<artifactId>maprfs-jni</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>com.mapr.fs</groupId>
+							<artifactId>libprotodefs</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>com.mapr.fs</groupId>
+							<artifactId>mapr-hbase</artifactId>
+						</exclusion>
+
+						<exclusion>
+							<groupId>asm</groupId>
+							<artifactId>asm</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>org.ow2.asm</groupId>
+							<artifactId>asm</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>tomcat</groupId>
+							<artifactId>jasper-compiler</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>tomcat</groupId>
+							<artifactId>jasper-runtime</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>org.mortbay.jetty</groupId>
+							<artifactId>jetty</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>org.mortbay.jetty</groupId>
+							<artifactId>jsp-api-2.1</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>org.mortbay.jetty</groupId>
+							<artifactId>jsp-2.1</artifactId>
+						</exclusion>
+
+						<exclusion>
+							<groupId>org.eclipse.jdt</groupId>
+							<artifactId>core</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>org.mortbay.jetty</groupId>
+							<artifactId>jetty</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>com.sun.jersey</groupId>
+							<artifactId>jersey-json</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>org.codehaus.jettison</groupId>
+							<artifactId>jettison</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>com.sun.jersey</groupId>
+							<artifactId>jersey-server</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>tomcat</groupId>
+							<artifactId>jasper-compiler</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>tomcat</groupId>
+							<artifactId>jasper-runtime</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>javax.servlet.jsp</groupId>
+							<artifactId>jsp-api</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>com.sun.jersey.jersey-test-framework</groupId>
+							<artifactId>jersey-test-framework-grizzly2</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>com.sun.jersey.jersey-test-framework</groupId>
+							<artifactId>jersey-test-framework-core</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>com.sun.jersey</groupId>
+							<artifactId>jersey-grizzly2</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>org.glassfish.grizzly</groupId>
+							<artifactId>grizzly-http</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>org.glassfish.grizzly</groupId>
+							<artifactId>grizzly-framework</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>org.glassfish.grizzly</groupId>
+							<artifactId>grizzly-http-server</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>org.glassfish.grizzly</groupId>
+							<artifactId>grizzly-rcm</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>org.glassfish.grizzly</groupId>
+							<artifactId>grizzly-http-servlet</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>org.glassfish</groupId>
+							<artifactId>javax.servlet</artifactId>
+						</exclusion>
+						<exclusion>
+							<groupId>com.sun.jersey.contribs</groupId>
+							<artifactId>jersey-guice</artifactId>
+						</exclusion>
+						<!--We have to exclude beanutils because it is overlapping with commons-collections-->
+						<exclusion>
+							<groupId>commons-beanutils</groupId>
+							<artifactId>commons-beanutils</artifactId>
+						</exclusion>
+					</exclusions>
+				</dependency>
+			</dependencies>
+		</profile>
+	</profiles>
 </project>

http://git-wip-us.apache.org/repos/asf/flink/blob/b4d1eb40/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index b3d6483..c1e64ab 100644
--- a/pom.xml
+++ b/pom.xml
@@ -515,6 +515,42 @@ under the License.
 		</profile>
 
 		<profile>
+			<!--
+				MapR build profile. This build profile must be used together with "vendor-repos"
+				to be able to locate the MapR Hadoop / Zookeeper dependencies.
+			-->
+			<id>mapr</id>
+
+			<!--
+				use MapR Hadoop / Zookeeper dependencies appropriate for MapR 5.2.0;
+				users of different MapR versions should simply override these versions
+				with appropriate values.
+			-->
+			<properties>
+				<hadoop.version>2.7.0-mapr-1607</hadoop.version>
+				<zookeeper.version>3.4.5-mapr-1604</zookeeper.version>
+			</properties>
+
+			<dependencies>
+				<dependency>
+					<groupId>org.apache.zookeeper</groupId>
+					<artifactId>zookeeper</artifactId>
+					<version>${zookeeper.version}</version>
+					<exclusions>
+						<!--
+							exclude netty, because MapR's Zookeeper distribution has
+							a conflicting Netty version with Flink's Netty dependency
+						-->
+						<exclusion>
+							<groupId>org.jboss.netty</groupId>
+							<artifactId>netty</artifactId>
+						</exclusion>
+					</exclusions>
+				</dependency>
+			</dependencies>
+		</profile>
+
+		<profile>
 			<!-- used for aggregating  ScalaDoc with JavaDoc -->
 			<id>aggregate-scaladoc</id>
 			<dependencies>


[2/2] flink git commit: [FLINK-6139] [doc] Add documentation for running Flink on MapR

Posted by tz...@apache.org.
[FLINK-6139] [doc] Add documentation for running Flink on MapR

This closes #3582.


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/09239ea1
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/09239ea1
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/09239ea1

Branch: refs/heads/release-1.2
Commit: 09239ea1641e06802a302d00cc80e0fc643971e5
Parents: b4d1eb4
Author: Tzu-Li (Gordon) Tai <tz...@apache.org>
Authored: Wed Mar 22 15:15:41 2017 +0800
Committer: Tzu-Li (Gordon) Tai <tz...@apache.org>
Committed: Wed Mar 22 22:57:00 2017 +0800

----------------------------------------------------------------------
 docs/setup/aws.md           |   2 +-
 docs/setup/cluster_setup.md |   2 +-
 docs/setup/gce_setup.md     |   2 +-
 docs/setup/mapr_setup.md    | 132 +++++++++++++++++++++++++++++++++++++++
 docs/setup/yarn_setup.md    |   2 +-
 5 files changed, 136 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/09239ea1/docs/setup/aws.md
----------------------------------------------------------------------
diff --git a/docs/setup/aws.md b/docs/setup/aws.md
index 8d04d59..b0fbefc 100644
--- a/docs/setup/aws.md
+++ b/docs/setup/aws.md
@@ -2,7 +2,7 @@
 title: "Amazon Web Services (AWS)"
 nav-title: AWS
 nav-parent_id: deployment
-nav-pos: 10
+nav-pos: 5
 ---
 <!--
 Licensed to the Apache Software Foundation (ASF) under one

http://git-wip-us.apache.org/repos/asf/flink/blob/09239ea1/docs/setup/cluster_setup.md
----------------------------------------------------------------------
diff --git a/docs/setup/cluster_setup.md b/docs/setup/cluster_setup.md
index c86e353..bcb7766 100644
--- a/docs/setup/cluster_setup.md
+++ b/docs/setup/cluster_setup.md
@@ -1,7 +1,7 @@
 ---
 title: "Standalone Cluster"
 nav-parent_id: deployment
-nav-pos: 2
+nav-pos: 1
 ---
 <!--
 Licensed to the Apache Software Foundation (ASF) under one

http://git-wip-us.apache.org/repos/asf/flink/blob/09239ea1/docs/setup/gce_setup.md
----------------------------------------------------------------------
diff --git a/docs/setup/gce_setup.md b/docs/setup/gce_setup.md
index f9edfcc..ac1ad25 100644
--- a/docs/setup/gce_setup.md
+++ b/docs/setup/gce_setup.md
@@ -2,7 +2,7 @@
 title:  "Google Compute Engine Setup"
 nav-title: Google Compute Engine
 nav-parent_id: deployment
-nav-pos: 20
+nav-pos: 6
 ---
 <!--
 Licensed to the Apache Software Foundation (ASF) under one

http://git-wip-us.apache.org/repos/asf/flink/blob/09239ea1/docs/setup/mapr_setup.md
----------------------------------------------------------------------
diff --git a/docs/setup/mapr_setup.md b/docs/setup/mapr_setup.md
new file mode 100644
index 0000000..44682e5
--- /dev/null
+++ b/docs/setup/mapr_setup.md
@@ -0,0 +1,132 @@
+---
+title:  "MapR Setup"
+nav-title: MapR
+nav-parent_id: deployment
+nav-pos: 7
+---
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+This documentation provides instructions on how to prepare Flink for YARN
+executions on a [MapR](https://mapr.com/) cluster.
+
+* This will be replaced by the TOC
+{:toc}
+
+## Running Flink on YARN with MapR
+
+The instructions below assume MapR version 5.2.0. They will guide you
+to be able to start submitting [Flink on YARN]({{ site.baseurl }}/setup/yarn_setup.html)
+jobs or sessions to a MapR cluster.
+
+### Building Flink for MapR
+
+In order to run Flink on MapR, Flink needs to be built with MapR's own
+Hadoop and Zookeeper distribution. Simply build Flink using Maven with
+the following command from the project root directory:
+
+```
+mvn clean install -DskipTests -Pvendor-repos,mapr \
+    -Dhadoop.version=2.7.0-mapr-1607 \
+    -Dzookeeper.version=3.4.5-mapr-1604
+```
+
+The `vendor-repos` build profile adds MapR's repository to the build so that
+MapR's Hadoop / Zookeeper dependencies can be fetched. The `mapr` build
+profile additionally resolves some dependency clashes between MapR and
+Flink, as well as ensuring that the native MapR libraries on the cluster
+nodes are used. Both profiles must be activated.
+
+By default the `mapr` profile builds with Hadoop / Zookeeper dependencies
+for MapR version 5.2.0, so you don't need to explicitly override
+the `hadoop.version` and `zookeeper.version` properties.
+For different MapR versions, simply override these properties to appropriate
+values. The corresponding Hadoop / Zookeeper distributions for each MapR version
+can be found on MapR documentations such as
+[here](http://maprdocs.mapr.com/home/DevelopmentGuide/MavenArtifacts.html).
+
+### Job Submission Client Setup
+
+The client submitting Flink jobs to MapR also needs to be prepared with the below setups.
+
+Ensure that MapR's JAAS config file is picked up to avoid login failures:
+
+```
+export JVM_ARGS=-Djava.security.auth.login.config=/opt/mapr/conf/mapr.login.conf
+```
+
+Make sure that the `yarn.nodemanager.resource.cpu-vcores` property is set in `yarn-site.xml`:
+
+~~~xml
+<!-- in /opt/mapr/hadoop/hadoop-2.7.0/etc/hadoop/yarn-site.xml -->
+
+<configuration>
+...
+
+<property>
+    <name>yarn.nodemanager.resource.cpu-vcores</name>
+    <value>...</value>
+</property>
+
+...
+</configuration>
+~~~
+
+Also remember to set the `YARN_CONF_DIR` or `HADOOP_CONF_DIR` environment
+variables to the path where `yarn-site.xml` is located:
+
+```
+export YARN_CONF_DIR=/opt/mapr/hadoop/hadoop-2.7.0/etc/hadoop/
+export HADOOP_CONF_DIR=/opt/mapr/hadoop/hadoop-2.7.0/etc/hadoop/
+```
+
+Make sure that the MapR native libraries are picked up in the classpath:
+
+```
+export FLINK_CLASSPATH=/opt/mapr/lib/*
+```
+
+If you'll be starting Flink on YARN sessions with `yarn-session.sh`, the
+below is also required:
+
+```
+export CC_CLASSPATH=/opt/mapr/lib/*
+```
+
+## Running Flink with a Secured MapR Cluster
+
+*Note: In Flink 1.2.0, Flink's Kerberos authentication for YARN execution has
+a bug that forbids it to work with MapR Security. Please upgrade to later Flink
+versions in order to use Flink with a secured MapR cluster. For more details,
+please see [FLINK-5949](https://issues.apache.org/jira/browse/FLINK-5949).*
+
+Flink's [Kerberos authentication]({{ site.baseurl }}/ops/security-kerberos.html) is independent of
+[MapR's Security authentication](http://maprdocs.mapr.com/home/SecurityGuide/Configuring-MapR-Security.html).
+With the above build procedures and environment variable setups, Flink
+does not require any additional configuration to work with MapR Security.
+
+Users simply need to login by using MapR's `maprlogin` authentication
+utility. Users that haven't acquired MapR login credentials would not be
+able to submit Flink jobs, erroring with:
+
+```
+java.lang.Exception: unable to establish the security context
+Caused by: o.a.f.r.security.modules.SecurityModule$SecurityInstallException: Unable to set the Hadoop login user
+Caused by: java.io.IOException: failure to login: Unable to obtain MapR credentials
+```

http://git-wip-us.apache.org/repos/asf/flink/blob/09239ea1/docs/setup/yarn_setup.md
----------------------------------------------------------------------
diff --git a/docs/setup/yarn_setup.md b/docs/setup/yarn_setup.md
index 53423b8..3149ec2 100644
--- a/docs/setup/yarn_setup.md
+++ b/docs/setup/yarn_setup.md
@@ -2,7 +2,7 @@
 title:  "YARN Setup"
 nav-title: YARN
 nav-parent_id: deployment
-nav-pos: 3
+nav-pos: 2
 ---
 <!--
 Licensed to the Apache Software Foundation (ASF) under one