You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bigtop.apache.org by co...@apache.org on 2015/11/02 22:03:15 UTC

[2/2] bigtop git commit: BIGTOP-2104. Packages upgrade to Spark 1.5.1

BIGTOP-2104. Packages upgrade to Spark 1.5.1


Project: http://git-wip-us.apache.org/repos/asf/bigtop/repo
Commit: http://git-wip-us.apache.org/repos/asf/bigtop/commit/05d9fada
Tree: http://git-wip-us.apache.org/repos/asf/bigtop/tree/05d9fada
Diff: http://git-wip-us.apache.org/repos/asf/bigtop/diff/05d9fada

Branch: refs/heads/master
Commit: 05d9fada278f23c97079afb2429b590afa100b1e
Parents: bf67f6e
Author: Jonathan Kelly <jo...@amazon.com>
Authored: Fri Oct 2 15:13:18 2015 -0700
Committer: Konstantin Boudnik <co...@apache.org>
Committed: Mon Nov 2 13:02:28 2015 -0800

----------------------------------------------------------------------
 .../src/common/spark/do-component-build         | 15 +++-
 .../src/common/spark/install_spark.sh           | 79 ++++++--------------
 bigtop-packages/src/common/spark/patch0.diff    | 11 +++
 bigtop-packages/src/common/spark/spark-env.sh   | 43 +++++++++++
 bigtop-packages/src/deb/spark/control           | 15 ++++
 .../src/deb/spark/spark-core.install            | 14 ++--
 .../src/deb/spark/spark-datanucleus.install     |  2 +
 .../src/deb/spark/spark-extras.install          |  1 +
 .../src/deb/spark/spark-yarn-shuffle.install    |  2 +
 bigtop-packages/src/rpm/spark/SPECS/spark.spec  | 41 +++++++++-
 bigtop.bom                                      |  2 +-
 11 files changed, 158 insertions(+), 67 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/common/spark/do-component-build
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/common/spark/do-component-build b/bigtop-packages/src/common/spark/do-component-build
index 3d705a5..8d74f4a 100644
--- a/bigtop-packages/src/common/spark/do-component-build
+++ b/bigtop-packages/src/common/spark/do-component-build
@@ -24,9 +24,20 @@ BUILD_OPTS="-Divy.home=${HOME}/.ivy2 -Dsbt.ivy.home=${HOME}/.ivy2 -Duser.home=${
             -Dhadoop.version=$HADOOP_VERSION \
             -Dyarn.version=$HADOOP_VERSION \
             -Dprotobuf.version=2.5.0 \
-            -DskipTests -DrecompileMode=all"
+            -DrecompileMode=all \
+            -Pbigtop-dist \
+            -Pyarn -Phadoop-2.6 \
+            -Phive -Phive-thriftserver \
+            $SPARK_BUILD_OPTS"
+
 ## this might be an issue at times
 #        http://maven.40175.n5.nabble.com/Not-finding-artifact-in-local-repo-td3727753.html
 export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m -XX:PermSize=1024m -XX:MaxPermSize=1024m"
 
-mvn -Pbigtop-dist -Pyarn -Phive -Phive-thriftserver $BUILD_OPTS install
+mvn $BUILD_OPTS -DskipTests install
+
+# Tests must be run after Spark has already been packaged.
+# See http://spark.apache.org/docs/latest/building-spark.html#spark-tests-in-maven
+if [ "$SPARK_RUN_TESTS" = "true" ]; then
+    mvn $BUILD_OPTS test
+fi

http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/common/spark/install_spark.sh
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/common/spark/install_spark.sh b/bigtop-packages/src/common/spark/install_spark.sh
index 85078ae..530b355 100644
--- a/bigtop-packages/src/common/spark/install_spark.sh
+++ b/bigtop-packages/src/common/spark/install_spark.sh
@@ -119,6 +119,10 @@ install -d -m 0755 $PREFIX/$LIB_DIR
 install -d -m 0755 $PREFIX/$LIB_DIR/lib
 install -d -m 0755 $PREFIX/$LIB_DIR/bin
 install -d -m 0755 $PREFIX/$LIB_DIR/sbin
+install -d -m 0755 $PREFIX/$LIB_DIR/extras
+install -d -m 0755 $PREFIX/$LIB_DIR/extras/lib
+install -d -m 0755 $PREFIX/$LIB_DIR/yarn
+install -d -m 0755 $PREFIX/$LIB_DIR/yarn/lib
 install -d -m 0755 $PREFIX/$DOC_DIR
 install -d -m 0755 $PREFIX/$EXAMPLES_DIR
 
@@ -134,9 +138,15 @@ tar --wildcards -C $PREFIX/$LIB_DIR/ -zxf ${BUILD_DIR}/assembly/target/spark-ass
 
 rm -rf $PREFIX/$LIB_DIR/bin/*.cmd
 
+# External/extra jars
+ls ${BUILD_DIR}/{external,extras}/*/target/*${SPARK_VERSION}.jar | grep -v 'original-\|assembly' | xargs -IJARS cp JARS $PREFIX/$LIB_DIR/extras/lib
+
 # Examples jar
 cp ${BUILD_DIR}/examples/target/spark-examples*${SPARK_VERSION}.jar $PREFIX/$LIB_DIR/lib/spark-examples-${SPARK_VERSION}-hadoop${HADOOP_VERSION}.jar
 
+# Spark YARN Shuffle jar
+cp ${BUILD_DIR}/network/yarn/target/*/spark-${SPARK_VERSION}-yarn-shuffle.jar $PREFIX/$LIB_DIR/lib/
+
 # Examples src
 cp -ra ${BUILD_DIR}/examples/src $PREFIX/$EXAMPLES_DIR/
 ln -s $EXAMPLES_DIR $PREFIX/$LIB_DIR/examples
@@ -150,12 +160,12 @@ chmod 755 $PREFIX/$LIB_DIR/sbin/*
 # Copy in the configuration files
 install -d -m 0755 $PREFIX/$CONF_DIR
 cp -a ${BUILD_DIR}/conf/* $PREFIX/$CONF_DIR
-cp  $PREFIX/$CONF_DIR/spark-env.sh.template $PREFIX/$CONF_DIR/spark-env.sh
+cp $SOURCE_DIR/spark-env.sh $PREFIX/$CONF_DIR
 ln -s /etc/spark/conf $PREFIX/$LIB_DIR/conf
 
 # Copy in the wrappers
 install -d -m 0755 $PREFIX/$BIN_DIR
-for wrap in sbin/spark-executor bin/spark-shell bin/spark-submit; do
+for wrap in bin/spark-class bin/spark-shell bin/spark-sql bin/spark-submit; do
   cat > $PREFIX/$BIN_DIR/`basename $wrap` <<EOF
 #!/bin/bash
 
@@ -167,60 +177,6 @@ EOF
   chmod 755 $PREFIX/$BIN_DIR/`basename $wrap`
 done
 
-cat >> $PREFIX/$CONF_DIR/spark-env.sh <<EOF
-export HADOOP_HOME=\${HADOOP_HOME:-/usr/lib/hadoop}
-export HADOOP_CONF_DIR=\${HADOOP_CONF_DIR:-/etc/hadoop/conf}
-
-export SPARK_MASTER_IP=\`hostname\`
-export SPARK_MASTER_PORT=7077
-export SPARK_MASTER_WEBUI_PORT=18080
-export SPARK_WORKER_PORT=7078
-export SPARK_WORKER_WEBUI_PORT=18081
-export SPARK_WORKER_DIR=/var/run/spark/work
-export SPARK_HISTORY_OPTS="\$SPARK_HISTORY_OPTS -Dspark.history.fs.logDirectory=hdfs:///var/log/spark/apps -Dspark.history.ui.port=18082"
-
-export SPARK_LOG_DIR=/var/log/spark
-
-export LD_LIBRARY_PATH=\${LD_LIBRARY_PATH}:\${HADOOP_HOME}/lib/native
-EOF
-
-cat >> $PREFIX/$CONF_DIR/hive-site.xml <<EOF
-<?xml version="1.0"?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
--->
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
-<configuration>
-
-<property>
-  <name>javax.jdo.option.ConnectionURL</name>
-  <value>jdbc:derby:;databaseName=/tmp/spark-\${user.name}/metastore_db;create=true</value>
-  <description>JDBC connect string for a JDBC metastore</description>
-</property>
-
-<property>
-  <name>javax.jdo.option.ConnectionDriverName</name>
-  <value>org.apache.derby.jdbc.EmbeddedDriver</value>
-  <description>Driver class name for a JDBC metastore</description>
-</property>
-
-</configuration>
-
-EOF
-
 ln -s /var/run/spark/work $PREFIX/$LIB_DIR/work
 
 cp -r ${BUILD_DIR}/python ${PREFIX}/${INSTALLED_LIB_DIR}/
@@ -242,3 +198,14 @@ cp ${BUILD_DIR}/{LICENSE,NOTICE} ${PREFIX}/${LIB_DIR}/
 
 # Version-less symlinks
 (cd $PREFIX/$LIB_DIR/lib; ln -s spark-assembly*.jar spark-assembly.jar; ln -s spark-examples*.jar spark-examples.jar)
+pushd $PREFIX/$LIB_DIR/yarn/lib
+ln -s ../../lib/spark-*-yarn-shuffle.jar spark-yarn-shuffle.jar
+ln -s ../../lib/datanucleus-api-jdo*.jar datanucleus-api-jdo.jar
+ln -s ../../lib/datanucleus-core*.jar datanucleus-core.jar
+ln -s ../../lib/datanucleus-rdbms*.jar datanucleus-rdbms.jar
+popd
+pushd $PREFIX/$LIB_DIR/extras/lib
+for j in $(ls *.jar); do
+  ln -s $j $(echo $j | sed -n 's/\(.*\)\(_[0-9.]\+-[0-9.]\+\)\(.jar\)/\1\3/p')
+done
+popd

http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/common/spark/patch0.diff
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/common/spark/patch0.diff b/bigtop-packages/src/common/spark/patch0.diff
new file mode 100644
index 0000000..6b7f350
--- /dev/null
+++ b/bigtop-packages/src/common/spark/patch0.diff
@@ -0,0 +1,11 @@
+--- spark-1.5.1-patched/pom.xml	2015-09-23 22:50:32.000000000 -0700
++++ spark-1.5.1/pom.xml	2015-10-30 13:46:54.326426316 -0700
+@@ -2366,7 +2366,7 @@
+     <profile>
+       <id>hadoop-2.6</id>
+       <properties>
+-        <hadoop.version>2.6.0</hadoop.version>
++        <hadoop.version>${hadoop.version}</hadoop.version>
+         <jets3t.version>0.9.3</jets3t.version>
+         <zookeeper.version>3.4.6</zookeeper.version>
+         <curator.version>2.6.0</curator.version>

http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/common/spark/spark-env.sh
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/common/spark/spark-env.sh b/bigtop-packages/src/common/spark/spark-env.sh
new file mode 100755
index 0000000..885aed1
--- /dev/null
+++ b/bigtop-packages/src/common/spark/spark-env.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+export SPARK_HOME=${SPARK_HOME:-/usr/lib/spark}
+export SPARK_LOG_DIR=${SPARK_LOG_DIR:-/var/log/spark}
+
+export HADOOP_HOME=${HADOOP_HOME:-/usr/lib/hadoop}
+export HADOOP_HDFS_HOME=${HADOOP_HDFS_HOME:-${HADOOP_HOME}/../hadoop-hdfs}
+export HADOOP_MAPRED_HOME=${HADOOP_MAPRED_HOME:-${HADOOP_HOME}/../hadoop-mapreduce}
+export HADOOP_YARN_HOME=${HADOOP_YARN_HOME:-${HADOOP_HOME}/../hadoop-yarn}
+export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/etc/hadoop/conf}
+
+# Let's run everything with JVM runtime, instead of Scala
+export SPARK_LAUNCH_WITH_SCALA=0
+export SPARK_LIBRARY_PATH=${SPARK_LIBRARY_PATH:-${SPARK_HOME}/lib}
+export SCALA_LIBRARY_PATH=${SCALA_LIBRARY_PATH:-${SPARK_HOME}/lib}
+
+# Let's make sure that all needed hadoop libs are added properly
+export CLASSPATH="$CLASSPATH:$HADOOP_HOME/*:$HADOOP_HDFS_HOME/*:$HADOOP_YARN_HOME/*:$HADOOP_MAPRED_HOME/*"
+export SPARK_LIBRARY_PATH=$SPARK_LIBRARY_PATH:${HADOOP_HOME}/lib/native
+
+export STANDALONE_SPARK_MASTER_HOST=`hostname -f`
+export SPARK_MASTER_PORT=7077
+export SPARK_MASTER_WEBUI_PORT=18080
+
+export SPARK_WORKER_DIR=${SPARK_WORKER_DIR:-/var/run/spark/work}
+export SPARK_WORKER_PORT=7078
+export SPARK_WORKER_WEBUI_PORT=18081
+
+export SPARK_HISTORY_OPTS="$SPARK_HISTORY_OPTS -Dspark.history.fs.logDirectory=hdfs:///var/log/spark/apps -Dspark.history.ui.port=18082"

http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/deb/spark/control
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/spark/control b/bigtop-packages/src/deb/spark/control
index 856d622..7ebc970 100644
--- a/bigtop-packages/src/deb/spark/control
+++ b/bigtop-packages/src/deb/spark/control
@@ -60,3 +60,18 @@ Architecture: all
 Depends: spark-core (= ${source:Version})
 Description: Thrift server for Spark SQL
  Thrift server for Spark SQL
+
+Package: spark-datanucleus
+Architecture: all
+Description: DataNucleus libraries for Apache Spark
+ DataNucleus libraries used by Spark SQL with Hive Support
+
+Package: spark-extras
+Architecture: all
+Description: External/extra libraries for Apache Spark
+ External/extra libraries built for Apache Spark but not included in the main assembly JAR (e.g., external streaming libraries)
+
+Package: spark-yarn-shuffle
+Architecture: all
+Description: Spark YARN Shuffle Service
+ Spark YARN Shuffle Service

http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/deb/spark/spark-core.install
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/spark/spark-core.install b/bigtop-packages/src/deb/spark/spark-core.install
index 7e0d9f7..70f588c 100644
--- a/bigtop-packages/src/deb/spark/spark-core.install
+++ b/bigtop-packages/src/deb/spark/spark-core.install
@@ -1,26 +1,26 @@
 /etc/spark
-/usr/bin/spark-executor
+/usr/bin/spark-class
 /usr/bin/spark-submit
 /usr/bin/spark-shell
+/usr/bin/spark-sql
 /usr/lib/spark/LICENSE
 /usr/lib/spark/RELEASE
 /usr/lib/spark/NOTICE
 /usr/lib/spark/bin/beeline
-/usr/lib/spark/bin/compute-classpath.sh
 /usr/lib/spark/bin/load-spark-env.sh
 /usr/lib/spark/bin/run-example
 /usr/lib/spark/bin/spark-class
 /usr/lib/spark/bin/spark-shell
 /usr/lib/spark/bin/spark-sql
 /usr/lib/spark/bin/spark-submit
-/usr/lib/spark/bin/utils.sh
 /usr/lib/spark/conf
+/usr/lib/spark/data
+/usr/lib/spark/examples
+/usr/lib/spark/lib/spark-assembly*.jar
+/usr/lib/spark/lib/spark-examples*.jar
 /usr/lib/spark/sbin
 /usr/lib/spark/work
-/usr/lib/spark/lib
-/usr/lib/spark/examples
-/usr/lib/spark/data
-/usr/share/doc/spark
+/usr/share/doc/spark*
 /var/lib/spark/
 /var/log/spark/
 /var/run/spark/

http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/deb/spark/spark-datanucleus.install
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/spark/spark-datanucleus.install b/bigtop-packages/src/deb/spark/spark-datanucleus.install
new file mode 100644
index 0000000..69f619b
--- /dev/null
+++ b/bigtop-packages/src/deb/spark/spark-datanucleus.install
@@ -0,0 +1,2 @@
+/usr/lib/spark/lib/datanucleus-*.jar
+/usr/lib/spark/yarn/lib/datanucleus-*.jar

http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/deb/spark/spark-extras.install
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/spark/spark-extras.install b/bigtop-packages/src/deb/spark/spark-extras.install
new file mode 100644
index 0000000..c65fbc0
--- /dev/null
+++ b/bigtop-packages/src/deb/spark/spark-extras.install
@@ -0,0 +1 @@
+/usr/lib/spark/extras/lib

http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/deb/spark/spark-yarn-shuffle.install
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/spark/spark-yarn-shuffle.install b/bigtop-packages/src/deb/spark/spark-yarn-shuffle.install
new file mode 100644
index 0000000..79c799a
--- /dev/null
+++ b/bigtop-packages/src/deb/spark/spark-yarn-shuffle.install
@@ -0,0 +1,2 @@
+/usr/lib/spark/lib/spark-*-yarn-shuffle.jar
+/usr/lib/spark/yarn/lib/spark-yarn-shuffle.jar

http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/rpm/spark/SPECS/spark.spec
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/rpm/spark/SPECS/spark.spec b/bigtop-packages/src/rpm/spark/SPECS/spark.spec
index d8a5c10..daecc72 100644
--- a/bigtop-packages/src/rpm/spark/SPECS/spark.spec
+++ b/bigtop-packages/src/rpm/spark/SPECS/spark.spec
@@ -119,6 +119,28 @@ Requires: spark-core = %{version}-%{release}
 %description -n spark-thriftserver
 Thrift server for Spark SQL
 
+%package -n spark-datanucleus
+Summary: DataNucleus libraries for Apache Spark
+Group: Development/Libraries
+
+%description -n spark-datanucleus
+DataNucleus libraries used by Spark SQL with Hive Support
+
+%package -n spark-extras
+Summary: External/extra libraries for Apache Spark
+Group: Development/Libraries
+
+%description -n spark-extras
+External/extra libraries built for Apache Spark but not included in the main
+assembly JAR (e.g., external streaming libraries)
+
+%package -n spark-yarn-shuffle
+Summary: Spark YARN Shuffle Service
+Group: Development/Libraries
+
+%description -n spark-yarn-shuffle
+Spark YARN Shuffle Service
+
 %prep
 %setup -n %{spark_name}-%{spark_base_version}
 
@@ -175,6 +197,8 @@ done
 %{lib_spark}/NOTICE
 %{lib_spark}/bin
 %{lib_spark}/lib
+%exclude %{lib_spark}/lib/datanucleus-*.jar
+%exclude %{lib_spark}/lib/spark-*-yarn-shuffle.jar
 %{lib_spark}/sbin
 %{lib_spark}/data
 %{lib_spark}/examples
@@ -185,8 +209,9 @@ done
 %attr(0755,spark,spark) %{var_lib_spark}
 %attr(0755,spark,spark) %{var_run_spark}
 %attr(0755,spark,spark) %{var_log_spark}
+%{bin}/spark-class
 %{bin}/spark-shell
-%{bin}/spark-executor
+%{bin}/spark-sql
 %{bin}/spark-submit
 
 %files -n spark-python
@@ -195,6 +220,20 @@ done
 %attr(0755,root,root) %{lib_spark}/bin/pyspark
 %{lib_spark}/python
 
+%files -n spark-datanucleus
+%defattr(-,root,root,755)
+%{lib_spark}/lib/datanucleus-*.jar
+%{lib_spark}/yarn/lib/datanucleus-*.jar
+
+%files -n spark-extras
+%defattr(-,root,root,755)
+%{lib_spark}/extras
+
+%files -n spark-yarn-shuffle
+%defattr(-,root,root,755)
+%{lib_spark}/lib/spark-*-yarn-shuffle.jar
+%{lib_spark}/yarn/lib/spark-yarn-shuffle.jar
+
 %define service_macro() \
 %files -n %1 \
 %attr(0755,root,root)/%{initd_dir}/%1 \

http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop.bom
----------------------------------------------------------------------
diff --git a/bigtop.bom b/bigtop.bom
index 51209f5..57fd457 100644
--- a/bigtop.bom
+++ b/bigtop.bom
@@ -282,7 +282,7 @@ bigtop {
       name    = 'spark'
       pkg     = 'spark-core'
       relNotes = 'Apache Spark'
-      version { base = '1.3.1'; pkg = base; release = 1 }
+      version { base = '1.5.1'; pkg = base; release = 1 }
       tarball { destination = "$name-${version.base}.tar.gz"
                 source      = "$name-${version.base}.tgz" }
       url     { download_path = "/$name/$name-${version.base}"