You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bigtop.apache.org by co...@apache.org on 2015/11/02 22:03:14 UTC
[1/2] bigtop git commit: BIGTOP-2105. Puppet recipes improvements
after Spark is bumped to 1.5.1
Repository: bigtop
Updated Branches:
refs/heads/master bf67f6e5b -> ed6377f69
BIGTOP-2105. Puppet recipes improvements after Spark is bumped to 1.5.1
Project: http://git-wip-us.apache.org/repos/asf/bigtop/repo
Commit: http://git-wip-us.apache.org/repos/asf/bigtop/commit/ed6377f6
Tree: http://git-wip-us.apache.org/repos/asf/bigtop/tree/ed6377f6
Diff: http://git-wip-us.apache.org/repos/asf/bigtop/diff/ed6377f6
Branch: refs/heads/master
Commit: ed6377f693e7d2655172fd49e6e68ddc9d5709fd
Parents: 05d9fad
Author: Jonathan Kelly <jo...@amazon.com>
Authored: Fri Oct 2 15:20:33 2015 -0700
Committer: Konstantin Boudnik <co...@apache.org>
Committed: Mon Nov 2 13:02:28 2015 -0800
----------------------------------------------------------------------
.../puppet/modules/spark/manifests/init.pp | 140 +++++++++++++++----
.../modules/spark/templates/spark-defaults.conf | 22 +++
.../puppet/modules/spark/templates/spark-env.sh | 65 +--------
3 files changed, 145 insertions(+), 82 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/bigtop/blob/ed6377f6/bigtop-deploy/puppet/modules/spark/manifests/init.pp
----------------------------------------------------------------------
diff --git a/bigtop-deploy/puppet/modules/spark/manifests/init.pp b/bigtop-deploy/puppet/modules/spark/manifests/init.pp
index 9b33bb9..b93be1d 100644
--- a/bigtop-deploy/puppet/modules/spark/manifests/init.pp
+++ b/bigtop-deploy/puppet/modules/spark/manifests/init.pp
@@ -16,23 +16,42 @@
class spark {
class deploy ($roles) {
- if ("spark-master" in $roles) {
- include spark::master
+ if ('spark-client' in $roles) {
+ include client
}
- if ("spark-worker" in $roles) {
- include spark::worker
+ if ('spark-on-yarn' in $roles) {
+ include yarn
+ }
+
+ if ('spark-yarn-slave' in $roles) {
+ include yarn_slave
+ }
+
+ if ('spark-master' in $roles) {
+ include master
+ }
+
+ if ('spark-worker' in $roles) {
+ include worker
+ }
+
+ if ('spark-history-server' in $roles) {
+ include history_server
}
}
- class common ($master_host = $fqdn, $master_port = "7077", $master_ui_port = "18080") {
- package { "spark-core":
- ensure => latest,
+ class client {
+ include common
+
+ package { 'spark-python':
+ ensure => latest,
+ require => Package['spark-core'],
}
- file { "/etc/spark/conf/spark-env.sh":
- content => template("spark/spark-env.sh"),
- require => [Package["spark-core"]],
+ package { 'spark-extras':
+ ensure => latest,
+ require => Package['spark-core'],
}
}
@@ -43,14 +62,15 @@ class spark {
ensure => latest,
}
- if ( $fqdn == $common::master_host ) {
- service { "spark-master":
- ensure => running,
- require => [ Package["spark-master"], File["/etc/spark/conf/spark-env.sh"], ],
- subscribe => [Package["spark-master"], File["/etc/spark/conf/spark-env.sh"] ],
- hasrestart => true,
- hasstatus => true,
- }
+ service { 'spark-master':
+ ensure => running,
+ subscribe => [
+ Package['spark-master'],
+ File['/etc/spark/conf/spark-env.sh'],
+ File['/etc/spark/conf/spark-defaults.conf'],
+ ],
+ hasrestart => true,
+ hasstatus => true,
}
}
@@ -61,15 +81,87 @@ class spark {
ensure => latest,
}
- if ( $fqdn == $common::master_host ) {
- Service["spark-master"] ~> Service["spark-worker"]
+ service { 'spark-worker':
+ ensure => running,
+ subscribe => [
+ Package['spark-worker'],
+ File['/etc/spark/conf/spark-env.sh'],
+ File['/etc/spark/conf/spark-defaults.conf'],
+ ],
+ hasrestart => true,
+ hasstatus => true,
}
- service { "spark-worker":
- ensure => running,
- require => [ Package["spark-worker"], File["/etc/spark/conf/spark-env.sh"], ],
- subscribe => [Package["spark-worker"], File["/etc/spark/conf/spark-env.sh"] ],
+ }
+
+ class history_server {
+ include common
+
+ package { 'spark-history-server':
+ ensure => latest,
+ }
+
+ service { 'spark-history-server':
+ ensure => running,
+ subscribe => [
+ Package['spark-history-server'],
+ File['/etc/spark/conf/spark-env.sh'],
+ File['/etc/spark/conf/spark-defaults.conf'],
+ ],
hasrestart => true,
hasstatus => true,
}
}
+
+ class yarn {
+ include common
+ include datanucleus
+ }
+
+ class yarn_slave {
+ include yarn_shuffle
+ include datanucleus
+ }
+
+ class yarn_shuffle {
+ package { 'spark-yarn-shuffle':
+ ensure => latest,
+ }
+ }
+
+ class datanucleus {
+ package { 'spark-datanucleus':
+ ensure => latest,
+ }
+ }
+
+ class common(
+ $master_url = 'yarn',
+ $master_host = $fqdn,
+ $master_port = 7077,
+ $worker_port = 7078,
+ $master_ui_port = 8080,
+ $worker_ui_port = 8081,
+ $history_ui_port = 18080,
+ $use_yarn_shuffle_service = false,
+ ) {
+
+ package { 'spark-core':
+ ensure => latest,
+ }
+
+ file { '/etc/spark/conf/spark-env.sh':
+ content => template('spark/spark-env.sh'),
+ require => Package['spark-core'],
+ }
+
+ file { '/etc/spark/conf/spark-defaults.conf':
+ content => template('spark/spark-defaults.conf'),
+ require => Package['spark-core'],
+ }
+
+ file { '/etc/spark/conf/log4j.properties':
+ source => '/etc/spark/conf/log4j.properties.template',
+ require => Package['spark-core'],
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/bigtop/blob/ed6377f6/bigtop-deploy/puppet/modules/spark/templates/spark-defaults.conf
----------------------------------------------------------------------
diff --git a/bigtop-deploy/puppet/modules/spark/templates/spark-defaults.conf b/bigtop-deploy/puppet/modules/spark/templates/spark-defaults.conf
new file mode 100644
index 0000000..bdb9e56
--- /dev/null
+++ b/bigtop-deploy/puppet/modules/spark/templates/spark-defaults.conf
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+spark.master <%= @master_url %>
+spark.eventLog.enabled true
+spark.eventLog.dir hdfs:///var/log/spark/apps
+spark.history.fs.logDirectory hdfs:///var/log/spark/apps
+spark.yarn.historyServer.address <%= @master_host %>:<%= @history_ui_port %>
+spark.history.ui.port <%= @history_ui_port %>
+spark.shuffle.service.enabled <%= @use_yarn_shuffle_service %>
http://git-wip-us.apache.org/repos/asf/bigtop/blob/ed6377f6/bigtop-deploy/puppet/modules/spark/templates/spark-env.sh
----------------------------------------------------------------------
diff --git a/bigtop-deploy/puppet/modules/spark/templates/spark-env.sh b/bigtop-deploy/puppet/modules/spark/templates/spark-env.sh
old mode 100644
new mode 100755
index f7845a1..eb351c7
--- a/bigtop-deploy/puppet/modules/spark/templates/spark-env.sh
+++ b/bigtop-deploy/puppet/modules/spark/templates/spark-env.sh
@@ -14,68 +14,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# This file is sourced when running various Spark programs.
-# Copy it as spark-env.sh and edit that to configure Spark for your site.
-
-# Options read when launching programs locally with
-# ./bin/run-example or ./bin/spark-submit
-# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
-# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
-# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program
-# - SPARK_CLASSPATH, default classpath entries to append
-
-# Options read by executors and drivers running inside the cluster
-# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
-# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program
-# - SPARK_CLASSPATH, default classpath entries to append
-# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data
-# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos
-
-# Options read in YARN client mode
-# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
-# - SPARK_EXECUTOR_INSTANCES, Number of workers to start (Default: 2)
-# - SPARK_EXECUTOR_CORES, Number of cores for the workers (Default: 1).
-# - SPARK_EXECUTOR_MEMORY, Memory per Worker (e.g. 1000M, 2G) (Default: 1G)
-# - SPARK_DRIVER_MEMORY, Memory for Master (e.g. 1000M, 2G) (Default: 512 Mb)
-# - SPARK_YARN_APP_NAME, The name of your application (Default: Spark)
-# - SPARK_YARN_QUEUE, The hadoop queue to use for allocation requests (Default: ‘default’)
-# - SPARK_YARN_DIST_FILES, Comma separated list of files to be distributed with the job.
-# - SPARK_YARN_DIST_ARCHIVES, Comma separated list of archives to be distributed with the job.
-
-# Options for the daemons used in the standalone deploy mode
-# - SPARK_MASTER_IP, to bind the master to a different IP address or hostname
-# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master
-# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y")
-# - SPARK_WORKER_CORES, to set the number of cores to use on this machine
-# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g)
-# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker
-# - SPARK_WORKER_INSTANCES, to set the number of worker processes per node
-# - SPARK_WORKER_DIR, to set the working directory of worker processes
-# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y")
-# - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y")
-# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y")
-# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers
-
-# Generic options for the daemons used in the standalone deploy mode
-# - SPARK_CONF_DIR Alternate conf dir. (Default: ${SPARK_HOME}/conf)
-# - SPARK_LOG_DIR Where log files are stored. (Default: ${SPARK_HOME}/logs)
-# - SPARK_PID_DIR Where the pid file is stored. (Default: /tmp)
-# - SPARK_IDENT_STRING A string representing this instance of spark. (Default: $USER)
-# - SPARK_NICENESS The scheduling priority for daemons. (Default: 0)
-
+export SPARK_HOME=${SPARK_HOME:-/usr/lib/spark}
+export SPARK_LOG_DIR=${SPARK_LOG_DIR:-/var/log/spark}
export HADOOP_HOME=${HADOOP_HOME:-/usr/lib/hadoop}
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/etc/hadoop/conf}
+export HIVE_CONF_DIR=${HIVE_CONF_DIR:-/etc/hive/conf}
export STANDALONE_SPARK_MASTER_HOST=<%= @master_host %>
+export SPARK_MASTER_PORT=<%= @master_port %>
export SPARK_MASTER_IP=$STANDALONE_SPARK_MASTER_HOST
export SPARK_MASTER_WEBUI_PORT=<%= @master_ui_port %>
-export SPARK_MASTER_PORT=7077
-export SPARK_WORKER_PORT=7078
-export SPARK_WORKER_WEBUI_PORT=18081
-export SPARK_WORKER_DIR=/var/run/spark/work
-export SPARK_HISTORY_OPTS="$SPARK_HISTORY_OPTS -Dspark.history.fs.logDirectory=hdfs:///var/log/spark/apps -Dspark.history.ui.port=18082"
-
-export SPARK_LOG_DIR=/var/log/spark
-
-export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${HADOOP_HOME}/lib/native
+export SPARK_WORKER_DIR=${SPARK_WORKER_DIR:-/var/run/spark/work}
+export SPARK_WORKER_PORT=<%= @worker_port %>
+export SPARK_WORKER_WEBUI_PORT=<%= @worker_ui_port %>
[2/2] bigtop git commit: BIGTOP-2104. Packages upgrade to Spark 1.5.1
Posted by co...@apache.org.
BIGTOP-2104. Packages upgrade to Spark 1.5.1
Project: http://git-wip-us.apache.org/repos/asf/bigtop/repo
Commit: http://git-wip-us.apache.org/repos/asf/bigtop/commit/05d9fada
Tree: http://git-wip-us.apache.org/repos/asf/bigtop/tree/05d9fada
Diff: http://git-wip-us.apache.org/repos/asf/bigtop/diff/05d9fada
Branch: refs/heads/master
Commit: 05d9fada278f23c97079afb2429b590afa100b1e
Parents: bf67f6e
Author: Jonathan Kelly <jo...@amazon.com>
Authored: Fri Oct 2 15:13:18 2015 -0700
Committer: Konstantin Boudnik <co...@apache.org>
Committed: Mon Nov 2 13:02:28 2015 -0800
----------------------------------------------------------------------
.../src/common/spark/do-component-build | 15 +++-
.../src/common/spark/install_spark.sh | 79 ++++++--------------
bigtop-packages/src/common/spark/patch0.diff | 11 +++
bigtop-packages/src/common/spark/spark-env.sh | 43 +++++++++++
bigtop-packages/src/deb/spark/control | 15 ++++
.../src/deb/spark/spark-core.install | 14 ++--
.../src/deb/spark/spark-datanucleus.install | 2 +
.../src/deb/spark/spark-extras.install | 1 +
.../src/deb/spark/spark-yarn-shuffle.install | 2 +
bigtop-packages/src/rpm/spark/SPECS/spark.spec | 41 +++++++++-
bigtop.bom | 2 +-
11 files changed, 158 insertions(+), 67 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/common/spark/do-component-build
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/common/spark/do-component-build b/bigtop-packages/src/common/spark/do-component-build
index 3d705a5..8d74f4a 100644
--- a/bigtop-packages/src/common/spark/do-component-build
+++ b/bigtop-packages/src/common/spark/do-component-build
@@ -24,9 +24,20 @@ BUILD_OPTS="-Divy.home=${HOME}/.ivy2 -Dsbt.ivy.home=${HOME}/.ivy2 -Duser.home=${
-Dhadoop.version=$HADOOP_VERSION \
-Dyarn.version=$HADOOP_VERSION \
-Dprotobuf.version=2.5.0 \
- -DskipTests -DrecompileMode=all"
+ -DrecompileMode=all \
+ -Pbigtop-dist \
+ -Pyarn -Phadoop-2.6 \
+ -Phive -Phive-thriftserver \
+ $SPARK_BUILD_OPTS"
+
## this might be an issue at times
# http://maven.40175.n5.nabble.com/Not-finding-artifact-in-local-repo-td3727753.html
export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m -XX:PermSize=1024m -XX:MaxPermSize=1024m"
-mvn -Pbigtop-dist -Pyarn -Phive -Phive-thriftserver $BUILD_OPTS install
+mvn $BUILD_OPTS -DskipTests install
+
+# Tests must be run after Spark has already been packaged.
+# See http://spark.apache.org/docs/latest/building-spark.html#spark-tests-in-maven
+if [ "$SPARK_RUN_TESTS" = "true" ]; then
+ mvn $BUILD_OPTS test
+fi
http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/common/spark/install_spark.sh
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/common/spark/install_spark.sh b/bigtop-packages/src/common/spark/install_spark.sh
index 85078ae..530b355 100644
--- a/bigtop-packages/src/common/spark/install_spark.sh
+++ b/bigtop-packages/src/common/spark/install_spark.sh
@@ -119,6 +119,10 @@ install -d -m 0755 $PREFIX/$LIB_DIR
install -d -m 0755 $PREFIX/$LIB_DIR/lib
install -d -m 0755 $PREFIX/$LIB_DIR/bin
install -d -m 0755 $PREFIX/$LIB_DIR/sbin
+install -d -m 0755 $PREFIX/$LIB_DIR/extras
+install -d -m 0755 $PREFIX/$LIB_DIR/extras/lib
+install -d -m 0755 $PREFIX/$LIB_DIR/yarn
+install -d -m 0755 $PREFIX/$LIB_DIR/yarn/lib
install -d -m 0755 $PREFIX/$DOC_DIR
install -d -m 0755 $PREFIX/$EXAMPLES_DIR
@@ -134,9 +138,15 @@ tar --wildcards -C $PREFIX/$LIB_DIR/ -zxf ${BUILD_DIR}/assembly/target/spark-ass
rm -rf $PREFIX/$LIB_DIR/bin/*.cmd
+# External/extra jars
+ls ${BUILD_DIR}/{external,extras}/*/target/*${SPARK_VERSION}.jar | grep -v 'original-\|assembly' | xargs -IJARS cp JARS $PREFIX/$LIB_DIR/extras/lib
+
# Examples jar
cp ${BUILD_DIR}/examples/target/spark-examples*${SPARK_VERSION}.jar $PREFIX/$LIB_DIR/lib/spark-examples-${SPARK_VERSION}-hadoop${HADOOP_VERSION}.jar
+# Spark YARN Shuffle jar
+cp ${BUILD_DIR}/network/yarn/target/*/spark-${SPARK_VERSION}-yarn-shuffle.jar $PREFIX/$LIB_DIR/lib/
+
# Examples src
cp -ra ${BUILD_DIR}/examples/src $PREFIX/$EXAMPLES_DIR/
ln -s $EXAMPLES_DIR $PREFIX/$LIB_DIR/examples
@@ -150,12 +160,12 @@ chmod 755 $PREFIX/$LIB_DIR/sbin/*
# Copy in the configuration files
install -d -m 0755 $PREFIX/$CONF_DIR
cp -a ${BUILD_DIR}/conf/* $PREFIX/$CONF_DIR
-cp $PREFIX/$CONF_DIR/spark-env.sh.template $PREFIX/$CONF_DIR/spark-env.sh
+cp $SOURCE_DIR/spark-env.sh $PREFIX/$CONF_DIR
ln -s /etc/spark/conf $PREFIX/$LIB_DIR/conf
# Copy in the wrappers
install -d -m 0755 $PREFIX/$BIN_DIR
-for wrap in sbin/spark-executor bin/spark-shell bin/spark-submit; do
+for wrap in bin/spark-class bin/spark-shell bin/spark-sql bin/spark-submit; do
cat > $PREFIX/$BIN_DIR/`basename $wrap` <<EOF
#!/bin/bash
@@ -167,60 +177,6 @@ EOF
chmod 755 $PREFIX/$BIN_DIR/`basename $wrap`
done
-cat >> $PREFIX/$CONF_DIR/spark-env.sh <<EOF
-export HADOOP_HOME=\${HADOOP_HOME:-/usr/lib/hadoop}
-export HADOOP_CONF_DIR=\${HADOOP_CONF_DIR:-/etc/hadoop/conf}
-
-export SPARK_MASTER_IP=\`hostname\`
-export SPARK_MASTER_PORT=7077
-export SPARK_MASTER_WEBUI_PORT=18080
-export SPARK_WORKER_PORT=7078
-export SPARK_WORKER_WEBUI_PORT=18081
-export SPARK_WORKER_DIR=/var/run/spark/work
-export SPARK_HISTORY_OPTS="\$SPARK_HISTORY_OPTS -Dspark.history.fs.logDirectory=hdfs:///var/log/spark/apps -Dspark.history.ui.port=18082"
-
-export SPARK_LOG_DIR=/var/log/spark
-
-export LD_LIBRARY_PATH=\${LD_LIBRARY_PATH}:\${HADOOP_HOME}/lib/native
-EOF
-
-cat >> $PREFIX/$CONF_DIR/hive-site.xml <<EOF
-<?xml version="1.0"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
-<configuration>
-
-<property>
- <name>javax.jdo.option.ConnectionURL</name>
- <value>jdbc:derby:;databaseName=/tmp/spark-\${user.name}/metastore_db;create=true</value>
- <description>JDBC connect string for a JDBC metastore</description>
-</property>
-
-<property>
- <name>javax.jdo.option.ConnectionDriverName</name>
- <value>org.apache.derby.jdbc.EmbeddedDriver</value>
- <description>Driver class name for a JDBC metastore</description>
-</property>
-
-</configuration>
-
-EOF
-
ln -s /var/run/spark/work $PREFIX/$LIB_DIR/work
cp -r ${BUILD_DIR}/python ${PREFIX}/${INSTALLED_LIB_DIR}/
@@ -242,3 +198,14 @@ cp ${BUILD_DIR}/{LICENSE,NOTICE} ${PREFIX}/${LIB_DIR}/
# Version-less symlinks
(cd $PREFIX/$LIB_DIR/lib; ln -s spark-assembly*.jar spark-assembly.jar; ln -s spark-examples*.jar spark-examples.jar)
+pushd $PREFIX/$LIB_DIR/yarn/lib
+ln -s ../../lib/spark-*-yarn-shuffle.jar spark-yarn-shuffle.jar
+ln -s ../../lib/datanucleus-api-jdo*.jar datanucleus-api-jdo.jar
+ln -s ../../lib/datanucleus-core*.jar datanucleus-core.jar
+ln -s ../../lib/datanucleus-rdbms*.jar datanucleus-rdbms.jar
+popd
+pushd $PREFIX/$LIB_DIR/extras/lib
+for j in $(ls *.jar); do
+ ln -s $j $(echo $j | sed -n 's/\(.*\)\(_[0-9.]\+-[0-9.]\+\)\(.jar\)/\1\3/p')
+done
+popd
http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/common/spark/patch0.diff
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/common/spark/patch0.diff b/bigtop-packages/src/common/spark/patch0.diff
new file mode 100644
index 0000000..6b7f350
--- /dev/null
+++ b/bigtop-packages/src/common/spark/patch0.diff
@@ -0,0 +1,11 @@
+--- spark-1.5.1-patched/pom.xml 2015-09-23 22:50:32.000000000 -0700
++++ spark-1.5.1/pom.xml 2015-10-30 13:46:54.326426316 -0700
+@@ -2366,7 +2366,7 @@
+ <profile>
+ <id>hadoop-2.6</id>
+ <properties>
+- <hadoop.version>2.6.0</hadoop.version>
++ <hadoop.version>${hadoop.version}</hadoop.version>
+ <jets3t.version>0.9.3</jets3t.version>
+ <zookeeper.version>3.4.6</zookeeper.version>
+ <curator.version>2.6.0</curator.version>
http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/common/spark/spark-env.sh
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/common/spark/spark-env.sh b/bigtop-packages/src/common/spark/spark-env.sh
new file mode 100755
index 0000000..885aed1
--- /dev/null
+++ b/bigtop-packages/src/common/spark/spark-env.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+export SPARK_HOME=${SPARK_HOME:-/usr/lib/spark}
+export SPARK_LOG_DIR=${SPARK_LOG_DIR:-/var/log/spark}
+
+export HADOOP_HOME=${HADOOP_HOME:-/usr/lib/hadoop}
+export HADOOP_HDFS_HOME=${HADOOP_HDFS_HOME:-${HADOOP_HOME}/../hadoop-hdfs}
+export HADOOP_MAPRED_HOME=${HADOOP_MAPRED_HOME:-${HADOOP_HOME}/../hadoop-mapreduce}
+export HADOOP_YARN_HOME=${HADOOP_YARN_HOME:-${HADOOP_HOME}/../hadoop-yarn}
+export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/etc/hadoop/conf}
+
+# Let's run everything with JVM runtime, instead of Scala
+export SPARK_LAUNCH_WITH_SCALA=0
+export SPARK_LIBRARY_PATH=${SPARK_LIBRARY_PATH:-${SPARK_HOME}/lib}
+export SCALA_LIBRARY_PATH=${SCALA_LIBRARY_PATH:-${SPARK_HOME}/lib}
+
+# Let's make sure that all needed hadoop libs are added properly
+export CLASSPATH="$CLASSPATH:$HADOOP_HOME/*:$HADOOP_HDFS_HOME/*:$HADOOP_YARN_HOME/*:$HADOOP_MAPRED_HOME/*"
+export SPARK_LIBRARY_PATH=$SPARK_LIBRARY_PATH:${HADOOP_HOME}/lib/native
+
+export STANDALONE_SPARK_MASTER_HOST=`hostname -f`
+export SPARK_MASTER_PORT=7077
+export SPARK_MASTER_WEBUI_PORT=18080
+
+export SPARK_WORKER_DIR=${SPARK_WORKER_DIR:-/var/run/spark/work}
+export SPARK_WORKER_PORT=7078
+export SPARK_WORKER_WEBUI_PORT=18081
+
+export SPARK_HISTORY_OPTS="$SPARK_HISTORY_OPTS -Dspark.history.fs.logDirectory=hdfs:///var/log/spark/apps -Dspark.history.ui.port=18082"
http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/deb/spark/control
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/spark/control b/bigtop-packages/src/deb/spark/control
index 856d622..7ebc970 100644
--- a/bigtop-packages/src/deb/spark/control
+++ b/bigtop-packages/src/deb/spark/control
@@ -60,3 +60,18 @@ Architecture: all
Depends: spark-core (= ${source:Version})
Description: Thrift server for Spark SQL
Thrift server for Spark SQL
+
+Package: spark-datanucleus
+Architecture: all
+Description: DataNucleus libraries for Apache Spark
+ DataNucleus libraries used by Spark SQL with Hive Support
+
+Package: spark-extras
+Architecture: all
+Description: External/extra libraries for Apache Spark
+ External/extra libraries built for Apache Spark but not included in the main assembly JAR (e.g., external streaming libraries)
+
+Package: spark-yarn-shuffle
+Architecture: all
+Description: Spark YARN Shuffle Service
+ Spark YARN Shuffle Service
http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/deb/spark/spark-core.install
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/spark/spark-core.install b/bigtop-packages/src/deb/spark/spark-core.install
index 7e0d9f7..70f588c 100644
--- a/bigtop-packages/src/deb/spark/spark-core.install
+++ b/bigtop-packages/src/deb/spark/spark-core.install
@@ -1,26 +1,26 @@
/etc/spark
-/usr/bin/spark-executor
+/usr/bin/spark-class
/usr/bin/spark-submit
/usr/bin/spark-shell
+/usr/bin/spark-sql
/usr/lib/spark/LICENSE
/usr/lib/spark/RELEASE
/usr/lib/spark/NOTICE
/usr/lib/spark/bin/beeline
-/usr/lib/spark/bin/compute-classpath.sh
/usr/lib/spark/bin/load-spark-env.sh
/usr/lib/spark/bin/run-example
/usr/lib/spark/bin/spark-class
/usr/lib/spark/bin/spark-shell
/usr/lib/spark/bin/spark-sql
/usr/lib/spark/bin/spark-submit
-/usr/lib/spark/bin/utils.sh
/usr/lib/spark/conf
+/usr/lib/spark/data
+/usr/lib/spark/examples
+/usr/lib/spark/lib/spark-assembly*.jar
+/usr/lib/spark/lib/spark-examples*.jar
/usr/lib/spark/sbin
/usr/lib/spark/work
-/usr/lib/spark/lib
-/usr/lib/spark/examples
-/usr/lib/spark/data
-/usr/share/doc/spark
+/usr/share/doc/spark*
/var/lib/spark/
/var/log/spark/
/var/run/spark/
http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/deb/spark/spark-datanucleus.install
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/spark/spark-datanucleus.install b/bigtop-packages/src/deb/spark/spark-datanucleus.install
new file mode 100644
index 0000000..69f619b
--- /dev/null
+++ b/bigtop-packages/src/deb/spark/spark-datanucleus.install
@@ -0,0 +1,2 @@
+/usr/lib/spark/lib/datanucleus-*.jar
+/usr/lib/spark/yarn/lib/datanucleus-*.jar
http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/deb/spark/spark-extras.install
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/spark/spark-extras.install b/bigtop-packages/src/deb/spark/spark-extras.install
new file mode 100644
index 0000000..c65fbc0
--- /dev/null
+++ b/bigtop-packages/src/deb/spark/spark-extras.install
@@ -0,0 +1 @@
+/usr/lib/spark/extras/lib
http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/deb/spark/spark-yarn-shuffle.install
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/spark/spark-yarn-shuffle.install b/bigtop-packages/src/deb/spark/spark-yarn-shuffle.install
new file mode 100644
index 0000000..79c799a
--- /dev/null
+++ b/bigtop-packages/src/deb/spark/spark-yarn-shuffle.install
@@ -0,0 +1,2 @@
+/usr/lib/spark/lib/spark-*-yarn-shuffle.jar
+/usr/lib/spark/yarn/lib/spark-yarn-shuffle.jar
http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/rpm/spark/SPECS/spark.spec
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/rpm/spark/SPECS/spark.spec b/bigtop-packages/src/rpm/spark/SPECS/spark.spec
index d8a5c10..daecc72 100644
--- a/bigtop-packages/src/rpm/spark/SPECS/spark.spec
+++ b/bigtop-packages/src/rpm/spark/SPECS/spark.spec
@@ -119,6 +119,28 @@ Requires: spark-core = %{version}-%{release}
%description -n spark-thriftserver
Thrift server for Spark SQL
+%package -n spark-datanucleus
+Summary: DataNucleus libraries for Apache Spark
+Group: Development/Libraries
+
+%description -n spark-datanucleus
+DataNucleus libraries used by Spark SQL with Hive Support
+
+%package -n spark-extras
+Summary: External/extra libraries for Apache Spark
+Group: Development/Libraries
+
+%description -n spark-extras
+External/extra libraries built for Apache Spark but not included in the main
+assembly JAR (e.g., external streaming libraries)
+
+%package -n spark-yarn-shuffle
+Summary: Spark YARN Shuffle Service
+Group: Development/Libraries
+
+%description -n spark-yarn-shuffle
+Spark YARN Shuffle Service
+
%prep
%setup -n %{spark_name}-%{spark_base_version}
@@ -175,6 +197,8 @@ done
%{lib_spark}/NOTICE
%{lib_spark}/bin
%{lib_spark}/lib
+%exclude %{lib_spark}/lib/datanucleus-*.jar
+%exclude %{lib_spark}/lib/spark-*-yarn-shuffle.jar
%{lib_spark}/sbin
%{lib_spark}/data
%{lib_spark}/examples
@@ -185,8 +209,9 @@ done
%attr(0755,spark,spark) %{var_lib_spark}
%attr(0755,spark,spark) %{var_run_spark}
%attr(0755,spark,spark) %{var_log_spark}
+%{bin}/spark-class
%{bin}/spark-shell
-%{bin}/spark-executor
+%{bin}/spark-sql
%{bin}/spark-submit
%files -n spark-python
@@ -195,6 +220,20 @@ done
%attr(0755,root,root) %{lib_spark}/bin/pyspark
%{lib_spark}/python
+%files -n spark-datanucleus
+%defattr(-,root,root,755)
+%{lib_spark}/lib/datanucleus-*.jar
+%{lib_spark}/yarn/lib/datanucleus-*.jar
+
+%files -n spark-extras
+%defattr(-,root,root,755)
+%{lib_spark}/extras
+
+%files -n spark-yarn-shuffle
+%defattr(-,root,root,755)
+%{lib_spark}/lib/spark-*-yarn-shuffle.jar
+%{lib_spark}/yarn/lib/spark-yarn-shuffle.jar
+
%define service_macro() \
%files -n %1 \
%attr(0755,root,root)/%{initd_dir}/%1 \
http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop.bom
----------------------------------------------------------------------
diff --git a/bigtop.bom b/bigtop.bom
index 51209f5..57fd457 100644
--- a/bigtop.bom
+++ b/bigtop.bom
@@ -282,7 +282,7 @@ bigtop {
name = 'spark'
pkg = 'spark-core'
relNotes = 'Apache Spark'
- version { base = '1.3.1'; pkg = base; release = 1 }
+ version { base = '1.5.1'; pkg = base; release = 1 }
tarball { destination = "$name-${version.base}.tar.gz"
source = "$name-${version.base}.tgz" }
url { download_path = "/$name/$name-${version.base}"