You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bigtop.apache.org by co...@apache.org on 2015/11/02 22:03:14 UTC

[1/2] bigtop git commit: BIGTOP-2105. Puppet recipes improvements after Spark is bumped to 1.5.1

Repository: bigtop
Updated Branches:
  refs/heads/master bf67f6e5b -> ed6377f69


BIGTOP-2105. Puppet recipes improvements after Spark is bumped to 1.5.1


Project: http://git-wip-us.apache.org/repos/asf/bigtop/repo
Commit: http://git-wip-us.apache.org/repos/asf/bigtop/commit/ed6377f6
Tree: http://git-wip-us.apache.org/repos/asf/bigtop/tree/ed6377f6
Diff: http://git-wip-us.apache.org/repos/asf/bigtop/diff/ed6377f6

Branch: refs/heads/master
Commit: ed6377f693e7d2655172fd49e6e68ddc9d5709fd
Parents: 05d9fad
Author: Jonathan Kelly <jo...@amazon.com>
Authored: Fri Oct 2 15:20:33 2015 -0700
Committer: Konstantin Boudnik <co...@apache.org>
Committed: Mon Nov 2 13:02:28 2015 -0800

----------------------------------------------------------------------
 .../puppet/modules/spark/manifests/init.pp      | 140 +++++++++++++++----
 .../modules/spark/templates/spark-defaults.conf |  22 +++
 .../puppet/modules/spark/templates/spark-env.sh |  65 +--------
 3 files changed, 145 insertions(+), 82 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bigtop/blob/ed6377f6/bigtop-deploy/puppet/modules/spark/manifests/init.pp
----------------------------------------------------------------------
diff --git a/bigtop-deploy/puppet/modules/spark/manifests/init.pp b/bigtop-deploy/puppet/modules/spark/manifests/init.pp
index 9b33bb9..b93be1d 100644
--- a/bigtop-deploy/puppet/modules/spark/manifests/init.pp
+++ b/bigtop-deploy/puppet/modules/spark/manifests/init.pp
@@ -16,23 +16,42 @@
 class spark {
 
   class deploy ($roles) {
-    if ("spark-master" in $roles) {
-      include spark::master
+    if ('spark-client' in $roles) {
+      include client
     }
 
-    if ("spark-worker" in $roles) {
-      include spark::worker
+    if ('spark-on-yarn' in $roles) {
+      include yarn
+    }
+
+    if ('spark-yarn-slave' in $roles) {
+      include yarn_slave
+    }
+
+    if ('spark-master' in $roles) {
+      include master
+    }
+
+    if ('spark-worker' in $roles) {
+      include worker
+    }
+
+    if ('spark-history-server' in $roles) {
+      include history_server
     }
   }
 
-  class common ($master_host = $fqdn, $master_port = "7077", $master_ui_port = "18080") {
-    package { "spark-core":
-      ensure => latest,
+  class client {
+    include common
+
+    package { 'spark-python':
+      ensure  => latest,
+      require => Package['spark-core'],
     }
 
-    file { "/etc/spark/conf/spark-env.sh":
-        content => template("spark/spark-env.sh"),
-        require => [Package["spark-core"]],
+    package { 'spark-extras':
+      ensure  => latest,
+      require => Package['spark-core'],
     }
   }
 
@@ -43,14 +62,15 @@ class spark {
       ensure => latest,
     }
 
-    if ( $fqdn == $common::master_host ) {
-      service { "spark-master":
-        ensure => running,
-        require => [ Package["spark-master"], File["/etc/spark/conf/spark-env.sh"], ],
-        subscribe => [Package["spark-master"], File["/etc/spark/conf/spark-env.sh"] ],
-        hasrestart => true,
-        hasstatus => true,
-      }
+    service { 'spark-master':
+      ensure     => running,
+      subscribe  => [
+        Package['spark-master'],
+        File['/etc/spark/conf/spark-env.sh'],
+        File['/etc/spark/conf/spark-defaults.conf'],
+      ],
+      hasrestart => true,
+      hasstatus  => true,
     }
   }
 
@@ -61,15 +81,87 @@ class spark {
       ensure => latest,
     }
 
-    if ( $fqdn == $common::master_host ) {
-      Service["spark-master"] ~> Service["spark-worker"]
+    service { 'spark-worker':
+      ensure     => running,
+      subscribe  => [
+        Package['spark-worker'],
+        File['/etc/spark/conf/spark-env.sh'],
+        File['/etc/spark/conf/spark-defaults.conf'],
+      ],
+      hasrestart => true,
+      hasstatus  => true,
     }
-    service { "spark-worker":
-      ensure => running,
-      require => [ Package["spark-worker"], File["/etc/spark/conf/spark-env.sh"], ],
-      subscribe => [Package["spark-worker"], File["/etc/spark/conf/spark-env.sh"] ],
+  }
+
+  class history_server {
+    include common
+
+    package { 'spark-history-server':
+      ensure => latest,
+    }
+
+    service { 'spark-history-server':
+      ensure     => running,
+      subscribe  => [
+        Package['spark-history-server'],
+        File['/etc/spark/conf/spark-env.sh'],
+        File['/etc/spark/conf/spark-defaults.conf'],
+      ],
       hasrestart => true,
       hasstatus => true,
     } 
   }
+
+  class yarn {
+    include common
+    include datanucleus
+  }
+
+  class yarn_slave {
+    include yarn_shuffle
+    include datanucleus
+  }
+
+  class yarn_shuffle {
+    package { 'spark-yarn-shuffle':
+      ensure => latest,
+    }
+  }
+
+  class datanucleus {
+    package { 'spark-datanucleus':
+      ensure => latest,
+    }
+  }
+
+  class common(
+      $master_url = 'yarn',
+      $master_host = $fqdn,
+      $master_port = 7077,
+      $worker_port = 7078,
+      $master_ui_port = 8080,
+      $worker_ui_port = 8081,
+      $history_ui_port = 18080,
+      $use_yarn_shuffle_service = false,
+  ) {
+
+    package { 'spark-core':
+      ensure => latest,
+    }
+
+    file { '/etc/spark/conf/spark-env.sh':
+      content => template('spark/spark-env.sh'),
+      require => Package['spark-core'],
+    }
+
+    file { '/etc/spark/conf/spark-defaults.conf':
+      content => template('spark/spark-defaults.conf'),
+      require => Package['spark-core'],
+    }
+
+    file { '/etc/spark/conf/log4j.properties':
+      source  => '/etc/spark/conf/log4j.properties.template',
+      require => Package['spark-core'],
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/bigtop/blob/ed6377f6/bigtop-deploy/puppet/modules/spark/templates/spark-defaults.conf
----------------------------------------------------------------------
diff --git a/bigtop-deploy/puppet/modules/spark/templates/spark-defaults.conf b/bigtop-deploy/puppet/modules/spark/templates/spark-defaults.conf
new file mode 100644
index 0000000..bdb9e56
--- /dev/null
+++ b/bigtop-deploy/puppet/modules/spark/templates/spark-defaults.conf
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+spark.master <%= @master_url %>
+spark.eventLog.enabled true
+spark.eventLog.dir hdfs:///var/log/spark/apps
+spark.history.fs.logDirectory hdfs:///var/log/spark/apps
+spark.yarn.historyServer.address <%= @master_host %>:<%= @history_ui_port %>
+spark.history.ui.port <%= @history_ui_port %>
+spark.shuffle.service.enabled <%= @use_yarn_shuffle_service %>

http://git-wip-us.apache.org/repos/asf/bigtop/blob/ed6377f6/bigtop-deploy/puppet/modules/spark/templates/spark-env.sh
----------------------------------------------------------------------
diff --git a/bigtop-deploy/puppet/modules/spark/templates/spark-env.sh b/bigtop-deploy/puppet/modules/spark/templates/spark-env.sh
old mode 100644
new mode 100755
index f7845a1..eb351c7
--- a/bigtop-deploy/puppet/modules/spark/templates/spark-env.sh
+++ b/bigtop-deploy/puppet/modules/spark/templates/spark-env.sh
@@ -14,68 +14,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# This file is sourced when running various Spark programs.
-# Copy it as spark-env.sh and edit that to configure Spark for your site.
-
-# Options read when launching programs locally with 
-# ./bin/run-example or ./bin/spark-submit
-# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
-# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
-# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program
-# - SPARK_CLASSPATH, default classpath entries to append
-
-# Options read by executors and drivers running inside the cluster
-# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
-# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program
-# - SPARK_CLASSPATH, default classpath entries to append
-# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data
-# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos
-
-# Options read in YARN client mode
-# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
-# - SPARK_EXECUTOR_INSTANCES, Number of workers to start (Default: 2)
-# - SPARK_EXECUTOR_CORES, Number of cores for the workers (Default: 1).
-# - SPARK_EXECUTOR_MEMORY, Memory per Worker (e.g. 1000M, 2G) (Default: 1G)
-# - SPARK_DRIVER_MEMORY, Memory for Master (e.g. 1000M, 2G) (Default: 512 Mb)
-# - SPARK_YARN_APP_NAME, The name of your application (Default: Spark)
-# - SPARK_YARN_QUEUE, The hadoop queue to use for allocation requests (Default: ‘default’)
-# - SPARK_YARN_DIST_FILES, Comma separated list of files to be distributed with the job.
-# - SPARK_YARN_DIST_ARCHIVES, Comma separated list of archives to be distributed with the job.
-
-# Options for the daemons used in the standalone deploy mode
-# - SPARK_MASTER_IP, to bind the master to a different IP address or hostname
-# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master
-# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y")
-# - SPARK_WORKER_CORES, to set the number of cores to use on this machine
-# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g)
-# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker
-# - SPARK_WORKER_INSTANCES, to set the number of worker processes per node
-# - SPARK_WORKER_DIR, to set the working directory of worker processes
-# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y")
-# - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y")
-# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y")
-# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers
-
-# Generic options for the daemons used in the standalone deploy mode
-# - SPARK_CONF_DIR      Alternate conf dir. (Default: ${SPARK_HOME}/conf)
-# - SPARK_LOG_DIR       Where log files are stored.  (Default: ${SPARK_HOME}/logs)
-# - SPARK_PID_DIR       Where the pid file is stored. (Default: /tmp)
-# - SPARK_IDENT_STRING  A string representing this instance of spark. (Default: $USER)
-# - SPARK_NICENESS      The scheduling priority for daemons. (Default: 0)
-
+export SPARK_HOME=${SPARK_HOME:-/usr/lib/spark}
+export SPARK_LOG_DIR=${SPARK_LOG_DIR:-/var/log/spark}
 export HADOOP_HOME=${HADOOP_HOME:-/usr/lib/hadoop}
 export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/etc/hadoop/conf}
+export HIVE_CONF_DIR=${HIVE_CONF_DIR:-/etc/hive/conf}
 
 export STANDALONE_SPARK_MASTER_HOST=<%= @master_host %>
+export SPARK_MASTER_PORT=<%= @master_port %>
 export SPARK_MASTER_IP=$STANDALONE_SPARK_MASTER_HOST
 export SPARK_MASTER_WEBUI_PORT=<%= @master_ui_port %>
-export SPARK_MASTER_PORT=7077
-export SPARK_WORKER_PORT=7078
-export SPARK_WORKER_WEBUI_PORT=18081
-export SPARK_WORKER_DIR=/var/run/spark/work
-export SPARK_HISTORY_OPTS="$SPARK_HISTORY_OPTS -Dspark.history.fs.logDirectory=hdfs:///var/log/spark/apps -Dspark.history.ui.port=18082"
-
-export SPARK_LOG_DIR=/var/log/spark
-
-export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${HADOOP_HOME}/lib/native
 
+export SPARK_WORKER_DIR=${SPARK_WORKER_DIR:-/var/run/spark/work}
+export SPARK_WORKER_PORT=<%= @worker_port %>
+export SPARK_WORKER_WEBUI_PORT=<%= @worker_ui_port %>


[2/2] bigtop git commit: BIGTOP-2104. Packages upgrade to Spark 1.5.1

Posted by co...@apache.org.
BIGTOP-2104. Packages upgrade to Spark 1.5.1


Project: http://git-wip-us.apache.org/repos/asf/bigtop/repo
Commit: http://git-wip-us.apache.org/repos/asf/bigtop/commit/05d9fada
Tree: http://git-wip-us.apache.org/repos/asf/bigtop/tree/05d9fada
Diff: http://git-wip-us.apache.org/repos/asf/bigtop/diff/05d9fada

Branch: refs/heads/master
Commit: 05d9fada278f23c97079afb2429b590afa100b1e
Parents: bf67f6e
Author: Jonathan Kelly <jo...@amazon.com>
Authored: Fri Oct 2 15:13:18 2015 -0700
Committer: Konstantin Boudnik <co...@apache.org>
Committed: Mon Nov 2 13:02:28 2015 -0800

----------------------------------------------------------------------
 .../src/common/spark/do-component-build         | 15 +++-
 .../src/common/spark/install_spark.sh           | 79 ++++++--------------
 bigtop-packages/src/common/spark/patch0.diff    | 11 +++
 bigtop-packages/src/common/spark/spark-env.sh   | 43 +++++++++++
 bigtop-packages/src/deb/spark/control           | 15 ++++
 .../src/deb/spark/spark-core.install            | 14 ++--
 .../src/deb/spark/spark-datanucleus.install     |  2 +
 .../src/deb/spark/spark-extras.install          |  1 +
 .../src/deb/spark/spark-yarn-shuffle.install    |  2 +
 bigtop-packages/src/rpm/spark/SPECS/spark.spec  | 41 +++++++++-
 bigtop.bom                                      |  2 +-
 11 files changed, 158 insertions(+), 67 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/common/spark/do-component-build
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/common/spark/do-component-build b/bigtop-packages/src/common/spark/do-component-build
index 3d705a5..8d74f4a 100644
--- a/bigtop-packages/src/common/spark/do-component-build
+++ b/bigtop-packages/src/common/spark/do-component-build
@@ -24,9 +24,20 @@ BUILD_OPTS="-Divy.home=${HOME}/.ivy2 -Dsbt.ivy.home=${HOME}/.ivy2 -Duser.home=${
             -Dhadoop.version=$HADOOP_VERSION \
             -Dyarn.version=$HADOOP_VERSION \
             -Dprotobuf.version=2.5.0 \
-            -DskipTests -DrecompileMode=all"
+            -DrecompileMode=all \
+            -Pbigtop-dist \
+            -Pyarn -Phadoop-2.6 \
+            -Phive -Phive-thriftserver \
+            $SPARK_BUILD_OPTS"
+
 ## this might be an issue at times
 #        http://maven.40175.n5.nabble.com/Not-finding-artifact-in-local-repo-td3727753.html
 export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m -XX:PermSize=1024m -XX:MaxPermSize=1024m"
 
-mvn -Pbigtop-dist -Pyarn -Phive -Phive-thriftserver $BUILD_OPTS install
+mvn $BUILD_OPTS -DskipTests install
+
+# Tests must be run after Spark has already been packaged.
+# See http://spark.apache.org/docs/latest/building-spark.html#spark-tests-in-maven
+if [ "$SPARK_RUN_TESTS" = "true" ]; then
+    mvn $BUILD_OPTS test
+fi

http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/common/spark/install_spark.sh
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/common/spark/install_spark.sh b/bigtop-packages/src/common/spark/install_spark.sh
index 85078ae..530b355 100644
--- a/bigtop-packages/src/common/spark/install_spark.sh
+++ b/bigtop-packages/src/common/spark/install_spark.sh
@@ -119,6 +119,10 @@ install -d -m 0755 $PREFIX/$LIB_DIR
 install -d -m 0755 $PREFIX/$LIB_DIR/lib
 install -d -m 0755 $PREFIX/$LIB_DIR/bin
 install -d -m 0755 $PREFIX/$LIB_DIR/sbin
+install -d -m 0755 $PREFIX/$LIB_DIR/extras
+install -d -m 0755 $PREFIX/$LIB_DIR/extras/lib
+install -d -m 0755 $PREFIX/$LIB_DIR/yarn
+install -d -m 0755 $PREFIX/$LIB_DIR/yarn/lib
 install -d -m 0755 $PREFIX/$DOC_DIR
 install -d -m 0755 $PREFIX/$EXAMPLES_DIR
 
@@ -134,9 +138,15 @@ tar --wildcards -C $PREFIX/$LIB_DIR/ -zxf ${BUILD_DIR}/assembly/target/spark-ass
 
 rm -rf $PREFIX/$LIB_DIR/bin/*.cmd
 
+# External/extra jars
+ls ${BUILD_DIR}/{external,extras}/*/target/*${SPARK_VERSION}.jar | grep -v 'original-\|assembly' | xargs -IJARS cp JARS $PREFIX/$LIB_DIR/extras/lib
+
 # Examples jar
 cp ${BUILD_DIR}/examples/target/spark-examples*${SPARK_VERSION}.jar $PREFIX/$LIB_DIR/lib/spark-examples-${SPARK_VERSION}-hadoop${HADOOP_VERSION}.jar
 
+# Spark YARN Shuffle jar
+cp ${BUILD_DIR}/network/yarn/target/*/spark-${SPARK_VERSION}-yarn-shuffle.jar $PREFIX/$LIB_DIR/lib/
+
 # Examples src
 cp -ra ${BUILD_DIR}/examples/src $PREFIX/$EXAMPLES_DIR/
 ln -s $EXAMPLES_DIR $PREFIX/$LIB_DIR/examples
@@ -150,12 +160,12 @@ chmod 755 $PREFIX/$LIB_DIR/sbin/*
 # Copy in the configuration files
 install -d -m 0755 $PREFIX/$CONF_DIR
 cp -a ${BUILD_DIR}/conf/* $PREFIX/$CONF_DIR
-cp  $PREFIX/$CONF_DIR/spark-env.sh.template $PREFIX/$CONF_DIR/spark-env.sh
+cp $SOURCE_DIR/spark-env.sh $PREFIX/$CONF_DIR
 ln -s /etc/spark/conf $PREFIX/$LIB_DIR/conf
 
 # Copy in the wrappers
 install -d -m 0755 $PREFIX/$BIN_DIR
-for wrap in sbin/spark-executor bin/spark-shell bin/spark-submit; do
+for wrap in bin/spark-class bin/spark-shell bin/spark-sql bin/spark-submit; do
   cat > $PREFIX/$BIN_DIR/`basename $wrap` <<EOF
 #!/bin/bash
 
@@ -167,60 +177,6 @@ EOF
   chmod 755 $PREFIX/$BIN_DIR/`basename $wrap`
 done
 
-cat >> $PREFIX/$CONF_DIR/spark-env.sh <<EOF
-export HADOOP_HOME=\${HADOOP_HOME:-/usr/lib/hadoop}
-export HADOOP_CONF_DIR=\${HADOOP_CONF_DIR:-/etc/hadoop/conf}
-
-export SPARK_MASTER_IP=\`hostname\`
-export SPARK_MASTER_PORT=7077
-export SPARK_MASTER_WEBUI_PORT=18080
-export SPARK_WORKER_PORT=7078
-export SPARK_WORKER_WEBUI_PORT=18081
-export SPARK_WORKER_DIR=/var/run/spark/work
-export SPARK_HISTORY_OPTS="\$SPARK_HISTORY_OPTS -Dspark.history.fs.logDirectory=hdfs:///var/log/spark/apps -Dspark.history.ui.port=18082"
-
-export SPARK_LOG_DIR=/var/log/spark
-
-export LD_LIBRARY_PATH=\${LD_LIBRARY_PATH}:\${HADOOP_HOME}/lib/native
-EOF
-
-cat >> $PREFIX/$CONF_DIR/hive-site.xml <<EOF
-<?xml version="1.0"?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
--->
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
-<configuration>
-
-<property>
-  <name>javax.jdo.option.ConnectionURL</name>
-  <value>jdbc:derby:;databaseName=/tmp/spark-\${user.name}/metastore_db;create=true</value>
-  <description>JDBC connect string for a JDBC metastore</description>
-</property>
-
-<property>
-  <name>javax.jdo.option.ConnectionDriverName</name>
-  <value>org.apache.derby.jdbc.EmbeddedDriver</value>
-  <description>Driver class name for a JDBC metastore</description>
-</property>
-
-</configuration>
-
-EOF
-
 ln -s /var/run/spark/work $PREFIX/$LIB_DIR/work
 
 cp -r ${BUILD_DIR}/python ${PREFIX}/${INSTALLED_LIB_DIR}/
@@ -242,3 +198,14 @@ cp ${BUILD_DIR}/{LICENSE,NOTICE} ${PREFIX}/${LIB_DIR}/
 
 # Version-less symlinks
 (cd $PREFIX/$LIB_DIR/lib; ln -s spark-assembly*.jar spark-assembly.jar; ln -s spark-examples*.jar spark-examples.jar)
+pushd $PREFIX/$LIB_DIR/yarn/lib
+ln -s ../../lib/spark-*-yarn-shuffle.jar spark-yarn-shuffle.jar
+ln -s ../../lib/datanucleus-api-jdo*.jar datanucleus-api-jdo.jar
+ln -s ../../lib/datanucleus-core*.jar datanucleus-core.jar
+ln -s ../../lib/datanucleus-rdbms*.jar datanucleus-rdbms.jar
+popd
+pushd $PREFIX/$LIB_DIR/extras/lib
+for j in $(ls *.jar); do
+  ln -s $j $(echo $j | sed -n 's/\(.*\)\(_[0-9.]\+-[0-9.]\+\)\(.jar\)/\1\3/p')
+done
+popd

http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/common/spark/patch0.diff
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/common/spark/patch0.diff b/bigtop-packages/src/common/spark/patch0.diff
new file mode 100644
index 0000000..6b7f350
--- /dev/null
+++ b/bigtop-packages/src/common/spark/patch0.diff
@@ -0,0 +1,11 @@
+--- spark-1.5.1-patched/pom.xml	2015-09-23 22:50:32.000000000 -0700
++++ spark-1.5.1/pom.xml	2015-10-30 13:46:54.326426316 -0700
+@@ -2366,7 +2366,7 @@
+     <profile>
+       <id>hadoop-2.6</id>
+       <properties>
+-        <hadoop.version>2.6.0</hadoop.version>
++        <hadoop.version>${hadoop.version}</hadoop.version>
+         <jets3t.version>0.9.3</jets3t.version>
+         <zookeeper.version>3.4.6</zookeeper.version>
+         <curator.version>2.6.0</curator.version>

http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/common/spark/spark-env.sh
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/common/spark/spark-env.sh b/bigtop-packages/src/common/spark/spark-env.sh
new file mode 100755
index 0000000..885aed1
--- /dev/null
+++ b/bigtop-packages/src/common/spark/spark-env.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+export SPARK_HOME=${SPARK_HOME:-/usr/lib/spark}
+export SPARK_LOG_DIR=${SPARK_LOG_DIR:-/var/log/spark}
+
+export HADOOP_HOME=${HADOOP_HOME:-/usr/lib/hadoop}
+export HADOOP_HDFS_HOME=${HADOOP_HDFS_HOME:-${HADOOP_HOME}/../hadoop-hdfs}
+export HADOOP_MAPRED_HOME=${HADOOP_MAPRED_HOME:-${HADOOP_HOME}/../hadoop-mapreduce}
+export HADOOP_YARN_HOME=${HADOOP_YARN_HOME:-${HADOOP_HOME}/../hadoop-yarn}
+export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/etc/hadoop/conf}
+
+# Let's run everything with JVM runtime, instead of Scala
+export SPARK_LAUNCH_WITH_SCALA=0
+export SPARK_LIBRARY_PATH=${SPARK_LIBRARY_PATH:-${SPARK_HOME}/lib}
+export SCALA_LIBRARY_PATH=${SCALA_LIBRARY_PATH:-${SPARK_HOME}/lib}
+
+# Let's make sure that all needed hadoop libs are added properly
+export CLASSPATH="$CLASSPATH:$HADOOP_HOME/*:$HADOOP_HDFS_HOME/*:$HADOOP_YARN_HOME/*:$HADOOP_MAPRED_HOME/*"
+export SPARK_LIBRARY_PATH=$SPARK_LIBRARY_PATH:${HADOOP_HOME}/lib/native
+
+export STANDALONE_SPARK_MASTER_HOST=`hostname -f`
+export SPARK_MASTER_PORT=7077
+export SPARK_MASTER_WEBUI_PORT=18080
+
+export SPARK_WORKER_DIR=${SPARK_WORKER_DIR:-/var/run/spark/work}
+export SPARK_WORKER_PORT=7078
+export SPARK_WORKER_WEBUI_PORT=18081
+
+export SPARK_HISTORY_OPTS="$SPARK_HISTORY_OPTS -Dspark.history.fs.logDirectory=hdfs:///var/log/spark/apps -Dspark.history.ui.port=18082"

http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/deb/spark/control
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/spark/control b/bigtop-packages/src/deb/spark/control
index 856d622..7ebc970 100644
--- a/bigtop-packages/src/deb/spark/control
+++ b/bigtop-packages/src/deb/spark/control
@@ -60,3 +60,18 @@ Architecture: all
 Depends: spark-core (= ${source:Version})
 Description: Thrift server for Spark SQL
  Thrift server for Spark SQL
+
+Package: spark-datanucleus
+Architecture: all
+Description: DataNucleus libraries for Apache Spark
+ DataNucleus libraries used by Spark SQL with Hive Support
+
+Package: spark-extras
+Architecture: all
+Description: External/extra libraries for Apache Spark
+ External/extra libraries built for Apache Spark but not included in the main assembly JAR (e.g., external streaming libraries)
+
+Package: spark-yarn-shuffle
+Architecture: all
+Description: Spark YARN Shuffle Service
+ Spark YARN Shuffle Service

http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/deb/spark/spark-core.install
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/spark/spark-core.install b/bigtop-packages/src/deb/spark/spark-core.install
index 7e0d9f7..70f588c 100644
--- a/bigtop-packages/src/deb/spark/spark-core.install
+++ b/bigtop-packages/src/deb/spark/spark-core.install
@@ -1,26 +1,26 @@
 /etc/spark
-/usr/bin/spark-executor
+/usr/bin/spark-class
 /usr/bin/spark-submit
 /usr/bin/spark-shell
+/usr/bin/spark-sql
 /usr/lib/spark/LICENSE
 /usr/lib/spark/RELEASE
 /usr/lib/spark/NOTICE
 /usr/lib/spark/bin/beeline
-/usr/lib/spark/bin/compute-classpath.sh
 /usr/lib/spark/bin/load-spark-env.sh
 /usr/lib/spark/bin/run-example
 /usr/lib/spark/bin/spark-class
 /usr/lib/spark/bin/spark-shell
 /usr/lib/spark/bin/spark-sql
 /usr/lib/spark/bin/spark-submit
-/usr/lib/spark/bin/utils.sh
 /usr/lib/spark/conf
+/usr/lib/spark/data
+/usr/lib/spark/examples
+/usr/lib/spark/lib/spark-assembly*.jar
+/usr/lib/spark/lib/spark-examples*.jar
 /usr/lib/spark/sbin
 /usr/lib/spark/work
-/usr/lib/spark/lib
-/usr/lib/spark/examples
-/usr/lib/spark/data
-/usr/share/doc/spark
+/usr/share/doc/spark*
 /var/lib/spark/
 /var/log/spark/
 /var/run/spark/

http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/deb/spark/spark-datanucleus.install
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/spark/spark-datanucleus.install b/bigtop-packages/src/deb/spark/spark-datanucleus.install
new file mode 100644
index 0000000..69f619b
--- /dev/null
+++ b/bigtop-packages/src/deb/spark/spark-datanucleus.install
@@ -0,0 +1,2 @@
+/usr/lib/spark/lib/datanucleus-*.jar
+/usr/lib/spark/yarn/lib/datanucleus-*.jar

http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/deb/spark/spark-extras.install
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/spark/spark-extras.install b/bigtop-packages/src/deb/spark/spark-extras.install
new file mode 100644
index 0000000..c65fbc0
--- /dev/null
+++ b/bigtop-packages/src/deb/spark/spark-extras.install
@@ -0,0 +1 @@
+/usr/lib/spark/extras/lib

http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/deb/spark/spark-yarn-shuffle.install
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/deb/spark/spark-yarn-shuffle.install b/bigtop-packages/src/deb/spark/spark-yarn-shuffle.install
new file mode 100644
index 0000000..79c799a
--- /dev/null
+++ b/bigtop-packages/src/deb/spark/spark-yarn-shuffle.install
@@ -0,0 +1,2 @@
+/usr/lib/spark/lib/spark-*-yarn-shuffle.jar
+/usr/lib/spark/yarn/lib/spark-yarn-shuffle.jar

http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop-packages/src/rpm/spark/SPECS/spark.spec
----------------------------------------------------------------------
diff --git a/bigtop-packages/src/rpm/spark/SPECS/spark.spec b/bigtop-packages/src/rpm/spark/SPECS/spark.spec
index d8a5c10..daecc72 100644
--- a/bigtop-packages/src/rpm/spark/SPECS/spark.spec
+++ b/bigtop-packages/src/rpm/spark/SPECS/spark.spec
@@ -119,6 +119,28 @@ Requires: spark-core = %{version}-%{release}
 %description -n spark-thriftserver
 Thrift server for Spark SQL
 
+%package -n spark-datanucleus
+Summary: DataNucleus libraries for Apache Spark
+Group: Development/Libraries
+
+%description -n spark-datanucleus
+DataNucleus libraries used by Spark SQL with Hive Support
+
+%package -n spark-extras
+Summary: External/extra libraries for Apache Spark
+Group: Development/Libraries
+
+%description -n spark-extras
+External/extra libraries built for Apache Spark but not included in the main
+assembly JAR (e.g., external streaming libraries)
+
+%package -n spark-yarn-shuffle
+Summary: Spark YARN Shuffle Service
+Group: Development/Libraries
+
+%description -n spark-yarn-shuffle
+Spark YARN Shuffle Service
+
 %prep
 %setup -n %{spark_name}-%{spark_base_version}
 
@@ -175,6 +197,8 @@ done
 %{lib_spark}/NOTICE
 %{lib_spark}/bin
 %{lib_spark}/lib
+%exclude %{lib_spark}/lib/datanucleus-*.jar
+%exclude %{lib_spark}/lib/spark-*-yarn-shuffle.jar
 %{lib_spark}/sbin
 %{lib_spark}/data
 %{lib_spark}/examples
@@ -185,8 +209,9 @@ done
 %attr(0755,spark,spark) %{var_lib_spark}
 %attr(0755,spark,spark) %{var_run_spark}
 %attr(0755,spark,spark) %{var_log_spark}
+%{bin}/spark-class
 %{bin}/spark-shell
-%{bin}/spark-executor
+%{bin}/spark-sql
 %{bin}/spark-submit
 
 %files -n spark-python
@@ -195,6 +220,20 @@ done
 %attr(0755,root,root) %{lib_spark}/bin/pyspark
 %{lib_spark}/python
 
+%files -n spark-datanucleus
+%defattr(-,root,root,755)
+%{lib_spark}/lib/datanucleus-*.jar
+%{lib_spark}/yarn/lib/datanucleus-*.jar
+
+%files -n spark-extras
+%defattr(-,root,root,755)
+%{lib_spark}/extras
+
+%files -n spark-yarn-shuffle
+%defattr(-,root,root,755)
+%{lib_spark}/lib/spark-*-yarn-shuffle.jar
+%{lib_spark}/yarn/lib/spark-yarn-shuffle.jar
+
 %define service_macro() \
 %files -n %1 \
 %attr(0755,root,root)/%{initd_dir}/%1 \

http://git-wip-us.apache.org/repos/asf/bigtop/blob/05d9fada/bigtop.bom
----------------------------------------------------------------------
diff --git a/bigtop.bom b/bigtop.bom
index 51209f5..57fd457 100644
--- a/bigtop.bom
+++ b/bigtop.bom
@@ -282,7 +282,7 @@ bigtop {
       name    = 'spark'
       pkg     = 'spark-core'
       relNotes = 'Apache Spark'
-      version { base = '1.3.1'; pkg = base; release = 1 }
+      version { base = '1.5.1'; pkg = base; release = 1 }
       tarball { destination = "$name-${version.base}.tar.gz"
                 source      = "$name-${version.base}.tgz" }
       url     { download_path = "/$name/$name-${version.base}"