You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zeppelin.apache.org by bz...@apache.org on 2016/04/02 10:13:11 UTC
incubator-zeppelin git commit: ZEPPELIN-783 fix CI failure on Spark
download
Repository: incubator-zeppelin
Updated Branches:
refs/heads/master a7a7bdb68 -> 67e0fd554
ZEPPELIN-783 fix CI failure on Spark download
### What is this PR for?
Improve CI by hard-ending spark download failures that are responsible for recent CI red on `master`.
### What type of PR is it?
Bug Fix | Hot Fix
### Todos
- [x] cleanup on spark download attempts
- [x] leverage Travis CI [cacheing](https://docs.travis-ci.com/user/caching) for spark and pyspark binaries under `.spark-dist`
### What is the Jira issue?
[ZEPPELIN-783](https://issues.apache.org/jira/browse/ZEPPELIN-783)
### How should this be tested?
CI must be green
### Questions:
* Does the licenses files need update? No
* Is there breaking changes for older versions? No
* Does this needs documentation? No
Author: Alexander Bezzubov <bz...@apache.org>
Closes #810 from bzz/ZEPPELIN-783-fix-ci-spark-download and squashes the following commits:
9d59646 [Alexander Bezzubov] ZEPPELIN-783: consistent download timeout
b6310f0 [Alexander Bezzubov] ZEPPELIN-783: add debug info: download, Zepeplin config
5d0eb2d [Alexander Bezzubov] ZEPPELIN-783: pyspark&spark cache under .spark-distr, but unpack to root
d4ef96d [Alexander Bezzubov] ZEPPELIN-783: exclude .spark-dist cache from RAT
388d76b [Alexander Bezzubov] ZEPPELIN-783: backport from Spark download to start\stop scripts
fa8b516 [Alexander Bezzubov] ZEPPELIN-783: reconcile CI-time and build-time Spark download locations
542a305 [Alexander Bezzubov] ZEPPELIN-783: use TravisCI caching for relieable Spark download
bd1d5e2 [Alexander Bezzubov] ZEPPELIN-783: add cleanup on download failure
b413743 [Alexander Bezzubov] ZEPPELIN-783: refactoring - extract SPARK_ARCHIVE var
346e075 [Alexander Bezzubov] ZEPPELIN-783: upd shell style
Project: http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/commit/67e0fd55
Tree: http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/tree/67e0fd55
Diff: http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/diff/67e0fd55
Branch: refs/heads/master
Commit: 67e0fd554f33c547d80cfff9e80940977b4b2b29
Parents: a7a7bdb
Author: Alexander Bezzubov <bz...@apache.org>
Authored: Sat Apr 2 15:54:55 2016 +0900
Committer: Alexander Bezzubov <bz...@apache.org>
Committed: Sat Apr 2 17:12:56 2016 +0900
----------------------------------------------------------------------
.gitignore | 1 +
.travis.yml | 5 +++
pom.xml | 11 +++++++
spark-dependencies/pom.xml | 33 +++++++++++++++----
testing/downloadSpark.sh | 69 +++++++++++++++++++++++++--------------
testing/startSparkCluster.sh | 19 ++++++-----
testing/stopSparkCluster.sh | 11 ++++---
zeppelin-server/pom.xml | 2 +-
zeppelin-web/pom.xml | 2 +-
9 files changed, 107 insertions(+), 46 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/67e0fd55/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index ad473ed..9dd02a6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,7 @@
spark/derby.log
spark/metastore_db
spark-1.*-bin-hadoop*
+.spark-dist
zeppelin-server/derby.log
lens/lens-cli-hist.log
http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/67e0fd55/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 2ef9025..72b748e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -16,6 +16,9 @@
language: java
sudo: false
+cache:
+ directories:
+ - .spark-dist
matrix:
include:
@@ -48,6 +51,7 @@ matrix:
env: TEST_SELENIUM="true" SPARK_VER="1.6.0" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.AbstractFunctionalSuite -DfailIfNoTests=false"
before_install:
+ - "ls -la .spark-dist"
- "export DISPLAY=:99.0"
- "sh -e /etc/init.d/xvfb start"
@@ -58,6 +62,7 @@ before_script:
- travis_retry ./testing/downloadSpark.sh $SPARK_VER $HADOOP_VER
- ./testing/startSparkCluster.sh $SPARK_VER $HADOOP_VER
- echo "export SPARK_HOME=`pwd`/spark-$SPARK_VER-bin-hadoop$HADOOP_VER" > conf/zeppelin-env.sh
+ - tail conf/zeppelin-env.sh
script:
- mvn $TEST_FLAG $PROFILE -B $TEST_PROJECTS
http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/67e0fd55/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 5a14040..e5f7d9a 100755
--- a/pom.xml
+++ b/pom.xml
@@ -241,6 +241,7 @@
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
</plugin>
+
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
@@ -249,6 +250,7 @@
<target>1.7</target>
</configuration>
</plugin>
+
<!-- Test coverage plugin -->
<plugin>
<groupId>org.codehaus.mojo</groupId>
@@ -270,6 +272,7 @@
</execution>
</executions>
</plugin>
+
<!-- Checkstyle plugin -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
@@ -488,6 +491,7 @@
<exclude>conf/notebook-authorization.json</exclude>
<exclude>conf/zeppelin-env.sh</exclude>
<exclude>spark-*-bin*/**</exclude>
+ <exclude>.spark-dist/**</exclude>
<!-- bundled from bootstrap -->
<exclude>docs/assets/themes/zeppelin/bootstrap/**</exclude>
@@ -640,6 +644,13 @@
</lifecycleMappingMetadata>
</configuration>
</plugin>
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-antrun-plugin</artifactId>
+ <version>1.7</version>
+ </plugin>
+
</plugins>
</pluginManagement>
</build>
http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/67e0fd55/spark-dependencies/pom.xml
----------------------------------------------------------------------
diff --git a/spark-dependencies/pom.xml b/spark-dependencies/pom.xml
index 819de93..02d24e2 100644
--- a/spark-dependencies/pom.xml
+++ b/spark-dependencies/pom.xml
@@ -50,7 +50,11 @@
<akka.group>org.spark-project.akka</akka.group>
<akka.version>2.3.4-spark</akka.version>
- <spark.download.url>http://archive.apache.org/dist/spark/spark-${spark.version}/spark-${spark.version}.tgz</spark.download.url>
+ <spark.archive>spark-${spark.version}</spark.archive>
+ <spark.download.url>
+ http://archive.apache.org/dist/spark/${spark.archive}/${spark.archive}.tgz
+ </spark.download.url>
+ <spark.dist.cache>${project.build.directory}/../../.spark-dist</spark.dist.cache>
<py4j.version>0.8.2.1</py4j.version>
</properties>
@@ -787,12 +791,12 @@
</goals>
<configuration>
<url>${spark.download.url}</url>
- <unpack>true</unpack>
- <outputDirectory>${project.build.directory}/spark-dist</outputDirectory>
+ <outputDirectory>${spark.dist.cache}</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
+
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<configuration>
@@ -806,13 +810,28 @@
</filesets>
</configuration>
</plugin>
+
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
- <version>1.7</version>
<executions>
<execution>
- <id>download-and-zip-pyspark-files</id>
+ <id>unzip-pyspark-files</id>
+ <phase>validate</phase>
+ <goals>
+ <goal>run</goal>
+ </goals>
+ <configuration>
+ <target>
+ <untar src="${spark.dist.cache}/${spark.archive}.tgz"
+ dest="${project.build.directory}/spark-dist"
+ compression="gzip"/>
+ </target>
+ </configuration>
+ </execution>
+
+ <execution>
+ <id>zip-pyspark-files</id>
<phase>generate-resources</phase>
<goals>
<goal>run</goal>
@@ -821,9 +840,9 @@
<target>
<delete dir="../interpreter/spark/pyspark"/>
<copy todir="../interpreter/spark/pyspark"
- file="${project.build.directory}/spark-dist/spark-${spark.version}/python/lib/py4j-${py4j.version}-src.zip"/>
+ file="${project.build.directory}/spark-dist/${spark.archive}/python/lib/py4j-${py4j.version}-src.zip"/>
<zip destfile="${project.build.directory}/../../interpreter/spark/pyspark/pyspark.zip"
- basedir="${project.build.directory}/spark-dist/spark-${spark.version}/python"
+ basedir="${project.build.directory}/spark-dist/${spark.archive}/python"
includes="pyspark/*.py,pyspark/**/*.py"/>
</target>
</configuration>
http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/67e0fd55/testing/downloadSpark.sh
----------------------------------------------------------------------
diff --git a/testing/downloadSpark.sh b/testing/downloadSpark.sh
index 7c907fc..d12580f 100755
--- a/testing/downloadSpark.sh
+++ b/testing/downloadSpark.sh
@@ -17,7 +17,7 @@
#
-if [ $# -ne 2 ]; then
+if [[ "$#" -ne 2 ]]; then
echo "usage) $0 [spark version] [hadoop version]"
echo " eg) $0 1.3.1 2.6"
exit 1
@@ -26,10 +26,10 @@ fi
SPARK_VERSION="${1}"
HADOOP_VERSION="${2}"
-echo ${SPARK_VERSION} | grep "^1.[123].[0-9]" > /dev/null
-if [ $? -eq 0 ]; then
+echo "${SPARK_VERSION}" | grep "^1.[123].[0-9]" > /dev/null
+if [[ "$?" -eq 0 ]]; then
echo "${SPARK_VERSION}" | grep "^1.[12].[0-9]" > /dev/null
- if [ $? -eq 0 ]; then
+ if [[ "$?" -eq 0 ]]; then
SPARK_VER_RANGE="<=1.2"
else
SPARK_VER_RANGE="<=1.3"
@@ -40,31 +40,52 @@ fi
set -xe
-FWDIR=$(dirname "${BASH_SOURCE-$0}")
+TIMEOUT_SEC=590
+FWDIR="$(dirname "${BASH_SOURCE-$0}")"
ZEPPELIN_HOME="$(cd "${FWDIR}/.."; pwd)"
-export SPARK_HOME=${ZEPPELIN_HOME}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}
+
+SPARK_CACHE=".spark-dist"
+SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}"
+export SPARK_HOME="${ZEPPELIN_HOME}/${SPARK_ARCHIVE}"
echo "SPARK_HOME is ${SPARK_HOME}"
-if [ ! -d "${SPARK_HOME}" ]; then
- if [ "${SPARK_VER_RANGE}" == "<=1.2" ]; then
- # spark 1.1.x and spark 1.2.x can be downloaded from archive
- STARTTIME=`date +%s`
- timeout -s KILL 300 wget -q http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
- ENDTIME=`date +%s`
- DOWNLOADTIME=$((ENDTIME-STARTTIME))
- else
- # spark 1.3.x and later can be downloaded from mirror
- # get download address from mirror
- MIRROR_INFO=$(curl -s "http://www.apache.org/dyn/closer.cgi/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz?asjson=1")
- PREFFERED=$(echo "${MIRROR_INFO}" | grep preferred | sed 's/[^"]*.preferred.: .\([^"]*\).*/\1/g')
- PATHINFO=$(echo "${MIRROR_INFO}" | grep path_info | sed 's/[^"]*.path_info.: .\([^"]*\).*/\1/g')
+if [[ ! -d "${SPARK_HOME}" ]]; then
+ mkdir -p "${SPARK_CACHE}"
+ cd "${SPARK_CACHE}"
+ if [[ ! -f "${SPARK_ARCHIVE}.tgz" ]]; then
+ pwd
+ ls -la .
+ echo "${SPARK_CACHE} does not have ${SPARK_ARCHIVE} downloading ..."
+ # download archive if not cached
+ if [[ "${SPARK_VER_RANGE}" == "<=1.2" ]]; then
+ # spark 1.1.x and spark 1.2.x can be downloaded from archive
+ STARTTIME=`date +%s`
+ timeout -s KILL "${TIMEOUT_SEC}" wget -q "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz"
+ ENDTIME=`date +%s`
+ DOWNLOADTIME="$((ENDTIME-STARTTIME))"
+ else
+ # spark 1.3.x and later can be downloaded from mirror
+ # get download address from mirror
+ MIRROR_INFO=$(curl -s "http://www.apache.org/dyn/closer.cgi/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz?asjson=1")
+
+ PREFFERED=$(echo "${MIRROR_INFO}" | grep preferred | sed 's/[^"]*.preferred.: .\([^"]*\).*/\1/g')
+ PATHINFO=$(echo "${MIRROR_INFO}" | grep path_info | sed 's/[^"]*.path_info.: .\([^"]*\).*/\1/g')
+
+ STARTTIME=`date +%s`
+ timeout -s KILL "${TIMEOUT_SEC}" wget -q "${PREFFERED}${PATHINFO}"
+ ENDTIME=`date +%s`
+ DOWNLOADTIME="$((ENDTIME-STARTTIME))"
+ fi
+ fi
- STARTTIME=`date +%s`
- timeout -s KILL 590 wget -q "${PREFFERED}${PATHINFO}"
- ENDTIME=`date +%s`
- DOWNLOADTIME=$((ENDTIME-STARTTIME))
+ # extract archive in un-cached root, clean-up on failure
+ cp "${SPARK_ARCHIVE}.tgz" ..
+ cd ..
+ if ! tar zxf "${SPARK_ARCHIVE}.tgz" ; then
+ echo "Unable to extract ${SPARK_ARCHIVE}.tgz" >&2
+ rm -rf "${SPARK_ARCHIVE}"
+ rm -f "${SPARK_ARCHIVE}.tgz"
fi
- tar zxf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
fi
set +xe
http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/67e0fd55/testing/startSparkCluster.sh
----------------------------------------------------------------------
diff --git a/testing/startSparkCluster.sh b/testing/startSparkCluster.sh
index e47edc1..dc7613d 100755
--- a/testing/startSparkCluster.sh
+++ b/testing/startSparkCluster.sh
@@ -17,7 +17,7 @@
#
-if [ $# -ne 2 ]; then
+if [[ "$#" -ne 2 ]]; then
echo "usage) $0 [spark version] [hadoop version]"
echo " eg) $0 1.3.1 2.6"
exit 1
@@ -26,10 +26,10 @@ fi
SPARK_VERSION="${1}"
HADOOP_VERSION="${2}"
-echo ${SPARK_VERSION} | grep "^1.[123].[0-9]" > /dev/null
-if [ $? -eq 0 ]; then
+echo "${SPARK_VERSION}" | grep "^1.[123].[0-9]" > /dev/null
+if [[ "$?" -eq 0 ]]; then
echo "${SPARK_VERSION}" | grep "^1.[12].[0-9]" > /dev/null
- if [ $? -eq 0 ]; then
+ if [[ "$?" -eq 0 ]]; then
SPARK_VER_RANGE="<=1.2"
else
SPARK_VER_RANGE="<=1.3"
@@ -38,17 +38,18 @@ else
SPARK_VER_RANGE=">1.3"
fi
-
set -xe
-FWDIR=$(dirname "${BASH_SOURCE-$0}")
+FWDIR="$(dirname "${BASH_SOURCE-$0}")"
ZEPPELIN_HOME="$(cd "${FWDIR}/.."; pwd)"
-export SPARK_HOME=${ZEPPELIN_HOME}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}
+
+SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}"
+export SPARK_HOME="${ZEPPELIN_HOME}/${SPARK_ARCHIVE}"
echo "SPARK_HOME is ${SPARK_HOME}"
# create PID dir. test case detect pid file so they can select active spark home dir for test
-mkdir -p ${SPARK_HOME}/run
-export SPARK_PID_DIR=${SPARK_HOME}/run
+export SPARK_PID_DIR="${SPARK_HOME}/run"
+mkdir -p "${SPARK_PID_DIR}"
# start
export SPARK_MASTER_PORT=7071
http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/67e0fd55/testing/stopSparkCluster.sh
----------------------------------------------------------------------
diff --git a/testing/stopSparkCluster.sh b/testing/stopSparkCluster.sh
index 1bf8eac..e049ec4 100755
--- a/testing/stopSparkCluster.sh
+++ b/testing/stopSparkCluster.sh
@@ -16,7 +16,7 @@
# limitations under the License.
#
-if [ $# -ne 2 ]; then
+if [[ "$#" -ne 2 ]]; then
echo "usage) $0 [spark version] [hadoop version]"
echo " eg) $0 1.3.1 2.6"
exit 1
@@ -27,12 +27,15 @@ HADOOP_VERSION="${2}"
set -xe
-FWDIR=$(dirname "${BASH_SOURCE-$0}")
+FWDIR="$(dirname "${BASH_SOURCE-$0}")"
ZEPPELIN_HOME="$(cd "${FWDIR}/.."; pwd)"
-export SPARK_HOME=${ZEPPELIN_HOME}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}
+
+SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}"
+export SPARK_HOME="${ZEPPELIN_HOME}/${SPARK_ARCHIVE}"
+echo "SPARK_HOME is ${SPARK_HOME}"
# set create PID dir
-export SPARK_PID_DIR=${SPARK_HOME}/run
+export SPARK_PID_DIR="${SPARK_HOME}/run"
${SPARK_HOME}/sbin/spark-daemon.sh stop org.apache.spark.deploy.worker.Worker 1
${SPARK_HOME}/sbin/stop-master.sh
http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/67e0fd55/zeppelin-server/pom.xml
----------------------------------------------------------------------
diff --git a/zeppelin-server/pom.xml b/zeppelin-server/pom.xml
index ee03c33..f2c9ced 100644
--- a/zeppelin-server/pom.xml
+++ b/zeppelin-server/pom.xml
@@ -369,8 +369,8 @@
</plugin>
<plugin>
+ <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
- <version>1.6</version>
<executions>
<execution>
<id>start-zeppelin</id>
http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/67e0fd55/zeppelin-web/pom.xml
----------------------------------------------------------------------
diff --git a/zeppelin-web/pom.xml b/zeppelin-web/pom.xml
index 21f17df..8878e9a 100644
--- a/zeppelin-web/pom.xml
+++ b/zeppelin-web/pom.xml
@@ -47,10 +47,10 @@
<webXml>dist\WEB-INF\web.xml</webXml>
</configuration>
</plugin>
+
<plugin>
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
- <version>0.11</version>
<configuration>
<excludes>
<exclude>**/.idea/</exclude>