You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zeppelin.apache.org by bz...@apache.org on 2016/11/30 16:11:46 UTC
zeppelin git commit: [HOTFIX] Make Spark download stable on CI
Repository: zeppelin
Updated Branches:
refs/heads/master a459c0249 -> 985bb0c87
[HOTFIX] Make Spark download stable on CI
### What is this PR for?
There has been issues with downloading\caching Spark, esp in #1689 #1696
This is hotfix for Spark download on CI.
### What type of PR is it?
Hot Fix
### Todos
- [x] do not use distrs.apache.org
- [x] levirage `download-maven-plugin` cache for Spark download
- [x] set timeout 1min and 5 re-tries on download
- [x] un-pack them under `/target/` so `mvn clean` works as expected
- [x] mute logs for `./testing/install_external_dependencies.sh`
### How should this be tested?
In CI logs, Spark should be downloaded by `spark-dependencies` and cached under `${HOME}/.m2/repository/.cache/maven-download-plugin`
### Questions:
* Does the licenses files need update? No
* Is there breaking changes for older versions? No
* Does this needs documentation? No
Author: Alexander Bezzubov <bz...@apache.org>
Closes #1709 from bzz/make-ci-stabel and squashes the following commits:
06c031c [Alexander Bezzubov] Move logging config to MAVEN_OPTS
702dcdd [Alexander Bezzubov] Spark download\cached, using download-maven-plugin
7040b09 [Alexander Bezzubov] Switch Spark download dir
1d85b5c [Alexander Bezzubov] Mute dependency install logs
78109af [Alexander Bezzubov] Set readTimeOut for download-maven-plugin
7a64690 [Alexander Bezzubov] Bump download-maven-plugin version to lastes 1.3.0
605dea9 [Alexander Bezzubov] Spark 2.0.1 on CI, same as in pom.xml
9ee9c04 [Alexander Bezzubov] Direct Spark download url for CI as INFRA-12996
Project: http://git-wip-us.apache.org/repos/asf/zeppelin/repo
Commit: http://git-wip-us.apache.org/repos/asf/zeppelin/commit/985bb0c8
Tree: http://git-wip-us.apache.org/repos/asf/zeppelin/tree/985bb0c8
Diff: http://git-wip-us.apache.org/repos/asf/zeppelin/diff/985bb0c8
Branch: refs/heads/master
Commit: 985bb0c875b2af5692e0e0e7910432ee3a19b80c
Parents: a459c02
Author: Alexander Bezzubov <bz...@apache.org>
Authored: Wed Nov 30 23:04:50 2016 +0900
Committer: Alexander Bezzubov <bz...@apache.org>
Committed: Thu Dec 1 01:08:57 2016 +0900
----------------------------------------------------------------------
.travis.yml | 12 ++++-----
spark-dependencies/pom.xml | 58 ++++++++++++-----------------------------
testing/downloadSpark.sh | 2 +-
3 files changed, 24 insertions(+), 48 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/zeppelin/blob/985bb0c8/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index deca7d6..5d4fc6a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -42,9 +42,9 @@ matrix:
- jdk: "oraclejdk7"
env: SCALA_VER="2.11" PROFILE="-Prat" BUILD_FLAG="clean" TEST_FLAG="org.apache.rat:apache-rat-plugin:check" TEST_PROJECTS=""
- # Test all modules with spark 2.0.0 and scala 2.11
+ # Test all modules with spark 2.0.1 and scala 2.11
- jdk: "oraclejdk7"
- env: SCALA_VER="2.11" SPARK_VER="2.0.0" HADOOP_VER="2.3" PROFILE="-Pspark-2.0 -Phadoop-2.3 -Ppyspark -Psparkr -Pscalding -Pexamples -Pscala-2.11" BUILD_FLAG="package -Pbuild-distr -DskipRat" TEST_FLAG="verify -Pusing-packaged-distr -DskipRat" TEST_PROJECTS=""
+ env: SCALA_VER="2.11" SPARK_VER="2.0.1" HADOOP_VER="2.3" PROFILE="-Pspark-2.0 -Phadoop-2.3 -Ppyspark -Psparkr -Pscalding -Pexamples -Pscala-2.11" BUILD_FLAG="package -Pbuild-distr -DskipRat" TEST_FLAG="verify -Pusing-packaged-distr -DskipRat" TEST_PROJECTS=""
# Test all modules with scala 2.10
- jdk: "oraclejdk7"
@@ -75,8 +75,8 @@ matrix:
env: PYTHON="3" SCALA_VER="2.11" SPARK_VER="2.0.0" HADOOP_VER="2.3" PROFILE="-Pspark-2.0 -Phadoop-2.3 -Ppyspark -Pscala-2.11" BUILD_FLAG="package -pl spark,python -am -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-display,spark-dependencies,spark,python -Dtest=org.apache.zeppelin.spark.PySpark*Test,org.apache.zeppelin.python.* -Dpyspark.test.exclude='' -DfailIfNoTests=false"
before_install:
- - echo "MAVEN_OPTS='-Xms1024M -Xmx2048M -XX:MaxPermSize=1024m -XX:-UseGCOverheadLimit'" >> ~/.mavenrc
- - ./testing/install_external_dependencies.sh
+ - echo "MAVEN_OPTS='-Xms1024M -Xmx2048M -XX:MaxPermSize=1024m -XX:-UseGCOverheadLimit -Dorg.slf4j.simpleLogger.defaultLogLevel=warn'" >> ~/.mavenrc
+ - ./testing/install_external_dependencies.sh > /dev/null 2>&1
- ls -la .spark-dist ${HOME}/.m2/repository/.cache/maven-download-plugin || true
- ls .node_modules && cp -r .node_modules zeppelin-web/node_modules || echo "node_modules are not cached"
- "/sbin/start-stop-daemon --start --quiet --pidfile /tmp/custom_xvfb_99.pid --make-pidfile --background --exec /usr/bin/Xvfb -- :99 -ac -screen 0 1600x1024x16"
@@ -84,7 +84,7 @@ before_install:
- source ~/.environ
install:
- - mvn -Dorg.slf4j.simpleLogger.defaultLogLevel=warn $BUILD_FLAG $PROFILE -B
+ - mvn $BUILD_FLAG $PROFILE -B
before_script:
- travis_retry ./testing/downloadSpark.sh $SPARK_VER $HADOOP_VER
@@ -92,7 +92,7 @@ before_script:
- tail conf/zeppelin-env.sh
script:
- - mvn -Dorg.slf4j.simpleLogger.defaultLogLevel=warn $TEST_FLAG $PROFILE -B $TEST_PROJECTS
+ - mvn $TEST_FLAG $PROFILE -B $TEST_PROJECTS
after_success:
- echo "Travis exited with ${TRAVIS_TEST_RESULT}"
http://git-wip-us.apache.org/repos/asf/zeppelin/blob/985bb0c8/spark-dependencies/pom.xml
----------------------------------------------------------------------
diff --git a/spark-dependencies/pom.xml b/spark-dependencies/pom.xml
index 04b6983..1d0cb6e 100644
--- a/spark-dependencies/pom.xml
+++ b/spark-dependencies/pom.xml
@@ -56,13 +56,12 @@
<akka.version>2.3.4-spark</akka.version>
<spark.archive>spark-${spark.version}</spark.archive>
- <spark.download.url>
- http://archive.apache.org/dist/spark/${spark.archive}/${spark.archive}.tgz
- </spark.download.url>
+ <spark.src.download.url>
+ http://d3kbcqa49mib13.cloudfront.net/${spark.archive}.tgz
+ </spark.src.download.url>
<spark.bin.download.url>
- http://archive.apache.org/dist/spark/spark-${spark.version}/spark-${spark.version}-bin-without-hadoop.tgz
+ http://d3kbcqa49mib13.cloudfront.net/spark-${spark.version}-bin-without-hadoop.tgz
</spark.bin.download.url>
- <spark.dist.cache>${project.build.directory}/../../.spark-dist</spark.dist.cache>
<py4j.version>0.8.2.1</py4j.version>
</properties>
@@ -823,7 +822,7 @@
<plugin>
<groupId>com.googlecode.maven-download-plugin</groupId>
<artifactId>download-maven-plugin</artifactId>
- <version>1.2.1</version>
+ <version>1.3.0</version>
<executions>
<execution>
<id>download-pyspark-files</id>
@@ -832,8 +831,11 @@
<goal>wget</goal>
</goals>
<configuration>
- <url>${spark.download.url}</url>
- <outputDirectory>${spark.dist.cache}</outputDirectory>
+ <readTimeOut>60000</readTimeOut>
+ <retries>5</retries>
+ <unpack>true</unpack>
+ <url>${spark.src.download.url}</url>
+ <outputDirectory>${project.build.directory}</outputDirectory>
</configuration>
</execution>
</executions>
@@ -844,9 +846,6 @@
<configuration>
<filesets>
<fileset>
- <directory>${project.build.directory}/spark-dist</directory>
- </fileset>
- <fileset>
<directory>${basedir}/../python/build</directory>
</fileset>
</filesets>
@@ -858,21 +857,6 @@
<artifactId>maven-antrun-plugin</artifactId>
<executions>
<execution>
- <id>unzip-pyspark-files</id>
- <phase>validate</phase>
- <goals>
- <goal>run</goal>
- </goals>
- <configuration>
- <target>
- <untar src="${spark.dist.cache}/${spark.archive}.tgz"
- dest="${project.build.directory}/spark-dist"
- compression="gzip"/>
- </target>
- </configuration>
- </execution>
-
- <execution>
<id>zip-pyspark-files</id>
<phase>generate-resources</phase>
<goals>
@@ -882,9 +866,9 @@
<target>
<delete dir="../interpreter/spark/pyspark"/>
<copy todir="../interpreter/spark/pyspark"
- file="${project.build.directory}/spark-dist/${spark.archive}/python/lib/py4j-${py4j.version}-src.zip"/>
+ file="${project.build.directory}/${spark.archive}/python/lib/py4j-${py4j.version}-src.zip"/>
<zip destfile="${project.build.directory}/../../interpreter/spark/pyspark/pyspark.zip"
- basedir="${project.build.directory}/spark-dist/${spark.archive}/python"
+ basedir="${project.build.directory}/${spark.archive}/python"
includes="pyspark/*.py,pyspark/**/*.py"/>
</target>
</configuration>
@@ -902,7 +886,7 @@
<plugin>
<groupId>com.googlecode.maven-download-plugin</groupId>
<artifactId>download-maven-plugin</artifactId>
- <version>1.2.1</version>
+ <version>1.3.0</version>
<executions>
<execution>
<id>download-sparkr-files</id>
@@ -911,24 +895,16 @@
<goal>wget</goal>
</goals>
<configuration>
+ <readTimeOut>60000</readTimeOut>
+ <retries>5</retries>
<url>${spark.bin.download.url}</url>
<unpack>true</unpack>
- <outputDirectory>${project.build.directory}/spark-bin-dist</outputDirectory>
+ <outputDirectory>${project.build.directory}</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
- <artifactId>maven-clean-plugin</artifactId>
- <configuration>
- <filesets>
- <fileset>
- <directory>${project.build.directory}/spark-bin-dist</directory>
- </fileset>
- </filesets>
- </configuration>
- </plugin>
- <plugin>
<artifactId>maven-resources-plugin</artifactId>
<version>2.7</version>
<executions>
@@ -943,7 +919,7 @@
<resources>
<resource>
<directory>
- ${project.build.directory}/spark-bin-dist/spark-${spark.version}-bin-without-hadoop/R/lib
+ ${project.build.directory}/spark-${spark.version}-bin-without-hadoop/R/lib
</directory>
</resource>
</resources>
http://git-wip-us.apache.org/repos/asf/zeppelin/blob/985bb0c8/testing/downloadSpark.sh
----------------------------------------------------------------------
diff --git a/testing/downloadSpark.sh b/testing/downloadSpark.sh
index 21320bc..a1000a5 100755
--- a/testing/downloadSpark.sh
+++ b/testing/downloadSpark.sh
@@ -66,7 +66,7 @@ if [[ ! -d "${SPARK_HOME}" ]]; then
echo "${SPARK_VERSION} being downloaded from archives"
STARTTIME=`date +%s`
#timeout -s KILL "${MAX_DOWNLOAD_TIME_SEC}" wget "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz"
- download_with_retry "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz"
+ download_with_retry "http://d3kbcqa49mib13.cloudfront.net/${SPARK_ARCHIVE}.tgz"
ENDTIME=`date +%s`
DOWNLOADTIME="$((ENDTIME-STARTTIME))"
fi