You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zeppelin.apache.org by bz...@apache.org on 2016/11/30 16:11:46 UTC

zeppelin git commit: [HOTFIX] Make Spark download stable on CI

Repository: zeppelin
Updated Branches:
  refs/heads/master a459c0249 -> 985bb0c87


[HOTFIX] Make Spark download stable on CI

### What is this PR for?
There has been issues with downloading\caching Spark, esp in #1689 #1696
This is hotfix for Spark download on CI.

### What type of PR is it?
Hot Fix

### Todos
 - [x] do not use distrs.apache.org
 - [x] levirage `download-maven-plugin` cache for Spark download
 - [x] set timeout 1min and 5 re-tries on download
 - [x] un-pack them under `/target/` so `mvn clean` works as expected
 - [x] mute logs for `./testing/install_external_dependencies.sh`

### How should this be tested?
In CI logs, Spark should be downloaded by `spark-dependencies` and cached under `${HOME}/.m2/repository/.cache/maven-download-plugin`

### Questions:
* Does the licenses files need update? No
* Is there breaking changes for older versions? No
* Does this needs documentation? No

Author: Alexander Bezzubov <bz...@apache.org>

Closes #1709 from bzz/make-ci-stabel and squashes the following commits:

06c031c [Alexander Bezzubov] Move logging config to MAVEN_OPTS
702dcdd [Alexander Bezzubov] Spark download\cached, using download-maven-plugin
7040b09 [Alexander Bezzubov] Switch Spark download dir
1d85b5c [Alexander Bezzubov] Mute dependency install logs
78109af [Alexander Bezzubov] Set readTimeOut for download-maven-plugin
7a64690 [Alexander Bezzubov] Bump download-maven-plugin version to lastes 1.3.0
605dea9 [Alexander Bezzubov] Spark 2.0.1 on CI, same as in pom.xml
9ee9c04 [Alexander Bezzubov] Direct Spark download url for CI as INFRA-12996


Project: http://git-wip-us.apache.org/repos/asf/zeppelin/repo
Commit: http://git-wip-us.apache.org/repos/asf/zeppelin/commit/985bb0c8
Tree: http://git-wip-us.apache.org/repos/asf/zeppelin/tree/985bb0c8
Diff: http://git-wip-us.apache.org/repos/asf/zeppelin/diff/985bb0c8

Branch: refs/heads/master
Commit: 985bb0c875b2af5692e0e0e7910432ee3a19b80c
Parents: a459c02
Author: Alexander Bezzubov <bz...@apache.org>
Authored: Wed Nov 30 23:04:50 2016 +0900
Committer: Alexander Bezzubov <bz...@apache.org>
Committed: Thu Dec 1 01:08:57 2016 +0900

----------------------------------------------------------------------
 .travis.yml                | 12 ++++-----
 spark-dependencies/pom.xml | 58 ++++++++++++-----------------------------
 testing/downloadSpark.sh   |  2 +-
 3 files changed, 24 insertions(+), 48 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/zeppelin/blob/985bb0c8/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index deca7d6..5d4fc6a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -42,9 +42,9 @@ matrix:
     - jdk: "oraclejdk7"
       env: SCALA_VER="2.11" PROFILE="-Prat" BUILD_FLAG="clean" TEST_FLAG="org.apache.rat:apache-rat-plugin:check" TEST_PROJECTS=""
 
-    # Test all modules with spark 2.0.0 and scala 2.11
+    # Test all modules with spark 2.0.1 and scala 2.11
     - jdk: "oraclejdk7"
-      env: SCALA_VER="2.11" SPARK_VER="2.0.0" HADOOP_VER="2.3" PROFILE="-Pspark-2.0 -Phadoop-2.3 -Ppyspark -Psparkr -Pscalding -Pexamples -Pscala-2.11" BUILD_FLAG="package -Pbuild-distr -DskipRat" TEST_FLAG="verify -Pusing-packaged-distr -DskipRat" TEST_PROJECTS=""
+      env: SCALA_VER="2.11" SPARK_VER="2.0.1" HADOOP_VER="2.3" PROFILE="-Pspark-2.0 -Phadoop-2.3 -Ppyspark -Psparkr -Pscalding -Pexamples -Pscala-2.11" BUILD_FLAG="package -Pbuild-distr -DskipRat" TEST_FLAG="verify -Pusing-packaged-distr -DskipRat" TEST_PROJECTS=""
 
     # Test all modules with scala 2.10
     - jdk: "oraclejdk7"
@@ -75,8 +75,8 @@ matrix:
       env: PYTHON="3" SCALA_VER="2.11" SPARK_VER="2.0.0" HADOOP_VER="2.3" PROFILE="-Pspark-2.0 -Phadoop-2.3 -Ppyspark -Pscala-2.11" BUILD_FLAG="package -pl spark,python -am -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-display,spark-dependencies,spark,python -Dtest=org.apache.zeppelin.spark.PySpark*Test,org.apache.zeppelin.python.* -Dpyspark.test.exclude='' -DfailIfNoTests=false"
 
 before_install:
-  - echo "MAVEN_OPTS='-Xms1024M -Xmx2048M -XX:MaxPermSize=1024m -XX:-UseGCOverheadLimit'" >> ~/.mavenrc
-  - ./testing/install_external_dependencies.sh
+  - echo "MAVEN_OPTS='-Xms1024M -Xmx2048M -XX:MaxPermSize=1024m -XX:-UseGCOverheadLimit -Dorg.slf4j.simpleLogger.defaultLogLevel=warn'" >> ~/.mavenrc
+  - ./testing/install_external_dependencies.sh > /dev/null 2>&1
   - ls -la .spark-dist ${HOME}/.m2/repository/.cache/maven-download-plugin || true
   - ls .node_modules && cp -r .node_modules zeppelin-web/node_modules || echo "node_modules are not cached"
   - "/sbin/start-stop-daemon --start --quiet --pidfile /tmp/custom_xvfb_99.pid --make-pidfile --background --exec /usr/bin/Xvfb -- :99 -ac -screen 0 1600x1024x16"
@@ -84,7 +84,7 @@ before_install:
   - source ~/.environ
 
 install:
-  - mvn -Dorg.slf4j.simpleLogger.defaultLogLevel=warn $BUILD_FLAG $PROFILE -B
+  - mvn $BUILD_FLAG $PROFILE -B
 
 before_script:
   - travis_retry ./testing/downloadSpark.sh $SPARK_VER $HADOOP_VER
@@ -92,7 +92,7 @@ before_script:
   - tail conf/zeppelin-env.sh
 
 script:
-  - mvn -Dorg.slf4j.simpleLogger.defaultLogLevel=warn $TEST_FLAG $PROFILE -B $TEST_PROJECTS
+  - mvn $TEST_FLAG $PROFILE -B $TEST_PROJECTS
 
 after_success:
   - echo "Travis exited with ${TRAVIS_TEST_RESULT}"

http://git-wip-us.apache.org/repos/asf/zeppelin/blob/985bb0c8/spark-dependencies/pom.xml
----------------------------------------------------------------------
diff --git a/spark-dependencies/pom.xml b/spark-dependencies/pom.xml
index 04b6983..1d0cb6e 100644
--- a/spark-dependencies/pom.xml
+++ b/spark-dependencies/pom.xml
@@ -56,13 +56,12 @@
     <akka.version>2.3.4-spark</akka.version>
 
     <spark.archive>spark-${spark.version}</spark.archive>
-    <spark.download.url>
-      http://archive.apache.org/dist/spark/${spark.archive}/${spark.archive}.tgz
-    </spark.download.url>
+    <spark.src.download.url>
+      http://d3kbcqa49mib13.cloudfront.net/${spark.archive}.tgz
+    </spark.src.download.url>
     <spark.bin.download.url>
-      http://archive.apache.org/dist/spark/spark-${spark.version}/spark-${spark.version}-bin-without-hadoop.tgz
+      http://d3kbcqa49mib13.cloudfront.net/spark-${spark.version}-bin-without-hadoop.tgz
     </spark.bin.download.url>
-    <spark.dist.cache>${project.build.directory}/../../.spark-dist</spark.dist.cache>
     <py4j.version>0.8.2.1</py4j.version>
   </properties>
 
@@ -823,7 +822,7 @@
           <plugin>
             <groupId>com.googlecode.maven-download-plugin</groupId>
             <artifactId>download-maven-plugin</artifactId>
-            <version>1.2.1</version>
+            <version>1.3.0</version>
             <executions>
               <execution>
                 <id>download-pyspark-files</id>
@@ -832,8 +831,11 @@
                   <goal>wget</goal>
                 </goals>
                 <configuration>
-                  <url>${spark.download.url}</url>
-                  <outputDirectory>${spark.dist.cache}</outputDirectory>
+                  <readTimeOut>60000</readTimeOut>
+                  <retries>5</retries>
+                  <unpack>true</unpack>
+                  <url>${spark.src.download.url}</url>
+                  <outputDirectory>${project.build.directory}</outputDirectory>
                 </configuration>
               </execution>
             </executions>
@@ -844,9 +846,6 @@
             <configuration>
               <filesets>
                 <fileset>
-                  <directory>${project.build.directory}/spark-dist</directory>
-                </fileset>
-                <fileset>
                   <directory>${basedir}/../python/build</directory>
                 </fileset>
               </filesets>
@@ -858,21 +857,6 @@
             <artifactId>maven-antrun-plugin</artifactId>
             <executions>
               <execution>
-                <id>unzip-pyspark-files</id>
-                <phase>validate</phase>
-                <goals>
-                  <goal>run</goal>
-                </goals>
-                <configuration>
-                  <target>
-                    <untar src="${spark.dist.cache}/${spark.archive}.tgz"
-                           dest="${project.build.directory}/spark-dist"
-                           compression="gzip"/>
-                  </target>
-                </configuration>
-              </execution>
-
-              <execution>
                 <id>zip-pyspark-files</id>
                 <phase>generate-resources</phase>
                 <goals>
@@ -882,9 +866,9 @@
                   <target>
                     <delete dir="../interpreter/spark/pyspark"/>
                     <copy todir="../interpreter/spark/pyspark"
-                          file="${project.build.directory}/spark-dist/${spark.archive}/python/lib/py4j-${py4j.version}-src.zip"/>
+                          file="${project.build.directory}/${spark.archive}/python/lib/py4j-${py4j.version}-src.zip"/>
                     <zip destfile="${project.build.directory}/../../interpreter/spark/pyspark/pyspark.zip"
-                         basedir="${project.build.directory}/spark-dist/${spark.archive}/python"
+                         basedir="${project.build.directory}/${spark.archive}/python"
                          includes="pyspark/*.py,pyspark/**/*.py"/>
                   </target>
                 </configuration>
@@ -902,7 +886,7 @@
           <plugin>
             <groupId>com.googlecode.maven-download-plugin</groupId>
             <artifactId>download-maven-plugin</artifactId>
-            <version>1.2.1</version>
+            <version>1.3.0</version>
             <executions>
               <execution>
                 <id>download-sparkr-files</id>
@@ -911,24 +895,16 @@
                   <goal>wget</goal>
                 </goals>
                 <configuration>
+                  <readTimeOut>60000</readTimeOut>
+                  <retries>5</retries>
                   <url>${spark.bin.download.url}</url>
                   <unpack>true</unpack>
-                  <outputDirectory>${project.build.directory}/spark-bin-dist</outputDirectory>
+                  <outputDirectory>${project.build.directory}</outputDirectory>
                 </configuration>
               </execution>
             </executions>
           </plugin>
           <plugin>
-            <artifactId>maven-clean-plugin</artifactId>
-            <configuration>
-              <filesets>
-                <fileset>
-                  <directory>${project.build.directory}/spark-bin-dist</directory>
-                </fileset>
-              </filesets>
-            </configuration>
-          </plugin>
-          <plugin>
             <artifactId>maven-resources-plugin</artifactId>
             <version>2.7</version>
             <executions>
@@ -943,7 +919,7 @@
                   <resources>
                     <resource>
                       <directory>
-                        ${project.build.directory}/spark-bin-dist/spark-${spark.version}-bin-without-hadoop/R/lib
+                        ${project.build.directory}/spark-${spark.version}-bin-without-hadoop/R/lib
                       </directory>
                     </resource>
                   </resources>

http://git-wip-us.apache.org/repos/asf/zeppelin/blob/985bb0c8/testing/downloadSpark.sh
----------------------------------------------------------------------
diff --git a/testing/downloadSpark.sh b/testing/downloadSpark.sh
index 21320bc..a1000a5 100755
--- a/testing/downloadSpark.sh
+++ b/testing/downloadSpark.sh
@@ -66,7 +66,7 @@ if [[ ! -d "${SPARK_HOME}" ]]; then
         echo "${SPARK_VERSION} being downloaded from archives"
         STARTTIME=`date +%s`
         #timeout -s KILL "${MAX_DOWNLOAD_TIME_SEC}" wget "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz"
-        download_with_retry "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz"
+        download_with_retry "http://d3kbcqa49mib13.cloudfront.net/${SPARK_ARCHIVE}.tgz"
         ENDTIME=`date +%s`
         DOWNLOADTIME="$((ENDTIME-STARTTIME))"
     fi