You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zeppelin.apache.org by zj...@apache.org on 2018/11/13 01:31:55 UTC
zeppelin git commit: [ZEPPELIN-3810] Support Spark 2.4
Repository: zeppelin
Updated Branches:
refs/heads/master de17d2b87 -> 4f73272c0
[ZEPPELIN-3810] Support Spark 2.4
### What is this PR for?
Spark 2.4 changed it's Scala version from 2.11.8 to 2.11.12 (see SPARK-24418).
There are two problems for this upgrade at Zeppelin side:
1.. Some methods that are used in private by reflection, for instance, `loopPostInit` became inaccessible.
See:
- https://github.com/scala/scala/blob/v2.11.8/src/repl/scala/tools/nsc/interpreter/ILoop.scala
- https://github.com/scala/scala/blob/v2.11.12/src/repl/scala/tools/nsc/interpreter/ILoop.scala
To work around this, I manually ported `loopPostInit` at 2.11.8 to retain the behaviour. Some functions that are commonly existing at both Scala 2.11.8 and Scala 2.11.12 are used inside of the new `loopPostInit` by reflection.
2.. Upgrade from 2.11.8 to 2.11.12 requires `jline.version` upgrade. Otherwise, we will hit:
```
Caused by: java.lang.NoSuchMethodError:
jline.console.completer.CandidateListCompletionHandler.setPrintSpaceAfterFullCompletion(Z)V
at scala.tools.nsc.interpreter.jline.JLineConsoleReader.initCompletion(JLineReader.scala:139)
```
To work around this, I tweaked this by upgrading jline from `2.12.1` to `2.14.3`.
### What type of PR is it?
[Improvement]
### Todos
* [x] - Wait until Spark 2.4.0 is officially released.
### What is the Jira issue?
* https://issues.apache.org/jira/browse/ZEPPELIN-3810
### How should this be tested?
Verified manually against Spark 2.4.0 RC3
### Questions:
* Does the licenses files need update? Yes
* Is there breaking changes for older versions? No
* Does this needs documentation? No
Author: hyukjinkwon <gu...@apache.org>
Author: Hyukjin Kwon <gu...@apache.org>
Author: Jeff Zhang <zj...@gmail.com>
Closes #3206 from HyukjinKwon/ZEPPELIN-3810 and squashes the following commits:
c2456c949 [Hyukjin Kwon] Py4J 0.10.6 to 0.10.7
573f07d2d [Jeff Zhang] add test for spark 2.4 (#1)
9ac1797a7 [hyukjinkwon] Support Spark 2.4
Project: http://git-wip-us.apache.org/repos/asf/zeppelin/repo
Commit: http://git-wip-us.apache.org/repos/asf/zeppelin/commit/4f73272c
Tree: http://git-wip-us.apache.org/repos/asf/zeppelin/tree/4f73272c
Diff: http://git-wip-us.apache.org/repos/asf/zeppelin/diff/4f73272c
Branch: refs/heads/master
Commit: 4f73272c006b3767ad40686aa5876ac5fbf57928
Parents: de17d2b
Author: hyukjinkwon <gu...@apache.org>
Authored: Fri Nov 9 23:18:52 2018 +0800
Committer: Jeff Zhang <zj...@apache.org>
Committed: Tue Nov 13 09:31:49 2018 +0800
----------------------------------------------------------------------
.travis.yml | 10 +--
spark/interpreter/pom.xml | 55 +++++++++++++--
.../org/apache/zeppelin/spark/SparkVersion.java | 3 +-
spark/pom.xml | 42 ++++++++---
.../spark/SparkScala211Interpreter.scala | 73 +++++++++++++++++++-
zeppelin-distribution/src/bin_license/LICENSE | 2 +-
zeppelin-interpreter/pom.xml | 2 +-
.../zeppelin/rest/ZeppelinSparkClusterTest.java | 10 +--
.../interpreter/SparkIntegrationTest.java | 10 +--
9 files changed, 172 insertions(+), 35 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/zeppelin/blob/4f73272c/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index a86cf34..8619455 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -20,7 +20,7 @@ sudo: false
before_cache:
- sudo chown -R travis:travis $HOME/.m2
-
+
cache:
apt: true
directories:
@@ -98,15 +98,15 @@ matrix:
dist: trusty
env: BUILD_PLUGINS="true" PYTHON="3" SCALA_VER="2.10" PROFILE="-Pspark-1.6 -Pscala-2.10" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat -am" TEST_FLAG="test -DskipRat -am" MODULES="-pl zeppelin-zengine,spark/interpreter,spark/spark-dependencies" TEST_PROJECTS="-Dtest=SparkIntegrationTest,org.apache.zeppelin.spark.* -DfailIfNoTests=false"
- # Test spark module for 2.1.0 with scala 2.11
+ # Test spark module for 2.4.0 with scala 2.11
- jdk: "oraclejdk8"
dist: trusty
- env: BUILD_PLUGINS="false" PYTHON="2" SCALA_VER="2.11" PROFILE="-Pspark-2.1 -Phadoop2 -Pscala-2.11" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat -am" TEST_FLAG="test -DskipRat -am" MODULES="-pl spark/interpreter,spark/spark-dependencies" TEST_PROJECTS="-Dtest=org.apache.zeppelin.spark.*,org.apache.zeppelin.rinterpreter.*,org.apache.spark.api.r.* -DfailIfNoTests=false"
+ env: BUILD_PLUGINS="false" PYTHON="2" SCALA_VER="2.11" PROFILE="-Pspark-2.4 -Phadoop2 -Pscala-2.11" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat -am" TEST_FLAG="test -DskipRat -am" MODULES="-pl spark/interpreter,spark/spark-dependencies" TEST_PROJECTS="-Dtest=org.apache.zeppelin.spark.*,org.apache.zeppelin.rinterpreter.*,org.apache.spark.api.r.* -DfailIfNoTests=false"
- # Test spark module for 2.0.2 with scala 2.11
+ # Test spark module for 2.3.2 with scala 2.11
- jdk: "oraclejdk8"
dist: trusty
- env: BUILD_PLUGINS="false" PYTHON="2" SCALA_VER="2.11" PROFILE="-Pspark-2.0 -Phadoop3 -Pscala-2.11" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat -am" TEST_FLAG="test -DskipRat -am" MODULES="-pl spark/interpreter,spark/spark-dependencies" TEST_PROJECTS="-Dtest=org.apache.zeppelin.spark.*,org.apache.zeppelin.rinterpreter.*,org.apache.spark.api.r.* -DfailIfNoTests=false"
+ env: BUILD_PLUGINS="false" PYTHON="2" SCALA_VER="2.11" PROFILE="-Pspark-2.3 -Phadoop3 -Pscala-2.11" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat -am" TEST_FLAG="test -DskipRat -am" MODULES="-pl spark/interpreter,spark/spark-dependencies" TEST_PROJECTS="-Dtest=org.apache.zeppelin.spark.*,org.apache.zeppelin.rinterpreter.*,org.apache.spark.api.r.* -DfailIfNoTests=false"
# Test python/pyspark with python 2, livy 0.5
- sudo: required
http://git-wip-us.apache.org/repos/asf/zeppelin/blob/4f73272c/spark/interpreter/pom.xml
----------------------------------------------------------------------
diff --git a/spark/interpreter/pom.xml b/spark/interpreter/pom.xml
index daf801f..32f625d 100644
--- a/spark/interpreter/pom.xml
+++ b/spark/interpreter/pom.xml
@@ -53,7 +53,7 @@
<pyspark.test.exclude>**/PySparkInterpreterMatplotlibTest.java</pyspark.test.exclude>
<pyspark.test.include>**/*Test.*</pyspark.test.include>
-
+
</properties>
<dependencies>
@@ -71,12 +71,6 @@
<dependency>
<groupId>org.apache.zeppelin</groupId>
- <artifactId>spark-scala-2.10</artifactId>
- <version>${project.version}</version>
- </dependency>
-
- <dependency>
- <groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-interpreter-api</artifactId>
<version>${project.version}</version>
</dependency>
@@ -609,4 +603,51 @@
</plugins>
</build>
+ <profiles>
+
+ <profile>
+ <id>spark-2.2</id>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.zeppelin</groupId>
+ <artifactId>spark-scala-2.10</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ </dependencies>
+ </profile>
+
+ <profile>
+ <id>spark-2.1</id>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.zeppelin</groupId>
+ <artifactId>spark-scala-2.10</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ </dependencies>
+ </profile>
+
+ <profile>
+ <id>spark-2.0</id>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.zeppelin</groupId>
+ <artifactId>spark-scala-2.10</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ </dependencies>
+ </profile>
+
+ <profile>
+ <id>spark-1.6</id>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.zeppelin</groupId>
+ <artifactId>spark-scala-2.10</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ </dependencies>
+ </profile>
+
+ </profiles>
</project>
http://git-wip-us.apache.org/repos/asf/zeppelin/blob/4f73272c/spark/interpreter/src/main/java/org/apache/zeppelin/spark/SparkVersion.java
----------------------------------------------------------------------
diff --git a/spark/interpreter/src/main/java/org/apache/zeppelin/spark/SparkVersion.java b/spark/interpreter/src/main/java/org/apache/zeppelin/spark/SparkVersion.java
index b75deb8..6ee0015 100644
--- a/spark/interpreter/src/main/java/org/apache/zeppelin/spark/SparkVersion.java
+++ b/spark/interpreter/src/main/java/org/apache/zeppelin/spark/SparkVersion.java
@@ -31,9 +31,10 @@ public class SparkVersion {
public static final SparkVersion SPARK_2_3_0 = SparkVersion.fromVersionString("2.3.0");
public static final SparkVersion SPARK_2_3_1 = SparkVersion.fromVersionString("2.3.1");
public static final SparkVersion SPARK_2_4_0 = SparkVersion.fromVersionString("2.4.0");
+ public static final SparkVersion SPARK_3_0_0 = SparkVersion.fromVersionString("3.0.0");
public static final SparkVersion MIN_SUPPORTED_VERSION = SPARK_1_6_0;
- public static final SparkVersion UNSUPPORTED_FUTURE_VERSION = SPARK_2_4_0;
+ public static final SparkVersion UNSUPPORTED_FUTURE_VERSION = SPARK_3_0_0;
private int version;
private String versionString;
http://git-wip-us.apache.org/repos/asf/zeppelin/blob/4f73272c/spark/pom.xml
----------------------------------------------------------------------
diff --git a/spark/pom.xml b/spark/pom.xml
index 42f9d17..b16e53f 100644
--- a/spark/pom.xml
+++ b/spark/pom.xml
@@ -49,17 +49,16 @@
<spark.archive>spark-${spark.version}</spark.archive>
<spark.src.download.url>
- http://d3kbcqa49mib13.cloudfront.net/${spark.archive}.tgz
+ https://archive.apache.org/dist/spark/${spark.archive}/${spark.archive}.tgz
</spark.src.download.url>
<spark.bin.download.url>
- http://d3kbcqa49mib13.cloudfront.net/${spark.archive}-bin-without-hadoop.tgz
+ https://archive.apache.org/dist/spark/${spark.archive}/${spark.archive}-bin-without-hadoop.tgz
</spark.bin.download.url>
</properties>
<modules>
<module>interpreter</module>
<module>spark-scala-parent</module>
- <module>scala-2.10</module>
<module>scala-2.11</module>
<module>spark-dependencies</module>
<module>spark-shims</module>
@@ -193,31 +192,46 @@
<profiles>
<profile>
+ <id>spark-2.4</id>
+ <properties>
+ <spark.version>2.4.0</spark.version>
+ <protobuf.version>2.5.0</protobuf.version>
+ <py4j.version>0.10.7</py4j.version>
+ </properties>
+ </profile>
+
+ <profile>
<id>spark-2.3</id>
<properties>
- <spark.version>2.3.0</spark.version>
+ <spark.version>2.3.2</spark.version>
<protobuf.version>2.5.0</protobuf.version>
- <spark.py4j.version>0.10.6</spark.py4j.version>
+ <py4j.version>0.10.7</py4j.version>
</properties>
</profile>
<profile>
<id>spark-2.2</id>
- <activation>
- <activeByDefault>true</activeByDefault>
- </activation>
<properties>
- <spark.version>2.2.0</spark.version>
+ <spark.version>2.2.1</spark.version>
<py4j.version>0.10.4</py4j.version>
</properties>
+ <activation>
+ <activeByDefault>true</activeByDefault>
+ </activation>
+ <modules>
+ <module>scala-2.10</module>
+ </modules>
</profile>
<profile>
<id>spark-2.1</id>
<properties>
- <spark.version>2.1.0</spark.version>
+ <spark.version>2.1.2</spark.version>
<py4j.version>0.10.4</py4j.version>
</properties>
+ <modules>
+ <module>scala-2.10</module>
+ </modules>
</profile>
<profile>
@@ -226,6 +240,9 @@
<spark.version>2.0.2</spark.version>
<py4j.version>0.10.3</py4j.version>
</properties>
+ <modules>
+ <module>scala-2.10</module>
+ </modules>
</profile>
<profile>
@@ -234,7 +251,10 @@
<spark.version>1.6.3</spark.version>
<py4j.version>0.9</py4j.version>
</properties>
+ <modules>
+ <module>scala-2.10</module>
+ </modules>
</profile>
-
+
</profiles>
</project>
http://git-wip-us.apache.org/repos/asf/zeppelin/blob/4f73272c/spark/scala-2.11/src/main/scala/org/apache/zeppelin/spark/SparkScala211Interpreter.scala
----------------------------------------------------------------------
diff --git a/spark/scala-2.11/src/main/scala/org/apache/zeppelin/spark/SparkScala211Interpreter.scala b/spark/scala-2.11/src/main/scala/org/apache/zeppelin/spark/SparkScala211Interpreter.scala
index 757f7eb..0956e04 100644
--- a/spark/scala-2.11/src/main/scala/org/apache/zeppelin/spark/SparkScala211Interpreter.scala
+++ b/spark/scala-2.11/src/main/scala/org/apache/zeppelin/spark/SparkScala211Interpreter.scala
@@ -39,6 +39,8 @@ class SparkScala211Interpreter(override val conf: SparkConf,
override val printReplOutput: java.lang.Boolean)
extends BaseSparkScalaInterpreter(conf, depFiles, printReplOutput) {
+ import SparkScala211Interpreter._
+
lazy override val LOGGER: Logger = LoggerFactory.getLogger(getClass)
private var sparkILoop: ILoop = _
@@ -81,7 +83,7 @@ class SparkScala211Interpreter(override val conf: SparkConf,
sparkILoop.in = reader
sparkILoop.initializeSynchronous()
- callMethod(sparkILoop, "scala$tools$nsc$interpreter$ILoop$$loopPostInit")
+ loopPostInit(this)
this.scalaCompleter = reader.completion.completer()
createSparkContext()
@@ -105,3 +107,72 @@ class SparkScala211Interpreter(override val conf: SparkConf,
sparkILoop.interpret(code)
}
+
+private object SparkScala211Interpreter {
+
+ /**
+ * This is a hack to call `loopPostInit` at `ILoop`. At higher version of Scala such
+ * as 2.11.12, `loopPostInit` became a nested function which is inaccessible. Here,
+ * we redefine `loopPostInit` at Scala's 2.11.8 side and ignore `loadInitFiles` being called at
+ * Scala 2.11.12 since here we do not have to load files.
+ *
+ * Both methods `loopPostInit` and `unleashAndSetPhase` are redefined, and `phaseCommand` and
+ * `asyncMessage` are being called via reflection since both exist in Scala 2.11.8 and 2.11.12.
+ *
+ * Please see the codes below:
+ * https://github.com/scala/scala/blob/v2.11.8/src/repl/scala/tools/nsc/interpreter/ILoop.scala
+ * https://github.com/scala/scala/blob/v2.11.12/src/repl/scala/tools/nsc/interpreter/ILoop.scala
+ *
+ * See also ZEPPELIN-3810.
+ */
+ private def loopPostInit(interpreter: SparkScala211Interpreter): Unit = {
+ import StdReplTags._
+ import scala.reflect.classTag
+ import scala.reflect.io
+
+ val sparkILoop = interpreter.sparkILoop
+ val intp = sparkILoop.intp
+ val power = sparkILoop.power
+ val in = sparkILoop.in
+
+ def loopPostInit() {
+ // Bind intp somewhere out of the regular namespace where
+ // we can get at it in generated code.
+ intp.quietBind(NamedParam[IMain]("$intp", intp)(tagOfIMain, classTag[IMain]))
+ // Auto-run code via some setting.
+ (replProps.replAutorunCode.option
+ flatMap (f => io.File(f).safeSlurp())
+ foreach (intp quietRun _)
+ )
+ // classloader and power mode setup
+ intp.setContextClassLoader()
+ if (isReplPower) {
+ replProps.power setValue true
+ unleashAndSetPhase()
+ asyncMessage(power.banner)
+ }
+ // SI-7418 Now, and only now, can we enable TAB completion.
+ in.postInit()
+ }
+
+ def unleashAndSetPhase() = if (isReplPower) {
+ power.unleash()
+ intp beSilentDuring phaseCommand("typer") // Set the phase to "typer"
+ }
+
+ def phaseCommand(name: String): Results.Result = {
+ interpreter.callMethod(
+ sparkILoop,
+ "scala$tools$nsc$interpreter$ILoop$$phaseCommand",
+ Array(classOf[String]),
+ Array(name)).asInstanceOf[Results.Result]
+ }
+
+ def asyncMessage(msg: String): Unit = {
+ interpreter.callMethod(
+ sparkILoop, "asyncMessage", Array(classOf[String]), Array(msg))
+ }
+
+ loopPostInit()
+ }
+}
http://git-wip-us.apache.org/repos/asf/zeppelin/blob/4f73272c/zeppelin-distribution/src/bin_license/LICENSE
----------------------------------------------------------------------
diff --git a/zeppelin-distribution/src/bin_license/LICENSE b/zeppelin-distribution/src/bin_license/LICENSE
index 54356e1..f79da72 100644
--- a/zeppelin-distribution/src/bin_license/LICENSE
+++ b/zeppelin-distribution/src/bin_license/LICENSE
@@ -291,7 +291,7 @@ The text of each license is also included at licenses/LICENSE-[project]-[version
(BSD Style) JSch v0.1.53 (http://www.jcraft.com) - http://www.jcraft.com/jsch/LICENSE.txt
(BSD 3 Clause) highlightjs v9.4.0 (https://highlightjs.org/) - https://github.com/isagalaev/highlight.js/blob/9.4.0/LICENSE
(BSD 3 Clause) hamcrest v1.3 (http://hamcrest.org/JavaHamcrest/) - http://opensource.org/licenses/BSD-3-Clause
- (BSD Style) JLine v2.12.1 (https://github.com/jline/jline2) - https://github.com/jline/jline2/blob/master/LICENSE.txt
+ (BSD Style) JLine v2.14.3 (https://github.com/jline/jline2) - https://github.com/jline/jline2/blob/master/LICENSE.txt
(BSD New license) Google Auth Library for Java - Credentials (com.google.auth:google-auth-library-credentials:0.4.0 - https://github.com/google/google-auth-library-java/google-auth-library-credentials)
(BSD New license) Google Auth Library for Java - OAuth2 HTTP (com.google.auth:google-auth-library-oauth2-http:0.4.0 - https://github.com/google/google-auth-library-java/google-auth-library-oauth2-http)
(New BSD license) Protocol Buffer Java API (com.google.protobuf:protobuf-java-util:3.0.0-beta-2 - https://developers.google.com/protocol-buffers/)
http://git-wip-us.apache.org/repos/asf/zeppelin/blob/4f73272c/zeppelin-interpreter/pom.xml
----------------------------------------------------------------------
diff --git a/zeppelin-interpreter/pom.xml b/zeppelin-interpreter/pom.xml
index 22a029e..bf624ce 100644
--- a/zeppelin-interpreter/pom.xml
+++ b/zeppelin-interpreter/pom.xml
@@ -43,7 +43,7 @@
<aether.version>1.12</aether.version>
<maven.aeither.provider.version>3.0.3</maven.aeither.provider.version>
<wagon.version>1.0</wagon.version>
- <jline.version>2.12.1</jline.version>
+ <jline.version>2.14.3</jline.version>
<atomix.version>3.0.0-rc4</atomix.version>
<commons-math3.version>3.1.1</commons-math3.version>
<guava.version>20.0</guava.version>
http://git-wip-us.apache.org/repos/asf/zeppelin/blob/4f73272c/zeppelin-server/src/test/java/org/apache/zeppelin/rest/ZeppelinSparkClusterTest.java
----------------------------------------------------------------------
diff --git a/zeppelin-server/src/test/java/org/apache/zeppelin/rest/ZeppelinSparkClusterTest.java b/zeppelin-server/src/test/java/org/apache/zeppelin/rest/ZeppelinSparkClusterTest.java
index 7494df2..932d077 100644
--- a/zeppelin-server/src/test/java/org/apache/zeppelin/rest/ZeppelinSparkClusterTest.java
+++ b/zeppelin-server/src/test/java/org/apache/zeppelin/rest/ZeppelinSparkClusterTest.java
@@ -84,10 +84,12 @@ public class ZeppelinSparkClusterTest extends AbstractTestRestApi {
@Parameterized.Parameters
public static List<Object[]> data() {
return Arrays.asList(new Object[][]{
- {"2.2.1"},
- {"2.1.2"},
- {"2.0.2"},
- {"1.6.3"}
+ {"2.4.0"},
+ {"2.3.2"},
+ {"2.2.1"},
+ {"2.1.2"},
+ {"2.0.2"},
+ {"1.6.3"}
});
}
http://git-wip-us.apache.org/repos/asf/zeppelin/blob/4f73272c/zeppelin-zengine/src/test/java/org/apache/zeppelin/interpreter/SparkIntegrationTest.java
----------------------------------------------------------------------
diff --git a/zeppelin-zengine/src/test/java/org/apache/zeppelin/interpreter/SparkIntegrationTest.java b/zeppelin-zengine/src/test/java/org/apache/zeppelin/interpreter/SparkIntegrationTest.java
index fed9ad2..8d076e3 100644
--- a/zeppelin-zengine/src/test/java/org/apache/zeppelin/interpreter/SparkIntegrationTest.java
+++ b/zeppelin-zengine/src/test/java/org/apache/zeppelin/interpreter/SparkIntegrationTest.java
@@ -42,10 +42,12 @@ public class SparkIntegrationTest {
@Parameterized.Parameters
public static List<Object[]> data() {
return Arrays.asList(new Object[][]{
- {"2.2.1"},
- {"2.1.2"},
- {"2.0.2"},
- {"1.6.3"}
+ {"2.4.0"},
+ {"2.3.2"},
+ {"2.2.1"},
+ {"2.1.2"},
+ {"2.0.2"},
+ {"1.6.3"}
});
}