You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zeppelin.apache.org by zj...@apache.org on 2021/09/13 08:17:08 UTC
[zeppelin] branch master updated: [ZEPPELIN-5438] Add Build profile
for spark-3.1
This is an automated email from the ASF dual-hosted git repository.
zjffdu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/zeppelin.git
The following commit(s) were added to refs/heads/master by this push:
new 9aaea3e [ZEPPELIN-5438] Add Build profile for spark-3.1
9aaea3e is described below
commit 9aaea3ee01ab6f05e6a3202ab433c5d477edef5f
Author: Jeff Zhang <zj...@apache.org>
AuthorDate: Fri Jul 2 13:46:55 2021 +0800
[ZEPPELIN-5438] Add Build profile for spark-3.1
### What is this PR for?
* Add maven build profile for spark 3.1.
* Use spark 3.1 and scala-2.12 as the default build profile
* Some code refactoring on spark test code.
### What type of PR is it?
[Improvement ]
### Todos
* [ ] - Task
### What is the Jira issue?
* https://issues.apache.org/jira/browse/ZEPPELIN-5438
### How should this be tested?
* CI pass
### Screenshots (if appropriate)
### Questions:
* Does the licenses files need update? No
* Is there breaking changes for older versions? No
* Does this needs documentation? No
Author: Jeff Zhang <zj...@apache.org>
Closes #4182 from zjffdu/ZEPPELIN-5438 and squashes the following commits:
be71d5fdc3 [Jeff Zhang] [ZEPPELIN-5438] Add build profile for spark-3.1
---
.github/workflows/core.yml | 17 +++++-----
spark/interpreter/pom.xml | 4 ---
.../zeppelin/spark/SparkInterpreterTest.java | 7 ++--
.../zeppelin/spark/SparkSqlInterpreterTest.java | 25 +++++++++------
spark/pom.xml | 37 +++++++++++++++-------
.../zeppelin/integration/SparkIntegrationTest.java | 2 +-
.../integration/SparkIntegrationTest24.java | 11 +++++++
.../integration/SparkIntegrationTest30.java | 4 +--
.../integration/SparkIntegrationTest31.java | 4 +--
9 files changed, 70 insertions(+), 41 deletions(-)
diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml
index fdf28ff..81e0822 100644
--- a/.github/workflows/core.yml
+++ b/.github/workflows/core.yml
@@ -216,7 +216,8 @@ jobs:
mvn install -DskipTests -DskipRat -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Phadoop2 -Pintegration -B
mvn clean package -pl zeppelin-plugins -amd -DskipTests -B
- name: run tests
- run: mvn test -DskipRat -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Phadoop2 -Pintegration -B -Dtest=ZeppelinSparkClusterTest24,SparkSubmitIntegrationTest,SparkIntegrationTest24,ZeppelinSparkClusterTest23,SparkIntegrationTest23,ZeppelinSparkClusterTest22,SparkIntegrationTest22,ZeppelinSparkClusterTest30,SparkIntegrationTest30 -DfailIfNoTests=false
+ run: mvn test -DskipRat -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Phadoop2 -Pintegration -B -Dtest=ZeppelinSparkClusterTest24,SparkSubmitIntegrationTest,SparkIntegrationTest24,ZeppelinSparkClusterTest23,SparkIntegrationTest23,ZeppelinSparkClusterTest22,SparkIntegrationTest22,ZeppelinSparkClusterTest30,ZeppelinSparkClusterTest31,SparkIntegrationTest30,SparkIntegrationTest31 -DfailIfNoTests=false
+
jdbcIntegrationTest-and-unit-test-of-Spark-2-4-with-Scala-2-11:
runs-on: ubuntu-20.04
steps:
@@ -258,7 +259,7 @@ jobs:
- name: run tests
run: mvn test -DskipRat -pl zeppelin-interpreter-integration,jdbc,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Pspark-2.4 -Pspark-scala-2.11 -Phadoop2 -Pintegration -B -Dtest=JdbcIntegrationTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.kotlin.* -DfailIfNoTests=false
- spark-2-4-and-scale-2-12:
+ spark-2-4-and-scala-2-12:
runs-on: ubuntu-20.04
steps:
- name: Checkout
@@ -295,7 +296,7 @@ jobs:
- name: run tests
run: mvn test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.4 -Pspark-scala-2.12 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,org.apache.zeppelin.kotlin.* -DfailIfNoTests=false
- spark-2-3-and-scale-2-11-and-other-interpreter:
+ spark-3-0-and-scala-2-12-and-other-interpreter:
runs-on: ubuntu-20.04
steps:
- name: Checkout
@@ -328,11 +329,11 @@ jobs:
R -e "IRkernel::installspec()"
- name: install environment
run: |
- mvn install -DskipTests -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.3 -Pspark-scala-2.11 -Phadoop2 -B
+ mvn install -DskipTests -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-3.0 -Pspark-scala-2.12 -Phadoop2 -B
- name: run tests
- run: mvn test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.3 -Pspark-scala-2.11 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,apache.zeppelin.python.*,apache.zeppelin.jupyter.*,apache.zeppelin.r.* -DfailIfNoTests=false
+ run: mvn test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-3.0 -Pspark-scala-2.12 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,apache.zeppelin.python.*,apache.zeppelin.jupyter.*,apache.zeppelin.r.* -DfailIfNoTests=false
- spark-2-2-and-scale-2-10-and-other-interpreter:
+ spark-3-1-and-scala-2-12-and-other-interpreter:
runs-on: ubuntu-20.04
steps:
- name: Checkout
@@ -364,9 +365,9 @@ jobs:
run: |
R -e "IRkernel::installspec()"
- name: install environment
- run: mvn install -DskipTests -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.2 -Pspark-scala-2.10 -Phadoop2 -B
+ run: mvn install -DskipTests -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-3.1 -Pspark-scala-2.12 -Phadoop2 -B
- name: run tests
- run: mvn test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.2 -Pspark-scala-2.10 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,apache.zeppelin.python.*,apache.zeppelin.jupyter.*,apache.zeppelin.r.* -DfailIfNoTests=false
+ run: mvn test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-3.1 -Pspark-scala-2.12 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,apache.zeppelin.python.*,apache.zeppelin.jupyter.*,apache.zeppelin.r.* -DfailIfNoTests=false
test-livy-0-5-with-spark-2-2-0-under-python3:
runs-on: ubuntu-20.04
steps:
diff --git a/spark/interpreter/pom.xml b/spark/interpreter/pom.xml
index bef1699..81a0cdc 100644
--- a/spark/interpreter/pom.xml
+++ b/spark/interpreter/pom.xml
@@ -40,10 +40,6 @@
<maven.aeither.provider.version>3.0.3</maven.aeither.provider.version>
<wagon.version>2.7</wagon.version>
- <datanucleus.rdbms.version>3.2.9</datanucleus.rdbms.version>
- <datanucleus.apijdo.version>3.2.6</datanucleus.apijdo.version>
- <datanucleus.core.version>3.2.10</datanucleus.core.version>
-
<scala.compile.version>${spark.scala.version}</scala.compile.version>
<!-- settings -->
<pyspark.test.exclude>**/PySparkInterpreterMatplotlibTest.java</pyspark.test.exclude>
diff --git a/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkInterpreterTest.java b/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkInterpreterTest.java
index c613fcf..c750ea9 100644
--- a/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkInterpreterTest.java
+++ b/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkInterpreterTest.java
@@ -225,7 +225,7 @@ public class SparkInterpreterTest {
"| 1| a|\n" +
"| 2|null|\n" +
"+---+----+"));
- } else if (version.contains("String = 2.")) {
+ } else {
// create dataset from case class
context = getInterpreterContext();
result = interpreter.interpret("case class Person(id:Int, name:String, age:Int, country:String)\n" +
@@ -252,8 +252,9 @@ public class SparkInterpreterTest {
}
// ZeppelinContext
- result = interpreter.interpret("z.show(df)", getInterpreterContext());
- assertEquals(InterpreterResult.Code.SUCCESS, result.code());
+ context = getInterpreterContext();
+ result = interpreter.interpret("z.show(df)", context);
+ assertEquals(context.out.toString(), InterpreterResult.Code.SUCCESS, result.code());
assertEquals(InterpreterResult.Type.TABLE, messageOutput.getType());
messageOutput.flush();
assertEquals("_1\t_2\n1\ta\n2\tnull\n", messageOutput.toInterpreterResultMessage().getData());
diff --git a/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkSqlInterpreterTest.java b/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkSqlInterpreterTest.java
index 8c01130..1ce7329 100644
--- a/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkSqlInterpreterTest.java
+++ b/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkSqlInterpreterTest.java
@@ -67,21 +67,25 @@ public class SparkSqlInterpreterTest {
intpGroup.get("session_1").add(sparkInterpreter);
intpGroup.get("session_1").add(sqlInterpreter);
- context = InterpreterContext.builder()
- .setNoteId("noteId")
- .setParagraphId("paragraphId")
- .setParagraphTitle("title")
- .setAngularObjectRegistry(new AngularObjectRegistry(intpGroup.getId(), null))
- .setResourcePool(new LocalResourcePool("id"))
- .setInterpreterOut(new InterpreterOutput())
- .setIntpEventClient(mock(RemoteInterpreterEventClient.class))
- .build();
+ context = getInterpreterContext();
InterpreterContext.set(context);
sparkInterpreter.open();
sqlInterpreter.open();
}
+ private static InterpreterContext getInterpreterContext() {
+ return InterpreterContext.builder()
+ .setNoteId("noteId")
+ .setParagraphId("paragraphId")
+ .setParagraphTitle("title")
+ .setAngularObjectRegistry(new AngularObjectRegistry(intpGroup.getId(), null))
+ .setResourcePool(new LocalResourcePool("id"))
+ .setInterpreterOut(new InterpreterOutput())
+ .setIntpEventClient(mock(RemoteInterpreterEventClient.class))
+ .build();
+ }
+
@AfterClass
public static void tearDown() throws InterpreterException {
sqlInterpreter.close();
@@ -287,8 +291,9 @@ public class SparkSqlInterpreterTest {
@Test
public void testDDL() throws InterpreterException, IOException {
+ InterpreterContext context = getInterpreterContext();
InterpreterResult ret = sqlInterpreter.interpret("create table t1(id int, name string)", context);
- assertEquals(InterpreterResult.Code.SUCCESS, ret.code());
+ assertEquals(context.out.toString(), InterpreterResult.Code.SUCCESS, ret.code());
// spark 1.x will still return DataFrame with non-empty columns.
// org.apache.spark.sql.DataFrame = [result: string]
if (!sparkInterpreter.getSparkContext().version().startsWith("1.")) {
diff --git a/spark/pom.xml b/spark/pom.xml
index f123cad..75a4346 100644
--- a/spark/pom.xml
+++ b/spark/pom.xml
@@ -39,11 +39,11 @@
<datanucleus.core.version>3.2.10</datanucleus.core.version>
<!-- spark versions -->
- <spark.version>2.4.5</spark.version>
+ <spark.version>3.1.2</spark.version>
<protobuf.version>2.5.0</protobuf.version>
- <py4j.version>0.10.7</py4j.version>
- <spark.scala.version>2.11.12</spark.scala.version>
- <spark.scala.binary.version>2.11</spark.scala.binary.version>
+ <py4j.version>0.10.9</py4j.version>
+ <spark.scala.version>2.12.7</spark.scala.version>
+ <spark.scala.binary.version>2.12</spark.scala.binary.version>
<spark.archive>spark-${spark.version}</spark.archive>
<spark.src.download.url>
@@ -141,6 +141,9 @@
<profile>
<id>spark-scala-2.12</id>
+ <activation>
+ <activeByDefault>true</activeByDefault>
+ </activation>
<properties>
<spark.scala.version>2.12.7</spark.scala.version>
<spark.scala.binary.version>2.12</spark.scala.binary.version>
@@ -149,9 +152,6 @@
<profile>
<id>spark-scala-2.11</id>
- <activation>
- <activeByDefault>true</activeByDefault>
- </activation>
<properties>
<spark.scala.version>2.11.12</spark.scala.version>
<spark.scala.binary.version>2.11</spark.scala.binary.version>
@@ -169,9 +169,27 @@
<!-- profile spark-x only affect the embedded spark version in zeppelin distribution -->
<profile>
+ <id>spark-3.1</id>
+ <activation>
+ <activeByDefault>true</activeByDefault>
+ </activation>
+ <properties>
+ <datanucleus.core.version>4.1.17</datanucleus.core.version>
+ <datanucleus.rdbms.version>4.1.19</datanucleus.rdbms.version>
+ <datanucleus.apijdo.version>4.2.4</datanucleus.apijdo.version>
+ <spark.version>3.1.2</spark.version>
+ <protobuf.version>2.5.0</protobuf.version>
+ <py4j.version>0.10.9</py4j.version>
+ </properties>
+ </profile>
+
+ <profile>
<id>spark-3.0</id>
<properties>
- <spark.version>3.1.1</spark.version>
+ <datanucleus.core.version>4.1.17</datanucleus.core.version>
+ <datanucleus.rdbms.version>4.1.19</datanucleus.rdbms.version>
+ <datanucleus.apijdo.version>4.2.4</datanucleus.apijdo.version>
+ <spark.version>3.0.3</spark.version>
<protobuf.version>2.5.0</protobuf.version>
<py4j.version>0.10.9</py4j.version>
</properties>
@@ -179,9 +197,6 @@
<profile>
<id>spark-2.4</id>
- <activation>
- <activeByDefault>true</activeByDefault>
- </activation>
<properties>
<spark.version>2.4.5</spark.version>
<protobuf.version>2.5.0</protobuf.version>
diff --git a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest.java b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest.java
index 40496c7..1dec6ee 100644
--- a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest.java
+++ b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest.java
@@ -58,7 +58,7 @@ public abstract class SparkIntegrationTest {
private static MiniHadoopCluster hadoopCluster;
private static MiniZeppelin zeppelin;
private static InterpreterFactory interpreterFactory;
- private static InterpreterSettingManager interpreterSettingManager;
+ protected static InterpreterSettingManager interpreterSettingManager;
private String sparkVersion;
private String sparkHome;
diff --git a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest24.java b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest24.java
index 48d3185..9a2391d 100644
--- a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest24.java
+++ b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest24.java
@@ -17,9 +17,14 @@
package org.apache.zeppelin.integration;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.zeppelin.interpreter.InterpreterException;
+import org.apache.zeppelin.interpreter.InterpreterSetting;
+import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
+import java.io.IOException;
import java.util.Arrays;
import java.util.List;
@@ -37,4 +42,10 @@ public class SparkIntegrationTest24 extends SparkIntegrationTest{
});
}
+ @Override
+ public void testYarnClusterMode() throws IOException, YarnException, InterruptedException, InterpreterException, XmlPullParserException {
+ InterpreterSetting sparkInterpreterSetting = interpreterSettingManager.getInterpreterSettingByName("spark");
+ sparkInterpreterSetting.setProperty("spark.sql.execution.arrow.sparkr.enabled", "false");
+ super.testYarnClusterMode();
+ }
}
diff --git a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest30.java b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest30.java
index 76cd0c9..9dedf93 100644
--- a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest30.java
+++ b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest30.java
@@ -34,8 +34,8 @@ public class SparkIntegrationTest30 extends SparkIntegrationTest {
@Parameterized.Parameters
public static List<Object[]> data() {
return Arrays.asList(new Object[][]{
- {"3.0.2", "2.7"},
- {"3.0.2", "3.2"}
+ {"3.0.3", "2.7"},
+ {"3.0.3", "3.2"}
});
}
diff --git a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest31.java b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest31.java
index 26362ba..bfaa1ea 100644
--- a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest31.java
+++ b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest31.java
@@ -34,8 +34,8 @@ public class SparkIntegrationTest31 extends SparkIntegrationTest {
@Parameterized.Parameters
public static List<Object[]> data() {
return Arrays.asList(new Object[][]{
- {"3.1.1", "2.7"},
- {"3.1.1", "3.2"}
+ {"3.1.2", "2.7"},
+ {"3.1.2", "3.2"}
});
}