You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zeppelin.apache.org by zj...@apache.org on 2021/09/13 08:17:08 UTC

[zeppelin] branch master updated: [ZEPPELIN-5438] Add Build profile for spark-3.1

This is an automated email from the ASF dual-hosted git repository.

zjffdu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/zeppelin.git


The following commit(s) were added to refs/heads/master by this push:
     new 9aaea3e  [ZEPPELIN-5438] Add Build profile for spark-3.1
9aaea3e is described below

commit 9aaea3ee01ab6f05e6a3202ab433c5d477edef5f
Author: Jeff Zhang <zj...@apache.org>
AuthorDate: Fri Jul 2 13:46:55 2021 +0800

    [ZEPPELIN-5438] Add Build profile for spark-3.1
    
    ### What is this PR for?
    
    * Add maven build profile for spark 3.1.
    * Use spark 3.1 and scala-2.12 as the default build profile
    * Some code refactoring on spark test code.
    
    ### What type of PR is it?
    [Improvement ]
    
    ### Todos
    * [ ] - Task
    
    ### What is the Jira issue?
    * https://issues.apache.org/jira/browse/ZEPPELIN-5438
    
    ### How should this be tested?
    * CI pass
    
    ### Screenshots (if appropriate)
    
    ### Questions:
    * Does the licenses files need update? No
    * Is there breaking changes for older versions? No
    * Does this needs documentation? No
    
    Author: Jeff Zhang <zj...@apache.org>
    
    Closes #4182 from zjffdu/ZEPPELIN-5438 and squashes the following commits:
    
    be71d5fdc3 [Jeff Zhang] [ZEPPELIN-5438] Add build profile for spark-3.1
---
 .github/workflows/core.yml                         | 17 +++++-----
 spark/interpreter/pom.xml                          |  4 ---
 .../zeppelin/spark/SparkInterpreterTest.java       |  7 ++--
 .../zeppelin/spark/SparkSqlInterpreterTest.java    | 25 +++++++++------
 spark/pom.xml                                      | 37 +++++++++++++++-------
 .../zeppelin/integration/SparkIntegrationTest.java |  2 +-
 .../integration/SparkIntegrationTest24.java        | 11 +++++++
 .../integration/SparkIntegrationTest30.java        |  4 +--
 .../integration/SparkIntegrationTest31.java        |  4 +--
 9 files changed, 70 insertions(+), 41 deletions(-)

diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml
index fdf28ff..81e0822 100644
--- a/.github/workflows/core.yml
+++ b/.github/workflows/core.yml
@@ -216,7 +216,8 @@ jobs:
           mvn install -DskipTests -DskipRat -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Phadoop2 -Pintegration -B
           mvn clean package -pl zeppelin-plugins -amd -DskipTests -B
       - name: run tests
-        run: mvn test -DskipRat -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Phadoop2 -Pintegration -B -Dtest=ZeppelinSparkClusterTest24,SparkSubmitIntegrationTest,SparkIntegrationTest24,ZeppelinSparkClusterTest23,SparkIntegrationTest23,ZeppelinSparkClusterTest22,SparkIntegrationTest22,ZeppelinSparkClusterTest30,SparkIntegrationTest30 -DfailIfNoTests=false
+        run: mvn test -DskipRat -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Phadoop2 -Pintegration -B -Dtest=ZeppelinSparkClusterTest24,SparkSubmitIntegrationTest,SparkIntegrationTest24,ZeppelinSparkClusterTest23,SparkIntegrationTest23,ZeppelinSparkClusterTest22,SparkIntegrationTest22,ZeppelinSparkClusterTest30,ZeppelinSparkClusterTest31,SparkIntegrationTest30,SparkIntegrationTest31 -DfailIfNoTests=false
+
   jdbcIntegrationTest-and-unit-test-of-Spark-2-4-with-Scala-2-11:
     runs-on: ubuntu-20.04
     steps:
@@ -258,7 +259,7 @@ jobs:
       - name: run tests
         run: mvn test -DskipRat -pl zeppelin-interpreter-integration,jdbc,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Pspark-2.4 -Pspark-scala-2.11 -Phadoop2 -Pintegration -B -Dtest=JdbcIntegrationTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.kotlin.* -DfailIfNoTests=false
 
-  spark-2-4-and-scale-2-12:
+  spark-2-4-and-scala-2-12:
     runs-on: ubuntu-20.04
     steps:
       - name: Checkout
@@ -295,7 +296,7 @@ jobs:
       - name: run tests
         run: mvn test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.4 -Pspark-scala-2.12 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,org.apache.zeppelin.kotlin.* -DfailIfNoTests=false
 
-  spark-2-3-and-scale-2-11-and-other-interpreter:
+  spark-3-0-and-scala-2-12-and-other-interpreter:
     runs-on: ubuntu-20.04
     steps:
       - name: Checkout
@@ -328,11 +329,11 @@ jobs:
           R -e "IRkernel::installspec()"
       - name: install environment
         run: |
-          mvn install -DskipTests -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.3 -Pspark-scala-2.11 -Phadoop2 -B
+          mvn install -DskipTests -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-3.0 -Pspark-scala-2.12 -Phadoop2 -B
       - name: run tests
-        run: mvn test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.3 -Pspark-scala-2.11 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,apache.zeppelin.python.*,apache.zeppelin.jupyter.*,apache.zeppelin.r.* -DfailIfNoTests=false
+        run: mvn test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-3.0 -Pspark-scala-2.12 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,apache.zeppelin.python.*,apache.zeppelin.jupyter.*,apache.zeppelin.r.* -DfailIfNoTests=false
 
-  spark-2-2-and-scale-2-10-and-other-interpreter:
+  spark-3-1-and-scala-2-12-and-other-interpreter:
     runs-on: ubuntu-20.04
     steps:
       - name: Checkout
@@ -364,9 +365,9 @@ jobs:
         run: |
           R -e "IRkernel::installspec()"
       - name: install environment
-        run: mvn install -DskipTests -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.2 -Pspark-scala-2.10 -Phadoop2 -B
+        run: mvn install -DskipTests -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-3.1 -Pspark-scala-2.12 -Phadoop2 -B
       - name: run tests
-        run: mvn test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.2 -Pspark-scala-2.10 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,apache.zeppelin.python.*,apache.zeppelin.jupyter.*,apache.zeppelin.r.* -DfailIfNoTests=false
+        run: mvn test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-3.1 -Pspark-scala-2.12 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,apache.zeppelin.python.*,apache.zeppelin.jupyter.*,apache.zeppelin.r.* -DfailIfNoTests=false
   test-livy-0-5-with-spark-2-2-0-under-python3:
     runs-on: ubuntu-20.04
     steps:
diff --git a/spark/interpreter/pom.xml b/spark/interpreter/pom.xml
index bef1699..81a0cdc 100644
--- a/spark/interpreter/pom.xml
+++ b/spark/interpreter/pom.xml
@@ -40,10 +40,6 @@
     <maven.aeither.provider.version>3.0.3</maven.aeither.provider.version>
     <wagon.version>2.7</wagon.version>
 
-    <datanucleus.rdbms.version>3.2.9</datanucleus.rdbms.version>
-    <datanucleus.apijdo.version>3.2.6</datanucleus.apijdo.version>
-    <datanucleus.core.version>3.2.10</datanucleus.core.version>
-
     <scala.compile.version>${spark.scala.version}</scala.compile.version>
     <!-- settings -->
     <pyspark.test.exclude>**/PySparkInterpreterMatplotlibTest.java</pyspark.test.exclude>
diff --git a/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkInterpreterTest.java b/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkInterpreterTest.java
index c613fcf..c750ea9 100644
--- a/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkInterpreterTest.java
+++ b/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkInterpreterTest.java
@@ -225,7 +225,7 @@ public class SparkInterpreterTest {
               "|  1|   a|\n" +
               "|  2|null|\n" +
               "+---+----+"));
-    } else if (version.contains("String = 2.")) {
+    } else {
       // create dataset from case class
       context = getInterpreterContext();
       result = interpreter.interpret("case class Person(id:Int, name:String, age:Int, country:String)\n" +
@@ -252,8 +252,9 @@ public class SparkInterpreterTest {
     }
 
     // ZeppelinContext
-    result = interpreter.interpret("z.show(df)", getInterpreterContext());
-    assertEquals(InterpreterResult.Code.SUCCESS, result.code());
+    context = getInterpreterContext();
+    result = interpreter.interpret("z.show(df)", context);
+    assertEquals(context.out.toString(), InterpreterResult.Code.SUCCESS, result.code());
     assertEquals(InterpreterResult.Type.TABLE, messageOutput.getType());
     messageOutput.flush();
     assertEquals("_1\t_2\n1\ta\n2\tnull\n", messageOutput.toInterpreterResultMessage().getData());
diff --git a/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkSqlInterpreterTest.java b/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkSqlInterpreterTest.java
index 8c01130..1ce7329 100644
--- a/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkSqlInterpreterTest.java
+++ b/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkSqlInterpreterTest.java
@@ -67,21 +67,25 @@ public class SparkSqlInterpreterTest {
     intpGroup.get("session_1").add(sparkInterpreter);
     intpGroup.get("session_1").add(sqlInterpreter);
 
-    context = InterpreterContext.builder()
-        .setNoteId("noteId")
-        .setParagraphId("paragraphId")
-        .setParagraphTitle("title")
-        .setAngularObjectRegistry(new AngularObjectRegistry(intpGroup.getId(), null))
-        .setResourcePool(new LocalResourcePool("id"))
-        .setInterpreterOut(new InterpreterOutput())
-        .setIntpEventClient(mock(RemoteInterpreterEventClient.class))
-        .build();
+    context = getInterpreterContext();
     InterpreterContext.set(context);
 
     sparkInterpreter.open();
     sqlInterpreter.open();
   }
 
+  private static InterpreterContext getInterpreterContext() {
+    return InterpreterContext.builder()
+            .setNoteId("noteId")
+            .setParagraphId("paragraphId")
+            .setParagraphTitle("title")
+            .setAngularObjectRegistry(new AngularObjectRegistry(intpGroup.getId(), null))
+            .setResourcePool(new LocalResourcePool("id"))
+            .setInterpreterOut(new InterpreterOutput())
+            .setIntpEventClient(mock(RemoteInterpreterEventClient.class))
+            .build();
+  }
+
   @AfterClass
   public static void tearDown() throws InterpreterException {
     sqlInterpreter.close();
@@ -287,8 +291,9 @@ public class SparkSqlInterpreterTest {
 
   @Test
   public void testDDL() throws InterpreterException, IOException {
+    InterpreterContext context = getInterpreterContext();
     InterpreterResult ret = sqlInterpreter.interpret("create table t1(id int, name string)", context);
-    assertEquals(InterpreterResult.Code.SUCCESS, ret.code());
+    assertEquals(context.out.toString(), InterpreterResult.Code.SUCCESS, ret.code());
     // spark 1.x will still return DataFrame with non-empty columns.
     // org.apache.spark.sql.DataFrame = [result: string]
     if (!sparkInterpreter.getSparkContext().version().startsWith("1.")) {
diff --git a/spark/pom.xml b/spark/pom.xml
index f123cad..75a4346 100644
--- a/spark/pom.xml
+++ b/spark/pom.xml
@@ -39,11 +39,11 @@
         <datanucleus.core.version>3.2.10</datanucleus.core.version>
 
         <!-- spark versions -->
-        <spark.version>2.4.5</spark.version>
+        <spark.version>3.1.2</spark.version>
         <protobuf.version>2.5.0</protobuf.version>
-        <py4j.version>0.10.7</py4j.version>
-        <spark.scala.version>2.11.12</spark.scala.version>
-        <spark.scala.binary.version>2.11</spark.scala.binary.version>
+        <py4j.version>0.10.9</py4j.version>
+        <spark.scala.version>2.12.7</spark.scala.version>
+        <spark.scala.binary.version>2.12</spark.scala.binary.version>
 
         <spark.archive>spark-${spark.version}</spark.archive>
         <spark.src.download.url>
@@ -141,6 +141,9 @@
 
         <profile>
             <id>spark-scala-2.12</id>
+            <activation>
+                <activeByDefault>true</activeByDefault>
+            </activation>
             <properties>
                 <spark.scala.version>2.12.7</spark.scala.version>
                 <spark.scala.binary.version>2.12</spark.scala.binary.version>
@@ -149,9 +152,6 @@
 
         <profile>
             <id>spark-scala-2.11</id>
-            <activation>
-                <activeByDefault>true</activeByDefault>
-            </activation>
             <properties>
                 <spark.scala.version>2.11.12</spark.scala.version>
                 <spark.scala.binary.version>2.11</spark.scala.binary.version>
@@ -169,9 +169,27 @@
         <!-- profile spark-x only affect the embedded spark version in zeppelin distribution -->
 
         <profile>
+            <id>spark-3.1</id>
+            <activation>
+                <activeByDefault>true</activeByDefault>
+            </activation>
+            <properties>
+                <datanucleus.core.version>4.1.17</datanucleus.core.version>
+                <datanucleus.rdbms.version>4.1.19</datanucleus.rdbms.version>
+                <datanucleus.apijdo.version>4.2.4</datanucleus.apijdo.version>
+                <spark.version>3.1.2</spark.version>
+                <protobuf.version>2.5.0</protobuf.version>
+                <py4j.version>0.10.9</py4j.version>
+            </properties>
+        </profile>
+
+        <profile>
             <id>spark-3.0</id>
             <properties>
-                <spark.version>3.1.1</spark.version>
+                <datanucleus.core.version>4.1.17</datanucleus.core.version>
+                <datanucleus.rdbms.version>4.1.19</datanucleus.rdbms.version>
+                <datanucleus.apijdo.version>4.2.4</datanucleus.apijdo.version>
+                <spark.version>3.0.3</spark.version>
                 <protobuf.version>2.5.0</protobuf.version>
                 <py4j.version>0.10.9</py4j.version>
             </properties>
@@ -179,9 +197,6 @@
 
         <profile>
             <id>spark-2.4</id>
-            <activation>
-                <activeByDefault>true</activeByDefault>
-            </activation>
             <properties>
                 <spark.version>2.4.5</spark.version>
                 <protobuf.version>2.5.0</protobuf.version>
diff --git a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest.java b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest.java
index 40496c7..1dec6ee 100644
--- a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest.java
+++ b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest.java
@@ -58,7 +58,7 @@ public abstract class SparkIntegrationTest {
   private static MiniHadoopCluster hadoopCluster;
   private static MiniZeppelin zeppelin;
   private static InterpreterFactory interpreterFactory;
-  private static InterpreterSettingManager interpreterSettingManager;
+  protected static InterpreterSettingManager interpreterSettingManager;
 
   private String sparkVersion;
   private String sparkHome;
diff --git a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest24.java b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest24.java
index 48d3185..9a2391d 100644
--- a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest24.java
+++ b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest24.java
@@ -17,9 +17,14 @@
 
 package org.apache.zeppelin.integration;
 
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.zeppelin.interpreter.InterpreterException;
+import org.apache.zeppelin.interpreter.InterpreterSetting;
+import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
+import java.io.IOException;
 import java.util.Arrays;
 import java.util.List;
 
@@ -37,4 +42,10 @@ public class SparkIntegrationTest24 extends SparkIntegrationTest{
     });
   }
 
+  @Override
+  public void testYarnClusterMode() throws IOException, YarnException, InterruptedException, InterpreterException, XmlPullParserException {
+    InterpreterSetting sparkInterpreterSetting = interpreterSettingManager.getInterpreterSettingByName("spark");
+    sparkInterpreterSetting.setProperty("spark.sql.execution.arrow.sparkr.enabled", "false");
+    super.testYarnClusterMode();
+  }
 }
diff --git a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest30.java b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest30.java
index 76cd0c9..9dedf93 100644
--- a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest30.java
+++ b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest30.java
@@ -34,8 +34,8 @@ public class SparkIntegrationTest30 extends SparkIntegrationTest {
   @Parameterized.Parameters
   public static List<Object[]> data() {
     return Arrays.asList(new Object[][]{
-            {"3.0.2", "2.7"},
-            {"3.0.2", "3.2"}
+            {"3.0.3", "2.7"},
+            {"3.0.3", "3.2"}
     });
   }
 
diff --git a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest31.java b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest31.java
index 26362ba..bfaa1ea 100644
--- a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest31.java
+++ b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest31.java
@@ -34,8 +34,8 @@ public class SparkIntegrationTest31 extends SparkIntegrationTest {
   @Parameterized.Parameters
   public static List<Object[]> data() {
     return Arrays.asList(new Object[][]{
-        {"3.1.1", "2.7"},
-        {"3.1.1", "3.2"}
+        {"3.1.2", "2.7"},
+        {"3.1.2", "3.2"}
     });
   }