You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zeppelin.apache.org by zj...@apache.org on 2021/10/26 06:36:47 UTC

[zeppelin] branch master updated: [ZEPPELIN-5565] Support Spark 3.2

This is an automated email from the ASF dual-hosted git repository.

zjffdu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/zeppelin.git


The following commit(s) were added to refs/heads/master by this push:
     new 2f55fe8  [ZEPPELIN-5565] Support Spark 3.2
2f55fe8 is described below

commit 2f55fe8ed277b28d71f858633f9c9d76fd18f0c3
Author: Jeff Zhang <zj...@apache.org>
AuthorDate: Mon Oct 25 15:53:13 2021 +0800

    [ZEPPELIN-5565] Support Spark 3.2
    
    ### What is this PR for?
    
    Simple PR to support Spark 3.2. No many code change in Spark module, because Spark 3.2 is compatible with Spark 3.1 which we have already supported. Only netty is excluded in pom.xml because it conflicts with the netty that Spark 3.2 use. This only affect the Spark unit test. Integration test still pass even without these netty change, because unit test use different classpath.
    
    ### What type of PR is it?
    [ Improvement ]
    
    ### Todos
    * [ ] - Task
    
    ### What is the Jira issue?
    * https://issues.apache.org/jira/browse/ZEPPELIN-5565
    
    ### How should this be tested?
    * CI pass
    
    ### Screenshots (if appropriate)
    
    ### Questions:
    * Does the licenses files need update? No
    * Is there breaking changes for older versions? No
    * Does this needs documentation? No
    
    Author: Jeff Zhang <zj...@apache.org>
    
    Closes #4257 from zjffdu/ZEPPELIN-5565 and squashes the following commits:
    
    9b5b6c42d7 [Jeff Zhang] Update doc
    7b8924a850 [Jeff Zhang] Update scala version
    1b20f934b1 [Jeff Zhang] [ZEPPELIN-5565] Support Spark 3.2
---
 .github/workflows/core.yml                         | 43 +++++++++++++++-
 .github/workflows/frontend.yml                     |  8 +--
 docs/setup/basics/how_to_build.md                  |  3 +-
 spark/interpreter/pom.xml                          | 12 +++++
 .../org/apache/zeppelin/spark/SparkShimsTest.java  |  6 +--
 spark/pom.xml                                      | 14 +++++-
 spark/scala-2.12/pom.xml                           |  2 +-
 .../org/apache/zeppelin/spark/SparkVersion.java    |  8 ++-
 .../integration/SparkIntegrationTest32.java        | 58 ++++++++++++++++++++++
 .../integration/ZeppelinSparkClusterTest32.java    | 40 +++++++++++++++
 10 files changed, 176 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml
index 866ed31..1503c66 100644
--- a/.github/workflows/core.yml
+++ b/.github/workflows/core.yml
@@ -219,7 +219,7 @@ jobs:
           ./mvnw install -DskipTests -DskipRat -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Phadoop2 -Pintegration -B
           ./mvnw clean package -pl zeppelin-plugins -amd -DskipTests -B
       - name: run tests
-        run: ./mvnw test -DskipRat -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Phadoop2 -Pintegration -B -Dtest=ZeppelinSparkClusterTest24,SparkSubmitIntegrationTest,SparkIntegrationTest24,ZeppelinSparkClusterTest23,SparkIntegrationTest23,ZeppelinSparkClusterTest22,SparkIntegrationTest22,ZeppelinSparkClusterTest30,ZeppelinSparkClusterTest31,SparkIntegrationTest30,SparkIntegrationTest31 -DfailIfNoTests=false
+        run: ./mvnw test -DskipRat -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Phadoop2 -Pintegration -B -Dtest=SparkSubmitIntegrationTest,ZeppelinSparkClusterTest24,SparkIntegrationTest24,ZeppelinSparkClusterTest30,SparkIntegrationTest30,ZeppelinSparkClusterTest31,SparkIntegrationTest31,ZeppelinSparkClusterTest32,SparkIntegrationTest32 -DfailIfNoTests=false
 
   jdbcIntegrationTest-and-unit-test-of-Spark-2-4-with-Scala-2-11:
     runs-on: ubuntu-20.04
@@ -379,6 +379,47 @@ jobs:
         run: ./mvnw install -DskipTests -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-3.1 -Pspark-scala-2.12 -Phadoop2 -B
       - name: run tests with ${{ matrix.python }}
         run: ./mvnw test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-3.1 -Pspark-scala-2.12 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,apache.zeppelin.python.*,apache.zeppelin.jupyter.*,apache.zeppelin.r.* -DfailIfNoTests=false
+
+  spark-3-2-and-scala-2-12-and-other-interpreter:
+    runs-on: ubuntu-20.04
+    strategy:
+      fail-fast: false
+      matrix:
+        python: [ 3.7, 3.8 ]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+      - name: Tune Runner VM
+        uses: ./.github/actions/tune-runner-vm
+      - name: Set up JDK 8
+        uses: actions/setup-java@v2
+        with:
+          distribution: 'adopt'
+          java-version: 8
+      - name: Cache local Maven repository
+        uses: actions/cache@v2
+        with:
+          path: |
+            ~/.m2/repository
+            !~/.m2/repository/org/apache/zeppelin/
+          key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }}
+          restore-keys: |
+            ${{ runner.os }}-zeppelin-
+      - name: Setup conda environment with python ${{ matrix.python }} and R
+        uses: conda-incubator/setup-miniconda@v2
+        with:
+          activate-environment: python_3_with_R
+          environment-file: testing/env_python_${{ matrix.python }}_with_R.yml
+          python-version: ${{ matrix.python }}
+          auto-activate-base: false
+      - name: Make IRkernel available to Jupyter
+        run: |
+          R -e "IRkernel::installspec()"
+      - name: install environment
+        run: ./mvnw install -DskipTests -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-3.2 -Pspark-scala-2.12 -Phadoop2 -B
+      - name: run tests with ${{ matrix.python }}
+        run: ./mvnw test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-3.2 -Pspark-scala-2.12 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,apache.zeppelin.python.*,apache.zeppelin.jupyter.*,apache.zeppelin.r.* -DfailIfNoTests=false
+
   test-livy-0-5-with-spark-2-2-0-under-python3:
     runs-on: ubuntu-20.04
     steps:
diff --git a/.github/workflows/frontend.yml b/.github/workflows/frontend.yml
index 9a526ca..403c939 100644
--- a/.github/workflows/frontend.yml
+++ b/.github/workflows/frontend.yml
@@ -71,7 +71,7 @@ jobs:
       - name: Run headless test
         run: xvfb-run --auto-servernum --server-args="-screen 0 1024x768x24" ./mvnw package -DskipRat -pl zeppelin-web-angular -Pweb-angular -B
 
-  test-selenium-with-spark-module-for-spark-2-3:
+  test-selenium-with-spark-module-for-spark-2-4:
     runs-on: ubuntu-20.04
     defaults:
       run:
@@ -107,8 +107,8 @@ jobs:
           R -e "IRkernel::installspec()"
       - name: install environment
         run: |
-          ./mvnw clean install -DskipTests -DskipRat -pl ${INTERPRETERS} -Pspark-2.3 -Phadoop2 -Phelium-dev -Pexamples -Pintegration -Pspark-scala-2.11 -B
+          ./mvnw clean install -DskipTests -DskipRat -pl ${INTERPRETERS} -Pspark-2.4 -Phadoop2 -Phelium-dev -Pexamples -Pintegration -Pspark-scala-2.11 -B
           ./mvnw clean package -pl zeppelin-plugins -amd -B
-          ./testing/downloadSpark.sh "2.3.2" "2.6"
+          ./testing/downloadSpark.sh "2.4.7" "2.7"
       - name: run tests
-        run: xvfb-run --auto-servernum --server-args="-screen 0 1600x1024x16" ./mvnw verify -DskipRat -Pspark-2.3 -Phadoop2 -Phelium-dev -Pexamples -Pintegration -Pspark-scala-2.11 -B -pl zeppelin-integration -DfailIfNoTests=false
+        run: xvfb-run --auto-servernum --server-args="-screen 0 1600x1024x16" ./mvnw verify -DskipRat -Pspark-2.4 -Phadoop2 -Phelium-dev -Pexamples -Pintegration -Pspark-scala-2.11 -B -pl zeppelin-integration -DfailIfNoTests=false
diff --git a/docs/setup/basics/how_to_build.md b/docs/setup/basics/how_to_build.md
index 4b1b9a3..cbd826f 100644
--- a/docs/setup/basics/how_to_build.md
+++ b/docs/setup/basics/how_to_build.md
@@ -106,6 +106,7 @@ Set spark major version
 Available profiles are
 
 ```
+-Pspark-3.2
 -Pspark-3.1
 -Pspark-3.0
 -Pspark-2.4
@@ -116,7 +117,7 @@ minor version can be adjusted by `-Dspark.version=x.x.x`
 ##### `-Pspark-scala-[version] (optional)`
 
 To be noticed, these profiles also only affect the embedded mode (no need to specify `SPARK_HOME`) of Spark interpreter. 
-Actually Zeppelin supports all the versions of scala (2.10, 2.11, 2.12) in Spark interpreter as long as you specify `SPARK_HOME`.
+Actually Zeppelin supports all the versions of scala (2.11, 2.12) in Spark interpreter as long as you specify `SPARK_HOME`.
 
 Available profiles are
 
diff --git a/spark/interpreter/pom.xml b/spark/interpreter/pom.xml
index c5cf60f..0ac70a9 100644
--- a/spark/interpreter/pom.xml
+++ b/spark/interpreter/pom.xml
@@ -106,6 +106,10 @@
           <groupId>net.sf.py4j</groupId>
           <artifactId>py4j</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>io.netty</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
 
@@ -120,6 +124,10 @@
           <groupId>net.sf.py4j</groupId>
           <artifactId>py4j</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>io.netty</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
 
@@ -148,6 +156,10 @@
           <groupId>net.sf.py4j</groupId>
           <artifactId>py4j</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>io.netty</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
 
diff --git a/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkShimsTest.java b/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkShimsTest.java
index d428502..812a981 100644
--- a/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkShimsTest.java
+++ b/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkShimsTest.java
@@ -131,11 +131,7 @@ public class SparkShimsTest {
         try {
           sparkShims = SparkShims.getInstance(SparkVersion.SPARK_2_0_0.toString(), new Properties(), null);
         } catch (Throwable e2) {
-          try {
-            sparkShims = SparkShims.getInstance(SparkVersion.SPARK_1_6_0.toString(), new Properties(), null);
-          } catch (Throwable e3) {
-            throw new RuntimeException("All SparkShims are tried, but no one can be created.");
-          }
+          throw new RuntimeException("All SparkShims are tried, but no one can be created.");
         }
       }
     }
diff --git a/spark/pom.xml b/spark/pom.xml
index 1614002..748d77b 100644
--- a/spark/pom.xml
+++ b/spark/pom.xml
@@ -159,7 +159,7 @@
         <!-- profile spark-x only affect the embedded spark version in zeppelin distribution -->
 
         <profile>
-            <id>spark-3.1</id>
+            <id>spark-3.2</id>
             <activation>
                 <activeByDefault>true</activeByDefault>
             </activation>
@@ -167,6 +167,18 @@
                 <datanucleus.core.version>4.1.17</datanucleus.core.version>
                 <datanucleus.rdbms.version>4.1.19</datanucleus.rdbms.version>
                 <datanucleus.apijdo.version>4.2.4</datanucleus.apijdo.version>
+                <spark.version>3.2.0</spark.version>
+                <protobuf.version>2.5.0</protobuf.version>
+                <py4j.version>0.10.9.2</py4j.version>
+            </properties>
+        </profile>
+
+        <profile>
+            <id>spark-3.1</id>
+            <properties>
+                <datanucleus.core.version>4.1.17</datanucleus.core.version>
+                <datanucleus.rdbms.version>4.1.19</datanucleus.rdbms.version>
+                <datanucleus.apijdo.version>4.2.4</datanucleus.apijdo.version>
                 <spark.version>3.1.2</spark.version>
                 <protobuf.version>2.5.0</protobuf.version>
                 <py4j.version>0.10.9</py4j.version>
diff --git a/spark/scala-2.12/pom.xml b/spark/scala-2.12/pom.xml
index 4c4f33f..a76ee4d 100644
--- a/spark/scala-2.12/pom.xml
+++ b/spark/scala-2.12/pom.xml
@@ -32,7 +32,7 @@
 
   <properties>
     <spark.version>2.4.5</spark.version>
-    <spark.scala.version>2.12.10</spark.scala.version>
+    <spark.scala.version>2.12.15</spark.scala.version>
     <spark.scala.binary.version>2.12</spark.scala.binary.version>
     <spark.scala.compile.version>${spark.scala.version}</spark.scala.compile.version>
   </properties>
diff --git a/spark/spark-shims/src/main/java/org/apache/zeppelin/spark/SparkVersion.java b/spark/spark-shims/src/main/java/org/apache/zeppelin/spark/SparkVersion.java
index ea18a32..eb41f43 100644
--- a/spark/spark-shims/src/main/java/org/apache/zeppelin/spark/SparkVersion.java
+++ b/spark/spark-shims/src/main/java/org/apache/zeppelin/spark/SparkVersion.java
@@ -25,18 +25,16 @@ import org.slf4j.LoggerFactory;
 public class SparkVersion {
   private static final Logger logger = LoggerFactory.getLogger(SparkVersion.class);
 
-  public static final SparkVersion SPARK_1_6_0 = SparkVersion.fromVersionString("1.6.0");
-
   public static final SparkVersion SPARK_2_0_0 = SparkVersion.fromVersionString("2.0.0");
   public static final SparkVersion SPARK_2_2_0 = SparkVersion.fromVersionString("2.2.0");
   public static final SparkVersion SPARK_2_3_0 = SparkVersion.fromVersionString("2.3.0");
   public static final SparkVersion SPARK_2_3_1 = SparkVersion.fromVersionString("2.3.1");
   public static final SparkVersion SPARK_2_4_0 = SparkVersion.fromVersionString("2.4.0");
   public static final SparkVersion SPARK_3_1_0 = SparkVersion.fromVersionString("3.1.0");
-  public static final SparkVersion SPARK_3_2_0 = SparkVersion.fromVersionString("3.2.0");
+  public static final SparkVersion SPARK_3_3_0 = SparkVersion.fromVersionString("3.3.0");
 
-  public static final SparkVersion MIN_SUPPORTED_VERSION =  SPARK_1_6_0;
-  public static final SparkVersion UNSUPPORTED_FUTURE_VERSION = SPARK_3_2_0;
+  public static final SparkVersion MIN_SUPPORTED_VERSION =  SPARK_2_0_0;
+  public static final SparkVersion UNSUPPORTED_FUTURE_VERSION = SPARK_3_3_0;
 
   private int version;
   private int majorVersion;
diff --git a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest32.java b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest32.java
new file mode 100644
index 0000000..001ec0f
--- /dev/null
+++ b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest32.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.zeppelin.integration;
+
+import org.apache.zeppelin.interpreter.InterpreterSetting;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.util.Arrays;
+import java.util.List;
+
+@RunWith(value = Parameterized.class)
+public class SparkIntegrationTest32 extends SparkIntegrationTest {
+
+  public SparkIntegrationTest32(String sparkVersion, String hadoopVersion) {
+    super(sparkVersion, hadoopVersion);
+  }
+
+  @Parameterized.Parameters
+  public static List<Object[]> data() {
+    return Arrays.asList(new Object[][]{
+        {"3.2.0", "2.7"},
+        // TODO(zjffdu) Run integration tests under profile hadoop3
+        // {"3.2.0", "3.2"}
+    });
+  }
+
+  @Override
+  protected void setUpSparkInterpreterSetting(InterpreterSetting interpreterSetting) {
+    // spark3 doesn't support yarn-client and yarn-cluster anymore, use
+    // spark.master and spark.submit.deployMode instead
+    String sparkMaster = interpreterSetting.getJavaProperties().getProperty("spark.master");
+    if (sparkMaster.equals("yarn-client")) {
+      interpreterSetting.setProperty("spark.master", "yarn");
+      interpreterSetting.setProperty("spark.submit.deployMode", "client");
+    } else if (sparkMaster.equals("yarn-cluster")){
+      interpreterSetting.setProperty("spark.master", "yarn");
+      interpreterSetting.setProperty("spark.submit.deployMode", "cluster");
+    } else if (sparkMaster.startsWith("local")) {
+      interpreterSetting.setProperty("spark.submit.deployMode", "client");
+    }
+  }
+}
diff --git a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/ZeppelinSparkClusterTest32.java b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/ZeppelinSparkClusterTest32.java
new file mode 100644
index 0000000..2b6fa13
--- /dev/null
+++ b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/ZeppelinSparkClusterTest32.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.zeppelin.integration;
+
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.util.Arrays;
+import java.util.List;
+
+@RunWith(value = Parameterized.class)
+public class ZeppelinSparkClusterTest32 extends ZeppelinSparkClusterTest {
+
+  public ZeppelinSparkClusterTest32(String sparkVersion, String hadoopVersion) throws Exception {
+    super(sparkVersion, hadoopVersion);
+  }
+
+  @Parameterized.Parameters
+  public static List<Object[]> data() {
+    return Arrays.asList(new Object[][]{
+        {"3.2.0", "2.7"},
+        {"3.2.0", "3.2"}
+    });
+  }
+}