You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zeppelin.apache.org by jo...@apache.org on 2022/03/28 15:22:44 UTC

[zeppelin] branch master updated: [ZEPPELIN-5680] Remove spark-dependencies module (#4323)

This is an automated email from the ASF dual-hosted git repository.

jongyoul pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/zeppelin.git


The following commit(s) were added to refs/heads/master by this push:
     new 6331556  [ZEPPELIN-5680] Remove spark-dependencies module (#4323)
6331556 is described below

commit 6331556719a82ca8cd085377feaf1f6e2885c3ac
Author: Jeff Zhang <zj...@apache.org>
AuthorDate: Mon Mar 28 23:22:38 2022 +0800

    [ZEPPELIN-5680] Remove spark-dependencies module (#4323)
    
    * [ZEPPELIN-5680] Remove spark-dependencies module
    
    * update .github
    
    * update doc
---
 .github/workflows/core.yml        |  10 +-
 bin/interpreter.sh                |  25 +---
 docs/setup/basics/how_to_build.md |   7 +-
 spark/pom.xml                     |   1 -
 spark/spark-dependencies/pom.xml  | 280 --------------------------------------
 5 files changed, 9 insertions(+), 314 deletions(-)

diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml
index f9eb3ce..578414e 100644
--- a/.github/workflows/core.yml
+++ b/.github/workflows/core.yml
@@ -54,7 +54,7 @@ jobs:
           restore-keys: |
             ${{ runner.os }}-zeppelin-
       - name: install application with some interpreter
-        run: ./mvnw install -Pbuild-distr -DskipRat -DskipTests -pl zeppelin-server,zeppelin-web,spark-submit,spark/spark-dependencies,markdown,angular,shell -am -Phelium-dev -Pexamples -P${{ matrix.hadoop }} -B
+        run: ./mvnw install -Pbuild-distr -DskipRat -DskipTests -pl zeppelin-server,zeppelin-web,spark-submit,spark/scala-2.11,spark/scala-2.12,markdown,angular,shell -am -Phelium-dev -Pexamples -P${{ matrix.hadoop }} -B
       - name: install and test plugins
         run: ./mvnw package -DskipRat -pl zeppelin-plugins -amd -B
       - name: Setup conda environment with python 3.7 and R
@@ -74,7 +74,7 @@ jobs:
           conda list
           conda info
       - name: run tests with ${{ matrix.hadoop }} # skip spark test because we would run them in other CI
-        run: ./mvnw verify -Pusing-packaged-distr -DskipRat -pl zeppelin-server,zeppelin-web,spark-submit,spark/spark-dependencies,markdown,angular,shell -am -Phelium-dev -Pexamples -P${{ matrix.hadoop }} -Dtests.to.exclude=**/org/apache/zeppelin/spark/* -DfailIfNoTests=false
+        run: ./mvnw verify -Pusing-packaged-distr -DskipRat -pl zeppelin-server,zeppelin-web,spark-submit,spark/scala-2.11,spark/scala-2.12,markdown,angular,shell -am -Phelium-dev -Pexamples -P${{ matrix.hadoop }} -Dtests.to.exclude=**/org/apache/zeppelin/spark/* -DfailIfNoTests=false
 
   # test interpreter modules except spark, flink, python, rlang, jupyter
   interpreter-test-non-core:
@@ -190,7 +190,7 @@ jobs:
             ${{ runner.os }}-zeppelin-
       - name: install environment
         run: |
-          ./mvnw install -DskipTests -DskipRat -Phadoop2 -Pintegration -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/spark-dependencies,markdown,flink-cmd,flink/flink-scala-2.11,flink/flink-scala-2.12,jdbc,shell -am
+          ./mvnw install -DskipTests -DskipRat -Phadoop2 -Pintegration -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/scala-2.11,spark/scala-2.12,markdown,flink-cmd,flink/flink-scala-2.11,flink/flink-scala-2.12,jdbc,shell -am
           ./mvnw package -DskipRat -pl zeppelin-plugins -amd -DskipTests -B
       - name: Setup conda environment with python 3.7 and R
         uses: conda-incubator/setup-miniconda@v2
@@ -279,7 +279,7 @@ jobs:
             ${{ runner.os }}-zeppelin-
       - name: install environment
         run: |
-          ./mvnw install -DskipTests -DskipRat -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Phadoop2 -Pintegration -B
+          ./mvnw install -DskipTests -DskipRat -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/scala-2.11,spark/scala-2.12,markdown -am -Phadoop2 -Pintegration -B
           ./mvnw clean package -pl zeppelin-plugins -amd -DskipTests -B
       - name: Setup conda environment with python 3.7 and R
         uses: conda-incubator/setup-miniconda@v2
@@ -325,7 +325,7 @@ jobs:
           restore-keys: |
             ${{ runner.os }}-zeppelin-
       - name: install environment
-        run: ./mvnw install -DskipTests -DskipRat -pl spark-submit,spark/spark-dependencies -am -Phadoop2 -B
+        run: ./mvnw install -DskipTests -DskipRat -pl spark-submit,spark/scala-2.11,spark/scala-2.12 -am -Phadoop2 -B
       - name: Setup conda environment with python ${{ matrix.python }} and R
         uses: conda-incubator/setup-miniconda@v2
         with:
diff --git a/bin/interpreter.sh b/bin/interpreter.sh
index baeaa5b..1c72441 100755
--- a/bin/interpreter.sh
+++ b/bin/interpreter.sh
@@ -179,29 +179,8 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then
     export PYTHONPATH="$SPARK_HOME/python/:$PYTHONPATH"
     export PYTHONPATH="${py4j[0]}:$PYTHONPATH"
   else
-    # add Hadoop jars into classpath
-    if [[ -n "${HADOOP_HOME}" ]]; then
-      # Apache
-      addEachJarInDirRecursiveForIntp "${HADOOP_HOME}/share"
-
-      # CDH
-      addJarInDirForIntp "${HADOOP_HOME}"
-      addJarInDirForIntp "${HADOOP_HOME}/lib"
-    fi
-
-    addJarInDirForIntp "${INTERPRETER_DIR}/dep"
-
-    py4j=("${ZEPPELIN_HOME}"/interpreter/spark/pyspark/py4j-*-src.zip)
-    # pick the first match py4j zip - there should only be one
-    PYSPARKPATH="${ZEPPELIN_HOME}/interpreter/spark/pyspark/pyspark.zip:${py4j[0]}"
-
-    if [[ -z "${PYTHONPATH}" ]]; then
-      export PYTHONPATH="${PYSPARKPATH}"
-    else
-      export PYTHONPATH="${PYTHONPATH}:${PYSPARKPATH}"
-    fi
-    unset PYSPARKPATH
-    export SPARK_CLASSPATH+=":${ZEPPELIN_INTP_CLASSPATH}"
+    echo "No SPARK_HOME is specified"
+    exit -1
   fi
 
   if [[ -n "${HADOOP_CONF_DIR}" ]] && [[ -d "${HADOOP_CONF_DIR}" ]]; then
diff --git a/docs/setup/basics/how_to_build.md b/docs/setup/basics/how_to_build.md
index cbd826f..56715a2 100644
--- a/docs/setup/basics/how_to_build.md
+++ b/docs/setup/basics/how_to_build.md
@@ -93,7 +93,7 @@ Set scala version (default 2.10). Available profiles are
 
 #### Spark Interpreter
 
-To be noticed, the spark profiles here only affect the embedded mode (no need to specify `SPARK_HOME`) of spark interpreter. 
+To be noticed, the spark profiles here only affect the uni test (no need to specify `SPARK_HOME`) of spark interpreter. 
 Zeppelin doesn't require you to build with different spark to make different versions of spark work in Zeppelin.
 You can run different versions of Spark in Zeppelin as long as you specify `SPARK_HOME`. Actually Zeppelin supports all the versions of Spark from 1.6 to 3.0.
 
@@ -116,7 +116,7 @@ minor version can be adjusted by `-Dspark.version=x.x.x`
 
 ##### `-Pspark-scala-[version] (optional)`
 
-To be noticed, these profiles also only affect the embedded mode (no need to specify `SPARK_HOME`) of Spark interpreter. 
+To be noticed, these profiles also only affect the unit test (no need to specify `SPARK_HOME`) of Spark interpreter. 
 Actually Zeppelin supports all the versions of scala (2.11, 2.12) in Spark interpreter as long as you specify `SPARK_HOME`.
 
 Available profiles are
@@ -125,9 +125,6 @@ Available profiles are
 -Pspark-scala-2.11
 -Pspark-scala-2.12
 ```
-
-If you want to use Spark 3.x in the embedded mode, then you have to specify both profile `spark-3.0` and `spark-scala-2.12`,
-because Spark 3.x doesn't support scala 2.11.
  
 #### Build hadoop with Zeppelin (`-Phadoop[version]`)
  
diff --git a/spark/pom.xml b/spark/pom.xml
index a5a5a9d..f73c87b 100644
--- a/spark/pom.xml
+++ b/spark/pom.xml
@@ -59,7 +59,6 @@
         <module>spark-scala-parent</module>
         <module>scala-2.11</module>
         <module>scala-2.12</module>
-        <module>spark-dependencies</module>
         <module>spark-shims</module>
         <module>spark2-shims</module>
         <module>spark3-shims</module>
diff --git a/spark/spark-dependencies/pom.xml b/spark/spark-dependencies/pom.xml
deleted file mode 100644
index 0a8e71e..0000000
--- a/spark/spark-dependencies/pom.xml
+++ /dev/null
@@ -1,280 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  ~ Licensed to the Apache Software Foundation (ASF) under one or more
-  ~ contributor license agreements.  See the NOTICE file distributed with
-  ~ this work for additional information regarding copyright ownership.
-  ~ The ASF licenses this file to You under the Apache License, Version 2.0
-  ~ (the "License"); you may not use this file except in compliance with
-  ~ the License.  You may obtain a copy of the License at
-  ~
-  ~    http://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS,
-  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  ~ See the License for the specific language governing permissions and
-  ~ limitations under the License.
-  -->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-
-  <parent>
-    <artifactId>spark-parent</artifactId>
-    <groupId>org.apache.zeppelin</groupId>
-    <version>0.11.0-SNAPSHOT</version>
-  </parent>
-
-  <artifactId>zeppelin-spark-dependencies</artifactId>
-  <packaging>jar</packaging>
-  <name>Zeppelin: Spark dependencies</name>
-  <description>Zeppelin spark support</description>
-
-  <properties>
-    <!-- library version defined in this section brought from spark 1.4.1 and it's dependency.
-         Therefore changing only spark.version is not going to be enough when this module
-         support new version of spark to make the new version as default supported version.
-
-         Each profile (spark-2.0, spark-1.6, etc) will overrides necessary dependency version.
-         So we'll make one of those profile 'activateByDefault' to make it default supported version
-         instead of changing spark.version in this section.
-    -->
-
-    <hadoop.version>${hadoop2.7.version}</hadoop.version>
-    <yarn.version>${hadoop.version}</yarn.version>
-    <avro.version>1.7.7</avro.version>
-    <avro.mapred.classifier/>
-    <jets3t.version>0.7.1</jets3t.version>
-    <protobuf.version>2.4.1</protobuf.version>
-
-    <akka.group>org.spark-project.akka</akka.group>
-    <akka.version>2.3.4-spark</akka.version>
-  </properties>
-
-  <dependencies>
-
-    <dependency>
-      <groupId>org.apache.zeppelin</groupId>
-      <artifactId>spark-interpreter</artifactId>
-      <version>${project.version}</version>
-      <scope>provided</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.zeppelin</groupId>
-      <artifactId>spark-scala-2.11</artifactId>
-      <version>${project.version}</version>
-      <scope>provided</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.zeppelin</groupId>
-      <artifactId>spark-scala-2.12</artifactId>
-      <version>${project.version}</version>
-      <scope>provided</scope>
-    </dependency>
-
-    <!-- Spark -->
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_${spark.scala.binary.version}</artifactId>
-      <version>${spark.version}</version>
-      <exclusions>
-        <exclusion>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-client</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-repl_${spark.scala.binary.version}</artifactId>
-      <version>${spark.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-sql_${spark.scala.binary.version}</artifactId>
-      <version>${spark.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-hive_${spark.scala.binary.version}</artifactId>
-      <version>${spark.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming_${spark.scala.binary.version}</artifactId>
-      <version>${spark.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-catalyst_${spark.scala.binary.version}</artifactId>
-      <version>${spark.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-yarn_${spark.scala.binary.version}</artifactId>
-      <version>${spark.version}</version>
-    </dependency>
-
-    <!-- Include hadoop 2.7 into spark-dependencies jar.
-    Explicit specify compile scope, otherwise it would use provided defined in root pom.xml -->
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-client</artifactId>
-      <version>${hadoop.version}</version>
-      <scope>compile</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-yarn-api</artifactId>
-      <version>${hadoop.version}</version>
-      <scope>compile</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-common</artifactId>
-      <version>${hadoop.version}</version>
-      <scope>compile</scope>
-    </dependency>
-
-  </dependencies>
-
-  <build>
-    <plugins>
-      <plugin>
-        <artifactId>maven-enforcer-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>enforce</id>
-            <phase>none</phase>
-          </execution>
-        </executions>
-      </plugin>
-
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-surefire-plugin</artifactId>
-        <configuration>
-          <forkCount>1</forkCount>
-          <reuseForks>false</reuseForks>
-          <argLine>-Xmx1024m -XX:MaxMetaspaceSize=256m</argLine>
-        </configuration>
-      </plugin>
-
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-shade-plugin</artifactId>
-        <configuration>
-          <filters>
-            <filter>
-              <artifact>*:*</artifact>
-              <excludes>
-                <exclude>org/datanucleus/**</exclude>
-                <exclude>META-INF/*.SF</exclude>
-                <exclude>META-INF/*.DSA</exclude>
-                <exclude>META-INF/*.RSA</exclude>
-              </excludes>
-            </filter>
-          </filters>
-          <transformers>
-            <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
-            <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
-              <resource>reference.conf</resource>
-            </transformer>
-          </transformers>
-          <outputFile>${project.basedir}/../../interpreter/spark/dep/${project.artifactId}-${project.version}.jar</outputFile>
-        </configuration>
-        <executions>
-          <execution>
-            <phase>package</phase>
-            <goals>
-              <goal>shade</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-
-      <plugin>
-        <artifactId>maven-resources-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>copy-interpreter-setting</id>
-            <phase>none</phase>
-            <configuration>
-              <skip>true</skip>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-
-      <!-- include pyspark by default -->
-      <plugin>
-        <groupId>com.googlecode.maven-download-plugin</groupId>
-        <artifactId>download-maven-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>download-pyspark-files</id>
-            <phase>validate</phase>
-            <goals>
-              <goal>wget</goal>
-            </goals>
-            <configuration>
-              <readTimeOut>60000</readTimeOut>
-              <retries>5</retries>
-              <unpack>true</unpack>
-              <url>${spark.src.download.url}</url>
-              <outputDirectory>${project.build.directory}</outputDirectory>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-
-      <plugin>
-        <artifactId>maven-clean-plugin</artifactId>
-        <configuration>
-          <filesets>
-            <fileset>
-              <directory>${basedir}/../python/build</directory>
-            </fileset>
-          </filesets>
-        </configuration>
-      </plugin>
-
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-antrun-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>zip-pyspark-files</id>
-            <phase>generate-resources</phase>
-            <goals>
-              <goal>run</goal>
-            </goals>
-            <configuration>
-              <target>
-                <delete dir="../../interpreter/spark/pyspark"/>
-                <copy todir="../../interpreter/spark/pyspark"
-                      file="${project.build.directory}/${spark.archive}/python/lib/py4j-${py4j.version}-src.zip"/>
-                <zip destfile="${project.build.directory}/../../../interpreter/spark/pyspark/pyspark.zip"
-                     basedir="${project.build.directory}/${spark.archive}/python"
-                     includes="pyspark/*.py,pyspark/**/*.py"/>
-              </target>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-    </plugins>
-  </build>
-
-
-</project>