You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by mm...@apache.org on 2022/10/24 08:30:42 UTC

[beam] branch master updated: Migrate examples and maven-archetypes (including Java Quickstart) to Spark 3 (addresses #23728) (#23730)

This is an automated email from the ASF dual-hosted git repository.

mmack pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new 760c83e9402 Migrate examples and maven-archetypes (including Java Quickstart) to Spark 3 (addresses #23728) (#23730)
760c83e9402 is described below

commit 760c83e94020971d84b904270b087d4c3d69fd00
Author: Moritz Mack <mm...@talend.com>
AuthorDate: Mon Oct 24 10:30:30 2022 +0200

    Migrate examples and maven-archetypes (including Java Quickstart) to Spark 3 (addresses #23728) (#23730)
---
 examples/java/build.gradle                         |  7 +-----
 examples/kotlin/build.gradle                       |  7 +-----
 release/build.gradle.kts                           |  2 +-
 runners/spark/3/build.gradle                       |  3 +++
 runners/spark/spark_runner.gradle                  |  3 ---
 sdks/java/maven-archetypes/examples/build.gradle   |  2 +-
 .../src/main/resources/archetype-resources/pom.xml | 28 ++++------------------
 .../maven-archetypes/gcp-bom-examples/build.gradle |  2 +-
 .../src/main/resources/archetype-resources/pom.xml | 24 +++----------------
 9 files changed, 15 insertions(+), 63 deletions(-)

diff --git a/examples/java/build.gradle b/examples/java/build.gradle
index 13b2518bf38..aa51dcfeae8 100644
--- a/examples/java/build.gradle
+++ b/examples/java/build.gradle
@@ -109,13 +109,8 @@ dependencies {
   }
   directRunnerPreCommit project(path: ":runners:direct-java", configuration: "shadow")
   flinkRunnerPreCommit project(":runners:flink:${project.ext.latestFlinkVersion}")
-  // TODO: Make the netty version used configurable, we add netty-all 4.1.17.Final so it appears on the classpath
-  // before 4.1.8.Final defined by Apache Beam
-  sparkRunnerPreCommit "io.netty:netty-all:4.1.17.Final"
-  sparkRunnerPreCommit project(":runners:spark:2")
+  sparkRunnerPreCommit project(":runners:spark:3")
   sparkRunnerPreCommit project(":sdks:java:io:hadoop-file-system")
-  sparkRunnerPreCommit library.java.spark_streaming
-  sparkRunnerPreCommit library.java.spark_core
 }
 
 /*
diff --git a/examples/kotlin/build.gradle b/examples/kotlin/build.gradle
index 0aa3dc257b0..79a1248712d 100644
--- a/examples/kotlin/build.gradle
+++ b/examples/kotlin/build.gradle
@@ -81,13 +81,8 @@ dependencies {
   }
   directRunnerPreCommit project(path: ":runners:direct-java", configuration: "shadow")
   flinkRunnerPreCommit project(":runners:flink:${project.ext.latestFlinkVersion}")
-  // TODO: Make the netty version used configurable, we add netty-all 4.1.17.Final so it appears on the classpath
-  // before 4.1.8.Final defined by Apache Beam
-  sparkRunnerPreCommit "io.netty:netty-all:4.1.17.Final"
-  sparkRunnerPreCommit project(":runners:spark:2")
+  sparkRunnerPreCommit project(":runners:spark:3")
   sparkRunnerPreCommit project(":sdks:java:io:hadoop-file-system")
-  sparkRunnerPreCommit library.java.spark_streaming
-  sparkRunnerPreCommit library.java.spark_core
 }
 
 /*
diff --git a/release/build.gradle.kts b/release/build.gradle.kts
index 7de4ab3af61..ce895af80f8 100644
--- a/release/build.gradle.kts
+++ b/release/build.gradle.kts
@@ -38,7 +38,7 @@ task("runJavaExamplesValidationTask") {
   description = "Run the Beam quickstart across all Java runners"
   dependsOn(":runners:direct-java:runQuickstartJavaDirect")
   dependsOn(":runners:google-cloud-dataflow-java:runQuickstartJavaDataflow")
-  dependsOn(":runners:spark:2:runQuickstartJavaSpark")
+  dependsOn(":runners:spark:3:runQuickstartJavaSpark")
   dependsOn(":runners:flink:1.13:runQuickstartJavaFlinkLocal")
   dependsOn(":runners:direct-java:runMobileGamingJavaDirect")
   dependsOn(":runners:google-cloud-dataflow-java:runMobileGamingJavaDataflow")
diff --git a/runners/spark/3/build.gradle b/runners/spark/3/build.gradle
index 3d59bd525c4..494d367131b 100644
--- a/runners/spark/3/build.gradle
+++ b/runners/spark/3/build.gradle
@@ -29,6 +29,9 @@ project.ext {
 // Load the main build script which contains all build logic.
 apply from: "$basePath/spark_runner.gradle"
 
+// Generates runQuickstartJavaSpark task (can only support 1 version of Spark)
+createJavaExamplesArchetypeValidationTask(type: 'Quickstart', runner: 'Spark')
+
 // Additional supported Spark versions (used in compatibility tests)
 def sparkVersions = [
     "330": "3.3.0",
diff --git a/runners/spark/spark_runner.gradle b/runners/spark/spark_runner.gradle
index 14a433162fb..1869f9c2174 100644
--- a/runners/spark/spark_runner.gradle
+++ b/runners/spark/spark_runner.gradle
@@ -385,9 +385,6 @@ tasks.register("validatesRunner") {
   //dependsOn validatesStructuredStreamingRunnerBatch
 }
 
-// Generates :runners:spark:*:runQuickstartJavaSpark task
-createJavaExamplesArchetypeValidationTask(type: 'Quickstart', runner: 'Spark')
-
 tasks.register("hadoopVersionsTest") {
   group = "Verification"
   dependsOn hadoopVersions.collect{k,v -> "hadoopVersion${k}Test"}
diff --git a/sdks/java/maven-archetypes/examples/build.gradle b/sdks/java/maven-archetypes/examples/build.gradle
index 148015f4389..6a034029f10 100644
--- a/sdks/java/maven-archetypes/examples/build.gradle
+++ b/sdks/java/maven-archetypes/examples/build.gradle
@@ -36,7 +36,7 @@ processResources {
     'libraries-bom.version': dependencies.create(project.library.java.google_cloud_platform_libraries_bom).getVersion(),
     'pubsub.version': dependencies.create(project.library.java.google_api_services_pubsub).getVersion(),
     'slf4j.version': dependencies.create(project.library.java.slf4j_api).getVersion(),
-    'spark.version': dependencies.create(project.library.java.spark_core).getVersion(),
+    'spark.version': dependencies.create(project.library.java.spark3_core).getVersion(),
     'nemo.version': dependencies.create(project.library.java.nemo_compiler_frontend_beam).getVersion(),
     'hadoop.version': dependencies.create(project.library.java.hadoop_client).getVersion(),
     'mockito.version': dependencies.create(project.library.java.mockito_core).getVersion(),
diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
index 50515b81207..5560ca93257 100644
--- a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
+++ b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
@@ -220,15 +220,11 @@
 
     <profile>
       <id>spark-runner</id>
-      <!-- Makes the SparkRunner available when running a pipeline. Additionally,
-           overrides some Spark dependencies to Beam-compatible versions. -->
-      <properties>
-        <netty.version>4.1.17.Final</netty.version>
-      </properties>
+      <!-- Makes the SparkRunner available when running a pipeline. -->
       <dependencies>
         <dependency>
           <groupId>org.apache.beam</groupId>
-          <artifactId>beam-runners-spark</artifactId>
+          <artifactId>beam-runners-spark-3</artifactId>
           <version>${beam.version}</version>
           <scope>runtime</scope>
           <exclusions>
@@ -246,7 +242,7 @@
         </dependency>
         <dependency>
           <groupId>org.apache.spark</groupId>
-          <artifactId>spark-streaming_2.11</artifactId>
+          <artifactId>spark-streaming_2.12</artifactId>
           <version>${spark.version}</version>
           <scope>runtime</scope>
           <exclusions>
@@ -258,26 +254,10 @@
         </dependency>
         <dependency>
           <groupId>com.fasterxml.jackson.module</groupId>
-          <artifactId>jackson-module-scala_2.11</artifactId>
+          <artifactId>jackson-module-scala_2.12</artifactId>
           <version>${jackson.version}</version>
           <scope>runtime</scope>
         </dependency>
-        <!-- [BEAM-3519] GCP IO exposes netty on its API surface, causing conflicts with runners -->
-        <dependency>
-          <groupId>org.apache.beam</groupId>
-          <artifactId>beam-sdks-java-io-google-cloud-platform</artifactId>
-          <version>${beam.version}</version>
-          <exclusions>
-            <exclusion>
-              <groupId>io.grpc</groupId>
-              <artifactId>grpc-netty</artifactId>
-            </exclusion>
-            <exclusion>
-              <groupId>io.netty</groupId>
-              <artifactId>netty-handler</artifactId>
-            </exclusion>
-          </exclusions>
-        </dependency>
       </dependencies>
     </profile>
     <profile>
diff --git a/sdks/java/maven-archetypes/gcp-bom-examples/build.gradle b/sdks/java/maven-archetypes/gcp-bom-examples/build.gradle
index 0e4f394170e..af06bfc41d8 100644
--- a/sdks/java/maven-archetypes/gcp-bom-examples/build.gradle
+++ b/sdks/java/maven-archetypes/gcp-bom-examples/build.gradle
@@ -35,7 +35,7 @@ processResources {
             'junit.version': dependencies.create(project.library.java.junit).getVersion(),
             'pubsub.version': dependencies.create(project.library.java.google_api_services_pubsub).getVersion(),
             'slf4j.version': dependencies.create(project.library.java.slf4j_api).getVersion(),
-            'spark.version': dependencies.create(project.library.java.spark_core).getVersion(),
+            'spark.version': dependencies.create(project.library.java.spark3_core).getVersion(),
             'nemo.version': dependencies.create(project.library.java.nemo_compiler_frontend_beam).getVersion(),
             'hadoop.version': dependencies.create(project.library.java.hadoop_client).getVersion(),
             'mockito.version': dependencies.create(project.library.java.mockito_core).getVersion(),
diff --git a/sdks/java/maven-archetypes/gcp-bom-examples/src/main/resources/archetype-resources/pom.xml b/sdks/java/maven-archetypes/gcp-bom-examples/src/main/resources/archetype-resources/pom.xml
index 863a465f0fd..c3fb0f26fcb 100644
--- a/sdks/java/maven-archetypes/gcp-bom-examples/src/main/resources/archetype-resources/pom.xml
+++ b/sdks/java/maven-archetypes/gcp-bom-examples/src/main/resources/archetype-resources/pom.xml
@@ -216,13 +216,10 @@
       <id>spark-runner</id>
       <!-- Makes the SparkRunner available when running a pipeline. Additionally,
            overrides some Spark dependencies to Beam-compatible versions. -->
-      <properties>
-        <netty.version>4.1.17.Final</netty.version>
-      </properties>
       <dependencies>
         <dependency>
           <groupId>org.apache.beam</groupId>
-          <artifactId>beam-runners-spark</artifactId>
+          <artifactId>beam-runners-spark-3</artifactId>
           <scope>runtime</scope>
           <exclusions>
             <exclusion>
@@ -238,7 +235,7 @@
         </dependency>
         <dependency>
           <groupId>org.apache.spark</groupId>
-          <artifactId>spark-streaming_2.11</artifactId>
+          <artifactId>spark-streaming_2.12</artifactId>
           <scope>runtime</scope>
           <exclusions>
             <exclusion>
@@ -249,25 +246,10 @@
         </dependency>
         <dependency>
           <groupId>com.fasterxml.jackson.module</groupId>
-          <artifactId>jackson-module-scala_2.11</artifactId>
+          <artifactId>jackson-module-scala_2.12</artifactId>
           <version>${jackson.version}</version>
           <scope>runtime</scope>
         </dependency>
-        <!-- [BEAM-3519] GCP IO exposes netty on its API surface, causing conflicts with runners -->
-        <dependency>
-          <groupId>org.apache.beam</groupId>
-          <artifactId>beam-sdks-java-io-google-cloud-platform</artifactId>
-          <exclusions>
-            <exclusion>
-              <groupId>io.grpc</groupId>
-              <artifactId>grpc-netty</artifactId>
-            </exclusion>
-            <exclusion>
-              <groupId>io.netty</groupId>
-              <artifactId>netty-handler</artifactId>
-            </exclusion>
-          </exclusions>
-        </dependency>
       </dependencies>
     </profile>
     <profile>