You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by mm...@apache.org on 2022/10/24 08:30:42 UTC
[beam] branch master updated: Migrate examples and maven-archetypes (including Java Quickstart) to Spark 3 (addresses #23728) (#23730)
This is an automated email from the ASF dual-hosted git repository.
mmack pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new 760c83e9402 Migrate examples and maven-archetypes (including Java Quickstart) to Spark 3 (addresses #23728) (#23730)
760c83e9402 is described below
commit 760c83e94020971d84b904270b087d4c3d69fd00
Author: Moritz Mack <mm...@talend.com>
AuthorDate: Mon Oct 24 10:30:30 2022 +0200
Migrate examples and maven-archetypes (including Java Quickstart) to Spark 3 (addresses #23728) (#23730)
---
examples/java/build.gradle | 7 +-----
examples/kotlin/build.gradle | 7 +-----
release/build.gradle.kts | 2 +-
runners/spark/3/build.gradle | 3 +++
runners/spark/spark_runner.gradle | 3 ---
sdks/java/maven-archetypes/examples/build.gradle | 2 +-
.../src/main/resources/archetype-resources/pom.xml | 28 ++++------------------
.../maven-archetypes/gcp-bom-examples/build.gradle | 2 +-
.../src/main/resources/archetype-resources/pom.xml | 24 +++----------------
9 files changed, 15 insertions(+), 63 deletions(-)
diff --git a/examples/java/build.gradle b/examples/java/build.gradle
index 13b2518bf38..aa51dcfeae8 100644
--- a/examples/java/build.gradle
+++ b/examples/java/build.gradle
@@ -109,13 +109,8 @@ dependencies {
}
directRunnerPreCommit project(path: ":runners:direct-java", configuration: "shadow")
flinkRunnerPreCommit project(":runners:flink:${project.ext.latestFlinkVersion}")
- // TODO: Make the netty version used configurable, we add netty-all 4.1.17.Final so it appears on the classpath
- // before 4.1.8.Final defined by Apache Beam
- sparkRunnerPreCommit "io.netty:netty-all:4.1.17.Final"
- sparkRunnerPreCommit project(":runners:spark:2")
+ sparkRunnerPreCommit project(":runners:spark:3")
sparkRunnerPreCommit project(":sdks:java:io:hadoop-file-system")
- sparkRunnerPreCommit library.java.spark_streaming
- sparkRunnerPreCommit library.java.spark_core
}
/*
diff --git a/examples/kotlin/build.gradle b/examples/kotlin/build.gradle
index 0aa3dc257b0..79a1248712d 100644
--- a/examples/kotlin/build.gradle
+++ b/examples/kotlin/build.gradle
@@ -81,13 +81,8 @@ dependencies {
}
directRunnerPreCommit project(path: ":runners:direct-java", configuration: "shadow")
flinkRunnerPreCommit project(":runners:flink:${project.ext.latestFlinkVersion}")
- // TODO: Make the netty version used configurable, we add netty-all 4.1.17.Final so it appears on the classpath
- // before 4.1.8.Final defined by Apache Beam
- sparkRunnerPreCommit "io.netty:netty-all:4.1.17.Final"
- sparkRunnerPreCommit project(":runners:spark:2")
+ sparkRunnerPreCommit project(":runners:spark:3")
sparkRunnerPreCommit project(":sdks:java:io:hadoop-file-system")
- sparkRunnerPreCommit library.java.spark_streaming
- sparkRunnerPreCommit library.java.spark_core
}
/*
diff --git a/release/build.gradle.kts b/release/build.gradle.kts
index 7de4ab3af61..ce895af80f8 100644
--- a/release/build.gradle.kts
+++ b/release/build.gradle.kts
@@ -38,7 +38,7 @@ task("runJavaExamplesValidationTask") {
description = "Run the Beam quickstart across all Java runners"
dependsOn(":runners:direct-java:runQuickstartJavaDirect")
dependsOn(":runners:google-cloud-dataflow-java:runQuickstartJavaDataflow")
- dependsOn(":runners:spark:2:runQuickstartJavaSpark")
+ dependsOn(":runners:spark:3:runQuickstartJavaSpark")
dependsOn(":runners:flink:1.13:runQuickstartJavaFlinkLocal")
dependsOn(":runners:direct-java:runMobileGamingJavaDirect")
dependsOn(":runners:google-cloud-dataflow-java:runMobileGamingJavaDataflow")
diff --git a/runners/spark/3/build.gradle b/runners/spark/3/build.gradle
index 3d59bd525c4..494d367131b 100644
--- a/runners/spark/3/build.gradle
+++ b/runners/spark/3/build.gradle
@@ -29,6 +29,9 @@ project.ext {
// Load the main build script which contains all build logic.
apply from: "$basePath/spark_runner.gradle"
+// Generates runQuickstartJavaSpark task (can only support 1 version of Spark)
+createJavaExamplesArchetypeValidationTask(type: 'Quickstart', runner: 'Spark')
+
// Additional supported Spark versions (used in compatibility tests)
def sparkVersions = [
"330": "3.3.0",
diff --git a/runners/spark/spark_runner.gradle b/runners/spark/spark_runner.gradle
index 14a433162fb..1869f9c2174 100644
--- a/runners/spark/spark_runner.gradle
+++ b/runners/spark/spark_runner.gradle
@@ -385,9 +385,6 @@ tasks.register("validatesRunner") {
//dependsOn validatesStructuredStreamingRunnerBatch
}
-// Generates :runners:spark:*:runQuickstartJavaSpark task
-createJavaExamplesArchetypeValidationTask(type: 'Quickstart', runner: 'Spark')
-
tasks.register("hadoopVersionsTest") {
group = "Verification"
dependsOn hadoopVersions.collect{k,v -> "hadoopVersion${k}Test"}
diff --git a/sdks/java/maven-archetypes/examples/build.gradle b/sdks/java/maven-archetypes/examples/build.gradle
index 148015f4389..6a034029f10 100644
--- a/sdks/java/maven-archetypes/examples/build.gradle
+++ b/sdks/java/maven-archetypes/examples/build.gradle
@@ -36,7 +36,7 @@ processResources {
'libraries-bom.version': dependencies.create(project.library.java.google_cloud_platform_libraries_bom).getVersion(),
'pubsub.version': dependencies.create(project.library.java.google_api_services_pubsub).getVersion(),
'slf4j.version': dependencies.create(project.library.java.slf4j_api).getVersion(),
- 'spark.version': dependencies.create(project.library.java.spark_core).getVersion(),
+ 'spark.version': dependencies.create(project.library.java.spark3_core).getVersion(),
'nemo.version': dependencies.create(project.library.java.nemo_compiler_frontend_beam).getVersion(),
'hadoop.version': dependencies.create(project.library.java.hadoop_client).getVersion(),
'mockito.version': dependencies.create(project.library.java.mockito_core).getVersion(),
diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
index 50515b81207..5560ca93257 100644
--- a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
+++ b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
@@ -220,15 +220,11 @@
<profile>
<id>spark-runner</id>
- <!-- Makes the SparkRunner available when running a pipeline. Additionally,
- overrides some Spark dependencies to Beam-compatible versions. -->
- <properties>
- <netty.version>4.1.17.Final</netty.version>
- </properties>
+ <!-- Makes the SparkRunner available when running a pipeline. -->
<dependencies>
<dependency>
<groupId>org.apache.beam</groupId>
- <artifactId>beam-runners-spark</artifactId>
+ <artifactId>beam-runners-spark-3</artifactId>
<version>${beam.version}</version>
<scope>runtime</scope>
<exclusions>
@@ -246,7 +242,7 @@
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
- <artifactId>spark-streaming_2.11</artifactId>
+ <artifactId>spark-streaming_2.12</artifactId>
<version>${spark.version}</version>
<scope>runtime</scope>
<exclusions>
@@ -258,26 +254,10 @@
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.module</groupId>
- <artifactId>jackson-module-scala_2.11</artifactId>
+ <artifactId>jackson-module-scala_2.12</artifactId>
<version>${jackson.version}</version>
<scope>runtime</scope>
</dependency>
- <!-- [BEAM-3519] GCP IO exposes netty on its API surface, causing conflicts with runners -->
- <dependency>
- <groupId>org.apache.beam</groupId>
- <artifactId>beam-sdks-java-io-google-cloud-platform</artifactId>
- <version>${beam.version}</version>
- <exclusions>
- <exclusion>
- <groupId>io.grpc</groupId>
- <artifactId>grpc-netty</artifactId>
- </exclusion>
- <exclusion>
- <groupId>io.netty</groupId>
- <artifactId>netty-handler</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
</dependencies>
</profile>
<profile>
diff --git a/sdks/java/maven-archetypes/gcp-bom-examples/build.gradle b/sdks/java/maven-archetypes/gcp-bom-examples/build.gradle
index 0e4f394170e..af06bfc41d8 100644
--- a/sdks/java/maven-archetypes/gcp-bom-examples/build.gradle
+++ b/sdks/java/maven-archetypes/gcp-bom-examples/build.gradle
@@ -35,7 +35,7 @@ processResources {
'junit.version': dependencies.create(project.library.java.junit).getVersion(),
'pubsub.version': dependencies.create(project.library.java.google_api_services_pubsub).getVersion(),
'slf4j.version': dependencies.create(project.library.java.slf4j_api).getVersion(),
- 'spark.version': dependencies.create(project.library.java.spark_core).getVersion(),
+ 'spark.version': dependencies.create(project.library.java.spark3_core).getVersion(),
'nemo.version': dependencies.create(project.library.java.nemo_compiler_frontend_beam).getVersion(),
'hadoop.version': dependencies.create(project.library.java.hadoop_client).getVersion(),
'mockito.version': dependencies.create(project.library.java.mockito_core).getVersion(),
diff --git a/sdks/java/maven-archetypes/gcp-bom-examples/src/main/resources/archetype-resources/pom.xml b/sdks/java/maven-archetypes/gcp-bom-examples/src/main/resources/archetype-resources/pom.xml
index 863a465f0fd..c3fb0f26fcb 100644
--- a/sdks/java/maven-archetypes/gcp-bom-examples/src/main/resources/archetype-resources/pom.xml
+++ b/sdks/java/maven-archetypes/gcp-bom-examples/src/main/resources/archetype-resources/pom.xml
@@ -216,13 +216,10 @@
<id>spark-runner</id>
<!-- Makes the SparkRunner available when running a pipeline. Additionally,
overrides some Spark dependencies to Beam-compatible versions. -->
- <properties>
- <netty.version>4.1.17.Final</netty.version>
- </properties>
<dependencies>
<dependency>
<groupId>org.apache.beam</groupId>
- <artifactId>beam-runners-spark</artifactId>
+ <artifactId>beam-runners-spark-3</artifactId>
<scope>runtime</scope>
<exclusions>
<exclusion>
@@ -238,7 +235,7 @@
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
- <artifactId>spark-streaming_2.11</artifactId>
+ <artifactId>spark-streaming_2.12</artifactId>
<scope>runtime</scope>
<exclusions>
<exclusion>
@@ -249,25 +246,10 @@
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.module</groupId>
- <artifactId>jackson-module-scala_2.11</artifactId>
+ <artifactId>jackson-module-scala_2.12</artifactId>
<version>${jackson.version}</version>
<scope>runtime</scope>
</dependency>
- <!-- [BEAM-3519] GCP IO exposes netty on its API surface, causing conflicts with runners -->
- <dependency>
- <groupId>org.apache.beam</groupId>
- <artifactId>beam-sdks-java-io-google-cloud-platform</artifactId>
- <exclusions>
- <exclusion>
- <groupId>io.grpc</groupId>
- <artifactId>grpc-netty</artifactId>
- </exclusion>
- <exclusion>
- <groupId>io.netty</groupId>
- <artifactId>netty-handler</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
</dependencies>
</profile>
<profile>