You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by lc...@apache.org on 2018/11/28 02:19:23 UTC
[beam] branch master updated: [BEAM-6102] Fix several packages that
were being bundled without relocation within the Dataflow worker. (#7145)
This is an automated email from the ASF dual-hosted git repository.
lcwik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new d922f70 [BEAM-6102] Fix several packages that were being bundled without relocation within the Dataflow worker. (#7145)
d922f70 is described below
commit d922f70b123a2d5f0926f8989d829ecd75360f47
Author: Lukasz Cwik <lc...@google.com>
AuthorDate: Tue Nov 27 18:19:16 2018 -0800
[BEAM-6102] Fix several packages that were being bundled without relocation within the Dataflow worker. (#7145)
This change:
* Parameterizes the shadow jar validation to allow for excludes to be on a per project basis instead.
* Cuts down the Dataflow worker jar by ~20mbs of non-relocated classes that are provided by the SDK during job submission.
* Fixes building the job management and fn execution model jars which were including the model pipeline classes.
* Adds several comments on how one is meant to shade the legacy worker jar correctly and enforces validation so classes don't leak out unexpectedly.
* Adds two follow-up TODOs to relocate DataflowRunnerHarness which requires fixing boot.go code and also relocating conscrypt.
---
.../org/apache/beam/gradle/BeamModulePlugin.groovy | 68 +++++++-----
model/fn-execution/build.gradle | 4 +-
model/job-management/build.gradle | 4 +-
model/pipeline/build.gradle | 2 +-
.../worker/legacy-worker/build.gradle | 118 +++++++++++----------
.../worker/windmill/build.gradle | 2 +-
6 files changed, 109 insertions(+), 89 deletions(-)
diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
index ba17983..38c52ad 100644
--- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
+++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
@@ -102,6 +102,14 @@ class BeamModulePlugin implements Plugin<Project> {
boolean validateShadowJar = true
/**
+ * The set of excludes that should be used during validation of the shadow jar. Projects should override
+ * the default with the most specific set of excludes that is valid for the contents of its shaded jar.
+ *
+ * By default we exclude any class underneath the org.apache.beam namespace.
+ */
+ List<String> shadowJarValidationExcludes = ["org/apache/beam/**"]
+
+ /**
* The shadowJar / shadowTestJar tasks execute the following closure to configure themselves.
* Users can compose their closure with the default closure via:
* DEFAULT_SHADOW_CLOSURE << {
@@ -117,6 +125,17 @@ class BeamModulePlugin implements Plugin<Project> {
boolean publish = true
}
+ /** A class defining the set of configurable properties accepted by applyPortabilityNature. */
+ class PortabilityNatureConfiguration {
+ /**
+ * The set of excludes that should be used during validation of the shadow jar. Projects should override
+ * the default with the most specific set of excludes that is valid for the contents of its shaded jar.
+ *
+ * By default we exclude any class underneath the org.apache.beam namespace.
+ */
+ List<String> shadowJarValidationExcludes = ["org/apache/beam/**"]
+ }
+
// A class defining the set of configurable properties for createJavaExamplesArchetypeValidationTask
class JavaExamplesArchetypeValidationConfiguration {
// Type [Quickstart, MobileGaming] for the postrelease validation is required.
@@ -759,7 +778,7 @@ class BeamModulePlugin implements Plugin<Project> {
}
if (configuration.validateShadowJar) {
- project.task('validateShadedJarDoesntLeakNonOrgApacheBeamClasses', dependsOn: 'shadowJar') {
+ project.task('validateShadedJarDoesntLeakNonProjectClasses', dependsOn: 'shadowJar') {
ext.outFile = project.file("${project.reportsDir}/${name}.out")
inputs.files project.configurations.shadow.artifacts.files
outputs.files outFile
@@ -767,19 +786,21 @@ class BeamModulePlugin implements Plugin<Project> {
project.configurations.shadow.artifacts.files.each {
FileTree exposedClasses = project.zipTree(it).matching {
include "**/*.class"
- exclude "org/apache/beam/**"
// BEAM-5919: Exclude paths for Java 9 multi-release jars.
exclude "META-INF/versions/*/module-info.class"
- exclude "META-INF/versions/*/org/apache/beam/**"
+ configuration.shadowJarValidationExcludes.each {
+ exclude "$it"
+ exclude "META-INF/versions/*/$it"
+ }
}
outFile.text = exposedClasses.files
if (exposedClasses.files) {
- throw new GradleException("$it exposed classes outside of org.apache.beam namespace: ${exposedClasses.files}")
+ throw new GradleException("$it exposed classes outside of ${configuration.shadowJarValidationExcludes}: ${exposedClasses.files}")
}
}
}
}
- project.tasks.check.dependsOn project.tasks.validateShadedJarDoesntLeakNonOrgApacheBeamClasses
+ project.tasks.check.dependsOn project.tasks.validateShadedJarDoesntLeakNonProjectClasses
}
if ((isRelease(project) || project.hasProperty('publishing')) &&
@@ -1358,14 +1379,19 @@ artifactId=${project.name}
project.ext.applyPortabilityNature = {
println "applyPortabilityNature with " + (it ? "$it" : "default configuration") + " for project $project.name"
- project.ext.applyJavaNature(enableFindbugs: false, shadowClosure: GrpcVendoring.shadowClosure() << {
- // We perform all the code relocations but don't include
- // any of the actual dependencies since they will be supplied
- // by org.apache.beam:beam-vendor-grpc-v1_13_1:0.1
- dependencies {
- exclude(dependency(".*:.*"))
- }
- })
+ PortabilityNatureConfiguration configuration = it ? it as PortabilityNatureConfiguration : new PortabilityNatureConfiguration()
+
+ project.ext.applyJavaNature(
+ enableFindbugs: false,
+ shadowJarValidationExcludes: it.shadowJarValidationExcludes,
+ shadowClosure: GrpcVendoring.shadowClosure() << {
+ // We perform all the code relocations but don't include
+ // any of the actual dependencies since they will be supplied
+ // by org.apache.beam:beam-vendor-grpc-v1_13_1:0.1
+ dependencies {
+ include(dependency { return false })
+ }
+ })
// Don't force modules here because we don't want to take the shared declarations in build_rules.gradle
// because we would like to have the freedom to choose which versions of dependencies we
@@ -1399,22 +1425,6 @@ artifactId=${project.name}
}
project.dependencies GrpcVendoring.dependenciesClosure() << { shadow 'org.apache.beam:beam-vendor-grpc-1_13_1:0.1' }
-
- project.task('validateShadedJarDoesntExportVendoredDependencies', dependsOn: 'shadowJar') {
- ext.outFile = project.file("${project.reportsDir}/${name}.out")
- inputs.files project.configurations.shadow.artifacts.files
- outputs.files outFile
- doLast {
- project.configurations.shadow.artifacts.files.each {
- FileTree exportedClasses = project.zipTree(it).matching { include "org/apache/beam/vendor/**" }
- outFile.text = exportedClasses.files
- if (exportedClasses.files) {
- throw new GradleException("$it exported classes inside of org.apache.beam.vendor namespace: ${exportedClasses.files}")
- }
- }
- }
- }
- project.tasks.check.dependsOn project.tasks.validateShadedJarDoesntExportVendoredDependencies
}
/** ***********************************************************************************************/
diff --git a/model/fn-execution/build.gradle b/model/fn-execution/build.gradle
index 3194fcf..faf674c 100644
--- a/model/fn-execution/build.gradle
+++ b/model/fn-execution/build.gradle
@@ -17,7 +17,7 @@
*/
apply plugin: org.apache.beam.gradle.BeamModulePlugin
-applyPortabilityNature()
+applyPortabilityNature(shadowJarValidationExcludes: ["org/apache/beam/model/fnexecution/v1/**"])
description = "Apache Beam :: Model :: Fn Execution"
ext.summary = "Portable definitions for execution user-defined functions."
@@ -25,6 +25,6 @@ ext.summary = "Portable definitions for execution user-defined functions."
dependencies {
// We purposely depend on the unshaded classes for protobuf compilation and
// export the shaded variant as the actual runtime dependency.
- protobuf project(path: ":beam-model-pipeline", configuration: "unshaded")
+ compile project(path: ":beam-model-pipeline", configuration: "unshaded")
runtime project(path: ":beam-model-pipeline", configuration: "shadow")
}
diff --git a/model/job-management/build.gradle b/model/job-management/build.gradle
index e453e33..4c50782 100644
--- a/model/job-management/build.gradle
+++ b/model/job-management/build.gradle
@@ -17,7 +17,7 @@
*/
apply plugin: org.apache.beam.gradle.BeamModulePlugin
-applyPortabilityNature()
+applyPortabilityNature(shadowJarValidationExcludes: ["org/apache/beam/model/jobmanagement/v1/**"])
description = "Apache Beam :: Model :: Job Management"
ext.summary = "Portable definitions for submitting pipelines."
@@ -25,6 +25,6 @@ ext.summary = "Portable definitions for submitting pipelines."
dependencies {
// We purposely depend on the unshaded classes for protobuf compilation and
// export the shaded variant as the actual runtime dependency.
- protobuf project(path: ":beam-model-pipeline", configuration: "unshaded")
+ compile project(path: ":beam-model-pipeline", configuration: "unshaded")
runtime project(path: ":beam-model-pipeline", configuration: "shadow")
}
diff --git a/model/pipeline/build.gradle b/model/pipeline/build.gradle
index 016a766..2a72cd6 100644
--- a/model/pipeline/build.gradle
+++ b/model/pipeline/build.gradle
@@ -17,7 +17,7 @@
*/
apply plugin: org.apache.beam.gradle.BeamModulePlugin
-applyPortabilityNature()
+applyPortabilityNature(shadowJarValidationExcludes: ["org/apache/beam/model/pipeline/v1/**"])
description = "Apache Beam :: Model :: Pipeline"
ext.summary = "Portable definitions for building pipelines"
diff --git a/runners/google-cloud-dataflow-java/worker/legacy-worker/build.gradle b/runners/google-cloud-dataflow-java/worker/legacy-worker/build.gradle
index 5718d11..7551758 100644
--- a/runners/google-cloud-dataflow-java/worker/legacy-worker/build.gradle
+++ b/runners/google-cloud-dataflow-java/worker/legacy-worker/build.gradle
@@ -78,20 +78,72 @@ def excluded_dependencies = [
library.java.junit, // Test only
]
-applyJavaNature(publish: false, enableFindbugs: false /* TODO(BEAM-5658): enable findbugs */, validateShadowJar: false, shadowClosure: DEFAULT_SHADOW_CLOSURE << {
+applyJavaNature(
+ publish: false,
+ enableFindbugs: false /* TODO(BEAM-5658): enable findbugs */,
+ shadowJarValidationExcludes: [
+ "org/apache/beam/runners/dataflow/worker/**",
+ "org/apache/beam/repackaged/beam_runners_google_cloud_dataflow_java_legacy_worker/**",
+ // TODO(BEAM-6137): Move DataflowRunnerHarness class under org.apache.beam.runners.dataflow.worker namespace
+ "com/google/cloud/dataflow/worker/DataflowRunnerHarness.class",
+ // TODO(BEAM-6136): Enable relocation for conscrypt
+ "org/conscrypt/**",
+ ],
+ shadowClosure: DEFAULT_SHADOW_CLOSURE << {
+ // Each included dependency must also include all of its necessary transitive dependencies
+ // or have them provided by the users pipeline during job submission. Typically a users
+ // pipeline includes :beam-runners-google-cloud-dataflow-java and its transitive dependencies
+ // so those dependencies don't need to be shaded (bundled and relocated) away. All other
+ // dependencies needed to run the worker must be shaded (bundled and relocated) to prevent
+ // ClassNotFound and/or MethodNotFound errors during pipeline execution.
+ //
+ // Each included dependency should have a matching relocation rule below that ensures
+ // that the shaded jar is correctly built.
+
+ dependencies {
+ include(project(path: ":beam-model-fn-execution", configuration: "shadow"))
+ }
+ relocate("org.apache.beam.model.fnexecution.v1", getWorkerRelocatedPath("org.apache.beam.model.fnexecution.v1"))
+
+ dependencies {
+ include(project(path: ":beam-runners-core-construction-java", configuration: "shadow"))
+ include(project(path: ":beam-runners-core-java", configuration: "shadow"))
+ }
+ relocate("org.apache.beam.runners.core", getWorkerRelocatedPath("org.apache.beam.runners.core"))
+ relocate("org.apache.beam.repackaged.beam_runners_core_construction_java", getWorkerRelocatedPath("org.apache.beam.repackaged.beam_runners_core_construction_java"))
+ relocate("org.apache.beam.repackaged.beam_runners_core_java", getWorkerRelocatedPath("org.apache.beam.repackaged.beam_runners_core_java"))
+
+ dependencies {
+ include(project(path: ":beam-runners-java-fn-execution", configuration: "shadow"))
+ }
+ relocate("org.apache.beam.runners.fnexecution", getWorkerRelocatedPath("org.apache.beam.runners.fnexecution"))
+ relocate("org.apache.beam.repackaged.beam_runners_java_fn_execution", getWorkerRelocatedPath("org.apache.beam.repackaged.beam_runners_java_fn_execution"))
+
+ dependencies {
+ include(project(path: ":beam-sdks-java-fn-execution", configuration: "shadow"))
+ }
+ relocate("org.apache.beam.sdk.fn", getWorkerRelocatedPath("org.apache.beam.sdk.fn"))
+ relocate("org.apache.beam.repackaged.beam_sdks_java_fn_execution", getWorkerRelocatedPath("org.apache.beam.repackaged.beam_sdks_java_fn_execution"))
+
+ // TODO(BEAM-6136): Enable relocation for conscrypt
+ dependencies {
+ include(dependency("org.conscrypt:conscrypt-openjdk:1.1.3:linux-x86_64"))
+ }
+
+ dependencies {
+ // We have to include jetty-server/jetty-servlet and all of its transitive dependencies
+ // which includes several org.eclipse.jetty artifacts + servlet-api
+ include(dependency("org.eclipse.jetty:.*:9.2.10.v20150310"))
+ include(dependency("javax.servlet:javax.servlet-api:3.1.0"))
+ }
+ relocate("org.eclipse.jetty", getWorkerRelocatedPath("org.eclipse.jetty"))
+ relocate("javax.servlet", getWorkerRelocatedPath("javax.servlet"))
+
+ // We don't relocate windmill since it is already underneath the org.apache.beam.runners.dataflow.worker namespace and never
+ // expect a user pipeline to include it. There is also a JNI component that windmill server relies on which makes
+ // arbitrary relocation more difficult.
dependencies {
include(project(path: ":beam-runners-google-cloud-dataflow-java-windmill", configuration: "shadow"))
- include(dependency(".*:.*"))
-
- sdk_provided_dependencies.each {
- exclude(dependency(it))
- }
- sdk_provided_project_dependencies.each {
- exclude(project(path: it, configuration: "shadow"))
- }
- excluded_dependencies.each {
- exclude(dependency(it))
- }
}
// Include original source files extracted under
@@ -100,47 +152,6 @@ applyJavaNature(publish: false, enableFindbugs: false /* TODO(BEAM-5658): enable
exclude "META-INF/LICENSE.txt"
exclude "about.html"
-
- relocate("com.", getWorkerRelocatedPath("com.")) {
- exclude "com.fasterxml.jackson.**"
- exclude "com.google.api.client.**"
- exclude "com.google.api.services.bigquery.**"
- exclude "com.google.api.services.clouddebugger.**"
- exclude "com.google.api.services.dataflow.**"
- exclude "com.google.api.services.datastore.**"
- exclude "com.google.api.services.pubsub.**"
- exclude "com.google.api.services.storage.**"
- exclude "com.google.auth.**"
- exclude "com.google.cloud.dataflow.**"
- exclude "com.sun.management*"
- exclude "com.sun.management.**"
- }
- relocate("javax.servlet", getWorkerRelocatedPath("javax.servlet"))
- relocate("io.", getWorkerRelocatedPath("io."))
- relocate("okio.", getWorkerRelocatedPath("okio."))
- relocate("org.", getWorkerRelocatedPath("org.")) {
- // Exclude netty-tcnative from shading since gRPC relies on Netty to be able
- // to load org.apache.tomcat.jni.SSL to provide an SSL context.
- exclude "org.apache.avro.**"
- exclude "org.apache.beam.**"
- exclude "org.apache.tomcat.jni.**"
- exclude "org.conscrypt.**"
- exclude "org.eclipse.jetty.alpn.**"
- exclude "org.eclipse.jetty.npn.**"
- exclude "org.hamcrest.**"
- exclude "org.joda.time.**"
- exclude "org.junit.**"
- exclude "org.slf4j.**"
- exclude "org.w3c.dom.**"
- }
- relocate("org.apache.beam.runners.core.construction.",
- getWorkerRelocatedPath("org.")) {
- // Exclude org.apache.beam.runners.core.construction.metrics
- // because it is referenced by
- // org.apache.beam.runners.core.metrics which is not relocated.
- exclude "org.apache.beam.runners.core.construction.metrics.**"
- }
-
})
/******************************************************************************/
@@ -184,7 +195,6 @@ dependencies {
compile project(path: ":beam-sdks-java-fn-execution", configuration: "shadow")
compile project(path: ":beam-runners-google-cloud-dataflow-java-windmill", configuration: "shadow")
compile library.java.guava
- compile "javax.servlet:javax.servlet-api:3.1.0"
compile "org.conscrypt:conscrypt-openjdk:1.1.3:linux-x86_64"
compile "org.eclipse.jetty:jetty-server:9.2.10.v20150310"
compile "org.eclipse.jetty:jetty-servlet:9.2.10.v20150310"
diff --git a/runners/google-cloud-dataflow-java/worker/windmill/build.gradle b/runners/google-cloud-dataflow-java/worker/windmill/build.gradle
index 7939921..cd982e5 100644
--- a/runners/google-cloud-dataflow-java/worker/windmill/build.gradle
+++ b/runners/google-cloud-dataflow-java/worker/windmill/build.gradle
@@ -17,7 +17,7 @@
*/
apply plugin: org.apache.beam.gradle.BeamModulePlugin
-applyPortabilityNature()
+applyPortabilityNature(shadowJarValidationExcludes: ["org/apache/beam/runners/dataflow/worker/windmill/**"])
description = "Apache Beam :: Runners :: Google Cloud Dataflow Java :: Windmill"
ext.summary = "Windmill proto specifications"