You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by lc...@apache.org on 2022/03/23 04:39:04 UTC
[beam] branch master updated: Add option to add modules to JDK add-open (#17110)
This is an automated email from the ASF dual-hosted git repository.
lcwik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new 06f3519 Add option to add modules to JDK add-open (#17110)
06f3519 is described below
commit 06f3519af13d6d7a79e00a11fb6824e00c856a8e
Author: Kiley <ki...@google.com>
AuthorDate: Tue Mar 22 21:35:16 2022 -0700
Add option to add modules to JDK add-open (#17110)
Add jamm java args from option json
---
.../dataflow/options/DataflowPipelineOptions.java | 18 +++
sdks/java/container/Dockerfile | 5 +
sdks/java/container/boot.go | 149 ++++++++++++++++++++-
sdks/java/container/common.gradle | 8 ++
sdks/java/container/java17/option-jamm.json | 13 ++
.../apache/beam/sdk/options/SdkHarnessOptions.java | 19 +++
.../java/org/apache/beam/fn/harness/Caches.java | 1 -
7 files changed, 211 insertions(+), 2 deletions(-)
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineOptions.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineOptions.java
index 7d3be45..ab6e0ea 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineOptions.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineOptions.java
@@ -217,4 +217,22 @@ public interface DataflowPipelineOptions
boolean isHotKeyLoggingEnabled();
void setHotKeyLoggingEnabled(boolean value);
+
+ /**
+ * Open modules needed for reflection that access JDK internals with Java 9+
+ *
+ * <p>With JDK 16+, <a href="#{https://openjdk.java.net/jeps/403}">JDK internals are strongly
+ * encapsulated</a> and can result in an InaccessibleObjectException being thrown if a tool or
+ * library uses reflection that access JDK internals. If you see these errors in your worker logs,
+ * you can pass in modules to open using the format module/package=target-module(,target-module)*
+ * to allow access to the library. E.g. java.base/java.lang=jamm
+ *
+ * <p>You may see warnings that jamm, a library used to more accurately size objects, is unable to
+ * make a private field accessible. To resolve the warning, open the specified module/package to
+ * jamm.
+ */
+ @Description("Open modules needed for reflection with Java 17+.")
+ List<String> getJdkAddOpenModules();
+
+ void setJdkAddOpenModules(List<String> options);
}
diff --git a/sdks/java/container/Dockerfile b/sdks/java/container/Dockerfile
index 6e12ff1..179a7e2 100644
--- a/sdks/java/container/Dockerfile
+++ b/sdks/java/container/Dockerfile
@@ -41,6 +41,11 @@ COPY target/NOTICE /opt/apache/beam/
# copy third party licenses
ADD target/third_party_licenses /opt/apache/beam/third_party_licenses/
+# Copy Java options. Because the options directory may be empty and
+# COPY fails if there are no files, copy an extra LICENSE file then remove it.
+COPY target/LICENSE target/options/* /opt/apache/beam/options/
+RUN rm /opt/apache/beam/options/LICENSE
+
# Add golang licenses. Because the go-license directory may be empty if
# pull_licenses is false, and COPY fails if there are no files,
# copy an extra LICENSE file then remove it.
diff --git a/sdks/java/container/boot.go b/sdks/java/container/boot.go
index 20e2c7d..f378573 100644
--- a/sdks/java/container/boot.go
+++ b/sdks/java/container/boot.go
@@ -19,11 +19,14 @@ package main
import (
"context"
+ "encoding/json"
"flag"
"fmt"
+ "io/ioutil"
"log"
"os"
"path/filepath"
+ "sort"
"strconv"
"strings"
@@ -98,7 +101,6 @@ func main() {
log.Printf("Initializing java harness: %v", strings.Join(os.Args, " "))
// (1) Obtain the pipeline options
-
options, err := provision.ProtoToJSON(info.GetPipelineOptions())
if err != nil {
log.Fatalf("Failed to convert pipeline options: %v", err)
@@ -193,7 +195,35 @@ func main() {
} else {
args = append(args, jammAgentArgs)
}
+ // Apply meta options
+ const metaDir = "/opt/apache/beam/options"
+ metaOptions, err := LoadMetaOptions(metaDir)
+ javaOptions := BuildOptions(metaOptions)
+ // (1) Add custom jvm arguments: "-server -Xmx1324 -XXfoo .."
+ args = append(args, javaOptions.JavaArguments...)
+
+ // (2) Add classpath: "-cp foo.jar:bar.jar:.."
+ if len(javaOptions.Classpath) > 0 {
+ args = append(args, "-cp")
+ args = append(args, strings.Join(javaOptions.Classpath, ":"))
+ }
+ // (3) Add (sorted) properties: "-Dbar=baz -Dfoo=bar .."
+ var properties []string
+ for key, value := range javaOptions.Properties {
+ properties = append(properties, fmt.Sprintf("-D%s=%s", key, value))
+ }
+ sort.Strings(properties)
+ args = append(args, properties...)
+
+ // Open modules specified in pipeline options
+ if pipelineOptions, ok := info.GetPipelineOptions().GetFields()["options"]; ok {
+ if modules, ok := pipelineOptions.GetStructValue().GetFields()["jdkAddOpenModules"]; ok {
+ for _, module := range modules.GetListValue().GetValues() {
+ args = append(args, "--add-opens=" + module.GetStringValue())
+ }
+ }
+ }
args = append(args, "org.apache.beam.fn.harness.FnHarness")
log.Printf("Executing: java %v", strings.Join(args, " "))
@@ -211,3 +241,120 @@ func heapSizeLimit(info *fnpb.ProvisionInfo) uint64 {
}
return 1 << 30
}
+
+// Options represents java VM invocation options in a simple,
+// semi-structured way.
+type Options struct {
+ JavaArguments []string `json:"java_arguments,omitempty"`
+ Properties map[string]string `json:"properties,omitempty"`
+ Classpath []string `json:"classpath,omitempty"`
+}
+
+// MetaOption represents a jvm environment transformation or setup
+// that the launcher employs. The aim is to keep the service-side and
+// user-side required configuration simple and minimal, yet allow
+// numerous execution tweaks. Most tweaks are enabled by default and
+// require no input. Some setups, such as Cloud Debugging, are opt-in.
+//
+// Meta-options are usually included with the image and use supporting
+// files, usually jars. A few are intrinsic because they are require
+// additional input or complex computations, such as Cloud Debugging
+// and Cloud Profiling. Meta-options can be enabled or disabled by
+// name. For the most part, the meta-option names are not guaranteed
+// to be backwards compatible or stable. They are rather knobs that
+// can be tuned if some well-intended transformation cause trouble for
+// a customer. For tweaks, the expectation is that the default is
+// almost always correct.
+//
+// Meta-options are simple additive manipulations applied in priority
+// order (applied low to high) to allow jvm customization by adding
+// files, notably enabling customization by later docker layers. The
+// override semantics is prepend for lists and simple overwrite
+// otherwise. A common use case is adding a jar to the beginning of
+// the classpath, such as the shuffle or windmill jni jar, or adding
+// an agent.
+type MetaOption struct {
+ Name string `json:"name,omitempty"`
+ Description string `json:"description,omitempty"`
+ Enabled bool `json:"enabled,omitempty"`
+ Priority int `json:"priority,omitempty"`
+ Options Options `json:"options"`
+}
+
+// byPriority sorts MetaOptions by priority, highest first.
+type byPriority []*MetaOption
+
+func (f byPriority) Len() int { return len(f) }
+func (f byPriority) Swap(i, j int) { f[i], f[j] = f[j], f[i] }
+func (f byPriority) Less(i, j int) bool { return f[i].Priority > f[j].Priority }
+
+// LoadMetaOptions scans the directory tree for meta-option metadata
+// files and loads them. Any regular file named "option-XX.json" is
+// strictly assumed to be a meta-option file. This strictness allows
+// us to fail hard if such a file cannot be parsed.
+//
+// Loading meta-options from disk allows extra files and their
+// configuration be kept together and defined externally.
+func LoadMetaOptions(dir string) ([]*MetaOption, error) {
+ var meta []*MetaOption
+
+ worker := func(path string, info os.FileInfo, err error) error {
+ if err != nil {
+ return err
+ }
+ if !info.Mode().IsRegular() {
+ return nil
+ }
+ if !strings.HasPrefix(info.Name(), "option-") {
+ return nil
+ }
+ if !strings.HasSuffix(info.Name(), ".json") {
+ return nil
+ }
+
+ content, err := ioutil.ReadFile(path)
+ if err != nil {
+ return err
+ }
+
+ var option MetaOption
+ if err := json.Unmarshal(content, &option); err != nil {
+ return fmt.Errorf("failed to parse %s: %v", path, err)
+ }
+
+ log.Printf("Loaded meta-option '%s'", option.Name)
+
+ meta = append(meta, &option)
+ return nil
+ }
+
+ if err := filepath.Walk(dir, worker); err != nil {
+ return nil, err
+ }
+ return meta, nil
+}
+
+func BuildOptions(metaOptions []*MetaOption) *Options {
+ options := &Options{Properties: make(map[string]string)}
+
+ sort.Sort(byPriority(metaOptions))
+ for _, meta := range metaOptions {
+ if !meta.Enabled {
+ continue
+ }
+
+ options.JavaArguments = append(options.JavaArguments, meta.Options.JavaArguments...)
+
+ for key, value := range meta.Options.Properties {
+ _, exists := options.Properties[key]
+ if !exists {
+ options.Properties[key] = value
+ } else {
+ log.Printf("Warning: %s property -D%s=%s was redefined", meta.Name, key, value)
+ }
+ }
+
+ options.Classpath = append(options.Classpath, meta.Options.Classpath...)
+ }
+ return options
+}
diff --git a/sdks/java/container/common.gradle b/sdks/java/container/common.gradle
index bfccfdb..e58f19e 100644
--- a/sdks/java/container/common.gradle
+++ b/sdks/java/container/common.gradle
@@ -74,6 +74,13 @@ task copyGolangLicenses(type: Copy) {
dependsOn ':release:go-licenses:java:createLicenses'
}
+task copyJdkOptions(type: Copy) {
+ if (imageJavaVersion == "17") {
+ from "option-jamm.json"
+ into "build/target/options"
+ }
+}
+
task skipPullLicenses(type: Exec) {
executable "sh"
args "-c", "mkdir -p build/target/go-licenses build/target/third_party_licenses && touch build/target/third_party_licenses/skip"
@@ -108,3 +115,4 @@ if (project.rootProject.hasProperty(["docker-pull-licenses"]) ||
dockerPrepare.dependsOn copySdkHarnessLauncher
dockerPrepare.dependsOn copyDockerfileDependencies
dockerPrepare.dependsOn ":sdks:java:container:downloadCloudProfilerAgent"
+dockerPrepare.dependsOn copyJdkOptions
\ No newline at end of file
diff --git a/sdks/java/container/java17/option-jamm.json b/sdks/java/container/java17/option-jamm.json
new file mode 100644
index 0000000..e994648
--- /dev/null
+++ b/sdks/java/container/java17/option-jamm.json
@@ -0,0 +1,13 @@
+{
+ "name": "jamm",
+ "enabled": true,
+ "options": {
+ "java_arguments": [
+ "--add-modules=jamm",
+ "--module-path=/opt/apache/beam/jars/jamm.jar",
+ "--add-opens=java.base/java.lang=jamm",
+ "--add-opens=java.base/java.lang.ref=jamm",
+ "--add-opens=java.base/java.util=jamm"
+ ]
+ }
+}
\ No newline at end of file
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/SdkHarnessOptions.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/SdkHarnessOptions.java
index 022d521..8c63106 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/SdkHarnessOptions.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/SdkHarnessOptions.java
@@ -22,6 +22,7 @@ import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Prec
import com.fasterxml.jackson.annotation.JsonCreator;
import java.util.Arrays;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import org.apache.beam.sdk.annotations.Experimental;
import org.apache.beam.sdk.annotations.Experimental.Kind;
@@ -294,4 +295,22 @@ public interface SdkHarnessOptions extends PipelineOptions {
return overrides;
}
}
+
+ /**
+ * Open modules needed for reflection that access JDK internals with Java 9+
+ *
+ * <p>With JDK 16+, <a href="#{https://openjdk.java.net/jeps/403}">JDK internals are strongly
+ * encapsulated</a> and can result in an InaccessibleObjectException being thrown if a tool or
+ * library uses reflection that access JDK internals. If you see these errors in your worker logs,
+ * you can pass in modules to open using the format module/package=target-module(,target-module)*
+ * to allow access to the library. E.g. java.base/java.lang=jamm
+ *
+ * <p>You may see warnings that jamm, a library used to more accurately size objects, is unable to
+ * make a private field accessible. To resolve the warning, open the specified module/package to
+ * jamm.
+ */
+ @Description("Open modules needed for reflection with Java 17+.")
+ List<String> getJdkAddOpenModules();
+
+ void setJdkAddOpenModules(List<String> options);
}
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/Caches.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/Caches.java
index c4f1688..9fa658e 100644
--- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/Caches.java
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/Caches.java
@@ -65,7 +65,6 @@ public final class Caches {
} catch (RuntimeException e) {
// Checking for RuntimeException since java.lang.reflect.InaccessibleObjectException is only
// available starting Java 9
- // TODO(BEAM-13695) Provide more accurate memory measurements for Java 17
LOG.warn("JVM prevents jamm from accessing subgraph - cache sizes may be underestimated", e);
return MEMORY_METER.measure(o);
}