You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by lc...@apache.org on 2022/03/23 04:39:04 UTC

[beam] branch master updated: Add option to add modules to JDK add-open (#17110)

This is an automated email from the ASF dual-hosted git repository.

lcwik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new 06f3519  Add option to add modules to JDK add-open (#17110)
06f3519 is described below

commit 06f3519af13d6d7a79e00a11fb6824e00c856a8e
Author: Kiley <ki...@google.com>
AuthorDate: Tue Mar 22 21:35:16 2022 -0700

    Add option to add modules to JDK add-open (#17110)
    
    Add jamm java args from option json
---
 .../dataflow/options/DataflowPipelineOptions.java  |  18 +++
 sdks/java/container/Dockerfile                     |   5 +
 sdks/java/container/boot.go                        | 149 ++++++++++++++++++++-
 sdks/java/container/common.gradle                  |   8 ++
 sdks/java/container/java17/option-jamm.json        |  13 ++
 .../apache/beam/sdk/options/SdkHarnessOptions.java |  19 +++
 .../java/org/apache/beam/fn/harness/Caches.java    |   1 -
 7 files changed, 211 insertions(+), 2 deletions(-)

diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineOptions.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineOptions.java
index 7d3be45..ab6e0ea 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineOptions.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineOptions.java
@@ -217,4 +217,22 @@ public interface DataflowPipelineOptions
   boolean isHotKeyLoggingEnabled();
 
   void setHotKeyLoggingEnabled(boolean value);
+
+  /**
+   * Open modules needed for reflection that access JDK internals with Java 9+
+   *
+   * <p>With JDK 16+, <a href="#{https://openjdk.java.net/jeps/403}">JDK internals are strongly
+   * encapsulated</a> and can result in an InaccessibleObjectException being thrown if a tool or
+   * library uses reflection that access JDK internals. If you see these errors in your worker logs,
+   * you can pass in modules to open using the format module/package=target-module(,target-module)*
+   * to allow access to the library. E.g. java.base/java.lang=jamm
+   *
+   * <p>You may see warnings that jamm, a library used to more accurately size objects, is unable to
+   * make a private field accessible. To resolve the warning, open the specified module/package to
+   * jamm.
+   */
+  @Description("Open modules needed for reflection with Java 17+.")
+  List<String> getJdkAddOpenModules();
+
+  void setJdkAddOpenModules(List<String> options);
 }
diff --git a/sdks/java/container/Dockerfile b/sdks/java/container/Dockerfile
index 6e12ff1..179a7e2 100644
--- a/sdks/java/container/Dockerfile
+++ b/sdks/java/container/Dockerfile
@@ -41,6 +41,11 @@ COPY target/NOTICE /opt/apache/beam/
 # copy third party licenses
 ADD target/third_party_licenses /opt/apache/beam/third_party_licenses/
 
+# Copy Java options. Because the options directory may be empty and
+# COPY fails if there are no files, copy an extra LICENSE file then remove it.
+COPY target/LICENSE target/options/* /opt/apache/beam/options/
+RUN rm /opt/apache/beam/options/LICENSE
+
 # Add golang licenses. Because the go-license directory may be empty if
 # pull_licenses is false, and COPY fails if there are no files,
 # copy an extra LICENSE file then remove it.
diff --git a/sdks/java/container/boot.go b/sdks/java/container/boot.go
index 20e2c7d..f378573 100644
--- a/sdks/java/container/boot.go
+++ b/sdks/java/container/boot.go
@@ -19,11 +19,14 @@ package main
 
 import (
 	"context"
+	"encoding/json"
 	"flag"
 	"fmt"
+	"io/ioutil"
 	"log"
 	"os"
 	"path/filepath"
+	"sort"
 	"strconv"
 	"strings"
 
@@ -98,7 +101,6 @@ func main() {
 	log.Printf("Initializing java harness: %v", strings.Join(os.Args, " "))
 
 	// (1) Obtain the pipeline options
-
 	options, err := provision.ProtoToJSON(info.GetPipelineOptions())
 	if err != nil {
 		log.Fatalf("Failed to convert pipeline options: %v", err)
@@ -193,7 +195,35 @@ func main() {
 	} else {
 		args = append(args, jammAgentArgs)
 	}
+	// Apply meta options
+	const metaDir = "/opt/apache/beam/options"
+	metaOptions, err := LoadMetaOptions(metaDir)
+	javaOptions := BuildOptions(metaOptions)
+	// (1) Add custom jvm arguments: "-server -Xmx1324 -XXfoo .."
+	args = append(args, javaOptions.JavaArguments...)
+
+	// (2) Add classpath: "-cp foo.jar:bar.jar:.."
+	if len(javaOptions.Classpath) > 0 {
+		args = append(args, "-cp")
+		args = append(args, strings.Join(javaOptions.Classpath, ":"))
+	}
 
+	// (3) Add (sorted) properties: "-Dbar=baz -Dfoo=bar .."
+	var properties []string
+	for key, value := range javaOptions.Properties {
+		properties = append(properties, fmt.Sprintf("-D%s=%s", key, value))
+	}
+	sort.Strings(properties)
+	args = append(args, properties...)
+
+	// Open modules specified in pipeline options
+	if pipelineOptions, ok := info.GetPipelineOptions().GetFields()["options"]; ok {
+		if modules, ok := pipelineOptions.GetStructValue().GetFields()["jdkAddOpenModules"]; ok {
+			for _, module := range modules.GetListValue().GetValues() {
+				args = append(args, "--add-opens=" + module.GetStringValue())
+			}
+		}
+	}
 	args = append(args, "org.apache.beam.fn.harness.FnHarness")
 	log.Printf("Executing: java %v", strings.Join(args, " "))
 
@@ -211,3 +241,120 @@ func heapSizeLimit(info *fnpb.ProvisionInfo) uint64 {
 	}
 	return 1 << 30
 }
+
+// Options represents java VM invocation options in a simple,
+// semi-structured way.
+type Options struct {
+	JavaArguments []string          `json:"java_arguments,omitempty"`
+	Properties    map[string]string `json:"properties,omitempty"`
+	Classpath     []string          `json:"classpath,omitempty"`
+}
+
+// MetaOption represents a jvm environment transformation or setup
+// that the launcher employs. The aim is to keep the service-side and
+// user-side required configuration simple and minimal, yet allow
+// numerous execution tweaks. Most tweaks are enabled by default and
+// require no input. Some setups, such as Cloud Debugging, are opt-in.
+//
+// Meta-options are usually included with the image and use supporting
+// files, usually jars. A few are intrinsic because they are require
+// additional input or complex computations, such as Cloud Debugging
+// and Cloud Profiling. Meta-options can be enabled or disabled by
+// name. For the most part, the meta-option names are not guaranteed
+// to be backwards compatible or stable. They are rather knobs that
+// can be tuned if some well-intended transformation cause trouble for
+// a customer. For tweaks, the expectation is that the default is
+// almost always correct.
+//
+// Meta-options are simple additive manipulations applied in priority
+// order (applied low to high) to allow jvm customization by adding
+// files, notably enabling customization by later docker layers. The
+// override semantics is prepend for lists and simple overwrite
+// otherwise. A common use case is adding a jar to the beginning of
+// the classpath, such as the shuffle or windmill jni jar, or adding
+// an agent.
+type MetaOption struct {
+	Name        string  `json:"name,omitempty"`
+	Description string  `json:"description,omitempty"`
+	Enabled     bool    `json:"enabled,omitempty"`
+	Priority    int     `json:"priority,omitempty"`
+	Options     Options `json:"options"`
+}
+
+// byPriority sorts MetaOptions by priority, highest first.
+type byPriority []*MetaOption
+
+func (f byPriority) Len() int           { return len(f) }
+func (f byPriority) Swap(i, j int)      { f[i], f[j] = f[j], f[i] }
+func (f byPriority) Less(i, j int) bool { return f[i].Priority > f[j].Priority }
+
+// LoadMetaOptions scans the directory tree for meta-option metadata
+// files and loads them. Any regular file named "option-XX.json" is
+// strictly assumed to be a meta-option file. This strictness allows
+// us to fail hard if such a file cannot be parsed.
+//
+// Loading meta-options from disk allows extra files and their
+// configuration be kept together and defined externally.
+func LoadMetaOptions(dir string) ([]*MetaOption, error) {
+	var meta []*MetaOption
+
+	worker := func(path string, info os.FileInfo, err error) error {
+		if err != nil {
+			return err
+		}
+		if !info.Mode().IsRegular() {
+			return nil
+		}
+		if !strings.HasPrefix(info.Name(), "option-") {
+			return nil
+		}
+		if !strings.HasSuffix(info.Name(), ".json") {
+			return nil
+		}
+
+		content, err := ioutil.ReadFile(path)
+		if err != nil {
+			return err
+		}
+
+		var option MetaOption
+		if err := json.Unmarshal(content, &option); err != nil {
+			return fmt.Errorf("failed to parse %s: %v", path, err)
+		}
+
+		log.Printf("Loaded meta-option '%s'", option.Name)
+
+		meta = append(meta, &option)
+		return nil
+	}
+
+	if err := filepath.Walk(dir, worker); err != nil {
+		return nil, err
+	}
+	return meta, nil
+}
+
+func BuildOptions(metaOptions []*MetaOption) *Options {
+	options := &Options{Properties: make(map[string]string)}
+
+	sort.Sort(byPriority(metaOptions))
+	for _, meta := range metaOptions {
+		if !meta.Enabled {
+			continue
+		}
+
+		options.JavaArguments = append(options.JavaArguments, meta.Options.JavaArguments...)
+
+		for key, value := range meta.Options.Properties {
+			_, exists := options.Properties[key]
+			if !exists {
+				options.Properties[key] = value
+			} else {
+				log.Printf("Warning: %s property -D%s=%s was redefined", meta.Name, key, value)
+			}
+		}
+
+		options.Classpath = append(options.Classpath, meta.Options.Classpath...)
+	}
+	return options
+}
diff --git a/sdks/java/container/common.gradle b/sdks/java/container/common.gradle
index bfccfdb..e58f19e 100644
--- a/sdks/java/container/common.gradle
+++ b/sdks/java/container/common.gradle
@@ -74,6 +74,13 @@ task copyGolangLicenses(type: Copy) {
     dependsOn ':release:go-licenses:java:createLicenses'
 }
 
+task copyJdkOptions(type: Copy) {
+    if (imageJavaVersion == "17") {
+        from "option-jamm.json"
+        into "build/target/options"
+    }
+}
+
 task skipPullLicenses(type: Exec) {
     executable "sh"
     args "-c", "mkdir -p build/target/go-licenses build/target/third_party_licenses && touch build/target/third_party_licenses/skip"
@@ -108,3 +115,4 @@ if (project.rootProject.hasProperty(["docker-pull-licenses"]) ||
 dockerPrepare.dependsOn copySdkHarnessLauncher
 dockerPrepare.dependsOn copyDockerfileDependencies
 dockerPrepare.dependsOn ":sdks:java:container:downloadCloudProfilerAgent"
+dockerPrepare.dependsOn copyJdkOptions
\ No newline at end of file
diff --git a/sdks/java/container/java17/option-jamm.json b/sdks/java/container/java17/option-jamm.json
new file mode 100644
index 0000000..e994648
--- /dev/null
+++ b/sdks/java/container/java17/option-jamm.json
@@ -0,0 +1,13 @@
+{
+  "name": "jamm",
+  "enabled": true,
+  "options": {
+    "java_arguments": [
+      "--add-modules=jamm",
+      "--module-path=/opt/apache/beam/jars/jamm.jar",
+      "--add-opens=java.base/java.lang=jamm",
+      "--add-opens=java.base/java.lang.ref=jamm",
+      "--add-opens=java.base/java.util=jamm"
+    ]
+  }
+}
\ No newline at end of file
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/SdkHarnessOptions.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/SdkHarnessOptions.java
index 022d521..8c63106 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/SdkHarnessOptions.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/SdkHarnessOptions.java
@@ -22,6 +22,7 @@ import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Prec
 import com.fasterxml.jackson.annotation.JsonCreator;
 import java.util.Arrays;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import org.apache.beam.sdk.annotations.Experimental;
 import org.apache.beam.sdk.annotations.Experimental.Kind;
@@ -294,4 +295,22 @@ public interface SdkHarnessOptions extends PipelineOptions {
       return overrides;
     }
   }
+
+  /**
+   * Open modules needed for reflection that access JDK internals with Java 9+
+   *
+   * <p>With JDK 16+, <a href="#{https://openjdk.java.net/jeps/403}">JDK internals are strongly
+   * encapsulated</a> and can result in an InaccessibleObjectException being thrown if a tool or
+   * library uses reflection that access JDK internals. If you see these errors in your worker logs,
+   * you can pass in modules to open using the format module/package=target-module(,target-module)*
+   * to allow access to the library. E.g. java.base/java.lang=jamm
+   *
+   * <p>You may see warnings that jamm, a library used to more accurately size objects, is unable to
+   * make a private field accessible. To resolve the warning, open the specified module/package to
+   * jamm.
+   */
+  @Description("Open modules needed for reflection with Java 17+.")
+  List<String> getJdkAddOpenModules();
+
+  void setJdkAddOpenModules(List<String> options);
 }
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/Caches.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/Caches.java
index c4f1688..9fa658e 100644
--- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/Caches.java
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/Caches.java
@@ -65,7 +65,6 @@ public final class Caches {
     } catch (RuntimeException e) {
       // Checking for RuntimeException since java.lang.reflect.InaccessibleObjectException is only
       // available starting Java 9
-      // TODO(BEAM-13695) Provide more accurate memory measurements for Java 17
       LOG.warn("JVM prevents jamm from accessing subgraph - cache sizes may be underestimated", e);
       return MEMORY_METER.measure(o);
     }