You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by ke...@apache.org on 2017/11/17 20:30:55 UTC

[04/50] [abbrv] beam git commit: Dataflow: Add option to upload heap dumps to GCS

Dataflow: Add option to upload heap dumps to GCS

This flag needs to go in before backend runner code that reads it. It
will have no effect until that code is deployed.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/2fc1c055
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/2fc1c055
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/2fc1c055

Branch: refs/heads/tez-runner
Commit: 2fc1c055396a7c16d2c7bcb3364e45877b55c5d3
Parents: e40e882
Author: bchambers <bc...@google.com>
Authored: Wed Nov 8 12:50:44 2017 -0800
Committer: bchambers <bc...@google.com>
Committed: Fri Nov 10 09:21:13 2017 -0800

----------------------------------------------------------------------
 .../options/DataflowPipelineDebugOptions.java        | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/2fc1c055/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineDebugOptions.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineDebugOptions.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineDebugOptions.java
index ec108da..ffc51d1 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineDebugOptions.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineDebugOptions.java
@@ -23,6 +23,7 @@ import java.util.Map;
 import org.apache.beam.runners.dataflow.util.DataflowTransport;
 import org.apache.beam.runners.dataflow.util.GcsStager;
 import org.apache.beam.runners.dataflow.util.Stager;
+import org.apache.beam.sdk.annotations.Experimental;
 import org.apache.beam.sdk.options.Default;
 import org.apache.beam.sdk.options.DefaultValueFactory;
 import org.apache.beam.sdk.options.Description;
@@ -39,6 +40,7 @@ import org.apache.beam.sdk.util.InstanceBuilder;
     + "debugging and testing purposes.")
 @Hidden
 public interface DataflowPipelineDebugOptions extends ExperimentalOptions, PipelineOptions {
+
   /**
    * The root URL for the Dataflow API. {@code dataflowEndpoint} can override this value
    * if it contains an absolute URL, otherwise {@code apiRootUrl} will be combined with
@@ -178,6 +180,19 @@ public interface DataflowPipelineDebugOptions extends ExperimentalOptions, Pipel
   void setDumpHeapOnOOM(boolean dumpHeapBeforeExit);
 
   /**
+   * CAUTION: This option implies dumpHeapOnOOM, and has similar caveats. Specifically, heap
+   * dumps can of comparable size to the default boot disk. Consider increasing the boot disk size
+   * before setting this flag to true.
+   */
+  @Description(
+      "[EXPERIMENTAL] Set to a GCS bucket (directory) to upload heap dumps to the given location.\n"
+      + "Enabling this implies that heap dumps should be generated on OOM (--dumpHeapOnOOM=true)\n"
+      + "Uploads will continue until the pipeline is stopped or updated without this option.\n")
+  @Experimental
+  String getSaveHeapDumpsToGcsPath();
+  void setSaveHeapDumpsToGcsPath(String gcsPath);
+
+  /**
    * Creates a {@link Stager} object using the class specified in
    * {@link #getStagerClass()}.
    */