You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by go...@apache.org on 2019/03/29 19:38:51 UTC

[beam] branch master updated: [BEAM-6853] Add sdk-worker-parallelism and environment-cache-millis to python sdk

This is an automated email from the ASF dual-hosted git repository.

goenka pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new 36e9d9c  [BEAM-6853] Add sdk-worker-parallelism and environment-cache-millis to python sdk
     new 8ffc058  Merge pull request #8082 from angoenka/portable_options
36e9d9c is described below

commit 36e9d9cc98ce8b4e024278cc0eb631606e4220aa
Author: Ankur Goenka <an...@gmail.com>
AuthorDate: Mon Mar 18 14:33:37 2019 -0700

    [BEAM-6853] Add sdk-worker-parallelism and environment-cache-millis to python sdk
---
 .../org/apache/beam/sdk/options/PortablePipelineOptions.java   |  3 ++-
 sdks/python/apache_beam/options/pipeline_options.py            | 10 ++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/PortablePipelineOptions.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/PortablePipelineOptions.java
index edd9c71..75c55fc 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/options/PortablePipelineOptions.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/options/PortablePipelineOptions.java
@@ -75,7 +75,8 @@ public interface PortablePipelineOptions extends PipelineOptions {
 
   @Description(
       "Sets the number of sdk worker processes that will run on each worker node. Default is 1. If"
-          + " 0, it will be automatically set according to the number of CPU cores on the worker.")
+          + " 0, it will be automatically set by the runner by looking at different parameters "
+          + "(e.g. number of CPU cores on the worker machine).")
   @Nullable
   Long getSdkWorkerParallelism();
 
diff --git a/sdks/python/apache_beam/options/pipeline_options.py b/sdks/python/apache_beam/options/pipeline_options.py
index 22a0c0a..d9e8164 100644
--- a/sdks/python/apache_beam/options/pipeline_options.py
+++ b/sdks/python/apache_beam/options/pipeline_options.py
@@ -737,6 +737,16 @@ class PortableOptions(PipelineOptions):
               '"<process to execute>", "env":{"<Environment variables 1>": '
               '"<ENV_VAL>"} }. All fields in the json are optional except '
               'command.'))
+    parser.add_argument(
+        '--sdk-worker-parallelism', default=None,
+        help=('Sets the number of sdk worker processes that will run on each '
+              'worker node. Default is 1. If 0, it will be automatically set '
+              'by the runner by looking at different parameters (e.g. number '
+              'of CPU cores on the worker machine).'))
+    parser.add_argument(
+        '--environment-cache-millis', default=0,
+        help=('Duration in milliseconds for environment cache within a job. '
+              '0 means no caching.'))
 
 
 class RunnerOptions(PipelineOptions):