You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@beam.apache.org by GitBox <gi...@apache.org> on 2021/03/10 03:25:10 UTC

[GitHub] [beam] chamikaramj commented on a change in pull request #13730: [BEAM-11613] Updates Dataflow pipelines to always use SDK Harness containers hosted in GCR

chamikaramj commented on a change in pull request #13730:
URL: https://github.com/apache/beam/pull/13730#discussion_r590970909



##########
File path: sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
##########
@@ -687,18 +687,41 @@ def create_job(self, job):
         'A template was just created at location %s', template_location)
     return None
 
+  @staticmethod
+  def _update_container_image_for_dataflow(beam_container_image_url):
+    image_suffix = beam_container_image_url.rsplit('/', 1)[1]
+    return names.DATAFLOW_CONTAINER_IMAGE_REPOSITORY + '/' + image_suffix
+
   @staticmethod
   def _apply_sdk_environment_overrides(proto_pipeline, sdk_overrides):
-    # Update environments based on user provided overrides
-    if sdk_overrides:
-      for environment in proto_pipeline.components.environments.values():
-        docker_payload = proto_utils.parse_Bytes(
-            environment.payload, beam_runner_api_pb2.DockerPayload)
-        for pattern, override in sdk_overrides.items():
-          new_payload = copy(docker_payload)
-          new_payload.container_image = re.sub(
-              pattern, override, docker_payload.container_image)
-          environment.payload = new_payload.SerializeToString()
+    # Updates container image URLs for Dataflow.

Review comment:
       Not sure if I full got this. Will leave this for now but lemme know if further updates are needed.

##########
File path: sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
##########
@@ -683,18 +683,41 @@ def create_job(self, job):
         'A template was just created at location %s', template_location)
     return None
 
+  @staticmethod
+  def _update_container_image_for_dataflow(beam_container_image_url):
+    image_suffix = beam_container_image_url.rsplit('/', 1)[1]

Review comment:
       Done.

##########
File path: sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
##########
@@ -560,8 +560,8 @@ def __init__(self, options):
   def _get_sdk_image_overrides(self, pipeline_options):
     worker_options = pipeline_options.view_as(WorkerOptions)
     sdk_overrides = worker_options.sdk_harness_container_image_overrides
-    if sdk_overrides:
-      return dict(override_str.split(',', 1) for override_str in sdk_overrides)
+    return dict(override_str.split(',', 1)

Review comment:
       Done.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org