You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@beam.apache.org by GitBox <gi...@apache.org> on 2021/03/15 18:25:50 UTC

[GitHub] [beam] emilymye commented on a change in pull request #14224: [BEAM-11613] Limits Dataflow GCR container image overriding

emilymye commented on a change in pull request #14224:
URL: https://github.com/apache/beam/pull/14224#discussion_r594583372



##########
File path: sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
##########
@@ -696,24 +696,35 @@ def _update_container_image_for_dataflow(beam_container_image_url):
     return names.DATAFLOW_CONTAINER_IMAGE_REPOSITORY + '/' + image_suffix
 
   @staticmethod
-  def _apply_sdk_environment_overrides(proto_pipeline, sdk_overrides):
+  def _apply_sdk_environment_overrides(
+      proto_pipeline, sdk_overrides, pipeline_options):
     # Updates container image URLs for Dataflow.
     # For a given container image URL
     # * If a matching override has been provided that will be used.
     # * If not, container image URL will be updated to use the correct base
     #   repository (GRC) for Dataflow.
+
+    pipeline_sdk_container_image = get_container_image_from_options(
+        pipeline_options)
+
     for environment in proto_pipeline.components.environments.values():
       docker_payload = proto_utils.parse_Bytes(
           environment.payload, beam_runner_api_pb2.DockerPayload)
       overridden = False
-      new_container_image = None
+      new_container_image = docker_payload.container_image
       for pattern, override in sdk_overrides.items():
         new_container_image = re.sub(
             pattern, override, docker_payload.container_image)
         if new_container_image != docker_payload.container_image:
           overridden = True
 
-      if not overridden:
+      # For Dataflow, We replace external Apache Beam containers images hosted
+      # in Docker Hub with copies hosted in GCR for improved performance.
+      # We do not want to replace a container image if it's the pipeline SDK
+      # image or if it was explicitly overridden.
+      from apache_beam.transforms.environments import is_apache_beam_container

Review comment:
       Is there a reason we need to have this import here? Can we also renamed pipeline_sdk_container_image as like default_sdk_image or something? I don't think it's very clear what this step is doing. 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org