You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by GitBox <gi...@apache.org> on 2020/04/27 21:30:11 UTC

[GitHub] [airflow] aaltay commented on a change in pull request #8531: Support all RuntimeEnvironment parameters in DataflowTemplatedJobStartOperator

aaltay commented on a change in pull request #8531:
URL: https://github.com/apache/airflow/pull/8531#discussion_r416140436



##########
File path: airflow/providers/google/cloud/hooks/dataflow.py
##########
@@ -548,23 +554,17 @@ def start_template_dataflow(
         :type location: str
         """
         name = self._build_dataflow_job_name(job_name, append_job_name)
-        # Builds RuntimeEnvironment from variables dictionary
-        # https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment
-        environment = {}
-        for key in ['numWorkers', 'maxWorkers', 'zone', 'serviceAccountEmail',
-                    'tempLocation', 'bypassTempDirValidation', 'machineType',
-                    'additionalExperiments', 'network', 'subnetwork', 'additionalUserLabels']:
-            if key in variables:
-                environment.update({key: variables[key]})
-        body = {"jobName": name,
-                "parameters": parameters,
-                "environment": environment}
+
         service = self.get_conn()
         request = service.projects().locations().templates().launch(  # pylint: disable=no-member
             projectId=project_id,
             location=location,
             gcsPath=dataflow_template,
-            body=body
+            body={
+                "jobName": name,
+                "parameters": parameters,
+                "environment": variables

Review comment:
       What is the difference between parameters and variables?

##########
File path: airflow/providers/google/cloud/operators/dataflow.py
##########
@@ -377,6 +394,7 @@ def __init__(
         self.poll_sleep = poll_sleep
         self.job_id = None
         self.hook: Optional[DataflowHook] = None
+        self.options.update(dataflow_default_options)

Review comment:
       Would not this override user provided options with default options?

##########
File path: airflow/providers/google/cloud/hooks/dataflow.py
##########
@@ -532,7 +532,13 @@ def start_template_dataflow(
 
         :param job_name: The name of the job.

Review comment:
       Is this bug ( #8300) only affecting templates?

##########
File path: tests/providers/google/cloud/hooks/test_dataflow.py
##########
@@ -52,32 +54,32 @@
     'stagingLocation': 'gs://test/staging',
     'labels': {'foo': 'bar'}
 }
-DATAFLOW_VARIABLES_TEMPLATE = {
-    'project': 'test',
-    'tempLocation': 'gs://test/temp',
-    'zone': 'us-central1-f'
-}
 RUNTIME_ENV = {
-    'tempLocation': 'gs://test/temp',
-    'zone': 'us-central1-f',
-    'numWorkers': 2,
-    'maxWorkers': 10,
-    'serviceAccountEmail': 'test@apache.airflow',
-    'machineType': 'n1-standard-1',
     'additionalExperiments': ['exp_flag1', 'exp_flag2'],
-    'network': 'default',
-    'subnetwork': 'regions/REGION/subnetworks/SUBNETWORK',
     'additionalUserLabels': {
         'name': 'wrench',
         'mass': '1.3kg',
         'count': '3'
-    }
+    },
+    'bypassTempDirValidation': {},
+    'ipConfiguration': 'WORKER_IP_PRIVATE',
+    'kmsKeyName': (
+        'projects/TEST_PROJECT_ID/locations/TEST_LOCATIONS/keyRings/TEST_KEYRING/cryptoKeys/TEST_CRYPTOKEYS'
+    ),
+    'maxWorkers': 10,
+    'network': 'default',
+    'numWorkers': 2,
+    'serviceAccountEmail': 'test@apache.airflow',
+    'subnetwork': 'regions/REGION/subnetworks/SUBNETWORK',
+    'tempLocation': 'gs://test/temp',
+    'workerRegion': "test-region",
+    'workerZone': 'test-zone',
+    'zone': 'us-central1-f',
+    'machineType': 'n1-standard-1',

Review comment:
       maybe test private ip flag, since it was specifically mentioned in the issue.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org