You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@beam.apache.org by GitBox <gi...@apache.org> on 2021/12/09 21:30:23 UTC

[GitHub] [beam] chamikaramj commented on a change in pull request #16186: [BEAM-13355] add Big Query parameter to enable users to specify load_…

chamikaramj commented on a change in pull request #16186:
URL: https://github.com/apache/beam/pull/16186#discussion_r766168570



##########
File path: sdks/python/apache_beam/io/gcp/bigquery_file_loads_test.py
##########
@@ -459,6 +459,38 @@ def test_records_traverse_transform_with_mocks(self):
 
       assert_that(jobs, equal_to([job_reference]), label='CheckJobs')
 
+  def test_load_job_id_used(self):

Review comment:
       Can you also manually run a pipeline to confirm that this works end to end ? (no need to add an integration test).

##########
File path: sdks/python/apache_beam/io/gcp/bigquery_file_loads.py
##########
@@ -527,8 +532,11 @@ def process(self, element, job_name_prefix=None, unused_schema_mod_jobs=None):
 
     if not self.bq_io_metadata:
       self.bq_io_metadata = create_bigquery_io_metadata(self._step_name)
+
+    project_id = copy_to_reference.projectId \

Review comment:
       Nit: For formatting, I think we usually prefer adding parentheses over \  

##########
File path: sdks/python/apache_beam/io/gcp/bigquery_file_loads_test.py
##########
@@ -459,6 +459,38 @@ def test_records_traverse_transform_with_mocks(self):
 
       assert_that(jobs, equal_to([job_reference]), label='CheckJobs')
 
+  def test_load_job_id_used(self):
+    job_reference = bigquery_api.JobReference()
+    job_reference.projectId = 'loadJobId'
+    job_reference.jobId = 'job_name1'
+
+    result_job = bigquery_api.Job()
+    result_job.jobReference = job_reference
+
+    mock_job = mock.Mock()
+    mock_job.status.state = 'DONE'
+    mock_job.status.errorResult = None
+    mock_job.jobReference = job_reference
+
+    bq_client = mock.Mock()
+    bq_client.jobs.Get.return_value = mock_job
+
+    bq_client.jobs.Insert.return_value = result_job
+
+    transform = bqfl.BigQueryBatchFileLoads(
+        'project1:dataset1.table1',
+        custom_gcs_temp_location=self._new_tempdir(),
+        test_client=bq_client,
+        validate=False,
+        load_job_project_id='loadJobId')
+
+    with TestPipeline('DirectRunner') as p:
+      outputs = p | beam.Create(_ELEMENTS) | transform
+      jobs = outputs[bqfl.BigQueryBatchFileLoads.DESTINATION_JOBID_PAIRS] \
+             | "GetJobs" >> beam.Map(lambda x: x[1])
+
+      assert_that(jobs, equal_to([job_reference]), label='CheckJobProjectIds')

Review comment:
       I think you need a second test to confirm that the copy job also picks up the specified project.

##########
File path: sdks/python/apache_beam/io/gcp/bigquery_file_loads_test.py
##########
@@ -459,6 +459,38 @@ def test_records_traverse_transform_with_mocks(self):
 
       assert_that(jobs, equal_to([job_reference]), label='CheckJobs')
 
+  def test_load_job_id_used(self):
+    job_reference = bigquery_api.JobReference()
+    job_reference.projectId = 'loadJobId'
+    job_reference.jobId = 'job_name1'
+
+    result_job = bigquery_api.Job()
+    result_job.jobReference = job_reference
+
+    mock_job = mock.Mock()
+    mock_job.status.state = 'DONE'
+    mock_job.status.errorResult = None
+    mock_job.jobReference = job_reference
+
+    bq_client = mock.Mock()
+    bq_client.jobs.Get.return_value = mock_job
+
+    bq_client.jobs.Insert.return_value = result_job
+
+    transform = bqfl.BigQueryBatchFileLoads(
+        'project1:dataset1.table1',
+        custom_gcs_temp_location=self._new_tempdir(),
+        test_client=bq_client,
+        validate=False,
+        load_job_project_id='loadJobId')

Review comment:
       s/loadJobId/loadJobProject




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@beam.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org