You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2022/09/07 20:26:48 UTC

[airflow] branch main updated: GCSToBigQueryOperator allow for schema_object in alternate GCS Bucket (#26190)

This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 8cac96918b GCSToBigQueryOperator allow for schema_object in alternate GCS Bucket (#26190)
8cac96918b is described below

commit 8cac96918becf19a4a04eef1e5bcf175f815f204
Author: Peter Wicks <pw...@apartmentlist.com>
AuthorDate: Wed Sep 7 14:26:39 2022 -0600

    GCSToBigQueryOperator allow for schema_object in alternate GCS Bucket (#26190)
---
 airflow/providers/google/cloud/transfers/gcs_to_bigquery.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/airflow/providers/google/cloud/transfers/gcs_to_bigquery.py b/airflow/providers/google/cloud/transfers/gcs_to_bigquery.py
index 6821e78433..3f5f1d0edf 100644
--- a/airflow/providers/google/cloud/transfers/gcs_to_bigquery.py
+++ b/airflow/providers/google/cloud/transfers/gcs_to_bigquery.py
@@ -56,6 +56,8 @@ class GCSToBigQueryOperator(BaseOperator):
     :param schema_object: If set, a GCS object path pointing to a .json file that
         contains the schema for the table. (templated)
         Parameter must be defined if 'schema_fields' is null and autodetect is False.
+    :param schema_object_bucket: [Optional] If set, the GCS bucket where the schema object
+        template is stored. (templated) (Default: the value of ``bucket``)
     :param source_format: File format to export.
     :param compression: [Optional] The compression type of the data source.
         Possible values include GZIP and NONE.
@@ -133,6 +135,7 @@ class GCSToBigQueryOperator(BaseOperator):
         'bucket',
         'source_objects',
         'schema_object',
+        'schema_object_bucket',
         'destination_project_dataset_table',
         'impersonation_chain',
     )
@@ -147,6 +150,7 @@ class GCSToBigQueryOperator(BaseOperator):
         destination_project_dataset_table,
         schema_fields=None,
         schema_object=None,
+        schema_object_bucket=None,
         source_format='CSV',
         compression='NONE',
         create_disposition='CREATE_IF_NEEDED',
@@ -187,6 +191,10 @@ class GCSToBigQueryOperator(BaseOperator):
         self.source_objects = source_objects
         self.schema_object = schema_object
 
+        if schema_object_bucket is None:
+            schema_object_bucket = bucket
+        self.schema_object_bucket = schema_object_bucket
+
         # BQ config
         self.destination_project_dataset_table = destination_project_dataset_table
         self.schema_fields = schema_fields
@@ -236,7 +244,7 @@ class GCSToBigQueryOperator(BaseOperator):
                     impersonation_chain=self.impersonation_chain,
                 )
                 blob = gcs_hook.download(
-                    bucket_name=self.bucket,
+                    bucket_name=self.schema_object_bucket,
                     object_name=self.schema_object,
                 )
                 schema_fields = json.loads(blob.decode("utf-8"))