You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by cr...@apache.org on 2017/07/13 21:14:10 UTC

incubator-airflow git commit: [AIRFLOW-1394] Add quote_character param to GCS hook and operator

Repository: incubator-airflow
Updated Branches:
  refs/heads/master 38c86bbbc -> 9fd0beaac


[AIRFLOW-1394] Add quote_character param to GCS hook and operator

Closes #2428 from dclubb/master


Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/9fd0beaa
Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/9fd0beaa
Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/9fd0beaa

Branch: refs/heads/master
Commit: 9fd0beaacbe8943e81b50b4a88f6ffedb6b437f5
Parents: 38c86bb
Author: David Clubb <da...@imgix.com>
Authored: Thu Jul 13 14:13:39 2017 -0700
Committer: Chris Riccomini <cr...@apache.org>
Committed: Thu Jul 13 14:13:46 2017 -0700

----------------------------------------------------------------------
 airflow/contrib/hooks/bigquery_hook.py | 6 ++++++
 airflow/contrib/operators/gcs_to_bq.py | 5 +++++
 2 files changed, 11 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/9fd0beaa/airflow/contrib/hooks/bigquery_hook.py
----------------------------------------------------------------------
diff --git a/airflow/contrib/hooks/bigquery_hook.py b/airflow/contrib/hooks/bigquery_hook.py
index cc76953..0950b22 100644
--- a/airflow/contrib/hooks/bigquery_hook.py
+++ b/airflow/contrib/hooks/bigquery_hook.py
@@ -375,6 +375,7 @@ class BigQueryBaseCursor(object):
                  write_disposition='WRITE_EMPTY',
                  field_delimiter=',',
                  max_bad_records=0,
+                 quote_character=None,
                  schema_update_options=()):
         """
         Executes a BigQuery load command to load data from Google Cloud Storage
@@ -409,6 +410,8 @@ class BigQueryBaseCursor(object):
         :param max_bad_records: The maximum number of bad records that BigQuery can
             ignore when running the job.
         :type max_bad_records: int
+        :param quote_character: The value that is used to quote data sections in a CSV file.
+        :type quote_character: string
         :param schema_update_options: Allows the schema of the desitination
             table to be updated as a side effect of the load job.
         :type schema_update_options: list
@@ -485,6 +488,9 @@ class BigQueryBaseCursor(object):
         if max_bad_records:
             configuration['load']['maxBadRecords'] = max_bad_records
 
+        if quote_character:
+            configuration['load']['quote'] = quote_character
+
         return self.run_with_configuration(configuration)
 
     def run_with_configuration(self, configuration):

http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/9fd0beaa/airflow/contrib/operators/gcs_to_bq.py
----------------------------------------------------------------------
diff --git a/airflow/contrib/operators/gcs_to_bq.py b/airflow/contrib/operators/gcs_to_bq.py
index 44cf7b6..b65d135 100644
--- a/airflow/contrib/operators/gcs_to_bq.py
+++ b/airflow/contrib/operators/gcs_to_bq.py
@@ -44,6 +44,7 @@ class GoogleCloudStorageToBigQueryOperator(BaseOperator):
         write_disposition='WRITE_EMPTY',
         field_delimiter=',',
         max_bad_records=0,
+        quote_character=None,
         max_id_key=None,
         bigquery_conn_id='bigquery_default',
         google_cloud_storage_conn_id='google_cloud_storage_default',
@@ -84,6 +85,8 @@ class GoogleCloudStorageToBigQueryOperator(BaseOperator):
         :param max_bad_records: The maximum number of bad records that BigQuery can
             ignore when running the job.
         :type max_bad_records: int
+        :param quote_character: The value that is used to quote data sections in a CSV file.
+        :type quote_character: string
         :param max_id_key: If set, the name of a column in the BigQuery table
             that's to be loaded. Thsi will be used to select the MAX value from
             BigQuery after the load occurs. The results will be returned by the
@@ -120,6 +123,7 @@ class GoogleCloudStorageToBigQueryOperator(BaseOperator):
         self.write_disposition = write_disposition
         self.field_delimiter = field_delimiter
         self.max_bad_records = max_bad_records
+        self.quote_character = quote_character
 
         self.max_id_key = max_id_key
         self.bigquery_conn_id = bigquery_conn_id
@@ -156,6 +160,7 @@ class GoogleCloudStorageToBigQueryOperator(BaseOperator):
             write_disposition=self.write_disposition,
             field_delimiter=self.field_delimiter,
             max_bad_records=self.max_bad_records,
+            quote_character=self.quote_character,
             schema_update_options=self.schema_update_options)
 
         if self.max_id_key: