You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by "ASF GitHub Bot (JIRA)" <ji...@apache.org> on 2018/12/20 15:15:00 UTC

[jira] [Commented] (AIRFLOW-3163) Add set table description operator to BigQuery operators

    [ https://issues.apache.org/jira/browse/AIRFLOW-3163?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16725931#comment-16725931 ] 

ASF GitHub Bot commented on AIRFLOW-3163:
-----------------------------------------

stale[bot] closed pull request #4003: [AIRFLOW-3163] add operator to enable setting table description in BigQuery table
URL: https://github.com/apache/incubator-airflow/pull/4003
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/airflow/contrib/hooks/bigquery_hook.py b/airflow/contrib/hooks/bigquery_hook.py
index dd77df1283..ccbb36dbd4 100644
--- a/airflow/contrib/hooks/bigquery_hook.py
+++ b/airflow/contrib/hooks/bigquery_hook.py
@@ -135,6 +135,34 @@ def table_exists(self, project_id, dataset_id, table_id):
                 return False
             raise
 
+    def set_table_description(self, dataset_id, table_id, description, project_id=None):
+        """
+        Sets the description for the given table
+
+        :param project_id: The Google cloud project in which to look for the
+            table. The connection supplied to the hook must provide access to
+            the specified project.
+        :type project_id: string
+        :param dataset_id: The name of the dataset in which to look for the
+            table.
+        :type dataset_id: string
+        :param table_id: The name of the table to set the description for.
+        :type table_id: string
+        :param description: The description to set
+        :type description: string
+        """
+        service = self.get_service()
+        project_id = project_id if project_id is not None else self._get_field('project')
+        table = service.tables().get(
+            projectId=project_id, datasetId=dataset_id,
+            tableId=table_id).execute()
+        table['description'] = description
+        service.tables().patch(
+            projectId=project_id,
+            datasetId=dataset_id,
+            tableId=table_id,
+            body=table).execute()
+
 
 class BigQueryPandasConnector(GbqConnector):
     """
diff --git a/airflow/contrib/operators/bigquery_operator.py b/airflow/contrib/operators/bigquery_operator.py
index 9386e57c07..1ad19a7aa0 100644
--- a/airflow/contrib/operators/bigquery_operator.py
+++ b/airflow/contrib/operators/bigquery_operator.py
@@ -629,3 +629,57 @@ def execute(self, context):
             project_id=self.project_id,
             dataset_id=self.dataset_id,
             dataset_reference=self.dataset_reference)
+
+
+class BigQuerySetTableDescriptionOperator(BaseOperator):
+    """
+    This operator is called to set the desription on a table
+
+    :param project_id: The Google cloud project in which to look for the
+        table. The connection supplied must provide access to
+        the specified project.
+    :type project_id: string
+    :param dataset_id: The name of the dataset in which to look for the
+        table.
+    :type dataset_id: string
+    :param table_id: The name of the table to set the description for.
+    :type table_id: string
+    :param description: The description to set
+    :type description: string
+    :param bigquery_conn_id: The connection ID to use when
+        connecting to BigQuery.
+    :type google_cloud_storage_conn_id: string
+    :param delegate_to: The account to impersonate, if any. For this to
+        work, the service account making the request must have domain-wide
+        delegation enabled.
+    :type delegate_to: string
+    """
+    template_fields = ('project_id', 'dataset_id', 'table_id', 'description')
+    ui_color = '#f0eee4'
+
+    @apply_defaults
+    def __init__(self,
+                 project_id=None,
+                 dataset_id=None,
+                 table_id=None,
+                 description=None,
+                 bigquery_conn_id='bigquery_default',
+                 delegate_to=None,
+                 *args,
+                 **kwargs):
+        super(BigQuerySetTableDescriptionOperator, self).__init__(*args, **kwargs)
+        self.project_id = project_id
+        self.dataset_id = dataset_id
+        self.table_id = table_id
+        self.description = description
+        self.bigquery_conn_id = bigquery_conn_id
+        self.delegate_to = delegate_to
+
+    def execute(self, context):
+        bq_hook = BigQueryHook(
+            bigquery_conn_id=self.bigquery_conn_id,
+            delegate_to=self.delegate_to)
+        bq_hook.set_table_description(project_id=self.project_id,
+                                      dataset_id=self.dataset_id,
+                                      table_id=self.table_id,
+                                      description=self.description)


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


> Add set table description operator to BigQuery operators
> --------------------------------------------------------
>
>                 Key: AIRFLOW-3163
>                 URL: https://issues.apache.org/jira/browse/AIRFLOW-3163
>             Project: Apache Airflow
>          Issue Type: Improvement
>          Components: operators
>    Affects Versions: 1.10.0
>            Reporter: Anthony Brown
>            Assignee: Anthony Brown
>            Priority: Minor
>
> When populating lots of tables as results from BigQuery sql statements, it would be useful to set a description on the table to include something like last updated date
>  
> This will add an operator to the BigQuery class to include it



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)