You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by cr...@apache.org on 2017/12/04 22:10:47 UTC

incubator-airflow git commit: [AIRFLOW-1883] Get File Size for objects in Google Cloud Storage

Repository: incubator-airflow
Updated Branches:
  refs/heads/master 1359d8735 -> 8d2f43073


[AIRFLOW-1883] Get File Size for objects in Google Cloud Storage

Closes #2840 from kaxil/Get_File_Size


Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/8d2f4307
Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/8d2f4307
Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/8d2f4307

Branch: refs/heads/master
Commit: 8d2f430732331c003d2c82f9a0c435e013281fe9
Parents: 1359d87
Author: Kaxil Naik <ka...@gmail.com>
Authored: Mon Dec 4 14:10:31 2017 -0800
Committer: Chris Riccomini <cr...@apache.org>
Committed: Mon Dec 4 14:10:37 2017 -0800

----------------------------------------------------------------------
 airflow/contrib/hooks/gcs_hook.py | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/8d2f4307/airflow/contrib/hooks/gcs_hook.py
----------------------------------------------------------------------
diff --git a/airflow/contrib/hooks/gcs_hook.py b/airflow/contrib/hooks/gcs_hook.py
index f6ad39f..3103a5a 100644
--- a/airflow/contrib/hooks/gcs_hook.py
+++ b/airflow/contrib/hooks/gcs_hook.py
@@ -269,3 +269,31 @@ class GoogleCloudStorageHook(GoogleCloudBaseHook):
                 # empty next page token
                 break
         return ids
+
+    def get_size(self, bucket, object):
+        """
+        Gets the size of a file in Google Cloud Storage.
+        :param bucket: The Google cloud storage bucket where the object is.
+        :type bucket: string
+        :param object: The name of the object to check in the Google cloud
+            storage bucket.
+        :type object: string
+        """
+        self.log.info('Checking the file size of object: %s in bucket: %s', object, bucket)
+        service = self.get_conn()
+        try:
+            response = service.objects().get(
+                bucket=bucket,
+                object=object
+            ).execute()
+
+            if 'name' in response and response['name'][-1] != '/':
+                # Remove Directories & Just check size of files
+                size = response['size']
+                self.log.info('The file size of %s is %s', object, size)
+                return size
+            else:
+                raise ValueError('Object is not a file')
+        except errors.HttpError as ex:
+            if ex.resp['status'] == '404':
+                raise ValueError('Object Not Found')