You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by cr...@apache.org on 2017/12/04 22:10:47 UTC
incubator-airflow git commit: [AIRFLOW-1883] Get File Size for
objects in Google Cloud Storage
Repository: incubator-airflow
Updated Branches:
refs/heads/master 1359d8735 -> 8d2f43073
[AIRFLOW-1883] Get File Size for objects in Google Cloud Storage
Closes #2840 from kaxil/Get_File_Size
Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/8d2f4307
Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/8d2f4307
Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/8d2f4307
Branch: refs/heads/master
Commit: 8d2f430732331c003d2c82f9a0c435e013281fe9
Parents: 1359d87
Author: Kaxil Naik <ka...@gmail.com>
Authored: Mon Dec 4 14:10:31 2017 -0800
Committer: Chris Riccomini <cr...@apache.org>
Committed: Mon Dec 4 14:10:37 2017 -0800
----------------------------------------------------------------------
airflow/contrib/hooks/gcs_hook.py | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/8d2f4307/airflow/contrib/hooks/gcs_hook.py
----------------------------------------------------------------------
diff --git a/airflow/contrib/hooks/gcs_hook.py b/airflow/contrib/hooks/gcs_hook.py
index f6ad39f..3103a5a 100644
--- a/airflow/contrib/hooks/gcs_hook.py
+++ b/airflow/contrib/hooks/gcs_hook.py
@@ -269,3 +269,31 @@ class GoogleCloudStorageHook(GoogleCloudBaseHook):
# empty next page token
break
return ids
+
+ def get_size(self, bucket, object):
+ """
+ Gets the size of a file in Google Cloud Storage.
+ :param bucket: The Google cloud storage bucket where the object is.
+ :type bucket: string
+ :param object: The name of the object to check in the Google cloud
+ storage bucket.
+ :type object: string
+ """
+ self.log.info('Checking the file size of object: %s in bucket: %s', object, bucket)
+ service = self.get_conn()
+ try:
+ response = service.objects().get(
+ bucket=bucket,
+ object=object
+ ).execute()
+
+ if 'name' in response and response['name'][-1] != '/':
+ # Remove Directories & Just check size of files
+ size = response['size']
+ self.log.info('The file size of %s is %s', object, size)
+ return size
+ else:
+ raise ValueError('Object is not a file')
+ except errors.HttpError as ex:
+ if ex.resp['status'] == '404':
+ raise ValueError('Object Not Found')