You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by le...@apache.org on 2018/03/13 17:14:00 UTC
[2/3] climate git commit: CLIMATE-316 Add ESGF Download Script to
repository
CLIMATE-316 Add ESGF Download Script to repository
Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/848cdb69
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/848cdb69
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/848cdb69
Branch: refs/heads/master
Commit: 848cdb692774cba27fe435e2730b08118cb9daf3
Parents: 48a18fc
Author: michaelarthuranderson <mi...@gmail.com>
Authored: Sun Feb 25 16:08:01 2018 -0500
Committer: michaelarthuranderson <mi...@gmail.com>
Committed: Sun Feb 25 16:08:01 2018 -0500
----------------------------------------------------------------------
ocw/data_source/esgf.py | 66 ++++++++++++++++++++++----------------------
1 file changed, 33 insertions(+), 33 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/climate/blob/848cdb69/ocw/data_source/esgf.py
----------------------------------------------------------------------
diff --git a/ocw/data_source/esgf.py b/ocw/data_source/esgf.py
index 0dcc2e0..6b2f042 100644
--- a/ocw/data_source/esgf.py
+++ b/ocw/data_source/esgf.py
@@ -16,9 +16,26 @@
# specific language governing permissions and limitations
# under the License.
#
+"""
+A set of functions to wrap downloading ESGF datasets into an OCW dataset object.
+*** Note *** The ESGF data source requires that the user have certain credentials downloaded from
+the ESG. The current version of the module should download these automatically. Older versions of
+the library will not download them. The solution is to use the WGET script from the EGS to download
+a test dataset to get the credentials. The data source should work as expected then.
+
+"""
import os
import sys
+
+import requests
+from bs4 import BeautifulSoup
+
+import ocw.data_source.local as local
+from ocw.esgf.constants import DEFAULT_ESGF_SEARCH
+from ocw.esgf.download import download
+from ocw.esgf.logon import logon
+
if sys.version_info[0] >= 3:
from urllib.error import HTTPError
else:
@@ -27,15 +44,6 @@ else:
# might be around one day
from urllib2 import HTTPError
-from ocw.esgf.constants import DEFAULT_ESGF_SEARCH
-from ocw.esgf.download import download
-from ocw.esgf.logon import logon
-from ocw.esgf.search import SearchClient
-import ocw.data_source.local as local
-
-from bs4 import BeautifulSoup
-import requests
-
def load_dataset(dataset_id,
variable_name,
@@ -44,9 +52,8 @@ def load_dataset(dataset_id,
search_url=DEFAULT_ESGF_SEARCH,
elevation_index=0,
name='',
- save_path='/tmp',
- **additional_constraints):
- ''' Load an ESGF dataset.
+ save_path='/tmp'):
+ """ Load an ESGF dataset.
:param dataset_id: The ESGF ID of the dataset to load.
:type dataset_id: :mod:`string`
@@ -74,32 +81,24 @@ def load_dataset(dataset_id,
:param save_path: (Optional) Path to where downloaded files should be saved.
:type save_path: :mod:`string`
- :param additional_constraints: (Optional) Additional key,value pairs to
- pass as constraints to the search wrapper. These can be anything found
- on the ESGF metadata page for a dataset.
-
:returns: A :class:`list` of :class:`dataset.Dataset` contained the
requested dataset. If the dataset is stored in multiple files each will
be loaded into a separate :class:`dataset.Dataset`.
:raises ValueError: If no dataset can be found for the supplied ID and
variable, or if the requested dataset is a multi-file dataset.
- '''
- download_data = _get_file_download_data(url=search_url,
- dataset_id=dataset_id,
- variable=variable_name)
+ """
+ download_data = \
+ _get_file_download_data(url=search_url, dataset_id=dataset_id, variable=variable_name)
datasets = []
+
for url, var in download_data:
- _download_files([url],
- esgf_username,
- esgf_password,
- download_directory=save_path)
+ _download_files([url], esgf_username, esgf_password, download_directory=save_path)
file_save_path = os.path.join(save_path, url.split('/')[-1])
- datasets.append(local.load_file(file_save_path,
- var,
- name=name,
+
+ datasets.append(local.load_file(file_save_path, var, name=name,
elevation_index=elevation_index))
origin = {
@@ -107,19 +106,20 @@ def load_dataset(dataset_id,
'dataset_id': dataset_id,
'variable': variable_name
}
- for ds in datasets:
- ds.origin = origin
+
+ for dataset in datasets:
+ dataset.origin = origin
return datasets
def _get_file_download_data(dataset_id, variable, url=DEFAULT_ESGF_SEARCH):
- ''''''
+ """"""
url += '?type=File&dataset_id={}&variable={}'
url = url.format(dataset_id, variable)
- r = requests.get(url)
- xml = BeautifulSoup(r.content, "html.parser")
+ raw_data = requests.get(url)
+ xml = BeautifulSoup(raw_data.content, "html.parser")
dont_have_results = not bool(xml.response.result['numfound'])
@@ -141,7 +141,7 @@ def _get_file_download_data(dataset_id, variable, url=DEFAULT_ESGF_SEARCH):
def _download_files(file_urls, username, password, download_directory='/tmp'):
- ''''''
+ """"""
try:
logon(username, password)
except HTTPError: