You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by le...@apache.org on 2018/03/13 17:13:59 UTC
[1/3] climate git commit: CLIMATE-316 Add ESGF Download Script to
repository
Repository: climate
Updated Branches:
refs/heads/master 513dcc438 -> 5058b3898
CLIMATE-316 Add ESGF Download Script to repository
Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/48a18fc6
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/48a18fc6
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/48a18fc6
Branch: refs/heads/master
Commit: 48a18fc6dd9719035a0e2d20d662bf1804bce3c9
Parents: e8d8d42
Author: michaelarthuranderson <mi...@gmail.com>
Authored: Sun Feb 25 15:20:12 2018 -0500
Committer: michaelarthuranderson <mi...@gmail.com>
Committed: Sun Feb 25 15:20:12 2018 -0500
----------------------------------------------------------------------
examples/esgf_integration_example.py | 58 +++++++++++--------
ocw/esgf/constants.py | 2 +-
ocw/esgf/download.py | 53 ++++++++++-------
ocw/esgf/logon.py | 16 +++---
ocw/esgf/main.py | 96 ++++++++++++++++---------------
ocw/esgf/search.py | 22 ++++---
6 files changed, 138 insertions(+), 109 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/climate/blob/48a18fc6/examples/esgf_integration_example.py
----------------------------------------------------------------------
diff --git a/examples/esgf_integration_example.py b/examples/esgf_integration_example.py
index e939927..e541273 100644
--- a/examples/esgf_integration_example.py
+++ b/examples/esgf_integration_example.py
@@ -30,36 +30,46 @@
"""
-import ocw.data_source.esgf as esgf
-from getpass import getpass
+from __future__ import print_function
+
import ssl
import sys
+from getpass import getpass
+
+import ocw.data_source.esgf as esgf
+
+
+def main():
+ """
+ An example of using the OCW ESGF library. Connects to an ESGF
+ server and downloads a dataset.
+ """
+ if hasattr(ssl, '_create_unverified_context'):
+ ssl._create_default_https_context = ssl._create_unverified_context
+
+ dataset_id = 'obs4mips.CNES.AVISO.zos.mon.v20110829|esgf-data.jpl.nasa.gov'
+ variable = 'zosStderr'
-if hasattr(ssl, '_create_unverified_context'):
- ssl._create_default_https_context = ssl._create_unverified_context
+ if sys.version_info[0] >= 3:
+ username = input('Enter your ESGF OpenID:\n')
+ else:
+ username = raw_input('Enter your ESGF OpenID:\n')
-dataset_id = 'obs4mips.CNES.AVISO.zos.mon.v20110829|esgf-data.jpl.nasa.gov'
-variable = 'zosStderr'
+ password = getpass(prompt='Enter your ESGF Password:\n')
-if sys.version_info[0] >= 3:
- username = input('Enter your ESGF OpenID:\n')
-else:
- username = raw_input('Enter your ESGF OpenID:\n')
+ # Multiple datasets are returned in a list if the ESGF dataset is
+ # divided into multiple files.
+ datasets = esgf.load_dataset(dataset_id, variable, username, password)
-password = getpass(prompt='Enter your ESGF Password:\n')
+ # For this example, our dataset is only stored in a single file so
+ # we only need to look at the 0-th value in the returned list.
+ dataset = datasets[0]
-# Multiple datasets are returned in a list if the ESGF dataset is
-# divided into multiple files.
-datasets = esgf.load_dataset(dataset_id,
- variable,
- username,
- password)
+ print('\n--------\n')
+ print('Variable: ', dataset.variable)
+ print('Shape: ', dataset.values.shape)
+ print('A Value: ', dataset.values[100][100][100])
-# For this example, our dataset is only stored in a single file so
-# we only need to look at the 0-th value in the returned list.
-ds = datasets[0]
-print('\n--------\n')
-print('Variable: ', ds.variable)
-print('Shape: ', ds.values.shape)
-print('A Value: ', ds.values[100][100][100])
+if __name__ == '__main__':
+ main()
http://git-wip-us.apache.org/repos/asf/climate/blob/48a18fc6/ocw/esgf/constants.py
----------------------------------------------------------------------
diff --git a/ocw/esgf/constants.py b/ocw/esgf/constants.py
index 8d30848..90218fd 100644
--- a/ocw/esgf/constants.py
+++ b/ocw/esgf/constants.py
@@ -16,7 +16,7 @@
# specific language governing permissions and limitations
# under the License.
#
-'''Module containing constant parameters for ESGF RCMES integration.'''
+"""Module containing constant parameters for ESGF RCMES integration."""
# default location of ESGF user credentials
ESGF_CREDENTIALS = "~/.esg/credentials.pem"
http://git-wip-us.apache.org/repos/asf/climate/blob/48a18fc6/ocw/esgf/download.py
----------------------------------------------------------------------
diff --git a/ocw/esgf/download.py b/ocw/esgf/download.py
index 690915c..951a341 100644
--- a/ocw/esgf/download.py
+++ b/ocw/esgf/download.py
@@ -16,12 +16,18 @@
# specific language governing permissions and limitations
# under the License.
#
-'''
+"""
OCW module to download a file from ESGF.
-'''
+"""
+
+from __future__ import print_function
import sys
+from os.path import expanduser, join
+
+from ocw.esgf.constants import ESGF_CREDENTIALS
+
if sys.version_info[0] >= 3:
from http.client import HTTPSConnection
from urllib.request import build_opener
@@ -35,15 +41,12 @@ else:
from urllib2 import build_opener
from urllib2 import HTTPCookieProcessor
from urllib2 import HTTPSHandler
-from os.path import expanduser, join
-
-from ocw.esgf.constants import ESGF_CREDENTIALS
class HTTPSClientAuthHandler(HTTPSHandler):
- '''
+ """
HTTP handler that transmits an X509 certificate as part of the request
- '''
+ """
def __init__(self, key, cert):
HTTPSHandler.__init__(self)
@@ -51,34 +54,44 @@ class HTTPSClientAuthHandler(HTTPSHandler):
self.cert = cert
def https_open(self, req):
+ """
+ Opens the https connection.
+ :param req: The https request object.
+ :return: An addinfourl object for the request.
+ """
return self.do_open(self.getConnection, req)
def getConnection(self, host, timeout=300):
- return HTTPSConnection(host, key_file=self.key, cert_file=self.cert)
+ """
+ Create an HTTPSConnection object.
+ :param host: The ESGF server to connect to.
+ :param timeout: Connection timeout in seconds.
+ :return:
+ """
+ return HTTPSConnection(host, key_file=self.key, cert_file=self.cert, timeout=timeout)
def download(url, toDirectory="/tmp"):
- '''
+ """
Function to download a single file from ESGF.
-
:param url: the URL of the file to download
:param toDirectory: target directory where the file will be written
- '''
+ """
# setup HTTP handler
- certFile = expanduser(ESGF_CREDENTIALS)
- opener = build_opener(HTTPSClientAuthHandler(certFile, certFile))
+ cert_file = expanduser(ESGF_CREDENTIALS)
+ opener = build_opener(HTTPSClientAuthHandler(cert_file, cert_file))
opener.add_handler(HTTPCookieProcessor())
# download file
- localFilePath = join(toDirectory, url.split('/')[-1])
- print("\nDownloading url: %s to local path: %s ..." % (url, localFilePath))
- localFile = open(localFilePath, 'w')
- webFile = opener.open(url)
- localFile.write(webFile.read())
+ local_file_path = join(toDirectory, url.split('/')[-1])
+ print("\nDownloading url: %s to local path: %s ..." % (url, local_file_path))
+ local_file = open(local_file_path, 'w')
+ web_file = opener.open(url)
+ local_file.write(web_file.read())
# cleanup
- localFile.close()
- webFile.close()
+ local_file.close()
+ web_file.close()
opener.close()
print("... done")
http://git-wip-us.apache.org/repos/asf/climate/blob/48a18fc6/ocw/esgf/logon.py
----------------------------------------------------------------------
diff --git a/ocw/esgf/logon.py b/ocw/esgf/logon.py
index b792cfa..a49335d 100644
--- a/ocw/esgf/logon.py
+++ b/ocw/esgf/logon.py
@@ -16,28 +16,28 @@
# specific language governing permissions and limitations
# under the License.
#
-'''
+"""
RCMES module to logon onto the ESGF.
-'''
+"""
import os
from pyesgf.logon import LogonManager
-from ocw.esgf.constants import JPL_MYPROXY_SERVER_DN, JPL_HOSTNAME
+from ocw.esgf.constants import JPL_HOSTNAME, JPL_MYPROXY_SERVER_DN
def logon(openid, password):
- '''
+ """
Function to retrieve a short-term X.509 certificate that can be used to authenticate with ESGF.
The certificate is written in the location ~/.esg/credentials.pem.
The trusted CA certificates are written in the directory ~/.esg/certificates.
- '''
+ """
# Must configure the DN of the JPL MyProxy server if using a JPL openid
if JPL_HOSTNAME in openid:
os.environ['MYPROXY_SERVER_DN'] = JPL_MYPROXY_SERVER_DN
- lm = LogonManager()
+ logon_manager = LogonManager()
- lm.logon_with_openid(openid, password, bootstrap=True)
+ logon_manager.logon_with_openid(openid, password, bootstrap=True)
- return lm.is_logged_on()
+ return logon_manager.is_logged_on()
http://git-wip-us.apache.org/repos/asf/climate/blob/48a18fc6/ocw/esgf/main.py
----------------------------------------------------------------------
diff --git a/ocw/esgf/main.py b/ocw/esgf/main.py
index 5c90042..0fb4656 100644
--- a/ocw/esgf/main.py
+++ b/ocw/esgf/main.py
@@ -16,21 +16,23 @@
# specific language governing permissions and limitations
# under the License.
#
-'''
+"""
Example main program for ESGF-RCMES integration.
-
-'''
-# constant parameters
-DATA_DIRECTORY = "/tmp"
+"""
+
+from __future__ import print_function
+from ocw.esgf.download import download
from ocw.esgf.logon import logon
from ocw.esgf.search import SearchClient
-from ocw.esgf.download import download
+
+# constant parameters
+DATA_DIRECTORY = "/tmp"
def main():
- '''Example driver program'''
+ """Example driver program"""
username = raw_input('Enter your ESGF Username:\n')
password = raw_input('Enter your ESGF Password:\n')
@@ -42,8 +44,8 @@ def main():
print("...done.")
# step 2: execute faceted search for files
- urls = main_obs4mips()
- #urls = main_cmip5()
+ # urls = main_obs4mips()
+ urls = main_cmip5()
# step 3: download file(s)
for i, url in enumerate(urls):
@@ -53,66 +55,66 @@ def main():
def main_cmip5():
- '''
+ """
Example workflow to search for CMIP5 files
- '''
+ """
- searchClient = SearchClient(
+ search_client = SearchClient(
searchServiceUrl="http://pcmdi9.llnl.gov/esg-search/search", distrib=False)
- print('\nAvailable projects=%s' % searchClient.getFacets('project'))
- searchClient.setConstraint(project='CMIP5')
- print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
+ print('\nAvailable projects=%s' % search_client.getFacets('project'))
+ search_client.setConstraint(project='CMIP5')
+ print("Number of Datasets=%d" % search_client.getNumberOfDatasets())
+
+ print('\nAvailable models=%s' % search_client.getFacets('model'))
+ search_client.setConstraint(model='INM-CM4')
+ print("Number of Datasets=%d" % search_client.getNumberOfDatasets())
- print('\nAvailable models=%s' % searchClient.getFacets('model'))
- searchClient.setConstraint(model='INM-CM4')
- print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
+ print('\nAvailable experiments=%s' % search_client.getFacets('experiment'))
+ search_client.setConstraint(experiment='historical')
+ print("Number of Datasets=%d" % search_client.getNumberOfDatasets())
- print('\nAvailable experiments=%s' % searchClient.getFacets('experiment'))
- searchClient.setConstraint(experiment='historical')
- print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
+ print('\nAvailable time frequencies=%s' % search_client.getFacets('time_frequency'))
+ search_client.setConstraint(time_frequency='mon')
+ print("Number of Datasets=%d" % search_client.getNumberOfDatasets())
- print('\nAvailable time frequencies=%s' %
- searchClient.getFacets('time_frequency'))
- searchClient.setConstraint(time_frequency='mon')
- print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
+ print('\nAvailable CF standard names=%s' % search_client.getFacets('cf_standard_name'))
+ search_client.setConstraint(cf_standard_name='air_temperature')
+ print("Number of Datasets=%d" % search_client.getNumberOfDatasets())
- print('\nAvailable CF standard names=%s' %
- searchClient.getFacets('cf_standard_name'))
- searchClient.setConstraint(cf_standard_name='air_temperature')
- print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
+ urls = search_client.getFiles()
- urls = searchClient.getFiles()
return urls
def main_obs4mips():
- '''
+ """
Example workflow to search for obs4MIPs files.
- '''
+ """
- searchClient = SearchClient(distrib=False)
+ search_client = SearchClient(distrib=False)
# obs4MIPs
- print('\nAvailable projects=%s' % searchClient.getFacets('project'))
- searchClient.setConstraint(project='obs4MIPs')
- print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
+ print('\nAvailable projects=%s' % search_client.getFacets('project'))
+ search_client.setConstraint(project='obs4MIPs')
+ print("Number of Datasets=%d" % search_client.getNumberOfDatasets())
- print('\nAvailable variables=%s' % searchClient.getFacets('variable'))
- searchClient.setConstraint(variable='hus')
- print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
+ print('\nAvailable variables=%s' % search_client.getFacets('variable'))
+ search_client.setConstraint(variable='hus')
+ print("Number of Datasets=%d" % search_client.getNumberOfDatasets())
- print('\nAvailable time frequencies=%s' %
- searchClient.getFacets('time_frequency'))
- searchClient.setConstraint(time_frequency='mon')
- print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
+ print('\nAvailable time frequencies=%s' % search_client.getFacets('time_frequency'))
+ search_client.setConstraint(time_frequency='mon')
+ print("Number of Datasets=%d" % search_client.getNumberOfDatasets())
- print('\nAvailable models=%s' % searchClient.getFacets('model'))
- searchClient.setConstraint(model='Obs-MLS')
- print("Number of Datasets=%d" % searchClient.getNumberOfDatasetsi())
+ print('\nAvailable models=%s' % search_client.getFacets('model'))
+ search_client.setConstraint(model='Obs-MLS')
+ print("Number of Datasets=%d" % search_client.getNumberOfDatasets())
+
+ urls = search_client.getFiles()
- urls = searchClient.getFiles()
return urls
+
if __name__ == '__main__':
main()
http://git-wip-us.apache.org/repos/asf/climate/blob/48a18fc6/ocw/esgf/search.py
----------------------------------------------------------------------
diff --git a/ocw/esgf/search.py b/ocw/esgf/search.py
index c2f4e12..a807c42 100644
--- a/ocw/esgf/search.py
+++ b/ocw/esgf/search.py
@@ -16,17 +16,19 @@
# specific language governing permissions and limitations
# under the License.
#
-'''
+"""
RCMES module to execute a faceted search for ESGF files.
-'''
+"""
+
+from __future__ import print_function
from pyesgf.search import SearchConnection
from ocw.esgf.constants import JPL_SEARCH_SERVICE_URL
-class SearchClient():
+class SearchClient(object):
"""
Simple ESGF search client for RCMES.
This class is a thin layer on top of the esgfpy-client package.
@@ -36,7 +38,7 @@ class SearchClient():
def __init__(self, searchServiceUrl=JPL_SEARCH_SERVICE_URL, distrib=True):
"""
:param searchServiceUrl: URL of ESGF search service to query
- :param distrib: True to execute a federation-wide search,
+ :param distrib: True to execute a federation-wide search,
False to search only the specified search service
"""
connection = SearchConnection(searchServiceUrl, distrib=distrib)
@@ -66,8 +68,10 @@ class SearchClient():
def getFacets(self, facet):
"""
- :return: a dictionary of (facet value, facet count) for the specified facet and current constraints.
- Example (for facet='project'): {u'COUND': 4, u'CMIP5': 2657, u'obs4MIPs': 7}
+ :return: a dictionary of (facet value, facet count) for the specified facet
+ and current constraints.
+
+ Example (for facet='project'): {u'COUND': 4, u'CMIP5': 2657, u'obs4MIPs': 7}
"""
return self.context.facet_counts[facet]
@@ -82,7 +86,7 @@ class SearchClient():
print("\nSearching files for dataset=%s with constraints: %s" %
(dataset.dataset_id, self.constraints))
files = dataset.file_context().search(**self.constraints)
- for file in files:
- print('Found file=%s' % file.download_url)
- urls.append(file.download_url)
+ for current_file in files:
+ print('Found file=%s' % current_file.download_url)
+ urls.append(current_file.download_url)
return urls
[3/3] climate git commit: Merge branch 'CLIMATE-316' of
https://github.com/MichaelArthurAnderson/climate
Posted by le...@apache.org.
Merge branch 'CLIMATE-316' of https://github.com/MichaelArthurAnderson/climate
Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/5058b389
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/5058b389
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/5058b389
Branch: refs/heads/master
Commit: 5058b38985a81e338817f9b52d251fe342b3928c
Parents: 513dcc4 848cdb6
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Tue Mar 13 10:13:48 2018 -0700
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Tue Mar 13 10:13:48 2018 -0700
----------------------------------------------------------------------
examples/esgf_integration_example.py | 58 +++++++++++--------
ocw/data_source/esgf.py | 66 ++++++++++-----------
ocw/esgf/constants.py | 2 +-
ocw/esgf/download.py | 53 ++++++++++-------
ocw/esgf/logon.py | 16 +++---
ocw/esgf/main.py | 96 ++++++++++++++++---------------
ocw/esgf/search.py | 22 ++++---
7 files changed, 171 insertions(+), 142 deletions(-)
----------------------------------------------------------------------
[2/3] climate git commit: CLIMATE-316 Add ESGF Download Script to
repository
Posted by le...@apache.org.
CLIMATE-316 Add ESGF Download Script to repository
Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/848cdb69
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/848cdb69
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/848cdb69
Branch: refs/heads/master
Commit: 848cdb692774cba27fe435e2730b08118cb9daf3
Parents: 48a18fc
Author: michaelarthuranderson <mi...@gmail.com>
Authored: Sun Feb 25 16:08:01 2018 -0500
Committer: michaelarthuranderson <mi...@gmail.com>
Committed: Sun Feb 25 16:08:01 2018 -0500
----------------------------------------------------------------------
ocw/data_source/esgf.py | 66 ++++++++++++++++++++++----------------------
1 file changed, 33 insertions(+), 33 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/climate/blob/848cdb69/ocw/data_source/esgf.py
----------------------------------------------------------------------
diff --git a/ocw/data_source/esgf.py b/ocw/data_source/esgf.py
index 0dcc2e0..6b2f042 100644
--- a/ocw/data_source/esgf.py
+++ b/ocw/data_source/esgf.py
@@ -16,9 +16,26 @@
# specific language governing permissions and limitations
# under the License.
#
+"""
+A set of functions to wrap downloading ESGF datasets into an OCW dataset object.
+*** Note *** The ESGF data source requires that the user have certain credentials downloaded from
+the ESG. The current version of the module should download these automatically. Older versions of
+the library will not download them. The solution is to use the WGET script from the EGS to download
+a test dataset to get the credentials. The data source should work as expected then.
+
+"""
import os
import sys
+
+import requests
+from bs4 import BeautifulSoup
+
+import ocw.data_source.local as local
+from ocw.esgf.constants import DEFAULT_ESGF_SEARCH
+from ocw.esgf.download import download
+from ocw.esgf.logon import logon
+
if sys.version_info[0] >= 3:
from urllib.error import HTTPError
else:
@@ -27,15 +44,6 @@ else:
# might be around one day
from urllib2 import HTTPError
-from ocw.esgf.constants import DEFAULT_ESGF_SEARCH
-from ocw.esgf.download import download
-from ocw.esgf.logon import logon
-from ocw.esgf.search import SearchClient
-import ocw.data_source.local as local
-
-from bs4 import BeautifulSoup
-import requests
-
def load_dataset(dataset_id,
variable_name,
@@ -44,9 +52,8 @@ def load_dataset(dataset_id,
search_url=DEFAULT_ESGF_SEARCH,
elevation_index=0,
name='',
- save_path='/tmp',
- **additional_constraints):
- ''' Load an ESGF dataset.
+ save_path='/tmp'):
+ """ Load an ESGF dataset.
:param dataset_id: The ESGF ID of the dataset to load.
:type dataset_id: :mod:`string`
@@ -74,32 +81,24 @@ def load_dataset(dataset_id,
:param save_path: (Optional) Path to where downloaded files should be saved.
:type save_path: :mod:`string`
- :param additional_constraints: (Optional) Additional key,value pairs to
- pass as constraints to the search wrapper. These can be anything found
- on the ESGF metadata page for a dataset.
-
:returns: A :class:`list` of :class:`dataset.Dataset` contained the
requested dataset. If the dataset is stored in multiple files each will
be loaded into a separate :class:`dataset.Dataset`.
:raises ValueError: If no dataset can be found for the supplied ID and
variable, or if the requested dataset is a multi-file dataset.
- '''
- download_data = _get_file_download_data(url=search_url,
- dataset_id=dataset_id,
- variable=variable_name)
+ """
+ download_data = \
+ _get_file_download_data(url=search_url, dataset_id=dataset_id, variable=variable_name)
datasets = []
+
for url, var in download_data:
- _download_files([url],
- esgf_username,
- esgf_password,
- download_directory=save_path)
+ _download_files([url], esgf_username, esgf_password, download_directory=save_path)
file_save_path = os.path.join(save_path, url.split('/')[-1])
- datasets.append(local.load_file(file_save_path,
- var,
- name=name,
+
+ datasets.append(local.load_file(file_save_path, var, name=name,
elevation_index=elevation_index))
origin = {
@@ -107,19 +106,20 @@ def load_dataset(dataset_id,
'dataset_id': dataset_id,
'variable': variable_name
}
- for ds in datasets:
- ds.origin = origin
+
+ for dataset in datasets:
+ dataset.origin = origin
return datasets
def _get_file_download_data(dataset_id, variable, url=DEFAULT_ESGF_SEARCH):
- ''''''
+ """"""
url += '?type=File&dataset_id={}&variable={}'
url = url.format(dataset_id, variable)
- r = requests.get(url)
- xml = BeautifulSoup(r.content, "html.parser")
+ raw_data = requests.get(url)
+ xml = BeautifulSoup(raw_data.content, "html.parser")
dont_have_results = not bool(xml.response.result['numfound'])
@@ -141,7 +141,7 @@ def _get_file_download_data(dataset_id, variable, url=DEFAULT_ESGF_SEARCH):
def _download_files(file_urls, username, password, download_directory='/tmp'):
- ''''''
+ """"""
try:
logon(username, password)
except HTTPError: