You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by go...@apache.org on 2016/07/29 17:39:45 UTC
climate git commit: Revert "Merge branch 'CLIMATE-825'"
Repository: climate
Updated Branches:
refs/heads/master 577dfb787 -> d30d8146f
Revert "Merge branch 'CLIMATE-825'"
This reverts commit 577dfb78704a340b187ee746bef15b91a1fb5414, reversing
changes made to 70cd2b5006d1b0dfe1ff97f1cb2913c3d4ad913e.
Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/d30d8146
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/d30d8146
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/d30d8146
Branch: refs/heads/master
Commit: d30d8146fa9c91844411d41f15f5e64423f2bfa2
Parents: 577dfb7
Author: Alex Goodman <ag...@users.noreply.github.com>
Authored: Fri Jul 29 10:28:58 2016 -0700
Committer: Alex Goodman <ag...@users.noreply.github.com>
Committed: Fri Jul 29 10:28:58 2016 -0700
----------------------------------------------------------------------
docs/source/index.rst | 2 +-
docs/source/ocw/dataset_loader.rst | 5 -
ocw/dataset_loader.py | 206 --------------------------------
ocw/tests/test_dataset_loader.py | 185 ----------------------------
4 files changed, 1 insertion(+), 397 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/climate/blob/d30d8146/docs/source/index.rst
----------------------------------------------------------------------
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 2834ee6..1485385 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -13,7 +13,6 @@ Contents:
ocw/overview
ocw/dataset
- ocw/dataset_loader
ocw/dataset_processor
ocw/evaluation
ocw/metrics
@@ -34,3 +33,4 @@ Indices and tables
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`
+
http://git-wip-us.apache.org/repos/asf/climate/blob/d30d8146/docs/source/ocw/dataset_loader.rst
----------------------------------------------------------------------
diff --git a/docs/source/ocw/dataset_loader.rst b/docs/source/ocw/dataset_loader.rst
deleted file mode 100644
index 833b7f9..0000000
--- a/docs/source/ocw/dataset_loader.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-Dataset Loader Module
-**************
-
-.. automodule:: dataset_loader
- :members:
http://git-wip-us.apache.org/repos/asf/climate/blob/d30d8146/ocw/dataset_loader.py
----------------------------------------------------------------------
diff --git a/ocw/dataset_loader.py b/ocw/dataset_loader.py
deleted file mode 100644
index be43c05..0000000
--- a/ocw/dataset_loader.py
+++ /dev/null
@@ -1,206 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-'''
-Classes:
- DatasetLoader - Generate OCW Dataset objects from a variety of sources.
-'''
-
-import ocw.data_source.local as local
-import ocw.data_source.esgf as esgf
-import ocw.data_source.rcmed as rcmed
-import ocw.data_source.dap as dap
-
-class DatasetLoader:
- '''Generate OCW Dataset objects from a variety of sources.'''
-
- def __init__(self, reference, targets):
- '''Generate OCW Dataset objects from a variety of sources.
-
- Each keyword argument can be information for a dataset in dictionary
- form. For example:
- ``
- >>> reference = {'data_source':'rcmed', 'name':'cru', 'dataset_id':10,
- 'parameter_id':34}
- >>> targets = {'data_source':'local_multiple',
- 'path':'./data/CORDEX-Africa_data/AFRICA*pr.nc',
- 'variable':'pr'}
- >>> loader = DatasetLoader(reference, targets)
- ``
-
- Or more conveniently if the loader configuration is defined in a
- yaml file named config_file (see RCMES examples):
- ``
- >>> import yaml
- >>> config = yaml.load(open(config_file))
- >>> loader = DatasetLoader(**config['datasets'])
- ``
-
- As shown in the first example, the dictionary for each keyword argument
- should contain a data source and parameters specific to the loader for
- that data source. Once the configuration is entered, the datasets may be
- loaded using:
- ``
- >>> loader.load_datasets()
- >>> target_datasets = loader.target_datasets
- ``
-
- If ``reference`` is entered as a keyword argument, then it may be
- accesed from:
- ``
- >>> reference_dataset = loader.reference_dataset
- ``
-
- Additionally, each dataset must have a ``data_source`` keyword. This may
- be one of the following:
- * ``'local'`` - A single dataset file in a local directory
- * ``'local_split'`` - A single dataset split accross multiple files in a
- local directory
- * ``'local_multiple'`` - Multiple datasets in a local directory
- * ``'esgf'`` - Download the dataset from the Earth System Grid
- Federation
- * ``'rcmed'`` - Download the dataset from the Regional Climate Model
- Evaluation System Database
- * ``'dap'`` - Download the dataset from an OPeNDAP URL
-
- Users who wish to download datasets from sources not described above
- may define their own custom dataset loader function and incorporate it
- as follows:
- >>> loader.add_source_loader('my_source_name', my_loader_func)
-
- :param reference: The reference dataset loader configuration.
- :type reference: :mod:`dict`
-
- :param targets: The target dataset loader configurations.
- :type targets: :mod:`dict` or list of mod:`dict`
-
- :raises KeyError: If an invalid argument is passed to a data source
- loader function.
- '''
- # Reference dataset config
- self.set_reference(**reference)
-
- # Target dataset(s) config
- self.set_targets(targets)
-
- # Default loaders
- self._source_loaders = {
- 'local':local.load_file,
- 'local_split':local.load_dataset_from_multiple_netcdf_files,
- 'local_multiple':local.load_multiple_files,
- 'esgf':esgf.load_dataset,
- 'rcmed':rcmed.parameter_dataset,
- 'dap':dap.load
- }
-
- def add_source_loader(self, source_name, loader_func):
- '''
- Add a custom source loader.
-
- :param source_name: The name of the data source.
- :type source_name: :mod:`string`
-
- :param loader_func: Reference to a custom defined function. This should
- return an OCW Dataset object.
- :type loader_func: :class:`callable`
- '''
- self._source_loaders[source_name] = loader_func
-
- def add_target(self, **kwargs):
- '''
- A convenient means of adding a target dataset to the loader.
- :raises KeyError: If data_source is not specified.
- '''
- if 'data_source' not in kwargs:
- raise KeyError('Dataset configuration must contain a data_source.')
- self._target_config.append(kwargs)
-
- def add_targets(self, targets):
- '''
- A convenient means of adding multiple target datasets to the loader.
-
- :param targets: List of loader configurations for each target
- :type targets: List of :mod:`dict`
-
- :raises KeyError: If data_source is not specified.
- '''
- for target_config in targets:
- self.add_target(**target_config)
-
- def set_targets(self, targets):
- '''
- Reset the target dataset config.
-
- :param targets: List of loader configurations for each target
- :type targets: List of :mod:`dict`
-
- :raises KeyError: If data_source is not specified.
- '''
- # This check allows for the user to enter targets as one block or
- # as a list of separate blocks in their config files
- if not isinstance(targets, list):
- targets = [targets]
- self._target_config = []
- self.add_targets(targets)
-
- def set_reference(self, **kwargs):
- '''
- Reset the reference dataset config.
- :raises KeyError: If data_source is not specified.
- '''
- if 'data_source' not in kwargs:
- raise KeyError('Dataset configuration must contain a data_source.')
- self._reference_config = kwargs
-
- def load_datasets(self):
- '''
- Loads the datasets from the given loader configurations.
- '''
- # Load the reference dataset
- self.reference_dataset = self._load(**self._reference_config)
-
- # Ensure output is clear if loading is performed more than once to
- # prevent duplicates.
- self.target_datasets = []
-
- # Load the target datasets
- for loader_params in self._target_config:
- output = self._load(**loader_params)
-
- # Need to account for the fact that some loaders return lists
- # of OCW Dataset objects instead of just one
- if isinstance(output, list):
- self.target_datasets.extend(output)
- else:
- self.target_datasets.append(output)
-
- def _load(self, **kwargs):
- '''
- Generic dataset loading method.
- '''
- # Extract the data source
- data_source = kwargs.pop('data_source')
-
- # Find the correct loader function for the given data source
- loader_func = self._source_loaders[data_source]
-
- # The remaining kwargs should be specific to the loader
- output = loader_func(**kwargs)
-
- # Preserve data_source info for later use
- kwargs['data_source'] = data_source
- return output
http://git-wip-us.apache.org/repos/asf/climate/blob/d30d8146/ocw/tests/test_dataset_loader.py
----------------------------------------------------------------------
diff --git a/ocw/tests/test_dataset_loader.py b/ocw/tests/test_dataset_loader.py
deleted file mode 100644
index 2d192c1..0000000
--- a/ocw/tests/test_dataset_loader.py
+++ /dev/null
@@ -1,185 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import unittest
-import os
-import copy
-import netCDF4
-import numpy as np
-from ocw.dataset import Dataset
-from ocw.dataset_loader import DatasetLoader
-
-class TestDatasetLoader(unittest.TestCase):
- def setUp(self):
- # Read netCDF file
- self.file_path = create_netcdf_object()
- self.netCDF_file = netCDF4.Dataset(self.file_path, 'r')
- self.latitudes = self.netCDF_file.variables['latitude'][:]
- self.longitudes = self.netCDF_file.variables['longitude'][:]
- self.times = self.netCDF_file.variables['time'][:]
- self.alt_lats = self.netCDF_file.variables['alt_lat'][:]
- self.alt_lons = self.netCDF_file.variables['alt_lon'][:]
- self.values = self.netCDF_file.variables['value'][:]
- self.values2 = self.values + 1
-
- # Set up config
- self.reference_config = {'data_source': 'local',
- 'file_path': self.file_path,
- 'variable_name': 'value'}
- self.target_config = copy.deepcopy(self.reference_config)
- self.no_data_source_config = {'file_path': self.file_path,
- 'variable_name': 'value'}
- self.new_data_source_config = {'data_source': 'foo',
- 'lats': self.latitudes,
- 'lons': self.longitudes,
- 'times': self.times,
- 'values': self.values2,
- 'variable': 'value'}
-
- def tearDown(self):
- os.remove(self.file_path)
-
- def testInputHasDataSource(self):
- '''
- Make sure input data source is specified for each dataset to be loaded
- '''
- with self.assertRaises(KeyError):
- self.loader = DatasetLoader(self.reference_config,
- self.no_data_source_config)
-
- def testReferenceHasDataSource(self):
- '''
- Make sure ref data source is specified for each dataset to be loaded
- '''
- with self.assertRaises(KeyError):
- self.loader = DatasetLoader(self.reference_config,
- self.target_config)
- self.loader.set_reference(**self.no_data_source_config)
-
- def testTargetHasDataSource(self):
- '''
- Make sure target data source is specified for each dataset to be loaded
- '''
- with self.assertRaises(KeyError):
- self.loader = DatasetLoader(self.reference_config,
- self.target_config)
- self.loader.add_target(**self.no_data_source_config)
-
- def testNewDataSource(self):
- '''
- Ensures that custom data source loaders can be added
- '''
- self.loader = DatasetLoader(self.new_data_source_config,
- self.target_config)
-
- # Here the the data_source "foo" represents the Dataset constructor
- self.loader.add_source_loader('foo', build_dataset)
- self.loader.load_datasets()
- self.assertEqual(self.loader.reference_dataset.origin['source'],
- 'foo')
- np.testing.assert_array_equal(self.loader.reference_dataset.values,
- self.values2)
-
- def testExistingDataSource(self):
- '''
- Ensures that existing data source loaders can be added
- '''
- self.loader = DatasetLoader(self.reference_config,
- self.target_config)
- self.loader.load_datasets()
- self.assertEqual(self.loader.reference_dataset.origin['source'],
- 'local')
- np.testing.assert_array_equal(self.loader.reference_dataset.values,
- self.values)
-
- def testMultipleTargets(self):
- '''
- Test for when multiple target dataset configs are specified
- '''
- self.loader = DatasetLoader(self.reference_config,
- [self.target_config,
- self.new_data_source_config])
-
- # Here the the data_source "foo" represents the Dataset constructor
- self.loader.add_source_loader('foo', build_dataset)
- self.loader.load_datasets()
- self.assertEqual(self.loader.target_datasets[0].origin['source'],
- 'local')
- self.assertEqual(self.loader.target_datasets[1].origin['source'],
- 'foo')
- np.testing.assert_array_equal(self.loader.target_datasets[0].values,
- self.values)
- np.testing.assert_array_equal(self.loader.target_datasets[1].values,
- self.values2)
-
-def build_dataset(*args, **kwargs):
- '''
- Wrapper to Dataset constructor from fictitious 'foo' data_source.
- '''
- origin = {'source': 'foo'}
- return Dataset(*args, origin=origin, **kwargs)
-
-def create_netcdf_object():
- # To create the temporary netCDF file
- file_path = '/tmp/temporaryNetcdf.nc'
- netCDF_file = netCDF4.Dataset(file_path, 'w', format='NETCDF4')
- # To create dimensions
- netCDF_file.createDimension('lat_dim', 5)
- netCDF_file.createDimension('lon_dim', 5)
- netCDF_file.createDimension('time_dim', 3)
- # To create variables
- latitudes = netCDF_file.createVariable('latitude', 'd', ('lat_dim',))
- longitudes = netCDF_file.createVariable('longitude', 'd', ('lon_dim',))
- times = netCDF_file.createVariable('time', 'd', ('time_dim',))
- # unusual variable names to test optional arguments for Dataset constructor
- alt_lats = netCDF_file.createVariable('alt_lat', 'd', ('lat_dim',))
- alt_lons = netCDF_file.createVariable('alt_lon', 'd', ('lon_dim',))
- alt_times = netCDF_file.createVariable('alt_time', 'd', ('time_dim',))
- values = netCDF_file.createVariable('value', 'd',
- ('time_dim',
- 'lat_dim',
- 'lon_dim')
- )
-
- # To latitudes and longitudes for five values
- latitudes_data = np.arange(5.)
- longitudes_data = np.arange(150., 155.)
- # Three months of data.
- times_data = np.arange(3)
- # Create 150 values
- values_data = np.array([i for i in range(75)])
- # Reshape values to 4D array (level, time, lats, lons)
- values_data = values_data.reshape(len(times_data), len(latitudes_data),
- len(longitudes_data))
-
- # Ingest values to netCDF file
- latitudes[:] = latitudes_data
- longitudes[:] = longitudes_data
- times[:] = times_data
- alt_lats[:] = latitudes_data + 10
- alt_lons[:] = longitudes_data - 10
- alt_times[:] = times_data
- values[:] = values_data
- # Assign time info to time variable
- netCDF_file.variables['time'].units = 'months since 2001-01-01 00:00:00'
- netCDF_file.variables['alt_time'].units = 'months since 2001-04-01 00:00:00'
- netCDF_file.variables['value'].units = 'foo_units'
- netCDF_file.close()
- return file_path
-
-if __name__ == '__main__':
- unittest.main()