You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by go...@apache.org on 2016/07/29 17:39:45 UTC

climate git commit: Revert "Merge branch 'CLIMATE-825'"

Repository: climate
Updated Branches:
  refs/heads/master 577dfb787 -> d30d8146f


Revert "Merge branch 'CLIMATE-825'"

This reverts commit 577dfb78704a340b187ee746bef15b91a1fb5414, reversing
changes made to 70cd2b5006d1b0dfe1ff97f1cb2913c3d4ad913e.


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/d30d8146
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/d30d8146
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/d30d8146

Branch: refs/heads/master
Commit: d30d8146fa9c91844411d41f15f5e64423f2bfa2
Parents: 577dfb7
Author: Alex Goodman <ag...@users.noreply.github.com>
Authored: Fri Jul 29 10:28:58 2016 -0700
Committer: Alex Goodman <ag...@users.noreply.github.com>
Committed: Fri Jul 29 10:28:58 2016 -0700

----------------------------------------------------------------------
 docs/source/index.rst              |   2 +-
 docs/source/ocw/dataset_loader.rst |   5 -
 ocw/dataset_loader.py              | 206 --------------------------------
 ocw/tests/test_dataset_loader.py   | 185 ----------------------------
 4 files changed, 1 insertion(+), 397 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/d30d8146/docs/source/index.rst
----------------------------------------------------------------------
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 2834ee6..1485385 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -13,7 +13,6 @@ Contents:
 
    ocw/overview
    ocw/dataset
-   ocw/dataset_loader
    ocw/dataset_processor
    ocw/evaluation
    ocw/metrics
@@ -34,3 +33,4 @@ Indices and tables
 * :ref:`genindex`
 * :ref:`modindex`
 * :ref:`search`
+

http://git-wip-us.apache.org/repos/asf/climate/blob/d30d8146/docs/source/ocw/dataset_loader.rst
----------------------------------------------------------------------
diff --git a/docs/source/ocw/dataset_loader.rst b/docs/source/ocw/dataset_loader.rst
deleted file mode 100644
index 833b7f9..0000000
--- a/docs/source/ocw/dataset_loader.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-Dataset Loader Module
-**************
-
-.. automodule:: dataset_loader
-    :members:

http://git-wip-us.apache.org/repos/asf/climate/blob/d30d8146/ocw/dataset_loader.py
----------------------------------------------------------------------
diff --git a/ocw/dataset_loader.py b/ocw/dataset_loader.py
deleted file mode 100644
index be43c05..0000000
--- a/ocw/dataset_loader.py
+++ /dev/null
@@ -1,206 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-'''
-Classes:
-    DatasetLoader - Generate OCW Dataset objects from a variety of sources.
-'''
-
-import ocw.data_source.local as local
-import ocw.data_source.esgf as esgf
-import ocw.data_source.rcmed as rcmed
-import ocw.data_source.dap as dap
-
-class DatasetLoader:
-    '''Generate OCW Dataset objects from a variety of sources.'''
-
-    def __init__(self, reference, targets):
-        '''Generate OCW Dataset objects from a variety of sources.
-
-        Each keyword argument can be information for a dataset in dictionary
-        form. For example:
-        ``
-        >>> reference = {'data_source':'rcmed', 'name':'cru', 'dataset_id':10,
-                         'parameter_id':34}
-        >>> targets = {'data_source':'local_multiple',
-                       'path':'./data/CORDEX-Africa_data/AFRICA*pr.nc',
-                       'variable':'pr'}
-        >>> loader = DatasetLoader(reference, targets)
-        ``
-
-        Or more conveniently if the loader configuration is defined in a
-        yaml file named config_file (see RCMES examples):
-        ``
-        >>> import yaml
-        >>> config = yaml.load(open(config_file))
-        >>> loader = DatasetLoader(**config['datasets'])
-        ``
-
-        As shown in the first example, the dictionary for each keyword argument
-        should contain a data source and parameters specific to the loader for
-        that data source. Once the configuration is entered, the datasets may be
-        loaded using:
-        ``
-        >>> loader.load_datasets()
-        >>> target_datasets = loader.target_datasets
-        ``
-
-        If ``reference`` is entered as a keyword argument, then it may be
-        accesed from:
-        ``
-        >>> reference_dataset = loader.reference_dataset
-        ``
-
-        Additionally, each dataset must have a ``data_source`` keyword. This may
-        be one of the following:
-        * ``'local'`` - A single dataset file in a local directory
-        * ``'local_split'`` - A single dataset split accross multiple files in a
-                              local directory
-        * ``'local_multiple'`` - Multiple datasets in a local directory
-        * ``'esgf'`` - Download the dataset from the Earth System Grid
-                       Federation
-        * ``'rcmed'`` - Download the dataset from the Regional Climate Model
-                        Evaluation System Database
-        * ``'dap'`` - Download the dataset from an OPeNDAP URL
-
-        Users who wish to download datasets from sources not described above
-        may define their own custom dataset loader function and incorporate it
-        as follows:
-        >>> loader.add_source_loader('my_source_name', my_loader_func)
-
-        :param reference: The reference dataset loader configuration.
-        :type reference: :mod:`dict`
-
-        :param targets: The target dataset loader configurations.
-        :type targets: :mod:`dict` or list of mod:`dict`
-
-        :raises KeyError: If an invalid argument is passed to a data source
-        loader function.
-        '''
-        # Reference dataset config
-        self.set_reference(**reference)
-
-        # Target dataset(s) config
-        self.set_targets(targets)
-
-        # Default loaders
-        self._source_loaders = {
-                    'local':local.load_file,
-                    'local_split':local.load_dataset_from_multiple_netcdf_files,
-                    'local_multiple':local.load_multiple_files,
-                    'esgf':esgf.load_dataset,
-                    'rcmed':rcmed.parameter_dataset,
-                    'dap':dap.load
-                    }
-
-    def add_source_loader(self, source_name, loader_func):
-        '''
-        Add a custom source loader.
-
-        :param source_name: The name of the data source.
-        :type source_name: :mod:`string`
-
-        :param loader_func: Reference to a custom defined function. This should
-        return an OCW Dataset object.
-        :type loader_func: :class:`callable`
-        '''
-        self._source_loaders[source_name] = loader_func
-
-    def add_target(self, **kwargs):
-        '''
-        A convenient means of adding a target dataset to the loader.
-        :raises KeyError: If data_source is not specified.
-        '''
-        if 'data_source' not in kwargs:
-            raise KeyError('Dataset configuration must contain a data_source.')
-        self._target_config.append(kwargs)
-
-    def add_targets(self, targets):
-        '''
-        A convenient means of adding multiple target datasets to the loader.
-
-        :param targets: List of loader configurations for each target
-        :type targets: List of :mod:`dict`
-
-        :raises KeyError: If data_source is not specified.
-        '''
-        for target_config in targets:
-            self.add_target(**target_config)
-
-    def set_targets(self, targets):
-        '''
-        Reset the target dataset config.
-
-        :param targets: List of loader configurations for each target
-        :type targets: List of :mod:`dict`
-
-        :raises KeyError: If data_source is not specified.
-        '''
-        # This check allows for the user to enter targets as one block or
-        # as a list of separate blocks in their config files
-        if not isinstance(targets, list):
-            targets = [targets]
-        self._target_config = []
-        self.add_targets(targets)
-
-    def set_reference(self, **kwargs):
-        '''
-        Reset the reference dataset config.
-        :raises KeyError: If data_source is not specified.
-        '''
-        if 'data_source' not in kwargs:
-            raise KeyError('Dataset configuration must contain a data_source.')
-        self._reference_config = kwargs
-
-    def load_datasets(self):
-        '''
-        Loads the datasets from the given loader configurations.
-        '''
-        # Load the reference dataset
-        self.reference_dataset = self._load(**self._reference_config)
-
-        # Ensure output is clear if loading is performed more than once to
-        # prevent duplicates.
-        self.target_datasets = []
-
-        # Load the target datasets
-        for loader_params in self._target_config:
-            output = self._load(**loader_params)
-
-            # Need to account for the fact that some loaders return lists
-            # of OCW Dataset objects instead of just one
-            if isinstance(output, list):
-                self.target_datasets.extend(output)
-            else:
-                self.target_datasets.append(output)
-
-    def _load(self, **kwargs):
-        '''
-        Generic dataset loading method.
-        '''
-        # Extract the data source
-        data_source = kwargs.pop('data_source')
-
-        # Find the correct loader function for the given data source
-        loader_func = self._source_loaders[data_source]
-
-        # The remaining kwargs should be specific to the loader
-        output = loader_func(**kwargs)
-
-        # Preserve data_source info for later use
-        kwargs['data_source'] = data_source
-        return output

http://git-wip-us.apache.org/repos/asf/climate/blob/d30d8146/ocw/tests/test_dataset_loader.py
----------------------------------------------------------------------
diff --git a/ocw/tests/test_dataset_loader.py b/ocw/tests/test_dataset_loader.py
deleted file mode 100644
index 2d192c1..0000000
--- a/ocw/tests/test_dataset_loader.py
+++ /dev/null
@@ -1,185 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import unittest
-import os
-import copy
-import netCDF4
-import numpy as np
-from ocw.dataset import Dataset
-from ocw.dataset_loader import DatasetLoader
-
-class TestDatasetLoader(unittest.TestCase):
-    def setUp(self):
-        # Read netCDF file
-        self.file_path = create_netcdf_object()
-        self.netCDF_file = netCDF4.Dataset(self.file_path, 'r')
-        self.latitudes = self.netCDF_file.variables['latitude'][:]
-        self.longitudes = self.netCDF_file.variables['longitude'][:]
-        self.times = self.netCDF_file.variables['time'][:]
-        self.alt_lats = self.netCDF_file.variables['alt_lat'][:]
-        self.alt_lons = self.netCDF_file.variables['alt_lon'][:]
-        self.values = self.netCDF_file.variables['value'][:]
-        self.values2 = self.values + 1
-
-        # Set up config
-        self.reference_config = {'data_source': 'local',
-                                 'file_path': self.file_path,
-                                 'variable_name': 'value'}
-        self.target_config = copy.deepcopy(self.reference_config)
-        self.no_data_source_config = {'file_path': self.file_path,
-                                      'variable_name': 'value'}
-        self.new_data_source_config = {'data_source': 'foo',
-                                       'lats': self.latitudes,
-                                       'lons': self.longitudes,
-                                       'times': self.times,
-                                       'values': self.values2,
-                                       'variable': 'value'}
-
-    def tearDown(self):
-        os.remove(self.file_path)
-
-    def testInputHasDataSource(self):
-        '''
-        Make sure input data source is specified for each dataset to be loaded
-        '''
-        with self.assertRaises(KeyError):
-            self.loader = DatasetLoader(self.reference_config,
-                                        self.no_data_source_config)
-
-    def testReferenceHasDataSource(self):
-        '''
-        Make sure ref data source is specified for each dataset to be loaded
-        '''
-        with self.assertRaises(KeyError):
-            self.loader = DatasetLoader(self.reference_config,
-                                        self.target_config)
-            self.loader.set_reference(**self.no_data_source_config)
-
-    def testTargetHasDataSource(self):
-        '''
-        Make sure target data source is specified for each dataset to be loaded
-        '''
-        with self.assertRaises(KeyError):
-            self.loader = DatasetLoader(self.reference_config,
-                                        self.target_config)
-            self.loader.add_target(**self.no_data_source_config)
-
-    def testNewDataSource(self):
-        '''
-        Ensures that custom data source loaders can be added
-        '''
-        self.loader = DatasetLoader(self.new_data_source_config,
-                                    self.target_config)
-
-        # Here the the data_source "foo" represents the Dataset constructor
-        self.loader.add_source_loader('foo', build_dataset)
-        self.loader.load_datasets()
-        self.assertEqual(self.loader.reference_dataset.origin['source'],
-                         'foo')
-        np.testing.assert_array_equal(self.loader.reference_dataset.values,
-                                      self.values2)
-
-    def testExistingDataSource(self):
-        '''
-        Ensures that existing data source loaders can be added
-        '''
-        self.loader = DatasetLoader(self.reference_config,
-                                    self.target_config)
-        self.loader.load_datasets()
-        self.assertEqual(self.loader.reference_dataset.origin['source'],
-                         'local')
-        np.testing.assert_array_equal(self.loader.reference_dataset.values,
-                                      self.values)
-
-    def testMultipleTargets(self):
-        '''
-        Test for when multiple target dataset configs are specified
-        '''
-        self.loader = DatasetLoader(self.reference_config,
-                                    [self.target_config,
-                                     self.new_data_source_config])
-
-        # Here the the data_source "foo" represents the Dataset constructor
-        self.loader.add_source_loader('foo', build_dataset)
-        self.loader.load_datasets()
-        self.assertEqual(self.loader.target_datasets[0].origin['source'],
-                         'local')
-        self.assertEqual(self.loader.target_datasets[1].origin['source'],
-                         'foo')
-        np.testing.assert_array_equal(self.loader.target_datasets[0].values,
-                                      self.values)
-        np.testing.assert_array_equal(self.loader.target_datasets[1].values,
-                                      self.values2)
-
-def build_dataset(*args, **kwargs):
-    '''
-    Wrapper to Dataset constructor from fictitious 'foo' data_source.
-    '''
-    origin = {'source': 'foo'}
-    return Dataset(*args, origin=origin, **kwargs)
-
-def create_netcdf_object():
-    # To create the temporary netCDF file
-    file_path = '/tmp/temporaryNetcdf.nc'
-    netCDF_file = netCDF4.Dataset(file_path, 'w', format='NETCDF4')
-    # To create dimensions
-    netCDF_file.createDimension('lat_dim', 5)
-    netCDF_file.createDimension('lon_dim', 5)
-    netCDF_file.createDimension('time_dim', 3)
-    # To create variables
-    latitudes = netCDF_file.createVariable('latitude', 'd', ('lat_dim',))
-    longitudes = netCDF_file.createVariable('longitude', 'd', ('lon_dim',))
-    times = netCDF_file.createVariable('time', 'd', ('time_dim',))
-    # unusual variable names to test optional arguments for Dataset constructor
-    alt_lats = netCDF_file.createVariable('alt_lat', 'd', ('lat_dim',))
-    alt_lons = netCDF_file.createVariable('alt_lon', 'd', ('lon_dim',))
-    alt_times = netCDF_file.createVariable('alt_time', 'd', ('time_dim',))
-    values = netCDF_file.createVariable('value', 'd',
-                                        ('time_dim',
-                                         'lat_dim',
-                                         'lon_dim')
-                                        )
-
-    # To latitudes and longitudes for five values
-    latitudes_data = np.arange(5.)
-    longitudes_data = np.arange(150., 155.)
-    # Three months of data.
-    times_data = np.arange(3)
-    # Create 150 values
-    values_data = np.array([i for i in range(75)])
-    # Reshape values to 4D array (level, time, lats, lons)
-    values_data = values_data.reshape(len(times_data), len(latitudes_data),
-                                      len(longitudes_data))
-
-    # Ingest values to netCDF file
-    latitudes[:] = latitudes_data
-    longitudes[:] = longitudes_data
-    times[:] = times_data
-    alt_lats[:] = latitudes_data + 10
-    alt_lons[:] = longitudes_data - 10
-    alt_times[:] = times_data
-    values[:] = values_data
-    # Assign time info to time variable
-    netCDF_file.variables['time'].units = 'months since 2001-01-01 00:00:00'
-    netCDF_file.variables['alt_time'].units = 'months since 2001-04-01 00:00:00'
-    netCDF_file.variables['value'].units = 'foo_units'
-    netCDF_file.close()
-    return file_path
-
-if __name__ == '__main__':
-    unittest.main()