You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@climate.apache.org by go...@apache.org on 2016/07/29 18:31:38 UTC

[01/11] climate git commit: Added DatasetLoader class

Repository: climate
Updated Branches:
  refs/heads/master 70cd2b500 -> 7ab014106


Added DatasetLoader class


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/ecea6210
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/ecea6210
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/ecea6210

Branch: refs/heads/master
Commit: ecea6210b717e9464ce527c883286a38e41a7a08
Parents: ffd2159
Author: Alex Goodman <ag...@users.noreply.github.com>
Authored: Mon Jul 18 10:11:13 2016 -0700
Committer: Alex Goodman <ag...@users.noreply.github.com>
Committed: Mon Jul 18 10:11:13 2016 -0700

----------------------------------------------------------------------
 ocw/dataset_loader.py | 131 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 131 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/ecea6210/ocw/dataset_loader.py
----------------------------------------------------------------------
diff --git a/ocw/dataset_loader.py b/ocw/dataset_loader.py
new file mode 100644
index 0000000..bbcb43c
--- /dev/null
+++ b/ocw/dataset_loader.py
@@ -0,0 +1,131 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+'''
+Classes:
+    DatasetLoader - Generate OCW Dataset objects from a variety of sources.
+'''
+
+import ocw.data_source.local as local
+import ocw.data_source.esgf as esgf
+import ocw.data_source.rcmed as rcmed
+import ocw.data_source.dap as dap
+
+class DatasetLoader:
+    '''Generate OCW Dataset objects from a variety of sources.'''
+
+    def __init__(self, **kwargs):
+        '''Generate OCW Dataset objects from a variety of sources.
+
+        Each keyword argument can be information for a dataset in dictionary
+        form. For example:
+        ``
+        >>> reference = {'data_source':'rcmed', 'name':'cru', 'dataset_id':10,
+                         'parameter_id':34}
+        >>> targets = {'data_source':'local_multiple',
+                       'path':'./data/CORDEX-Africa_data/AFRICA*pr.nc',
+                       'variable':'pr'}
+        >>> loader = DatasetLoader(reference=reference, targets=targets)
+        ``
+
+        Or more conveniently if the loader configuration is defined in a
+        yaml file named config_file (see RCMES examples):
+        ``
+        >>> import yaml
+        >>> config = yaml.load(open(config_file))
+        >>> loader = DatasetLoader(**config['datasets'])
+        ``
+
+        As shown in the first example, the dictionary for each keyword argument
+        should contain a data source and parameters specific to the loader for
+        that data source. Once the configuration is entered, the datasets may be
+        loaded using:
+        ``
+        >>> loader.load_datasets()
+        >>> target_datasets = loader.target_datasets
+        ``
+
+        If ``reference`` is entered as a keyword argument, then it may be
+        accesed from:
+        ``
+        >>> reference_dataset = loader.reference_dataset
+        ``
+
+        Additionally, each dataset must have a ``data_source`` keyword. This may
+        be one of the following:
+        * ``'local'`` - A single dataset file in a local directory
+        * ``'local_split'`` - A single dataset split accross multiple files in a
+                              local directory
+        * ``'local_multiple'`` - Multiple datasets in a local directory
+        * ``'esgf'`` - Download the dataset from the Earth System Grid
+                       Federation
+        * ``'rcmed'`` - Download the dataset from the Regional Climate Model
+                        Evaluation System Database
+        * ``'dap'`` - Download the dataset from an OPeNDAP URL
+
+        Users who wish to download datasets from sources not described above
+        may define their own custom dataset loader function and incorporate it
+        as follows:
+        >>> loader.add_source_loader('my_source_name', my_loader_func)
+
+        :raises KeyError: If an invalid argument is passed to a data source
+        loader function.
+        '''
+        self.reference_dataset = None
+        self.target_datasets = []
+        self._config = kwargs
+        self._source_loaders = {
+                    'local':local.load,
+                    'local_split':local.load_dataset_from_multiple_netcdf_files
+                    'local_multiple':local.load_multiple_files,
+                    'esgf':esgf.load_dataset,
+                    'rcmed':parameter_dataset,
+                    'dap':dap.load
+                    }
+
+    def add_source_loader(self, source_name, loader_func):
+        '''
+        Add a custom source loader.
+
+        :param source_name: The name of the data source.
+        :type source_name: :mod:`string`
+
+        :param loader_func: Reference to a custom defined function. This should
+        return an OCW Dataset object.
+        :type loader_func: :mod:`callable`
+        '''
+        self._source_loader[source_name] = loader_func
+
+
+    def set_config(self, **kwargs):
+        '''
+        Change loader config if necessary. See class docstring for more info.
+        '''
+        self._config = kwargs
+
+    def load_datasets(self):
+        '''
+        Loads the datasets from the given loader configuration.
+        '''
+        for dataset_evaltype, dataset_params in self._config.iteritems():
+            data_source = dataset_params.pop('data_source'):
+            load_func = self._source_loaders[data_source]
+            if dataset_evaltype == 'reference':
+                self.reference_dataset = load_func(**dataset_params)
+            else:
+                target_dataset = load_func(**dataset_params)
+                self.target_datasets.extend(target_dataset)

[11/11] climate git commit: Merge branch 'CLIMATE-825'

Posted by go...@apache.org.

Merge branch 'CLIMATE-825'


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/7ab01410
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/7ab01410
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/7ab01410

Branch: refs/heads/master
Commit: 7ab0141061b9a11638452e8e60c3400a92057390
Parents: 70cd2b5 fa8356c
Author: Alex Goodman <ag...@users.noreply.github.com>
Authored: Fri Jul 29 11:31:12 2016 -0700
Committer: Alex Goodman <ag...@users.noreply.github.com>
Committed: Fri Jul 29 11:31:12 2016 -0700

----------------------------------------------------------------------
 docs/source/index.rst              |   2 +-
 docs/source/ocw/dataset_loader.rst |   5 +
 ocw/dataset_loader.py              | 207 ++++++++++++++++++++++++++++++++
 ocw/tests/test_dataset_loader.py   | 185 ++++++++++++++++++++++++++++
 4 files changed, 398 insertions(+), 1 deletion(-)
----------------------------------------------------------------------

[08/11] climate git commit: Add ASF Licence header to test_dataset_loader.py

Posted by go...@apache.org.

Add ASF Licence header to test_dataset_loader.py


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/b56b690c
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/b56b690c
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/b56b690c

Branch: refs/heads/master
Commit: b56b690cb21b260d146a6080fff33b9c0c77319e
Parents: 98a67d1
Author: Alex Goodman <ag...@users.noreply.github.com>
Authored: Mon Jul 25 16:21:38 2016 -0700
Committer: Alex Goodman <ag...@users.noreply.github.com>
Committed: Mon Jul 25 16:21:38 2016 -0700

----------------------------------------------------------------------
 ocw/tests/test_dataset_loader.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/b56b690c/ocw/tests/test_dataset_loader.py
----------------------------------------------------------------------
diff --git a/ocw/tests/test_dataset_loader.py b/ocw/tests/test_dataset_loader.py
index da1e8e8..2d192c1 100644
--- a/ocw/tests/test_dataset_loader.py
+++ b/ocw/tests/test_dataset_loader.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import unittest
 import os
 import copy

[07/11] climate git commit: Added unit tests

Posted by go...@apache.org.

Added unit tests


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/98a67d13
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/98a67d13
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/98a67d13

Branch: refs/heads/master
Commit: 98a67d130ad9b2c946f18dba90da6b68c51594de
Parents: 198de48
Author: Alex Goodman <ag...@users.noreply.github.com>
Authored: Mon Jul 25 16:14:14 2016 -0700
Committer: Alex Goodman <ag...@users.noreply.github.com>
Committed: Mon Jul 25 16:14:14 2016 -0700

----------------------------------------------------------------------
 ocw/tests/test_dataset_loader.py | 168 ++++++++++++++++++++++++++++++++++
 1 file changed, 168 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/98a67d13/ocw/tests/test_dataset_loader.py
----------------------------------------------------------------------
diff --git a/ocw/tests/test_dataset_loader.py b/ocw/tests/test_dataset_loader.py
new file mode 100644
index 0000000..da1e8e8
--- /dev/null
+++ b/ocw/tests/test_dataset_loader.py
@@ -0,0 +1,168 @@
+import unittest
+import os
+import copy
+import netCDF4
+import numpy as np
+from ocw.dataset import Dataset
+from ocw.dataset_loader import DatasetLoader
+
+class TestDatasetLoader(unittest.TestCase):
+    def setUp(self):
+        # Read netCDF file
+        self.file_path = create_netcdf_object()
+        self.netCDF_file = netCDF4.Dataset(self.file_path, 'r')
+        self.latitudes = self.netCDF_file.variables['latitude'][:]
+        self.longitudes = self.netCDF_file.variables['longitude'][:]
+        self.times = self.netCDF_file.variables['time'][:]
+        self.alt_lats = self.netCDF_file.variables['alt_lat'][:]
+        self.alt_lons = self.netCDF_file.variables['alt_lon'][:]
+        self.values = self.netCDF_file.variables['value'][:]
+        self.values2 = self.values + 1
+
+        # Set up config
+        self.reference_config = {'data_source': 'local',
+                                 'file_path': self.file_path,
+                                 'variable_name': 'value'}
+        self.target_config = copy.deepcopy(self.reference_config)
+        self.no_data_source_config = {'file_path': self.file_path,
+                                      'variable_name': 'value'}
+        self.new_data_source_config = {'data_source': 'foo',
+                                       'lats': self.latitudes,
+                                       'lons': self.longitudes,
+                                       'times': self.times,
+                                       'values': self.values2,
+                                       'variable': 'value'}
+
+    def tearDown(self):
+        os.remove(self.file_path)
+
+    def testInputHasDataSource(self):
+        '''
+        Make sure input data source is specified for each dataset to be loaded
+        '''
+        with self.assertRaises(KeyError):
+            self.loader = DatasetLoader(self.reference_config,
+                                        self.no_data_source_config)
+
+    def testReferenceHasDataSource(self):
+        '''
+        Make sure ref data source is specified for each dataset to be loaded
+        '''
+        with self.assertRaises(KeyError):
+            self.loader = DatasetLoader(self.reference_config,
+                                        self.target_config)
+            self.loader.set_reference(**self.no_data_source_config)
+
+    def testTargetHasDataSource(self):
+        '''
+        Make sure target data source is specified for each dataset to be loaded
+        '''
+        with self.assertRaises(KeyError):
+            self.loader = DatasetLoader(self.reference_config,
+                                        self.target_config)
+            self.loader.add_target(**self.no_data_source_config)
+
+    def testNewDataSource(self):
+        '''
+        Ensures that custom data source loaders can be added
+        '''
+        self.loader = DatasetLoader(self.new_data_source_config,
+                                    self.target_config)
+
+        # Here the the data_source "foo" represents the Dataset constructor
+        self.loader.add_source_loader('foo', build_dataset)
+        self.loader.load_datasets()
+        self.assertEqual(self.loader.reference_dataset.origin['source'],
+                         'foo')
+        np.testing.assert_array_equal(self.loader.reference_dataset.values,
+                                      self.values2)
+
+    def testExistingDataSource(self):
+        '''
+        Ensures that existing data source loaders can be added
+        '''
+        self.loader = DatasetLoader(self.reference_config,
+                                    self.target_config)
+        self.loader.load_datasets()
+        self.assertEqual(self.loader.reference_dataset.origin['source'],
+                         'local')
+        np.testing.assert_array_equal(self.loader.reference_dataset.values,
+                                      self.values)
+
+    def testMultipleTargets(self):
+        '''
+        Test for when multiple target dataset configs are specified
+        '''
+        self.loader = DatasetLoader(self.reference_config,
+                                    [self.target_config,
+                                     self.new_data_source_config])
+
+        # Here the the data_source "foo" represents the Dataset constructor
+        self.loader.add_source_loader('foo', build_dataset)
+        self.loader.load_datasets()
+        self.assertEqual(self.loader.target_datasets[0].origin['source'],
+                         'local')
+        self.assertEqual(self.loader.target_datasets[1].origin['source'],
+                         'foo')
+        np.testing.assert_array_equal(self.loader.target_datasets[0].values,
+                                      self.values)
+        np.testing.assert_array_equal(self.loader.target_datasets[1].values,
+                                      self.values2)
+
+def build_dataset(*args, **kwargs):
+    '''
+    Wrapper to Dataset constructor from fictitious 'foo' data_source.
+    '''
+    origin = {'source': 'foo'}
+    return Dataset(*args, origin=origin, **kwargs)
+
+def create_netcdf_object():
+    # To create the temporary netCDF file
+    file_path = '/tmp/temporaryNetcdf.nc'
+    netCDF_file = netCDF4.Dataset(file_path, 'w', format='NETCDF4')
+    # To create dimensions
+    netCDF_file.createDimension('lat_dim', 5)
+    netCDF_file.createDimension('lon_dim', 5)
+    netCDF_file.createDimension('time_dim', 3)
+    # To create variables
+    latitudes = netCDF_file.createVariable('latitude', 'd', ('lat_dim',))
+    longitudes = netCDF_file.createVariable('longitude', 'd', ('lon_dim',))
+    times = netCDF_file.createVariable('time', 'd', ('time_dim',))
+    # unusual variable names to test optional arguments for Dataset constructor
+    alt_lats = netCDF_file.createVariable('alt_lat', 'd', ('lat_dim',))
+    alt_lons = netCDF_file.createVariable('alt_lon', 'd', ('lon_dim',))
+    alt_times = netCDF_file.createVariable('alt_time', 'd', ('time_dim',))
+    values = netCDF_file.createVariable('value', 'd',
+                                        ('time_dim',
+                                         'lat_dim',
+                                         'lon_dim')
+                                        )
+
+    # To latitudes and longitudes for five values
+    latitudes_data = np.arange(5.)
+    longitudes_data = np.arange(150., 155.)
+    # Three months of data.
+    times_data = np.arange(3)
+    # Create 150 values
+    values_data = np.array([i for i in range(75)])
+    # Reshape values to 4D array (level, time, lats, lons)
+    values_data = values_data.reshape(len(times_data), len(latitudes_data),
+                                      len(longitudes_data))
+
+    # Ingest values to netCDF file
+    latitudes[:] = latitudes_data
+    longitudes[:] = longitudes_data
+    times[:] = times_data
+    alt_lats[:] = latitudes_data + 10
+    alt_lons[:] = longitudes_data - 10
+    alt_times[:] = times_data
+    values[:] = values_data
+    # Assign time info to time variable
+    netCDF_file.variables['time'].units = 'months since 2001-01-01 00:00:00'
+    netCDF_file.variables['alt_time'].units = 'months since 2001-04-01 00:00:00'
+    netCDF_file.variables['value'].units = 'foo_units'
+    netCDF_file.close()
+    return file_path
+
+if __name__ == '__main__':
+    unittest.main()

[10/11] climate git commit: Fixed PEP8 violations in dataset_loader.py

Posted by go...@apache.org.

Fixed PEP8 violations in dataset_loader.py


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/fa8356c5
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/fa8356c5
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/fa8356c5

Branch: refs/heads/master
Commit: fa8356c5492df5827020be9500d8d2375e777038
Parents: 541dbe5
Author: Alex Goodman <ag...@users.noreply.github.com>
Authored: Tue Jul 26 21:48:38 2016 -0700
Committer: Alex Goodman <ag...@users.noreply.github.com>
Committed: Tue Jul 26 21:48:38 2016 -0700

----------------------------------------------------------------------
 ocw/dataset_loader.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/fa8356c5/ocw/dataset_loader.py
----------------------------------------------------------------------
diff --git a/ocw/dataset_loader.py b/ocw/dataset_loader.py
index be43c05..8ee1b93 100644
--- a/ocw/dataset_loader.py
+++ b/ocw/dataset_loader.py
@@ -25,6 +25,7 @@ import ocw.data_source.esgf as esgf
 import ocw.data_source.rcmed as rcmed
 import ocw.data_source.dap as dap
 
+
 class DatasetLoader:
     '''Generate OCW Dataset objects from a variety of sources.'''
 
@@ -99,13 +100,13 @@ class DatasetLoader:
 
         # Default loaders
         self._source_loaders = {
-                    'local':local.load_file,
-                    'local_split':local.load_dataset_from_multiple_netcdf_files,
-                    'local_multiple':local.load_multiple_files,
-                    'esgf':esgf.load_dataset,
-                    'rcmed':rcmed.parameter_dataset,
-                    'dap':dap.load
-                    }
+            'local': local.load_file,
+            'local_split': local.load_dataset_from_multiple_netcdf_files,
+            'local_multiple': local.load_multiple_files,
+            'esgf': esgf.load_dataset,
+            'rcmed': rcmed.parameter_dataset,
+            'dap': dap.load
+        }
 
     def add_source_loader(self, source_name, loader_func):
         '''

[04/11] climate git commit: Minor docstring fix

Posted by go...@apache.org.

Minor docstring fix


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/d937675d
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/d937675d
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/d937675d

Branch: refs/heads/master
Commit: d937675d0ad1da865d5dab57d895cf77e471f646
Parents: bf5b6eb
Author: Alex Goodman <ag...@users.noreply.github.com>
Authored: Tue Jul 19 13:42:25 2016 -0700
Committer: Alex Goodman <ag...@users.noreply.github.com>
Committed: Tue Jul 19 13:42:25 2016 -0700

----------------------------------------------------------------------
 ocw/dataset_loader.py | 4 ----
 1 file changed, 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/d937675d/ocw/dataset_loader.py
----------------------------------------------------------------------
diff --git a/ocw/dataset_loader.py b/ocw/dataset_loader.py
index d81f869..32a7ae6 100644
--- a/ocw/dataset_loader.py
+++ b/ocw/dataset_loader.py
@@ -160,10 +160,6 @@ class DatasetLoader:
     def set_reference(self, **kwargs):
         '''
         Reset the reference dataset config.
-
-        :param targets: List of loader configurations for each target
-        :type targets: List of :mod:`dict`
-
         :raises KeyError: If data_source is not specified.
         '''
         if 'data_source' not in kwargs:

[09/11] climate git commit: Added dataset_loader.py to Sphinx build

Posted by go...@apache.org.

Added dataset_loader.py to Sphinx build


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/541dbe53
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/541dbe53
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/541dbe53

Branch: refs/heads/master
Commit: 541dbe53fcb4d014161d9a64f7b8f890f3360069
Parents: b56b690
Author: Alex Goodman <ag...@users.noreply.github.com>
Authored: Mon Jul 25 16:29:28 2016 -0700
Committer: Alex Goodman <ag...@users.noreply.github.com>
Committed: Mon Jul 25 16:29:28 2016 -0700

----------------------------------------------------------------------
 docs/source/index.rst              | 2 +-
 docs/source/ocw/dataset_loader.rst | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/541dbe53/docs/source/index.rst
----------------------------------------------------------------------
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 1485385..2834ee6 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -13,6 +13,7 @@ Contents:
 
    ocw/overview
    ocw/dataset
+   ocw/dataset_loader
    ocw/dataset_processor
    ocw/evaluation
    ocw/metrics
@@ -33,4 +34,3 @@ Indices and tables
 * :ref:`genindex`
 * :ref:`modindex`
 * :ref:`search`
-

http://git-wip-us.apache.org/repos/asf/climate/blob/541dbe53/docs/source/ocw/dataset_loader.rst
----------------------------------------------------------------------
diff --git a/docs/source/ocw/dataset_loader.rst b/docs/source/ocw/dataset_loader.rst
new file mode 100644
index 0000000..833b7f9
--- /dev/null
+++ b/docs/source/ocw/dataset_loader.rst
@@ -0,0 +1,5 @@
+Dataset Loader Module
+**************
+
+.. automodule:: dataset_loader
+    :members:

[03/11] climate git commit: Minor fixes to dataset_loader

Posted by go...@apache.org.

Minor fixes to dataset_loader


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/bf5b6eba
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/bf5b6eba
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/bf5b6eba

Branch: refs/heads/master
Commit: bf5b6eba6251c54317693cc1df2a05e3e8401dfb
Parents: a3764c0
Author: Alex Goodman <ag...@users.noreply.github.com>
Authored: Tue Jul 19 13:38:14 2016 -0700
Committer: Alex Goodman <ag...@users.noreply.github.com>
Committed: Tue Jul 19 13:38:14 2016 -0700

----------------------------------------------------------------------
 ocw/dataset_loader.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/bf5b6eba/ocw/dataset_loader.py
----------------------------------------------------------------------
diff --git a/ocw/dataset_loader.py b/ocw/dataset_loader.py
index 719cb9b..d81f869 100644
--- a/ocw/dataset_loader.py
+++ b/ocw/dataset_loader.py
@@ -192,9 +192,9 @@ class DatasetLoader:
                 else:
                     self.target_datasets.append(output)
 
-    def _load(**kwargs):
+    def _load(self, **kwargs):
         '''
-        Generic dataset loading method
+        Generic dataset loading method.
         '''
         # Extract the data source
         data_source = kwargs.pop('data_source')

[05/11] climate git commit: Preserve data_source info after each load

Posted by go...@apache.org.

Preserve data_source info after each load


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/4add58b2
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/4add58b2
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/4add58b2

Branch: refs/heads/master
Commit: 4add58b2e5dd4e6b28509402e35adffbc03692e7
Parents: d937675
Author: Alex Goodman <ag...@users.noreply.github.com>
Authored: Mon Jul 25 15:14:42 2016 -0700
Committer: Alex Goodman <ag...@users.noreply.github.com>
Committed: Mon Jul 25 15:14:42 2016 -0700

----------------------------------------------------------------------
 ocw/dataset_loader.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/4add58b2/ocw/dataset_loader.py
----------------------------------------------------------------------
diff --git a/ocw/dataset_loader.py b/ocw/dataset_loader.py
index 32a7ae6..2eb9ef0 100644
--- a/ocw/dataset_loader.py
+++ b/ocw/dataset_loader.py
@@ -199,4 +199,8 @@ class DatasetLoader:
         loader_func = self._source_loaders[data_source]
 
         # The remaining kwargs should be specific to the loader
-        return loader_func(**kwargs)
+        output = loader_func(**kwargs)
+
+        # Preserve data_source info for later use
+        kwargs['data_source'] = data_source
+        return output

[06/11] climate git commit: Fixing some bugs found from testing

Posted by go...@apache.org.

Fixing some bugs found from testing


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/198de483
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/198de483
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/198de483

Branch: refs/heads/master
Commit: 198de48332c84586dafcaf033fa27a8eb8818ebe
Parents: 4add58b
Author: Alex Goodman <ag...@users.noreply.github.com>
Authored: Mon Jul 25 16:13:48 2016 -0700
Committer: Alex Goodman <ag...@users.noreply.github.com>
Committed: Mon Jul 25 16:13:48 2016 -0700

----------------------------------------------------------------------
 ocw/dataset_loader.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/198de483/ocw/dataset_loader.py
----------------------------------------------------------------------
diff --git a/ocw/dataset_loader.py b/ocw/dataset_loader.py
index 2eb9ef0..be43c05 100644
--- a/ocw/dataset_loader.py
+++ b/ocw/dataset_loader.py
@@ -92,18 +92,18 @@ class DatasetLoader:
         loader function.
         '''
         # Reference dataset config
-        self.set_reference(reference)
+        self.set_reference(**reference)
 
         # Target dataset(s) config
         self.set_targets(targets)
 
         # Default loaders
         self._source_loaders = {
-                    'local':local.load,
-                    'local_split':local.load_dataset_from_multiple_netcdf_files
+                    'local':local.load_file,
+                    'local_split':local.load_dataset_from_multiple_netcdf_files,
                     'local_multiple':local.load_multiple_files,
                     'esgf':esgf.load_dataset,
-                    'rcmed':parameter_dataset,
+                    'rcmed':rcmed.parameter_dataset,
                     'dap':dap.load
                     }
 
@@ -118,7 +118,7 @@ class DatasetLoader:
         return an OCW Dataset object.
         :type loader_func: :class:`callable`
         '''
-        self._source_loader[source_name] = loader_func
+        self._source_loaders[source_name] = loader_func
 
     def add_target(self, **kwargs):
         '''
@@ -152,7 +152,7 @@ class DatasetLoader:
         '''
         # This check allows for the user to enter targets as one block or
         # as a list of separate blocks in their config files
-        if not instanceof(targets, list):
+        if not isinstance(targets, list):
             targets = [targets]
         self._target_config = []
         self.add_targets(targets)
@@ -178,15 +178,15 @@ class DatasetLoader:
         self.target_datasets = []
 
         # Load the target datasets
-        for loader_params in self._target_config
+        for loader_params in self._target_config:
             output = self._load(**loader_params)
 
-                # Need to account for the fact that some loaders return lists
-                # of OCW Dataset objects instead of just one
-                if isinstance(target_dataset, list):
-                    self.target_datasets.extend(output)
-                else:
-                    self.target_datasets.append(output)
+            # Need to account for the fact that some loaders return lists
+            # of OCW Dataset objects instead of just one
+            if isinstance(output, list):
+                self.target_datasets.extend(output)
+            else:
+                self.target_datasets.append(output)
 
     def _load(self, **kwargs):
         '''

[02/11] climate git commit: Added additional functionality to DatasetLoader

Posted by go...@apache.org.

Added additional functionality to DatasetLoader


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/a3764c04
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/a3764c04
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/a3764c04

Branch: refs/heads/master
Commit: a3764c046adb506740fc7e3abdbabedfea917ad5
Parents: ecea621
Author: Alex Goodman <ag...@users.noreply.github.com>
Authored: Mon Jul 18 15:11:41 2016 -0700
Committer: Alex Goodman <ag...@users.noreply.github.com>
Committed: Mon Jul 18 15:11:41 2016 -0700

----------------------------------------------------------------------
 ocw/dataset_loader.py | 111 +++++++++++++++++++++++++++++++++++++--------
 1 file changed, 93 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/a3764c04/ocw/dataset_loader.py
----------------------------------------------------------------------
diff --git a/ocw/dataset_loader.py b/ocw/dataset_loader.py
index bbcb43c..719cb9b 100644
--- a/ocw/dataset_loader.py
+++ b/ocw/dataset_loader.py
@@ -28,7 +28,7 @@ import ocw.data_source.dap as dap
 class DatasetLoader:
     '''Generate OCW Dataset objects from a variety of sources.'''
 
-    def __init__(self, **kwargs):
+    def __init__(self, reference, targets):
         '''Generate OCW Dataset objects from a variety of sources.
 
         Each keyword argument can be information for a dataset in dictionary
@@ -39,7 +39,7 @@ class DatasetLoader:
         >>> targets = {'data_source':'local_multiple',
                        'path':'./data/CORDEX-Africa_data/AFRICA*pr.nc',
                        'variable':'pr'}
-        >>> loader = DatasetLoader(reference=reference, targets=targets)
+        >>> loader = DatasetLoader(reference, targets)
         ``
 
         Or more conveniently if the loader configuration is defined in a
@@ -82,12 +82,22 @@ class DatasetLoader:
         as follows:
         >>> loader.add_source_loader('my_source_name', my_loader_func)
 
+        :param reference: The reference dataset loader configuration.
+        :type reference: :mod:`dict`
+
+        :param targets: The target dataset loader configurations.
+        :type targets: :mod:`dict` or list of mod:`dict`
+
         :raises KeyError: If an invalid argument is passed to a data source
         loader function.
         '''
-        self.reference_dataset = None
-        self.target_datasets = []
-        self._config = kwargs
+        # Reference dataset config
+        self.set_reference(reference)
+
+        # Target dataset(s) config
+        self.set_targets(targets)
+
+        # Default loaders
         self._source_loaders = {
                     'local':local.load,
                     'local_split':local.load_dataset_from_multiple_netcdf_files
@@ -106,26 +116,91 @@ class DatasetLoader:
 
         :param loader_func: Reference to a custom defined function. This should
         return an OCW Dataset object.
-        :type loader_func: :mod:`callable`
+        :type loader_func: :class:`callable`
         '''
         self._source_loader[source_name] = loader_func
 
+    def add_target(self, **kwargs):
+        '''
+        A convenient means of adding a target dataset to the loader.
+        :raises KeyError: If data_source is not specified.
+        '''
+        if 'data_source' not in kwargs:
+            raise KeyError('Dataset configuration must contain a data_source.')
+        self._target_config.append(kwargs)
+
+    def add_targets(self, targets):
+        '''
+        A convenient means of adding multiple target datasets to the loader.
+
+        :param targets: List of loader configurations for each target
+        :type targets: List of :mod:`dict`
 
-    def set_config(self, **kwargs):
+        :raises KeyError: If data_source is not specified.
         '''
-        Change loader config if necessary. See class docstring for more info.
+        for target_config in targets:
+            self.add_target(**target_config)
+
+    def set_targets(self, targets):
         '''
-        self._config = kwargs
+        Reset the target dataset config.
+
+        :param targets: List of loader configurations for each target
+        :type targets: List of :mod:`dict`
+
+        :raises KeyError: If data_source is not specified.
+        '''
+        # This check allows for the user to enter targets as one block or
+        # as a list of separate blocks in their config files
+        if not instanceof(targets, list):
+            targets = [targets]
+        self._target_config = []
+        self.add_targets(targets)
+
+    def set_reference(self, **kwargs):
+        '''
+        Reset the reference dataset config.
+
+        :param targets: List of loader configurations for each target
+        :type targets: List of :mod:`dict`
+
+        :raises KeyError: If data_source is not specified.
+        '''
+        if 'data_source' not in kwargs:
+            raise KeyError('Dataset configuration must contain a data_source.')
+        self._reference_config = kwargs
 
     def load_datasets(self):
         '''
-        Loads the datasets from the given loader configuration.
+        Loads the datasets from the given loader configurations.
         '''
-        for dataset_evaltype, dataset_params in self._config.iteritems():
-            data_source = dataset_params.pop('data_source'):
-            load_func = self._source_loaders[data_source]
-            if dataset_evaltype == 'reference':
-                self.reference_dataset = load_func(**dataset_params)
-            else:
-                target_dataset = load_func(**dataset_params)
-                self.target_datasets.extend(target_dataset)
+        # Load the reference dataset
+        self.reference_dataset = self._load(**self._reference_config)
+
+        # Ensure output is clear if loading is performed more than once to
+        # prevent duplicates.
+        self.target_datasets = []
+
+        # Load the target datasets
+        for loader_params in self._target_config
+            output = self._load(**loader_params)
+
+                # Need to account for the fact that some loaders return lists
+                # of OCW Dataset objects instead of just one
+                if isinstance(target_dataset, list):
+                    self.target_datasets.extend(output)
+                else:
+                    self.target_datasets.append(output)
+
+    def _load(**kwargs):
+        '''
+        Generic dataset loading method
+        '''
+        # Extract the data source
+        data_source = kwargs.pop('data_source')
+
+        # Find the correct loader function for the given data source
+        loader_func = self._source_loaders[data_source]
+
+        # The remaining kwargs should be specific to the loader
+        return loader_func(**kwargs)