You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by go...@apache.org on 2016/07/29 15:21:07 UTC

[02/10] climate git commit: Added additional functionality to DatasetLoader

Added additional functionality to DatasetLoader


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/a3764c04
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/a3764c04
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/a3764c04

Branch: refs/heads/master
Commit: a3764c046adb506740fc7e3abdbabedfea917ad5
Parents: ecea621
Author: Alex Goodman <ag...@users.noreply.github.com>
Authored: Mon Jul 18 15:11:41 2016 -0700
Committer: Alex Goodman <ag...@users.noreply.github.com>
Committed: Mon Jul 18 15:11:41 2016 -0700

----------------------------------------------------------------------
 ocw/dataset_loader.py | 111 +++++++++++++++++++++++++++++++++++++--------
 1 file changed, 93 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/a3764c04/ocw/dataset_loader.py
----------------------------------------------------------------------
diff --git a/ocw/dataset_loader.py b/ocw/dataset_loader.py
index bbcb43c..719cb9b 100644
--- a/ocw/dataset_loader.py
+++ b/ocw/dataset_loader.py
@@ -28,7 +28,7 @@ import ocw.data_source.dap as dap
 class DatasetLoader:
     '''Generate OCW Dataset objects from a variety of sources.'''
 
-    def __init__(self, **kwargs):
+    def __init__(self, reference, targets):
         '''Generate OCW Dataset objects from a variety of sources.
 
         Each keyword argument can be information for a dataset in dictionary
@@ -39,7 +39,7 @@ class DatasetLoader:
         >>> targets = {'data_source':'local_multiple',
                        'path':'./data/CORDEX-Africa_data/AFRICA*pr.nc',
                        'variable':'pr'}
-        >>> loader = DatasetLoader(reference=reference, targets=targets)
+        >>> loader = DatasetLoader(reference, targets)
         ``
 
         Or more conveniently if the loader configuration is defined in a
@@ -82,12 +82,22 @@ class DatasetLoader:
         as follows:
         >>> loader.add_source_loader('my_source_name', my_loader_func)
 
+        :param reference: The reference dataset loader configuration.
+        :type reference: :mod:`dict`
+
+        :param targets: The target dataset loader configurations.
+        :type targets: :mod:`dict` or list of mod:`dict`
+
         :raises KeyError: If an invalid argument is passed to a data source
         loader function.
         '''
-        self.reference_dataset = None
-        self.target_datasets = []
-        self._config = kwargs
+        # Reference dataset config
+        self.set_reference(reference)
+
+        # Target dataset(s) config
+        self.set_targets(targets)
+
+        # Default loaders
         self._source_loaders = {
                     'local':local.load,
                     'local_split':local.load_dataset_from_multiple_netcdf_files
@@ -106,26 +116,91 @@ class DatasetLoader:
 
         :param loader_func: Reference to a custom defined function. This should
         return an OCW Dataset object.
-        :type loader_func: :mod:`callable`
+        :type loader_func: :class:`callable`
         '''
         self._source_loader[source_name] = loader_func
 
+    def add_target(self, **kwargs):
+        '''
+        A convenient means of adding a target dataset to the loader.
+        :raises KeyError: If data_source is not specified.
+        '''
+        if 'data_source' not in kwargs:
+            raise KeyError('Dataset configuration must contain a data_source.')
+        self._target_config.append(kwargs)
+
+    def add_targets(self, targets):
+        '''
+        A convenient means of adding multiple target datasets to the loader.
+
+        :param targets: List of loader configurations for each target
+        :type targets: List of :mod:`dict`
 
-    def set_config(self, **kwargs):
+        :raises KeyError: If data_source is not specified.
         '''
-        Change loader config if necessary. See class docstring for more info.
+        for target_config in targets:
+            self.add_target(**target_config)
+
+    def set_targets(self, targets):
         '''
-        self._config = kwargs
+        Reset the target dataset config.
+
+        :param targets: List of loader configurations for each target
+        :type targets: List of :mod:`dict`
+
+        :raises KeyError: If data_source is not specified.
+        '''
+        # This check allows for the user to enter targets as one block or
+        # as a list of separate blocks in their config files
+        if not instanceof(targets, list):
+            targets = [targets]
+        self._target_config = []
+        self.add_targets(targets)
+
+    def set_reference(self, **kwargs):
+        '''
+        Reset the reference dataset config.
+
+        :param targets: List of loader configurations for each target
+        :type targets: List of :mod:`dict`
+
+        :raises KeyError: If data_source is not specified.
+        '''
+        if 'data_source' not in kwargs:
+            raise KeyError('Dataset configuration must contain a data_source.')
+        self._reference_config = kwargs
 
     def load_datasets(self):
         '''
-        Loads the datasets from the given loader configuration.
+        Loads the datasets from the given loader configurations.
         '''
-        for dataset_evaltype, dataset_params in self._config.iteritems():
-            data_source = dataset_params.pop('data_source'):
-            load_func = self._source_loaders[data_source]
-            if dataset_evaltype == 'reference':
-                self.reference_dataset = load_func(**dataset_params)
-            else:
-                target_dataset = load_func(**dataset_params)
-                self.target_datasets.extend(target_dataset)
+        # Load the reference dataset
+        self.reference_dataset = self._load(**self._reference_config)
+
+        # Ensure output is clear if loading is performed more than once to
+        # prevent duplicates.
+        self.target_datasets = []
+
+        # Load the target datasets
+        for loader_params in self._target_config
+            output = self._load(**loader_params)
+
+                # Need to account for the fact that some loaders return lists
+                # of OCW Dataset objects instead of just one
+                if isinstance(target_dataset, list):
+                    self.target_datasets.extend(output)
+                else:
+                    self.target_datasets.append(output)
+
+    def _load(**kwargs):
+        '''
+        Generic dataset loading method
+        '''
+        # Extract the data source
+        data_source = kwargs.pop('data_source')
+
+        # Find the correct loader function for the given data source
+        loader_func = self._source_loaders[data_source]
+
+        # The remaining kwargs should be specific to the loader
+        return loader_func(**kwargs)