You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by jo...@apache.org on 2014/11/07 22:39:40 UTC

[3/6] climate git commit: CLIMATE-552 - Comments and minor code cleanup

CLIMATE-552 - Comments and minor code cleanup

- Cleanup doc strings and in-line documentation.
- Remove unneeded list conversions of sets.


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/32a9bd19
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/32a9bd19
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/32a9bd19

Branch: refs/heads/master
Commit: 32a9bd192dcd7e96f7e71061bffe86f20db3d563
Parents: 3992e0c
Author: Michael Joyce <jo...@apache.org>
Authored: Fri Nov 7 12:00:08 2014 -0800
Committer: Michael Joyce <jo...@apache.org>
Committed: Fri Nov 7 12:00:08 2014 -0800

----------------------------------------------------------------------
 ocw/data_source/local.py | 68 +++++++++++++++++++++++++++++--------------
 1 file changed, 46 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/32a9bd19/ocw/data_source/local.py
----------------------------------------------------------------------
diff --git a/ocw/data_source/local.py b/ocw/data_source/local.py
index 85e27b3..0875d0c 100644
--- a/ocw/data_source/local.py
+++ b/ocw/data_source/local.py
@@ -32,46 +32,61 @@ TIME_NAMES = ['time', 'times', 'date', 'dates', 'julian']
 
 
 def _get_netcdf_variable_name(valid_var_names, netcdf, netcdf_var):
-    '''Return valid variable from given netCDF object.
+    ''' Determine if one of a set of variable names are in a NetCDF Dataset. 
 
-    Looks for an occurrence of a valid_var_name in the netcdf variable data.
-    If multiple possible matches are found a ValueError is raised. If no
-    matching variable names are found a Value is raised.
+    Looks for an occurrence of a valid_var_name in the NetCDF variable data.
+    This is useful for automatically determining the names of the lat, lon,
+    and time variable names inside of a dataset object.
 
     :param valid_var_names: The possible variable names to search for in 
         the netCDF object.
     :type valid_var_names: List of Strings
-    :param netcdf: The netCDF object in which to check for valid_var_names.
+    :param netcdf: The netCDF Dataset object in which to check for
+        valid_var_names.
     :type netcdf: netcdf4.Dataset
     :param netcdf_var: The relevant variable name to search over in the 
-        netcdf object.
+        netcdf object. This is used to narrow down the search for valid
+        variable names by first checking the desired variable's dimension
+        values for one or more of the valid variable names.
 
     :returns: The variable from valid_var_names that it locates in 
         the netCDF object.
 
-    :raises: ValueError
+    :raises ValueError: When unable to locate a single matching variable
+        name in the NetCDF Dataset from the supplied list of valid variable
+        names.
     '''
 
-    # Check for valid variable names in netCDF value variable dimensions
+    # Check for valid variable names in netCDF variable dimensions
     dimensions = netcdf.variables[netcdf_var].dimensions
     dims_lower = [dim.encode().lower() for dim in dimensions]
 
-    intersect = list(set(valid_var_names).intersection(dims_lower))
+    intersect = set(valid_var_names).intersection(dims_lower)
 
     if len(intersect) == 1:
-        index = dims_lower.index(intersect[0])
+        # Retrieve the name of the dimension where we found the matching
+        # variable name
+        index = dims_lower.index(intersect.pop())
         dimension_name = dimensions[index].encode()
 
+        # Locate all of the variables that share the dimension that we matched
+        # earlier. If the dimension's name matches then that variable is
+        # potentially what we want to return to the user.
         possible_vars = []
         for var in netcdf.variables.keys():
             var_dimensions = netcdf.variables[var].dimensions
 
+            # Skip any dimensions are > 1D
             if len(var_dimensions) != 1:
                 continue
 
             if var_dimensions[0].encode() == dimension_name:
                 possible_vars.append(var)
 
+        # If there are multiple variables with matching dimension names then we
+        # aren't able to determining the correct variable name using the
+        # variable dimensions. We need to try a different approach. Otherwise,
+        # we're done!
         if len(possible_vars) == 1:
             return possible_vars[0]
 
@@ -79,31 +94,41 @@ def _get_netcdf_variable_name(valid_var_names, netcdf, netcdf_var):
     variables = netcdf.variables.keys()
     vars_lower = [var.encode().lower() for var in variables]
 
-    intersect = list(set(valid_var_names).intersection(vars_lower))
+    intersect = set(valid_var_names).intersection(vars_lower)
 
     if len(intersect) == 1:
-        index = vars_lower.index(intersect[0])
+        index = vars_lower.index(intersect.pop())
         return variables[index]
 
-    # If we couldn't find a single matching valid variable name, we're
-    # unable to load the file properly.
+    # If we couldn't locate a single matching valid variable then we're unable
+    # to automatically determine the variable names for the user.
     error = (
-        "Unable to locate a single matching variable name in NetCDF object. "
+        "Unable to locate a single matching variable name from the "
+        "supplied list of valid variable names. "
     )
     raise ValueError(error)
 
 def load_file(file_path, variable_name, elevation_index=0):
-    '''Load netCDF file, get the all variables name and get the data.
+    ''' Load a NetCDF file into a Dataset.
 
-    :param file_path: NetCDF directory with file name
+    :param file_path: Path to the NetCDF file to load.
     :type file_path: String
-    :param variable_name: The given (by user) value variable name
+    :param variable_name: The variable name to load from the NetCDF file.
     :type variable_name: String
-
-    :returns: An OCW Dataset object containing the requested parameter data.
+    :param elevation_index: The elevation index for which data should be
+        returned. Climate data is often times 4 dimensional data. Some datasets
+        will have readins at different height/elevation levels. OCW expects
+        3D data so a single layer needs to be stripped out when loading. By
+        default, the first elevation layer is used. If desired you may specify
+        the elevation value to use.
+
+    :returns: An OCW Dataset object with the requested variable's data from
+        the NetCDF file.
     :rtype: ocw.dataset.Dataset object
 
-    :raises: ValueError
+    :raises ValueError: When the specified file path cannot be loaded by ndfCDF4
+        or when the lat/lon/time variable name cannot be determined
+        automatically.
     '''
 
     try:
@@ -123,7 +148,6 @@ def load_file(file_path, variable_name, elevation_index=0):
     times = numpy.array(times)
     values = ma.array(netcdf.variables[variable_name][:])
 
-
     if len(values.shape) == 4:
         value_dimensions_names = [dim_name.encode() for dim_name in netcdf.variables[variable_name].dimensions]
         lat_lon_time_var_names = [lat_name, lon_name, time_name]