You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@climate.apache.org by hu...@apache.org on 2016/06/27 00:00:13 UTC

[1/2] climate git commit: CLIMATE-812 - Fix PEP8 violations in dataset processor

Repository: climate
Updated Branches:
  refs/heads/master 54f211084 -> 3ce4e2094


CLIMATE-812 - Fix PEP8 violations in dataset processor


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/9fa0e4c5
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/9fa0e4c5
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/9fa0e4c5

Branch: refs/heads/master
Commit: 9fa0e4c5a8ed0223e5ee2a9494214a92ad22013f
Parents: 705403a
Author: Ibrahim Jarif <ja...@gmail.com>
Authored: Sat Jun 18 15:21:50 2016 +0530
Committer: Ibrahim Jarif <ja...@gmail.com>
Committed: Sat Jun 18 15:21:50 2016 +0530

----------------------------------------------------------------------
 ocw/dataset_processor.py | 762 ++++++++++++++++++++++++------------------
 1 file changed, 439 insertions(+), 323 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/9fa0e4c5/ocw/dataset_processor.py
----------------------------------------------------------------------
diff --git a/ocw/dataset_processor.py b/ocw/dataset_processor.py
index bab78e8..9ab4e50 100755
--- a/ocw/dataset_processor.py
+++ b/ocw/dataset_processor.py
@@ -31,7 +31,9 @@ import logging
 
 logger = logging.getLogger(__name__)
 
-def temporal_subset(month_start, month_end, target_dataset, average_each_year=False):
+
+def temporal_subset(month_start, month_end, target_dataset,
+                    average_each_year=False):
     """ Temporally subset data given month_index.
 
     :param month_start: An integer for beginning month (Jan=1)
@@ -51,21 +53,16 @@ def temporal_subset(month_start, month_end, target_dataset, average_each_year=Fa
     """
 
     if month_start > month_end:
-        month_index = range(month_start,13)
-        month_index.extend(range(1, month_end+1))
+        month_index = range(month_start, 13)
+        month_index.extend(range(1, month_end + 1))
     else:
-        month_index = range(month_start, month_end+1)
+        month_index = range(month_start, month_end + 1)
 
     dates = target_dataset.times
     months = np.array([d.month for d in dates])
     time_index = []
     for m_value in month_index:
         time_index = np.append(time_index, np.where(months == m_value)[0])
-        if m_value == month_index[0]:
-            time_index_first = np.min(np.where(months == m_value)[0])
-        if m_value == month_index[-1]:
-            time_index_last = np.max(np.where(months == m_value)[0])
-
     time_index = np.sort(time_index)
 
     time_index = list(time_index)
@@ -73,25 +70,27 @@ def temporal_subset(month_start, month_end, target_dataset, average_each_year=Fa
     new_dataset = ds.Dataset(target_dataset.lats,
                              target_dataset.lons,
                              target_dataset.times[time_index],
-                             target_dataset.values[time_index,:],
+                             target_dataset.values[time_index, :],
                              variable=target_dataset.variable,
                              units=target_dataset.units,
                              name=target_dataset.name)
 
     if average_each_year:
         nmonth = len(month_index)
-        ntime  = new_dataset.times.size
-        nyear = ntime/nmonth
-        averaged_time = []              
+        ntime = new_dataset.times.size
+        nyear = ntime / nmonth
+        averaged_time = []
         ny, nx = target_dataset.values.shape[1:]
-        averaged_values =ma.zeros([nyear, ny, nx])
+        averaged_values = ma.zeros([nyear, ny, nx])
         for iyear in np.arange(nyear):
-            # centered time index of the season between month_start and month_end in each year
-            center_index = int(nmonth/2)+iyear*nmonth   
+            # centered time index of the season between month_start and
+            # month_end in each year
+            center_index = int(nmonth / 2) + iyear * nmonth
             if nmonth == 1:
                 center_index = iyear
-            averaged_time.append(new_dataset.times[center_index]) 
-            averaged_values[iyear,:] = ma.average(new_dataset.values[nmonth*iyear:nmonth*iyear+nmonth,:], axis=0)
+            averaged_time.append(new_dataset.times[center_index])
+            averaged_values[iyear, :] = ma.average(new_dataset.values[
+                nmonth * iyear: nmonth * iyear + nmonth, :], axis=0)
         new_dataset = ds.Dataset(target_dataset.lats,
                                  target_dataset.lons,
                                  np.array(averaged_time),
@@ -99,59 +98,69 @@ def temporal_subset(month_start, month_end, target_dataset, average_each_year=Fa
                                  variable=target_dataset.variable,
                                  units=target_dataset.units,
                                  name=target_dataset.name)
-    
     return new_dataset
 
-def temporal_rebin(target_dataset, temporal_resolution):     
+
+def temporal_rebin(target_dataset, temporal_resolution):
     """ Rebin a Dataset to a new temporal resolution
-    
+
     :param target_dataset: Dataset object that needs temporal rebinned
     :type target_dataset: :class:`dataset.Dataset`
 
     :param temporal_resolution: The new temporal resolution
-    :type temporal_resolution: :mod:`string`               
-    
+    :type temporal_resolution: :mod:`string`
+
     :returns: A new temporally rebinned Dataset
     :rtype: :class:`dataset.Dataset`
     """
-    # Decode the temporal resolution into a string format that 
+    # Decode the temporal resolution into a string format that
     # _rcmes_calc_average_on_new_time_unit_K() can understand
 
-    binned_values, binned_dates = _rcmes_calc_average_on_new_time_unit(target_dataset.values, target_dataset.times, temporal_resolution)
+    binned_values, binned_dates = _rcmes_calc_average_on_new_time_unit(
+        target_dataset.values, target_dataset.times, temporal_resolution)
     binned_dates = np.array(binned_dates)
-    new_dataset = ds.Dataset(target_dataset.lats, 
-                             target_dataset.lons, 
-                             binned_dates, 
+    new_dataset = ds.Dataset(target_dataset.lats,
+                             target_dataset.lons,
+                             binned_dates,
                              binned_values,
                              variable=target_dataset.variable,
                              units=target_dataset.units,
                              name=target_dataset.name,
                              origin=target_dataset.origin)
-    
     return new_dataset
 
+
 def temporal_rebin_with_time_index(target_dataset, nt_average):
     """ Rebin a Dataset to a new temporal resolution
 
     :param target_dataset: Dataset object that needs temporal rebinned
     :type target_dataset: :class:`dataset.Dataset`
 
-    :param nt_average: Time resolution for the output datasets. 
-     It is the same as the number of time indicies to be averaged. (length of time dimension in the rebinned dataset) = (original time dimension length/nt_average)
+    :param nt_average: Time resolution for the output datasets.
+     It is the same as the number of time indicies to be averaged.
+     length of time dimension in the rebinned dataset) =
+     (original time dimension length/nt_average)
     :type temporal_resolution: integer
 
     :returns: A new temporally rebinned Dataset
     :rtype: :class:`dataset.Dataset`
     """
     nt = target_dataset.times.size
-    if nt % nt_average !=0:
-        print 'Warning: length of time dimension must be a multiple of nt_average'
+    if nt % nt_average != 0:
+        msg = ('Warning: length of time dimension must '
+               'be a multiple of nt_average')
+        print msg
     # nt2 is the length of time dimension in the rebinned dataset
-    nt2 = nt/nt_average
-    binned_dates = target_dataset.times[np.arange(nt2)*nt_average]
-    binned_values = ma.zeros(np.insert(target_dataset.values.shape[1:],0,nt2))
+    nt2 = nt / nt_average
+    binned_dates = target_dataset.times[np.arange(nt2) * nt_average]
+    binned_values = ma.zeros(
+        np.insert(target_dataset.values.shape[1:], 0, nt2))
     for it in np.arange(nt2):
-        binned_values[it,:] = ma.average(target_dataset.values[nt_average*it:nt_average*it+nt_average,:], axis=0)
+        binned_values[it, :] = ma.average(
+            target_dataset.values[nt_average * it:
+                                  nt_average * it + nt_average,
+                                  :],
+            axis=0)
     new_dataset = ds.Dataset(target_dataset.lats,
                              target_dataset.lons,
                              binned_dates,
@@ -162,7 +171,9 @@ def temporal_rebin_with_time_index(target_dataset, nt_average):
                              origin=target_dataset.origin)
     return new_dataset
 
-def spatial_regrid(target_dataset, new_latitudes, new_longitudes, boundary_check=True):
+
+def spatial_regrid(target_dataset, new_latitudes, new_longitudes,
+                   boundary_check=True):
     """ Regrid a Dataset using the new latitudes and longitudes
 
     :param target_dataset: Dataset object that needs spatially regridded
@@ -174,25 +185,26 @@ def spatial_regrid(target_dataset, new_latitudes, new_longitudes, boundary_check
     :param new_longitudes: Array of longitudes
     :type new_longitudes: :class:`numpy.ndarray`
 
-    :param boundary_check:  Check if the regriding domain's boundaries are outside target_dataset's domain          
+    :param boundary_check:  Check if the regriding domain's boundaries
+                            are outside target_dataset's domain
     :type boundary_check: :class:'bool'
 
     :returns: A new spatially regridded Dataset
     :rtype: :class:`dataset.Dataset`
     """
-    
+
     # Create grids of the given lats and lons for the underlying API
     # NOTE: np.meshgrid() requires inputs (x, y) and returns data
     #       of shape(y|lat|rows, x|lon|columns).  So we pass in lons, lats
     #       and get back data.shape(lats, lons)
-    if target_dataset.lons.ndim ==1 and target_dataset.lats.ndim ==1:
+    if target_dataset.lons.ndim == 1 and target_dataset.lats.ndim == 1:
         regular_grid = True
         lons, lats = np.meshgrid(target_dataset.lons, target_dataset.lats)
     else:
         regular_grid = False
         lons = target_dataset.lons
         lats = target_dataset.lats
-    if new_longitudes.ndim ==1 and new_latitudes.ndim ==1:
+    if new_longitudes.ndim == 1 and new_latitudes.ndim == 1:
         new_lons, new_lats = np.meshgrid(new_longitudes, new_latitudes)
     else:
         new_lons = new_longitudes
@@ -202,91 +214,116 @@ def spatial_regrid(target_dataset, new_latitudes, new_longitudes, boundary_check
     ny_new, nx_new = new_lats.shape
 
     # Make masked array of shape (times, new_latitudes,new_longitudes)
-    new_values = ma.zeros([len(target_dataset.times), 
-                           ny_new, nx_new]) 
+    new_values = ma.zeros([len(target_dataset.times),
+                           ny_new, nx_new])
     # Make masked array of shape (times, new_latitudes,new_longitudes)
-    new_values = ma.zeros([len(target_dataset.times), 
+    new_values = ma.zeros([len(target_dataset.times),
                            ny_new, nx_new])
 
     # Boundary vertices of target_dataset
     vertices = []
 
-    if regular_grid: 
-        vertices.append([lons[0,0], lats[0,0]])
-        vertices.append([lons[-1,0], lats[-1,0]])
-        vertices.append([lons[-1,-1], lats[-1,-1]])
-        vertices.append([lons[0,-1], lats[0,-1]])
-    else: 
-        for iy in np.arange(ny_old):   # from south to north along the west boundary
-            vertices.append([lons[iy,0], lats[iy,0]]) 
-        for ix in np.arange(nx_old):   # from west to east along the north boundary
+    if regular_grid:
+        vertices.append([lons[0, 0], lats[0, 0]])
+        vertices.append([lons[-1, 0], lats[-1, 0]])
+        vertices.append([lons[-1, -1], lats[-1, -1]])
+        vertices.append([lons[0, -1], lats[0, -1]])
+    else:
+        # from south to north along the west boundary
+        for iy in np.arange(ny_old):
+            vertices.append([lons[iy, 0], lats[iy, 0]])
+        # from west to east along the north boundary
+        for ix in np.arange(nx_old):
             vertices.append([lons[-1, ix], lats[-1, ix]])
-        for iy in np.arange(ny_old)[::-1]:   # from north to south along the east boundary
+        # from north to south along the east boundary
+        for iy in np.arange(ny_old)[::-1]:
             vertices.append([lons[iy, -1], lats[iy, -1]])
-        for ix in np.arange(nx_old)[::-1]:   # from east to west along the south boundary
+        # from east to west along the south boundary
+        for ix in np.arange(nx_old)[::-1]:
             vertices.append([lons[0, ix], lats[0, ix]])
     path = Path(vertices)
 
     # Convert new_lats and new_lons to float indices
     new_lons_indices = np.zeros(new_lons.shape)
     new_lats_indices = np.zeros(new_lats.shape)
-  
+
     for iy in np.arange(ny_new):
         for ix in np.arange(nx_new):
-            if path.contains_point([new_lons[iy,ix], new_lats[iy,ix]]) or not boundary_check: 
+            if path.contains_point([new_lons[iy, ix],
+                                    new_lats[iy, ix]]) or not boundary_check:
                 if regular_grid:
-                    new_lats_indices[iy,ix] = (ny_old -1.)*(new_lats[iy,ix] - lats.min())/(lats.max() - lats.min())  
-                    new_lons_indices[iy,ix] = (nx_old -1.)*(new_lons[iy,ix] - lons.min())/(lons.max() - lons.min())  
+                    mn = lats.min()
+                    mx = lats.max()
+                    new_lats_indices[iy, ix] = (
+                        ny_old - 1.) * (new_lats[iy, ix] - mn / (mx - mn))
+                    mn = lons.min()
+                    mx = lons.max()
+                    new_lons_indices[iy, ix] = (
+                        nx_old - 1.) * (new_lons[iy, ix] - mn) / (mx - mn)
                 else:
-                    distance_from_original_grids = ((lons - new_lons[iy,ix])**2.+(lats - new_lats[iy,ix])**2.)**0.5  
+                    distance_from_original_grids = (
+                        (lons - new_lons[iy, ix])**2. +
+                        (lats - new_lats[iy, ix])**2.)**0.5
                     if np.min(distance_from_original_grids) == 0.:
-                        new_lats_indices[iy,ix], new_lons_indices[iy,ix] = np.where(distance_from_original_grids ==0)
+                        new_lats_indices[iy, ix], new_lons_indices[
+                            iy, ix] = np.where(
+                                distance_from_original_grids == 0)
                     else:
-                        distance_rank = rankdata(distance_from_original_grids.flatten(), method='ordinal').reshape(lats.shape)                
-                        iy1, ix1 = np.where(distance_rank == 1) # the nearest grid point's indices 
-                        iy2, ix2 = np.where(distance_rank == 4) # point [iy2, ix] is diagonally across from [iy1, ix1] 
-                        dist1 = distance_from_original_grids[iy1,ix1]
-                        dist2 = distance_from_original_grids[iy2,ix2]
-                        new_lats_indices[iy,ix] = (dist1*iy2 + dist2*iy1)/(dist1+dist2) 
-                        new_lons_indices[iy,ix] = (dist1*ix2 + dist2*ix1)/(dist1+dist2) 
+                        distance_rank = rankdata(
+                            distance_from_original_grids.flatten(),
+                            method='ordinal').reshape(lats.shape)
+                        # the nearest grid point's indices
+                        iy1, ix1 = np.where(distance_rank == 1)
+                        # point [iy2, ix] is diagonally across from [iy1, ix1]
+                        iy2, ix2 = np.where(distance_rank == 4)
+                        dist1 = distance_from_original_grids[iy1, ix1]
+                        dist2 = distance_from_original_grids[iy2, ix2]
+                        new_lats_indices[iy, ix] = (
+                            dist1 * iy2 + dist2 * iy1) / (dist1 + dist2)
+                        new_lons_indices[iy, ix] = (
+                            dist1 * ix2 + dist2 * ix1) / (dist1 + dist2)
             else:
-                new_lats_indices[iy,ix] = -999.    
-                new_lats_indices[iy,ix] = -999.    
+                new_lats_indices[iy, ix] = -999.
+                new_lats_indices[iy, ix] = -999.
     new_lats_indices = ma.masked_less(new_lats_indices, 0.)
     new_lons_indices = ma.masked_less(new_lons_indices, 0.)
-                      
+
     # Regrid the data on each time slice
     for i in range(len(target_dataset.times)):
         if len(target_dataset.times) == 1:
             values_original = ma.array(target_dataset.values)
-        else: 
+        else:
             values_original = ma.array(target_dataset.values[i])
         for shift in (-1, 1):
             for axis in (0, 1):
                 q_shifted = np.roll(values_original, shift=shift, axis=axis)
                 idx = ~q_shifted.mask * values_original.mask
                 values_original.data[idx] = q_shifted[idx]
-        new_values[i] = map_coordinates(values_original, [new_lats_indices.flatten(), new_lons_indices.flatten()], order=1).reshape(new_lats.shape) 
+        new_values[i] = map_coordinates(values_original,
+                                        [new_lats_indices.flatten(),
+                                         new_lons_indices.flatten()],
+                                        order=1).reshape(new_lats.shape)
         new_values[i] = ma.array(new_values[i], mask=new_lats_indices.mask)
-        # Make a masking map using nearest neighbour interpolation -use this to determine locations with MDI and mask these
+        # Make a masking map using nearest neighbour interpolation -use this to
+        # determine locations with MDI and mask these
         qmdi = np.zeros_like(values_original)
         qmdi[values_original.mask == True] = 1.
         qmdi[values_original.mask == False] = 0.
-        qmdi_r = map_coordinates(qmdi, [new_lats_indices.flatten(), new_lons_indices.flatten()], order=1).reshape(new_lats.shape)
+        qmdi_r = map_coordinates(qmdi, [new_lats_indices.flatten(
+        ), new_lons_indices.flatten()], order=1).reshape(new_lats.shape)
         mdimask = (qmdi_r != 0.0)
 
         # Combine missing data mask, with outside domain mask define above.
         new_values[i].mask = np.logical_or(mdimask, new_values[i].mask)
 
-    
-    # TODO: 
-    # This will call down to the _congrid() function and the lat and lon 
+    # TODO:
+    # This will call down to the _congrid() function and the lat and lon
     # axis will be adjusted with the time axis being held constant
-    
+
     # Create a new Dataset Object to return using new data
-    regridded_dataset = ds.Dataset(new_latitudes, 
-                                   new_longitudes, 
-                                   target_dataset.times, 
+    regridded_dataset = ds.Dataset(new_latitudes,
+                                   new_longitudes,
+                                   target_dataset.times,
                                    new_values,
                                    variable=target_dataset.variable,
                                    units=target_dataset.units,
@@ -294,38 +331,41 @@ def spatial_regrid(target_dataset, new_latitudes, new_longitudes, boundary_check
                                    origin=target_dataset.origin)
     return regridded_dataset
 
+
 def ensemble(datasets):
     """
     Generate a single dataset which is the mean of the input datasets
 
     An ensemble datasets combines input datasets assuming the all have
-    similar shape, dimensions, and units. 
-    
+    similar shape, dimensions, and units.
+
     :param datasets: Datasets to be used to compose the ensemble dataset from.
         All Datasets must be the same shape.
     :type datasets: :class:`list` of :class:`dataset.Dataset`
-    
-    :returns: New Dataset with a name of 'Dataset Ensemble'
+
+     :returns: New Dataset with a name of 'Dataset Ensemble'
     :rtype: :class:`dataset.Dataset`
     """
     _check_dataset_shapes(datasets)
     dataset_values = [dataset.values for dataset in datasets]
     ensemble_values = ma.mean(dataset_values, axis=0)
-    
-    # Build new dataset object from the input datasets and the ensemble values and return it
-    ensemble_dataset = ds.Dataset(datasets[0].lats, 
-                                  datasets[0].lons, 
+
+    # Build new dataset object from the input datasets and
+    # the ensemble values and return it
+    ensemble_dataset = ds.Dataset(datasets[0].lats,
+                                  datasets[0].lons,
                                   datasets[0].times,
                                   ensemble_values,
                                   units=datasets[0].units,
                                   name="Dataset Ensemble")
-    
+
     return ensemble_dataset
 
+
 def subset(subregion, target_dataset, subregion_name=None):
     '''Subset given dataset(s) with subregion information
 
-    :param subregion: The Bounds with which to subset the target Dataset. 
+    :param subregion: The Bounds with which to subset the target Dataset.
     :type subregion: :class:`dataset.Bounds`
 
     :param target_dataset: The Dataset object to subset.
@@ -341,34 +381,40 @@ def subset(subregion, target_dataset, subregion_name=None):
     '''
 
     if not subregion.start:
-        subregion.start = target_dataset.times[0] 
+        subregion.start = target_dataset.times[0]
         subregion.end = target_dataset.times[-1]
-        
+
     # Ensure that the subregion information is well formed
     _are_bounds_contained_by_dataset(subregion, target_dataset)
 
     if not subregion_name:
         subregion_name = target_dataset.name
 
-    if target_dataset.lats.ndim ==2 and target_dataset.lons.ndim ==2:
-        start_time_index = np.where(target_dataset.times == subregion.start)[0][0]
+    if target_dataset.lats.ndim == 2 and target_dataset.lons.ndim == 2:
+        start_time_index = np.where(
+            target_dataset.times == subregion.start)[0][0]
         end_time_index = np.where(target_dataset.times == subregion.end)[0][0]
-        target_dataset = temporal_slice(start_time_index, end_time_index, target_dataset)
+        target_dataset = temporal_slice(
+            start_time_index, end_time_index, target_dataset)
         nt, ny, nx = target_dataset.values.shape
-        y_index, x_index = np.where((target_dataset.lats >= subregion.lat_max) | (target_dataset.lats <= subregion.lat_min) |
-                         (target_dataset.lons >= subregion.lon_max) | (target_dataset.lons <= subregion.lon_min)) 
+        y_index, x_index = np.where(
+            (target_dataset.lats >= subregion.lat_max) | (
+                target_dataset.lats <= subregion.lat_min) |
+            (target_dataset.lons >= subregion.lon_max) | (
+                target_dataset.lons <= subregion.lon_min))
         for it in np.arange(nt):
-            target_dataset.values[it,y_index, x_index] = 1.e+20
+            target_dataset.values[it, y_index, x_index] = 1.e+20
         target_dataset.values = ma.masked_equal(target_dataset.values, 1.e+20)
         return target_dataset
 
-    elif target_dataset.lats.ndim ==1 and target_dataset.lons.ndim ==1:
+    elif target_dataset.lats.ndim == 1 and target_dataset.lons.ndim == 1:
         # Get subregion indices into subregion data
-        dataset_slices = _get_subregion_slice_indices(subregion, target_dataset)
-       # Slice the values array with our calculated slice indices
+        dataset_slices = _get_subregion_slice_indices(subregion,
+                                                      target_dataset)
+        # Slice the values array with our calculated slice indices
         if target_dataset.values.ndim == 2:
             subset_values = ma.zeros([len(target_dataset.values[
-                dataset_slices["lat_start"]:dataset_slices["lat_end"]]), 
+                dataset_slices["lat_start"]:dataset_slices["lat_end"]]),
                 len(target_dataset.values[
                     dataset_slices["lon_start"]:dataset_slices["lon_end"]])])
 
@@ -380,26 +426,26 @@ def subset(subregion, target_dataset, subregion_name=None):
             subset_values = ma.zeros([len(target_dataset.values[
                 dataset_slices["time_start"]:dataset_slices["time_end"]]),
                 len(target_dataset.values[
-                dataset_slices["lat_start"]:dataset_slices["lat_end"]]), 
+                    dataset_slices["lat_start"]:dataset_slices["lat_end"]]),
                 len(target_dataset.values[
-                dataset_slices["lon_start"]:dataset_slices["lon_end"]])])
-        
+                    dataset_slices["lon_start"]:dataset_slices["lon_end"]])])
+
             subset_values = target_dataset.values[
                 dataset_slices["time_start"]:dataset_slices["time_end"] + 1,
                 dataset_slices["lat_start"]:dataset_slices["lat_end"] + 1,
                 dataset_slices["lon_start"]:dataset_slices["lon_end"] + 1]
-            
+
         # Build new dataset with subset information
         return ds.Dataset(
             # Slice the lats array with our calculated slice indices
-            target_dataset.lats[dataset_slices["lat_start"]: 
+            target_dataset.lats[dataset_slices["lat_start"]:
                                 dataset_slices["lat_end"] + 1],
             # Slice the lons array with our calculated slice indices
-            target_dataset.lons[dataset_slices["lon_start"]: 
+            target_dataset.lons[dataset_slices["lon_start"]:
                                 dataset_slices["lon_end"] + 1],
             # Slice the times array with our calculated slice indices
-            target_dataset.times[dataset_slices["time_start"]: 
-                                dataset_slices["time_end"]+ 1],
+            target_dataset.times[dataset_slices["time_start"]:
+                                 dataset_slices["time_end"] + 1],
             # Slice the values array with our calculated slice indices
             subset_values,
             variable=target_dataset.variable,
@@ -408,6 +454,7 @@ def subset(subregion, target_dataset, subregion_name=None):
             origin=target_dataset.origin
         )
 
+
 def temporal_slice(start_time_index, end_time_index, target_dataset):
     '''Temporally slice given dataset(s) with subregion information. This does not
     spatially subset the target_Dataset
@@ -430,17 +477,18 @@ def temporal_slice(start_time_index, end_time_index, target_dataset):
     end_date = target_dataset.times[end_time_index]
     timeStart = min(np.nonzero(target_dataset.times >= start_date)[0])
     timeEnd = max(np.nonzero(target_dataset.times <= end_date)[0])
-    target_dataset.times = target_dataset.times[timeStart:timeEnd+1]
-    target_dataset.values = target_dataset.values[timeStart:timeEnd+1,:]
+    target_dataset.times = target_dataset.times[timeStart:timeEnd + 1]
+    target_dataset.values = target_dataset.values[timeStart:timeEnd + 1, :]
 
     return target_dataset
 
+
 def safe_subset(subregion, target_dataset, subregion_name=None):
     '''Safely subset given dataset with subregion information
 
-    A standard subset requires that the provided subregion be entirely contained
-    within the datasets bounds. `safe_subset` returns the overlap of the
-    subregion and dataset without returning an error.
+    A standard subset requires that the provided subregion be entirely
+    contained within the datasets bounds. `safe_subset` returns the
+    overlap of the subregion and dataset without returning an error.
 
     :param subregion: The Bounds with which to subset the target Dataset.
     :type subregion: :class:`dataset.Bounds`
@@ -470,7 +518,7 @@ def safe_subset(subregion, target_dataset, subregion_name=None):
     if subregion.lon_max > lon_max:
         subregion.lon_max = lon_max
 
-    if subregion.start: 
+    if subregion.start:
         if subregion.start < start:
             subregion.start = start
 
@@ -480,6 +528,7 @@ def safe_subset(subregion, target_dataset, subregion_name=None):
 
     return subset(subregion, target_dataset, subregion_name)
 
+
 def normalize_dataset_datetimes(dataset, timestep):
     ''' Normalize Dataset datetime values.
 
@@ -508,6 +557,7 @@ def normalize_dataset_datetimes(dataset, timestep):
         origin=dataset.origin
     )
 
+
 def write_netcdf(dataset, path, compress=True):
     ''' Write a dataset to a NetCDF file.
 
@@ -525,9 +575,9 @@ def write_netcdf(dataset, path, compress=True):
     time_len = len(dataset.times)
 
     # Create attribute dimensions
-    lat_dim = out_file.createDimension('lat', lat_len)
-    lon_dim = out_file.createDimension('lon', lon_len)
-    time_dim = out_file.createDimension('time', time_len)
+    out_file.createDimension('lat', lat_len)
+    out_file.createDimension('lon', lon_len)
+    out_file.createDimension('time', time_len)
 
     # Create variables
     lats = out_file.createVariable('lat', 'f8', ('lat',), zlib=compress)
@@ -536,9 +586,9 @@ def write_netcdf(dataset, path, compress=True):
 
     var_name = dataset.variable if dataset.variable else 'var'
     values = out_file.createVariable(var_name,
-                                    'f8',
-                                    ('time', 'lat', 'lon'),
-                                    zlib=compress)
+                                     'f8',
+                                     ('time', 'lat', 'lon'),
+                                     zlib=compress)
 
     # Set the time variable units
     # We don't deal with hourly/minutely/anything-less-than-a-day data so
@@ -556,15 +606,21 @@ def write_netcdf(dataset, path, compress=True):
 
     out_file.close()
 
-def write_netcdf_multiple_datasets_with_subregions(ref_dataset, ref_name, 
-                                                   model_dataset_array, model_names,
-                                                   path,
-                                                   subregions = None, subregion_array = None,
-                                                   ref_subregion_mean = None, ref_subregion_std = None, 
-                                                   model_subregion_mean = None, model_subregion_std = None):
-    #Write multiple reference and model datasets and their subregional means and standard deivations in a NetCDF file.
 
-    #:To be updated 
+def write_netcdf_multiple_datasets_with_subregions(ref_dataset, ref_name,
+                                                   model_dataset_array,
+                                                   model_names,
+                                                   path,
+                                                   subregions=None,
+                                                   subregion_array=None,
+                                                   ref_subregion_mean=None,
+                                                   ref_subregion_std=None,
+                                                   model_subregion_mean=None,
+                                                   model_subregion_std=None):
+    # Write multiple reference and model datasets and their subregional means
+    # and standard deivations in a NetCDF file.
+
+    # :To be updated
     #
     out_file = netCDF4.Dataset(path, 'w', format='NETCDF4')
 
@@ -577,22 +633,22 @@ def write_netcdf_multiple_datasets_with_subregions(ref_dataset, ref_name,
     lon_ndim = dataset.lons.ndim
     time_len = len(dataset.times)
 
-    if not subregions == None:
+    if subregions is not None:
         nsubregion = len(subregions)
-                 
+
     # Create attribute dimensions
-    lat_dim = out_file.createDimension('y', lat_len)
-    lon_dim = out_file.createDimension('x', lon_len)
-    time_dim = out_file.createDimension('time', time_len)
+    out_file.createDimension('y', lat_len)
+    out_file.createDimension('x', lon_len)
+    out_file.createDimension('time', time_len)
 
     # Create variables and store the values
-    if lat_ndim ==2:
-        lats = out_file.createVariable('lat', 'f8', ('y','x'))
+    if lat_ndim == 2:
+        lats = out_file.createVariable('lat', 'f8', ('y', 'x'))
     else:
         lats = out_file.createVariable('lat', 'f8', ('y'))
     lats[:] = dataset.lats
-    if lon_ndim ==2:
-        lons = out_file.createVariable('lon', 'f8', ('y','x'))
+    if lon_ndim == 2:
+        lons = out_file.createVariable('lon', 'f8', ('y', 'x'))
     else:
         lons = out_file.createVariable('lon', 'f8', ('x'))
     lons[:] = dataset.lons
@@ -600,37 +656,49 @@ def write_netcdf_multiple_datasets_with_subregions(ref_dataset, ref_name,
     times.units = "days since %s" % dataset.times[0]
     times[:] = netCDF4.date2num(dataset.times, times.units)
 
-    #mask_array = np.zeros([time_len, lat_len, lon_len])
-    #for iobs in np.arange(nobs):
+    # mask_array = np.zeros([time_len, lat_len, lon_len])
+    # for iobs in np.arange(nobs):
     #    index = np.where(ref_dataset_array[iobs].values.mask[:] == True)
     #    mask_array[index] = 1
-    out_file.createVariable(ref_name, 'f8', ('time','y','x'))
+    out_file.createVariable(ref_name, 'f8', ('time', 'y', 'x'))
     out_file.variables[ref_name][:] = ref_dataset.values
     out_file.variables[ref_name].units = ref_dataset.units
     for imodel in np.arange(nmodel):
-        out_file.createVariable(model_names[imodel], 'f8', ('time','y','x'))
-        #out_file.variables[model_names[imodel]][:] = ma.array(model_dataset_array[imodel].values, mask = mask_array)
-        out_file.variables[model_names[imodel]][:] = model_dataset_array[imodel].values
-        out_file.variables[model_names[imodel]].units = model_dataset_array[imodel].units
-
-    if not subregions == None:
-        out_file.createVariable('subregion_array', 'i4', ('y','x'))
+        out_file.createVariable(model_names[imodel], 'f8', ('time', 'y', 'x'))
+        # out_file.variables[model_names[imodel]][:] = ma.array(
+        #    model_dataset_array[imodel].values, mask = mask_array)
+        out_file.variables[model_names[imodel]][:] = model_dataset_array[
+            imodel].values
+        out_file.variables[model_names[imodel]].units = model_dataset_array[
+            imodel].units
+
+    if subregions is not None:
+        out_file.createVariable('subregion_array', 'i4', ('y', 'x'))
         out_file.variables['subregion_array'][:] = subregion_array[:]
         nsubregion = len(subregions)
         out_file.createDimension('nsubregion', nsubregion)
         out_file.createDimension('nobs', nobs)
         out_file.createDimension('nmodel', nmodel)
-        out_file.createVariable('obs_subregion_mean', 'f8', ('nobs','time','nsubregion'))
+        out_file.createVariable('obs_subregion_mean', 'f8', ('nobs',
+                                                             'time',
+                                                             'nsubregion'))
         out_file.variables['obs_subregion_mean'][:] = ref_subregion_mean[:]
-        out_file.createVariable('obs_subregion_std', 'f8', ('nobs','time','nsubregion'))
+        out_file.createVariable('obs_subregion_std', 'f8', ('nobs',
+                                                            'time',
+                                                            'nsubregion'))
         out_file.variables['obs_subregion_std'][:] = ref_subregion_std[:]
-        out_file.createVariable('model_subregion_mean', 'f8', ('nmodel','time','nsubregion'))
+        out_file.createVariable('model_subregion_mean', 'f8', ('nmodel',
+                                                               'time',
+                                                               'nsubregion'))
         out_file.variables['model_subregion_mean'][:] = model_subregion_mean[:]
-        out_file.createVariable('model_subregion_std', 'f8', ('nmodel','time','nsubregion'))
+        out_file.createVariable('model_subregion_std', 'f8', ('nmodel',
+                                                              'time',
+                                                              'nsubregion'))
         out_file.variables['model_subregion_std'][:] = model_subregion_std[:]
 
     out_file.close()
 
+
 def water_flux_unit_conversion(dataset):
     ''' Convert water flux variables units as necessary
 
@@ -642,7 +710,7 @@ def water_flux_unit_conversion(dataset):
     :returns: A Dataset with values converted to new units.
     :rtype: :class:`dataset.Dataset`
     '''
-    water_flux_variables = ['pr', 'prec','evspsbl', 'mrro', 'swe']
+    water_flux_variables = ['pr', 'prec', 'evspsbl', 'mrro', 'swe']
     variable = dataset.variable.lower()
 
     if any(sub_string in variable for sub_string in water_flux_variables):
@@ -652,13 +720,14 @@ def water_flux_unit_conversion(dataset):
                 dataset.values = 1.e3 * dataset.values
                 dataset.units = 'km'
         else:
-            if any(unit in dataset_units 
-                for unit in ['kg m-2 s-1', 'mm s-1', 'mm/sec']):
+            if any(unit in dataset_units
+                    for unit in ['kg m-2 s-1', 'mm s-1', 'mm/sec']):
                 dataset.values = 86400. * dataset.values
                 dataset.units = 'mm/day'
 
     return dataset
 
+
 def temperature_unit_conversion(dataset):
     ''' Convert temperature units as necessary
 
@@ -670,7 +739,7 @@ def temperature_unit_conversion(dataset):
     :returns: The dataset with (potentially) updated units.
     :rtype: :class:`dataset.Dataset`
     '''
-    temperature_variables = ['temp','tas','tasmax','taxmin','T','tg']
+    temperature_variables = ['temp', 'tas', 'tasmax', 'taxmin', 'T', 'tg']
     variable = dataset.variable.lower()
 
     if any(sub_string in variable for sub_string in temperature_variables):
@@ -681,10 +750,12 @@ def temperature_unit_conversion(dataset):
 
     return dataset
 
+
 def variable_unit_conversion(dataset):
     ''' Convert water flux or temperature variables units as necessary
-    
-    For water flux variables, convert full SI units water flux units to more common units.
+
+    For water flux variables, convert full SI units water flux units
+    to more common units.
     For temperature, convert Celcius to Kelvin.
 
     :param dataset: The dataset to convert.
@@ -696,9 +767,10 @@ def variable_unit_conversion(dataset):
 
     dataset = water_flux_unit_conversion(dataset)
     dataset = temperature_unit_conversion(dataset)
-    
+
     return dataset
 
+
 def _rcmes_normalize_datetimes(datetimes, timestep):
     """ Normalize Dataset datetime values.
 
@@ -718,22 +790,25 @@ def _rcmes_normalize_datetimes(datetimes, timestep):
                 # Clean the inputDatetime
                 inputDatetimeString = inputDatetime.strftime('%Y%m%d')
                 normalInputDatetimeString = inputDatetimeString[:6] + '01'
-                inputDatetime = datetime.datetime.strptime(normalInputDatetimeString, '%Y%m%d')
+                inputDatetime = datetime.datetime.strptime(
+                    normalInputDatetimeString, '%Y%m%d')
 
             normalDatetimes.append(inputDatetime)
 
     elif timestep.lower() == 'daily':
         for inputDatetime in datetimes:
-            if inputDatetime.hour != 0 or inputDatetime.minute != 0 or inputDatetime.second != 0:
+            if (inputDatetime.hour != 0 or inputDatetime.minute != 0 or
+                    inputDatetime.second != 0):
                 datetimeString = inputDatetime.strftime('%Y%m%d%H%M%S')
                 normalDatetimeString = datetimeString[:8] + '000000'
-                inputDatetime = datetime.datetime.strptime(normalDatetimeString, '%Y%m%d%H%M%S')
+                inputDatetime = datetime.datetime.strptime(
+                    normalDatetimeString, '%Y%m%d%H%M%S')
 
             normalDatetimes.append(inputDatetime)
 
-
     return normalDatetimes
 
+
 def mask_missing_data(dataset_array):
     ''' Check missing values in observation and model datasets.
     If any of dataset in dataset_array has missing values at a grid point,
@@ -743,7 +818,7 @@ def mask_missing_data(dataset_array):
     mask_array = np.zeros(dataset_array[0].values.shape)
     for dataset in dataset_array:
         index = np.where(dataset.values.mask == True)
-        if index[0].size >0:
+        if index[0].size > 0:
             mask_array[index] = 1
     masked_array = []
     for dataset in dataset_array:
@@ -755,7 +830,7 @@ def mask_missing_data(dataset_array):
 def _rcmes_spatial_regrid(spatial_values, lat, lon, lat2, lon2, order=1):
     '''
     Spatial regrid from one set of lat,lon values onto a new set (lat2,lon2)
-    
+
     :param spatial_values: Values in a spatial grid that need to be regridded
     :type spatial_values: 2d masked numpy array.  shape (latitude, longitude)
     :param lat: Grid of latitude values which map to the spatial values
@@ -770,57 +845,61 @@ def _rcmes_spatial_regrid(spatial_values, lat, lon, lat2, lon2, order=1):
     :type order: [optional] Integer
 
     :returns: 2d masked numpy array with shape(len(lat2), len(lon2))
-    :rtype: (float, float) 
+    :rtype: (float, float)
     '''
 
     nlat = spatial_values.shape[0]
     nlon = spatial_values.shape[1]
-    #print nlat, nlon, "lats, lons - incoming dataset"
+    # print nlat, nlon, "lats, lons - incoming dataset"
 
     nlat2 = lat2.shape[0]
     nlon2 = lon2.shape[1]
-    #print nlat2, nlon2, "NEW lats, lons - for the new grid output"
+    # print nlat2, nlon2, "NEW lats, lons - for the new grid output"
 
-    # To make our lives easier down the road, let's 
+    # To make our lives easier down the road, let's
     # turn these into arrays of x & y coords
     loni = lon2.ravel()
     lati = lat2.ravel()
 
-    loni = loni.copy() # NB. it won't run unless you do this...
+    loni = loni.copy()  # NB. it won't run unless you do this...
     lati = lati.copy()
 
     # Now, we'll set points outside the boundaries to lie along an edge
     loni[loni > lon.max()] = lon.max()
     loni[loni < lon.min()] = lon.min()
-    
+
     # To deal with the "hard" break, we'll have to treat y differently,
     # so we're just setting the min here...
     lati[lati > lat.max()] = lat.max()
     lati[lati < lat.min()] = lat.min()
-    
-    
+
     # We need to convert these to (float) indicies
     #   (xi should range from 0 to (nx - 1), etc)
     loni = (nlon - 1) * (loni - lon.min()) / (lon.max() - lon.min())
-    
+
     # Deal with the "hard" break in the y-direction
     lati = (nlat - 1) * (lati - lat.min()) / (lat.max() - lat.min())
-    
+
     """
-    TODO: Review this docstring and see if it still holds true.  
+    TODO: Review this docstring and see if it still holds true.
     NOTE:  This function doesn't use MDI currently.  These are legacy comments
-    
+
     Notes on dealing with MDI when regridding data.
      Method adopted here:
-       Use bilinear interpolation of data by default (but user can specify other order using order=... in call)
+       Use bilinear interpolation of data by default
+       (but user can specify other order using order=... in call)
        Perform bilinear interpolation of data, and of mask.
-       To be conservative, new grid point which contained some missing data on the old grid is set to missing data.
-               -this is achieved by looking for any non-zero interpolated mask values.
-       To avoid issues with bilinear interpolation producing strong gradients leading into the MDI,
-        set values at MDI points to mean data value so little gradient visible = not ideal, but acceptable for now.
-    
-    Set values in MDI so that similar to surroundings so don't produce large gradients when interpolating
-    Preserve MDI mask, by only changing data part of masked array object.
+       To be conservative, new grid point which contained some missing data on
+       the old grid is set to missing data.
+               -this is achieved by looking for any non-zero interpolated
+               mask values.
+       To avoid issues with bilinear interpolation producing strong gradients
+       leading into the MDI, set values at MDI points to mean data value so
+       little gradient visible = not ideal, but acceptable for now.
+
+    Set values in MDI so that similar to surroundings so don't produce large
+    gradients when interpolating Preserve MDI mask, by only changing data part
+    of masked array object.
     """
     for shift in (-1, 1):
         for axis in (0, 1):
@@ -829,31 +908,37 @@ def _rcmes_spatial_regrid(spatial_values, lat, lon, lat2, lon2, order=1):
             spatial_values.data[idx] = q_shifted[idx]
 
     # Now we actually interpolate
-    # map_coordinates does cubic interpolation by default, 
+    # map_coordinates does cubic interpolation by default,
     # use "order=1" to preform bilinear interpolation instead...
-    
-    regridded_values = map_coordinates(spatial_values, [lati, loni], order=order)
+
+    regridded_values = map_coordinates(spatial_values,
+                                       [lati, loni],
+                                       order=order)
     regridded_values = regridded_values.reshape([nlat2, nlon2])
 
     # Set values to missing data outside of original domain
-    regridded_values = ma.masked_array(regridded_values, mask=np.logical_or(np.logical_or(lat2 >= lat.max(), 
-                                                              lat2 <= lat.min()), 
-                                                np.logical_or(lon2 <= lon.min(), 
-                                                              lon2 >= lon.max())))
-    
-    # Make second map using nearest neighbour interpolation -use this to determine locations with MDI and mask these
+    regridded_values = ma.masked_array(regridded_values,
+                                       mask=np.logical_or(
+                                           np.logical_or(lat2 >= lat.max(),
+                                                         lat2 <= lat.min()),
+                                           np.logical_or(lon2 <= lon.min(),
+                                                         lon2 >= lon.max())))
+
+    # Make second map using nearest neighbour interpolation -use this to
+    # determine locations with MDI and mask these
     qmdi = np.zeros_like(spatial_values)
     qmdi[spatial_values.mask == True] = 1.
     qmdi[spatial_values.mask == False] = 0.
     qmdi_r = map_coordinates(qmdi, [lati, loni], order=order)
     qmdi_r = qmdi_r.reshape([nlat2, nlon2])
     mdimask = (qmdi_r != 0.0)
-    
+
     # Combine missing data mask, with outside domain mask define above.
     regridded_values.mask = np.logical_or(mdimask, regridded_values.mask)
 
     return regridded_values
 
+
 def _rcmes_create_mask_using_threshold(masked_array, threshold=0.5):
     '''Mask an array if percent of values missing data is above a threshold.
 
@@ -871,15 +956,16 @@ def _rcmes_create_mask_using_threshold(masked_array, threshold=0.5):
     :returns: A Numpy array describing the mask for masked_array.
     '''
 
-    # try, except used as some model files don't have a full mask, but a single bool
-    #  the except catches this situation and deals with it appropriately.
+    # try, except used as some model files don't have a full mask, but a single
+    # bool the except catches this situation and deals with it appropriately.
     try:
         nT = masked_array.mask.shape[0]
 
         # For each pixel, count how many times are masked.
         nMasked = masked_array.mask[:, :, :].sum(axis=0)
 
-        # Define new mask as when a pixel has over a defined threshold ratio of masked data
+        # Define new mask as when a pixel has over a defined threshold ratio of
+        # masked data
         #   e.g. if the threshold is 75%, and there are 10 times,
         #        then a pixel will be masked if more than 5 times are masked.
         mymask = nMasked > (nT * threshold)
@@ -889,6 +975,7 @@ def _rcmes_create_mask_using_threshold(masked_array, threshold=0.5):
 
     return mymask
 
+
 def _rcmes_calc_average_on_new_time_unit(data, dates, unit):
     """ Rebin 3d array and list of dates using the provided unit parameter
 
@@ -897,22 +984,25 @@ def _rcmes_calc_average_on_new_time_unit(data, dates, unit):
     :param dates: List of dates that correspond to the given data values
     :type dates: Python datetime objects
     :param unit: Time unit to average the data into
-    :type unit: String matching one of these values : full | annual | monthly | daily
+    :type unit: String matching one of these values :
+        full | annual | monthly | daily
 
     :returns: meanstorem, newTimesList
-    :rtype: 3D numpy masked array the same shape as the input array, list of python datetime objects
+    :rtype: 3D numpy masked array the same shape as the input array,
+            list of python datetime objects
     """
 
     # Check if the user-selected temporal grid is valid. If not, EXIT
-    acceptable = (unit=='full')|(unit=='annual')|(unit=='monthly')|(unit=='daily')
+    acceptable = ((unit == 'full') | (unit == 'annual') |
+                  (unit == 'monthly') | (unit == 'daily'))
     if not acceptable:
         print 'Error: unknown unit type selected for time averaging: EXIT'
-        return -1,-1,-1,-1
+        return - 1, - 1, - 1, - 1
 
     nt, ny, nx = data.shape
     if unit == 'full':
         new_data = ma.mean(data, axis=0)
-        new_date = [dates[dates.size/2]]
+        new_date = [dates[dates.size / 2]]
     if unit == 'annual':
         years = [d.year for d in dates]
         years_sorted = np.unique(years)
@@ -921,26 +1011,28 @@ def _rcmes_calc_average_on_new_time_unit(data, dates, unit):
         new_date = []
         for year in years_sorted:
             index = np.where(years == year)[0]
-            new_data[it,:] = ma.mean(data[index,:], axis=0)
+            new_data[it, :] = ma.mean(data[index, :], axis=0)
             new_date.append(datetime.datetime(year=year, month=7, day=2))
-            it = it+1
+            it = it + 1
     if unit == 'monthly':
         years = [d.year for d in dates]
         years_sorted = np.unique(years)
         months = [d.month for d in dates]
         months_sorted = np.unique(months)
-        
-        new_data = ma.zeros([years_sorted.size*months_sorted.size, ny, nx])
+
+        new_data = ma.zeros([years_sorted.size * months_sorted.size, ny, nx])
         it = 0
         new_date = []
         for year in years_sorted:
             for month in months_sorted:
                 index = np.where((years == year) & (months == month))[0]
-                new_data[it,:] = ma.mean(data[index,:], axis=0)
-                new_date.append(datetime.datetime(year=year, month=month, day=15))
-                it = it+1   
+                new_data[it, :] = ma.mean(data[index, :], axis=0)
+                new_date.append(datetime.datetime(year=year,
+                                                  month=month,
+                                                  day=15))
+                it = it + 1
     if unit == 'daily':
-        days = [d.year*10000.+d.month*100.+d.day for d in dates] 
+        days = [d.year * 10000. + d.month * 100. + d.day for d in dates]
         days_sorted = np.unique(days)
 
         new_data = ma.zeros([days_sorted.size, ny, nx])
@@ -948,64 +1040,69 @@ def _rcmes_calc_average_on_new_time_unit(data, dates, unit):
         new_date = []
         for day in days_sorted:
             index = np.where(days == day)[0]
-            new_data[it,:] = ma.mean(data[index,:], axis=0)
+            new_data[it, :] = ma.mean(data[index, :], axis=0)
             y = int(day / 10000)
             m = int(day % 10000) / 100
             d = int(day % 100)
             new_date.append(datetime.datetime(year=y, month=m, day=d))
-            it = it+1
-        
+            it = it + 1
+
     return new_data, np.array(new_date)
 
+
 def _rcmes_calc_average_on_new_time_unit_K(data, dates, unit):
     """ Rebin 3d array and list of dates using the provided unit parameter
-    
-    :param data: Input data that needs to be averaged 
+
+    :param data: Input data that needs to be averaged
     :type data: 3D masked numpy array of shape (times, lats, lons)
     :param dates: List of dates that correspond to the given data values
     :type dates: Python datetime objects
     :param unit: Time unit to average the data into
-    :type unit: String matching one of these values : full | annual | monthly | daily
-    
+    :type unit: String matching one of these values :
+                full | annual | monthly | daily
+
     :returns: meanstorem, newTimesList
-    :rtype: 3D numpy masked array the same shape as the input array, list of python datetime objects
+    :rtype: 3D numpy masked array the same shape as the input array,
+            list of python datetime objects
     """
 
     # Check if the user-selected temporal grid is valid. If not, EXIT
-    acceptable = (unit=='full')|(unit=='annual')|(unit=='monthly')|(unit=='daily')
+    acceptable = ((unit == 'full') | (unit == 'annual') |
+                  (unit == 'monthly') | (unit == 'daily'))
     if not acceptable:
         print 'Error: unknown unit type selected for time averaging: EXIT'
-        return -1,-1,-1,-1
+        return - 1, - 1, - 1, - 1
 
     # Calculate arrays of: annual timeseries: year (2007,2007),
     #                      monthly time series: year-month (200701,200702),
-    #                      daily timeseries:  year-month-day (20070101,20070102) 
+    #                      daily timeseries: year-month-day (20070101,20070102)
     #  depending on user-selected averaging period.
 
     # Year list
-    if unit=='annual':
+    if unit == 'annual':
         timeunits = np.array([int(d.strftime("%Y")) for d in dates])
         unique_times = np.unique(timeunits)
-         
+
     # YearMonth format list
-    if unit=='monthly':
+    if unit == 'monthly':
         timeunits = np.array([int(d.strftime("%Y%m")) for d in dates])
         unique_times = np.unique(timeunits)
 
     # YearMonthDay format list
-    if unit=='daily':
+    if unit == 'daily':
         timeunits = np.array([int(d.strftime("%Y%m%d")) for d in dates])
         unique_times = np.unique(timeunits)
 
-
     # TODO: add pentad setting using Julian days?
 
     # Full list: a special case
     if unit == 'full':
-        #  Calculating means data over the entire time range: i.e., annual-mean climatology
+        # Calculating means data over the entire time range:
+        # i.e., annual-mean climatology
         timeunits = []
         for i in np.arange(len(dates)):
-            timeunits.append(999)  # i.e. we just want the same value for all times.
+            # i.e. we just want the same value for all times.
+            timeunits.append(999)
         timeunits = np.array(timeunits, dtype=int)
         unique_times = np.unique(timeunits)
 
@@ -1013,52 +1110,56 @@ def _rcmes_calc_average_on_new_time_unit_K(data, dates, unit):
     newTimesList = []
 
     # Decide whether or not you need to do any time averaging.
-    #   i.e. if data are already on required time unit then just pass data through and 
-    #        calculate and return representative datetimes.
+    #   i.e. if data are already on required time unit then just
+    #        pass data through and calculate and return representative
+    #        datetimes.
     processing_required = True
-    if len(timeunits)==(len(unique_times)):
+    if len(timeunits) == (len(unique_times)):
         processing_required = False
 
     # 1D data arrays, i.e. time series
-    if data.ndim==1:
+    if data.ndim == 1:
         # Create array to store the resulting data
         meanstore = np.zeros(len(unique_times))
-  
+
         # Calculate the means across each unique time unit
-        i=0
+        i = 0
         for myunit in unique_times:
             if processing_required:
-                datam=ma.masked_array(data,timeunits!=myunit)
+                datam = ma.masked_array(data, timeunits != myunit)
                 meanstore[i] = ma.average(datam)
-            
+
             # construct new times list
             yyyy, mm, dd = _create_new_year_month_day(myunit, dates)
-            newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
-            i = i+1
+            newTimesList.append(datetime.datetime(yyyy, mm, dd, 0, 0, 0, 0))
+            i = i + 1
 
     # 3D data arrays
-    if data.ndim==3:
+    if data.ndim == 3:
         # Create array to store the resulting data
-        meanstore = np.zeros([len(unique_times),data.shape[1],data.shape[2]])
-  
+        meanstore = np.zeros([len(unique_times), data.shape[1], data.shape[2]])
+
         # Calculate the means across each unique time unit
-        i=0
+        i = 0
         datamask_store = []
         for myunit in unique_times:
             if processing_required:
                 mask = np.zeros_like(data)
-                mask[timeunits!=myunit,:,:] = 1.0
+                mask[timeunits != myunit, :, :] = 1.0
                 # Calculate missing data mask within each time unit...
                 datamask_at_this_timeunit = np.zeros_like(data)
-                datamask_at_this_timeunit[:]= _rcmes_create_mask_using_threshold(data[timeunits==myunit,:,:],threshold=0.75)
+                datamask_at_this_timeunit[:] = _rcmes_create_mask_using_threshold(
+                    data[timeunits == myunit, :, :], threshold=0.75)
                 # Store results for masking later
                 datamask_store.append(datamask_at_this_timeunit[0])
-                # Calculate means for each pixel in this time unit, ignoring missing data (using masked array).
-                datam = ma.masked_array(data,np.logical_or(mask,datamask_at_this_timeunit))
-                meanstore[i,:,:] = ma.average(datam,axis=0)
+                # Calculate means for each pixel in this time unit, ignoring
+                # missing data (using masked array).
+                datam = ma.masked_array(data, np.logical_or(mask,
+                                                            datamask_at_this_timeunit))
+                meanstore[i, :, :] = ma.average(datam, axis=0)
             # construct new times list
             yyyy, mm, dd = _create_new_year_month_day(myunit, dates)
-            newTimesList.append(datetime.datetime(yyyy,mm,dd))
+            newTimesList.append(datetime.datetime(yyyy, mm, dd))
             i += 1
 
         if not processing_required:
@@ -1071,40 +1172,42 @@ def _rcmes_calc_average_on_new_time_unit_K(data, dates, unit):
 
     return meanstorem, newTimesList
 
+
 def _create_new_year_month_day(time_unit, dates):
     smyunit = str(time_unit)
-    if len(smyunit)==4:  # YYYY
+    if len(smyunit) == 4:  # YYYY
         yyyy = int(smyunit[0:4])
         mm = 1
         dd = 1
-    elif len(smyunit)==6:  # YYYYMM
+    elif len(smyunit) == 6:  # YYYYMM
         yyyy = int(smyunit[0:4])
         mm = int(smyunit[4:6])
         dd = 1
-    elif len(smyunit)==8:  # YYYYMMDD
+    elif len(smyunit) == 8:  # YYYYMMDD
         yyyy = int(smyunit[0:4])
         mm = int(smyunit[4:6])
         dd = int(smyunit[6:8])
-    elif len(smyunit)==3:  # Full time range
-        # Need to set an appropriate time representing the mid-point of the entire time span
-        dt = dates[-1]-dates[0]
-        halfway = dates[0]+(dt/2)
+    elif len(smyunit) == 3:  # Full time range
+        # Need to set an appropriate time representing the mid-point of the
+        # entire time span
+        dt = dates[-1] - dates[0]
+        halfway = dates[0] + (dt / 2)
         yyyy = int(halfway.year)
         mm = int(halfway.month)
         dd = int(halfway.day)
-    
+
     return (yyyy, mm, dd)
 
+
 def _congrid(a, newdims, method='linear', centre=False, minusone=False):
     '''
     This function is from http://wiki.scipy.org/Cookbook/Rebinning - Example 3
     It has been refactored and changed a bit, but the original functionality
     has been preserved.
-    
     Arbitrary resampling of source array to new dimension sizes.
     Currently only supports maintaining the same number of dimensions.
     To use 1-D arrays, first promote them to shape (x,1).
-    
+
     Uses the same parameters and creates the same co-ordinate lookup points
     as IDL''s congrid routine, which apparently originally came from a VAX/VMS
     routine of the same name.
@@ -1126,60 +1229,60 @@ def _congrid(a, newdims, method='linear', centre=False, minusone=False):
     True - inarray is resampled by(i-1)/(x-1) * (j-1)/(y-1)
     This prevents extrapolation one element beyond bounds of input array.
     '''
-    if not a.dtype in [np.float64, np.float32]:
+    if a.dtype not in [np.float64, np.float32]:
         a = np.cast[float](a)
 
-    # this will merely take the True/False input and convert it to an array(1) or array(0)
+    # this will merely take the True/False input and convert it to an
+    # array(1) or array(0)
     m1 = np.cast[int](minusone)
-    # this also casts the True False input into a floating point number of 0.5 or 0.0
+    # this also casts the True False input into a floating point number
+    # of 0.5 or 0.0
     ofs = np.cast[int](centre) * 0.5
 
-    old = np.array( a.shape )
-    ndims = len( a.shape )
-    if len( newdims ) != ndims:
+    old = np.array(a.shape)
+    ndims = len(a.shape)
+    if len(newdims) != ndims:
         print "[congrid] dimensions error. " \
               "This routine currently only supports " \
               "rebinning to the same number of dimensions."
         return None
-    newdims = np.asarray( newdims, dtype=float )
+    newdims = np.asarray(newdims, dtype=float)
     dimlist = []
 
     if method == 'neighbour':
         newa = _congrid_neighbor(a, newdims, m1, ofs)
 
-    elif method in ['nearest','linear']:
+    elif method in ['nearest', 'linear']:
         # calculate new dims
-        for i in range( ndims ):
-            base = np.arange( newdims[i] )
-            dimlist.append( (old[i] - m1) / (newdims[i] - m1) \
-                            * (base + ofs) - ofs )
+        for i in range(ndims):
+            base = np.arange(newdims[i])
+            dimlist.append((old[i] - m1) / (newdims[i] - m1) *
+                           (base + ofs) - ofs)
         # specify old dims
-        olddims = [np.arange(i, dtype = np.float) for i in list( a.shape )]
+        olddims = [np.arange(i, dtype=np.float) for i in list(a.shape)]
 
         # first interpolation - for ndims = any
-        mint = scipy.interpolate.interp1d( olddims[-1], a, kind=method )
-        newa = mint( dimlist[-1] )
+        mint = scipy.interpolate.interp1d(olddims[-1], a, kind=method)
+        newa = mint(dimlist[-1])
 
-        trorder = [ndims - 1] + range( ndims - 1 )
-        for i in range( ndims - 2, -1, -1 ):
-            newa = newa.transpose( trorder )
+        trorder = [ndims - 1] + range(ndims - 1)
+        for i in range(ndims - 2, -1, -1):
+            newa = newa.transpose(trorder)
 
-            mint = scipy.interpolate.interp1d( olddims[i], newa, kind=method )
-            newa = mint( dimlist[i] )
+            mint = scipy.interpolate.interp1d(olddims[i], newa, kind=method)
+            newa = mint(dimlist[i])
 
         if ndims > 1:
             # need one more transpose to return to original dimensions
-            newa = newa.transpose( trorder )
+            newa = newa.transpose(trorder)
 
         return newa
     elif method in ['spline']:
-        oslices = [ slice(0, j) for j in old ]
-        oldcoords = np.ogrid[oslices]
-        nslices = [ slice(0, j) for j in list(newdims) ]
+        nslices = [slice(0, j) for j in list(newdims)]
         newcoords = np.mgrid[nslices]
 
         newcoords_dims = range(np.rank(newcoords))
-        #make first index last
+        # make first index last
         newcoords_dims.append(newcoords_dims.pop(0))
         newcoords_tr = newcoords.transpose(newcoords_dims)
         # makes a view that affects newcoords
@@ -1199,56 +1302,59 @@ def _congrid(a, newdims, method='linear', centre=False, minusone=False):
               "and \'spline\' are supported."
         return None
 
+
 def _check_dataset_shapes(datasets):
     """ If the  datasets are not the same shape throw a ValueError Exception
-    
+
     :param datasets: OCW Datasets to check for a consistent shape
     :type datasets: List of OCW Dataset Objects
-    
+
     :raises: ValueError
     """
     dataset_shape = None
     for dataset in datasets:
-        if dataset_shape == None:
+        if dataset_shape is None:
             dataset_shape = dataset.values.shape
         else:
             if dataset.values.shape != dataset_shape:
                 msg = "%s != %s" % (dataset.values.shape, dataset_shape)
-                raise ValueError("Input datasets must be the same shape for an ensemble :: ", msg)
+                raise ValueError("Input datasets must be the same shape "
+                                 "for an ensemble :: ", msg)
             else:
                 pass
 
 
 def _congrid_neighbor(values, new_dims, minus_one, offset):
     """ Use the nearest neighbor to create a new array
-    
+
     :param values: Array of values that need to be interpolated
     :type values: Numpy ndarray
     :param new_dims: Longitude resolution in degrees
     :type new_dims: float
     :param lat_resolution: Latitude resolution in degrees
     :type lat_resolution: float
-    
+
     :returns: A new spatially regridded Dataset
     :rtype: Open Climate Workbench Dataset Object
     """
-    ndims = len( values.shape )
+    ndims = len(values.shape)
     dimlist = []
-    old_dims = np.array( values.shape )
-    for i in range( ndims ):
+    old_dims = np.array(values.shape)
+    for i in range(ndims):
         base = np.indices(new_dims)[i]
-        dimlist.append( (old_dims[i] - minus_one) / (new_dims[i] - minus_one) \
-                        * (base + offset) - offset )
-    cd = np.array( dimlist ).round().astype(int)
-    new_values = values[list( cd )]
-    return new_values    
+        dimlist.append((old_dims[i] - minus_one) / (new_dims[i] - minus_one) *
+                       (base + offset) - offset)
+    cd = np.array(dimlist).round().astype(int)
+    new_values = values[list(cd)]
+    return new_values
+
 
 def _are_bounds_contained_by_dataset(bounds, dataset):
     '''Check if a Dataset fully contains a bounds.
 
     :param bounds: The Bounds object to check.
     :type bounds: Bounds
-    :param dataset: The Dataset that should be fully contain the 
+    :param dataset: The Dataset that should be fully contain the
         Bounds
     :type dataset: Dataset
 
@@ -1259,29 +1365,40 @@ def _are_bounds_contained_by_dataset(bounds, dataset):
     start, end = dataset.time_range()
     errors = []
 
-    # TODO:  THIS IS TERRIBLY inefficent and we need to use a geometry lib instead in the future
-    if not np.round(lat_min,3) <= np.round(bounds.lat_min,3) <= np.round(lat_max,3):
-        error = "bounds.lat_min: %s is not between lat_min: %s and lat_max: %s" % (bounds.lat_min, lat_min, lat_max)
+    # TODO:  THIS IS TERRIBLY inefficent and we need to use a geometry
+    # lib instead in the future
+    if not (np.round(lat_min, 3) <= np.round(bounds.lat_min, 3) <=
+            np.round(lat_max, 3)):
+        error = ("bounds.lat_min: %s is not between lat_min: %s and"
+                 " lat_max: %s" % (bounds.lat_min, lat_min, lat_max))
         errors.append(error)
 
-    if not np.round(lat_min,3) <= np.round(bounds.lat_max,3) <= np.round(lat_max,3):
-        error = "bounds.lat_max: %s is not between lat_min: %s and lat_max: %s" % (bounds.lat_max, lat_min, lat_max)
+    if not (np.round(lat_min, 3) <= np.round(bounds.lat_max, 3) <=
+            np.round(lat_max, 3)):
+        error = ("bounds.lat_max: %s is not between lat_min: %s and"
+                 "lat_max: %s" % (bounds.lat_max, lat_min, lat_max))
         errors.append(error)
 
-    if not np.round(lon_min,3) <= np.round(bounds.lon_min,3) <= np.round(lon_max,3):
-        error = "bounds.lon_min: %s is not between lon_min: %s and lon_max: %s" % (bounds.lon_min, lon_min, lon_max)
+    if not (np.round(lon_min, 3) <= np.round(bounds.lon_min, 3) <=
+            np.round(lon_max, 3)):
+        error = ("bounds.lon_min: %s is not between lon_min: %s and"
+                 "lon_max: %s" % (bounds.lon_min, lon_min, lon_max))
         errors.append(error)
 
-    if not np.round(lon_min,3) <= np.round(bounds.lon_max,3) <= np.round(lon_max,3):
-        error = "bounds.lon_max: %s is not between lon_min: %s and lon_max: %s" % (bounds.lon_max, lon_min, lon_max)
+    if not (np.round(lon_min, 3) <= np.round(bounds.lon_max, 3) <=
+            np.round(lon_max, 3)):
+        error = ("bounds.lon_max: %s is not between lon_min: %s and"
+                 "lon_max: %s" % (bounds.lon_max, lon_min, lon_max))
         errors.append(error)
 
     if not start <= bounds.start <= end:
-        error = "bounds.start: %s is not between start: %s and end: %s" % (bounds.start, start, end)
+        error = ("bounds.start: %s is not between start: %s and end: %s" %
+                 (bounds.start, start, end))
         errors.append(error)
 
     if not start <= bounds.end <= end:
-        error = "bounds.end: %s is not between start: %s and end: %s" % (bounds.end, start, end)
+        error = ("bounds.end: %s is not between start: %s and end: %s" %
+                 (bounds.end, start, end))
         errors.append(error)
 
     if len(errors) == 0:
@@ -1290,10 +1407,11 @@ def _are_bounds_contained_by_dataset(bounds, dataset):
         error_message = '\n'.join(errors)
         raise ValueError(error_message)
 
+
 def _get_subregion_slice_indices(subregion, target_dataset):
     '''Get the indices for slicing Dataset values to generate the subregion.
 
-    :param subregion: The Bounds that specify the subset of the Dataset 
+    :param subregion: The Bounds that specify the subset of the Dataset
         that should be extracted.
     :type subregion: Bounds
     :param target_dataset: The Dataset to subset.
@@ -1307,16 +1425,14 @@ def _get_subregion_slice_indices(subregion, target_dataset):
     lonStart = min(np.nonzero(target_dataset.lons >= subregion.lon_min)[0])
     lonEnd = max(np.nonzero(target_dataset.lons <= subregion.lon_max)[0])
 
-
     timeStart = min(np.nonzero(target_dataset.times >= subregion.start)[0])
     timeEnd = max(np.nonzero(target_dataset.times <= subregion.end)[0])
 
     return {
-        "lat_start"  : latStart,
-        "lat_end"    : latEnd,
-        "lon_start"  : lonStart,
-        "lon_end"    : lonEnd,
-        "time_start" : timeStart,
-        "time_end"   : timeEnd
+        "lat_start": latStart,
+        "lat_end": latEnd,
+        "lon_start": lonStart,
+        "lon_end": lonEnd,
+        "time_start": timeStart,
+        "time_end": timeEnd
     }
-

[2/2] climate git commit: PEP8 violations in dataset_processor.py have been fixed.

Posted by hu...@apache.org.

PEP8 violations in dataset_processor.py have been fixed.


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/3ce4e209
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/3ce4e209
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/3ce4e209

Branch: refs/heads/master
Commit: 3ce4e20941076c0b0db490e3c8c96cde2807ee34
Parents: 54f2110 9fa0e4c
Author: huikyole <hu...@argo.jpl.nasa.gov>
Authored: Sun Jun 26 16:59:45 2016 -0700
Committer: huikyole <hu...@argo.jpl.nasa.gov>
Committed: Sun Jun 26 16:59:45 2016 -0700

----------------------------------------------------------------------
 ocw/dataset_processor.py | 762 ++++++++++++++++++++++++------------------
 1 file changed, 439 insertions(+), 323 deletions(-)
----------------------------------------------------------------------