You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by go...@apache.org on 2017/09/28 02:04:22 UTC

climate git commit: CLIMATE-928 - temporal_subset should trim edges of dataset times to ensure months divide evenly into years

Repository: climate
Updated Branches:
  refs/heads/master 3539aa2be -> cb9428413


CLIMATE-928 - temporal_subset should trim edges of dataset times to ensure months divide evenly into years


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/cb942841
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/cb942841
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/cb942841

Branch: refs/heads/master
Commit: cb9428413baccf8128df7855467838ce1600049c
Parents: 3539aa2
Author: Alex Goodman <ag...@users.noreply.github.com>
Authored: Wed Sep 27 18:35:18 2017 -0700
Committer: Alex Goodman <ag...@users.noreply.github.com>
Committed: Wed Sep 27 18:35:18 2017 -0700

----------------------------------------------------------------------
 ocw/dataset_processor.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/cb942841/ocw/dataset_processor.py
----------------------------------------------------------------------
diff --git a/ocw/dataset_processor.py b/ocw/dataset_processor.py
index 2097cc4..2227892 100755
--- a/ocw/dataset_processor.py
+++ b/ocw/dataset_processor.py
@@ -75,13 +75,18 @@ def temporal_subset(target_dataset, month_start, month_end,
                              name=target_dataset.name)
 
     if average_each_year:
+        new_times = new_dataset.times
         nmonth = len(month_index)
-        ntime = new_dataset.times.size
+        ntime = new_times.size
         nyear = ntime // nmonth
         if ntime % nmonth != 0:
-            raise ValueError("Number of times in dataset ({}) does not "
-                             "divide evenly into {} year(s)."
-                             .format(ntime, nyear))
+            logger.warning("Number of times in dataset ({}) does not "
+                           "divide evenly into {} year(s). Trimming data..."
+                           .format(ntime, nyear))
+            s_mon = new_times[0].month
+            e_mon = new_times[-1].month
+            new_times = new_times[13-s_mon:-e_mon]
+            nyear = new_times.size // nmonth
 
         averaged_time = []
         ny, nx = target_dataset.values.shape[1:]
@@ -92,7 +97,7 @@ def temporal_subset(target_dataset, month_start, month_end,
             center_index = int(nmonth / 2 + iyear * nmonth)
             if nmonth == 1:
                 center_index = iyear
-            averaged_time.append(new_dataset.times[center_index])
+            averaged_time.append(new_times[center_index])
             averaged_values[iyear, :] = ma.average(new_dataset.values[
                 nmonth * iyear: nmonth * iyear + nmonth, :], axis=0)
         new_dataset = ds.Dataset(target_dataset.lats,
@@ -253,7 +258,7 @@ def spatial_regrid(target_dataset, new_latitudes, new_longitudes,
             if path.contains_point([new_lons[iy, ix],
                                     new_lats[iy, ix]]) or not boundary_check:
                new_xy_mask[iy, ix] = 0.
-            
+
     new_index = np.where(new_xy_mask == 0.)
     # Regrid the data on each time slice
     for i in range(len(target_dataset.times)):
@@ -286,7 +291,7 @@ def spatial_regrid(target_dataset, new_latitudes, new_longitudes,
         values_false_indices = np.where(values_original.mask == False)
         qmdi[values_true_indices] = 1.
         qmdi[values_false_indices] = 0.
-        qmdi_r = griddata((lons.flatten(), lats.flatten()), qmdi.flatten(), 
+        qmdi_r = griddata((lons.flatten(), lats.flatten()), qmdi.flatten(),
                              (new_lons[new_index],
                               new_lats[new_index]),
                               method='nearest')
@@ -1441,7 +1446,7 @@ def _are_bounds_contained_by_dataset(dataset, bounds):
     '''
     lat_min, lat_max, lon_min, lon_max = dataset.spatial_boundaries()
     start, end = dataset.temporal_boundaries()
-    
+
     errors = []
 
     # TODO:  THIS IS TERRIBLY inefficent and we need to use a geometry