You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by go...@apache.org on 2013/08/07 20:41:42 UTC

svn commit: r1511431 - in /incubator/climate/branches/RefactorInput/ocw: dataset_processor.py tests/test_dataset_processor.py

Author: goodale
Date: Wed Aug  7 18:41:41 2013
New Revision: 1511431

URL: http://svn.apache.org/r1511431
Log:
Progress toward CLIMATE-235:
Added test to check the regridded times are correct.

Modified:
    incubator/climate/branches/RefactorInput/ocw/dataset_processor.py
    incubator/climate/branches/RefactorInput/ocw/tests/test_dataset_processor.py

Modified: incubator/climate/branches/RefactorInput/ocw/dataset_processor.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/RefactorInput/ocw/dataset_processor.py?rev=1511431&r1=1511430&r2=1511431&view=diff
==============================================================================
--- incubator/climate/branches/RefactorInput/ocw/dataset_processor.py (original)
+++ incubator/climate/branches/RefactorInput/ocw/dataset_processor.py Wed Aug  7 18:41:41 2013
@@ -16,6 +16,7 @@
 #
 
 from ocw import dataset as ds
+from toolkit import process
 
 import datetime
 import numpy as np
@@ -35,11 +36,29 @@ def temporal_rebin(target_dataset, tempo
     :returns: A new temporally rebinned Dataset
     :rtype: Open Climate Workbench Dataset Object
     """
-    # This will call down into the _congrid() function within this module
-    # after using the temporal_resolution to determine the new number of time
-    # bins to use in the resulting output Dataset.  Only the time axis of the 
-    # Dataset will be changed 
-    pass
+    # Decode the temporal resolution into a string format that 
+    # _rcmes_calc_average_on_new_time_unit_K() can understand
+    day_count = temporal_resolution.days
+    time_unit = None
+    if day_count == 1:
+        time_unit = 'daily'
+    elif day_count > 1 and day_count <= 31:
+        time_unit = 'monthly'
+    elif day_count > 31 and day_count <= 366:
+        time_unit = 'annual'
+    else:
+        time_unit = 'full'
+    
+    #  This is how RCMES calls this underlying function
+    masked_values = target_dataset.values.view(ma.MaskedArray)
+    binned_values, binned_dates = _rcmes_calc_average_on_new_time_unit_K(masked_values, target_dataset.times, time_unit)
+    
+    new_dataset = ds.Dataset(target_dataset.lats, 
+                             target_dataset.lons, 
+                             binned_dates, 
+                             binned_values)
+    
+    return new_dataset
 
 def spatial_regrid(target_dataset, new_latitudes, new_longitudes):
     """ Regrid a Dataset using the new latitudes and longitudes
@@ -197,25 +216,22 @@ def _rcmes_spatial_regrid(spatial_values
     return regridded_values
 
 
-def _rcmes_calc_average_on_new_time_unit_K(data, dateList, unit):
-    '''
-    Routine to calculate averages on longer time units than the data exists on.
-    e.g. if the data is 6-hourly, calculate daily, or monthly means.
-     
-    Input:
-        data     - data values
-        dateList - list of python datetime structures corresponding to data values
-        unit     - string describing time unit to average onto 
-                      e.g. 'monthly', 'daily', 'pentad','annual','decadal'
-    Output:
-        meanstorem - numpy masked array of data values meaned over required time period
-        newTimesList - a list of python datetime objects representing the data in the new averagin period
-                           NB. currently set to beginning of averaging period, 
-                           i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z.
-    '''
+def _rcmes_calc_average_on_new_time_unit_K(data, dates, unit):
+    """ Rebin 3d array and list of dates using the provided unit parameter
+    
+    :param data: Input data that needs to be averaged 
+    :type data: 3D masked numpy array of shape (times, lats, lons)
+    :param dates: List of dates that correspond to the given data values
+    :type dates: Python datetime objects
+    :param unit: Time unit to average the data into
+    :type unit: String matching one of these values : full | annual | monthly | daily
+    
+    :returns: meanstorem, newTimesList
+    :rtype: 3D numpy masked array the same shape as the input array, list of python datetime objects
+    """
 
     # Check if the user-selected temporal grid is valid. If not, EXIT
-    acceptable = (unit=='full')|(unit=='annual')|(unit=='monthly')|(unit=='daily')|(unit=='pentad')
+    acceptable = (unit=='full')|(unit=='annual')|(unit=='monthly')|(unit=='daily')
     if not acceptable:
         print 'Error: unknown unit type selected for time averaging: EXIT'
         return -1,-1,-1,-1
@@ -228,31 +244,31 @@ def _rcmes_calc_average_on_new_time_unit
     # Year list
     if unit=='annual':
         timeunits = []
-        for i in np.arange(len(dateList)):
-            timeunits.append(str(dateList[i].year))
+        for i in np.arange(len(dates)):
+            timeunits.append(str(dates[i].year))
         timeunits = np.array(timeunits, dtype=int)
          
     # YearMonth format list
     if unit=='monthly':
         timeunits = []
-        for i in np.arange(len(dateList)):
-            timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month))
+        for i in np.arange(len(dates)):
+            timeunits.append(str(dates[i].year) + str("%02d" % dates[i].month))
         timeunits = np.array(timeunits,dtype=int)
 
     # YearMonthDay format list
     if unit=='daily':
         timeunits = []
-        for i in np.arange(len(dateList)):
-            timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month) + str("%02d" % dateList[i].day))
+        for i in np.arange(len(dates)):
+            timeunits.append(str(dates[i].year) + str("%02d" % dates[i].month) + str("%02d" % dates[i].day))
         timeunits = np.array(timeunits,dtype=int)
 
     # TODO: add pentad setting using Julian days?
 
     # Full list: a special case
     if unit == 'full':
-        comment='Calculating means data over the entire time range: i.e., annual-mean climatology'
+        #  Calculating means data over the entire time range: i.e., annual-mean climatology
         timeunits = []
-        for i in np.arange(len(dateList)):
+        for i in np.arange(len(dates)):
             timeunits.append(999)  # i.e. we just want the same value for all times.
         timeunits = np.array(timeunits, dtype=int)
 
@@ -294,8 +310,8 @@ def _rcmes_calc_average_on_new_time_unit
                 dd = int(smyunit[6:8])
             if len(smyunit)==3:  # Full time range
                 # Need to set an appropriate time representing the mid-point of the entire time span
-                dt = dateList[-1]-dateList[0]
-                halfway = dateList[0]+(dt/2)
+                dt = dates[-1]-dates[0]
+                halfway = dates[0]+(dt/2)
                 yyyy = int(halfway.year)
                 mm = int(halfway.month)
                 dd = int(halfway.day)
@@ -305,7 +321,6 @@ def _rcmes_calc_average_on_new_time_unit
 
     # 3D data arrays
     if data.ndim==3:
-        # datamask = create_mask_using_threshold(data,threshold=0.75)
         # Create array to store the resulting data
         meanstore = np.zeros([len(np.unique(timeunits)),data.shape[1],data.shape[2]])
   
@@ -318,7 +333,7 @@ def _rcmes_calc_average_on_new_time_unit
                 mask[timeunits!=myunit,:,:] = 1.0
                 # Calculate missing data mask within each time unit...
                 datamask_at_this_timeunit = np.zeros_like(data)
-                datamask_at_this_timeunit[:]= create_mask_using_threshold(data[timeunits==myunit,:,:],threshold=0.75)
+                datamask_at_this_timeunit[:]= process.create_mask_using_threshold(data[timeunits==myunit,:,:],threshold=0.75)
                 # Store results for masking later
                 datamask_store.append(datamask_at_this_timeunit[0])
                 # Calculate means for each pixel in this time unit, ignoring missing data (using masked array).
@@ -340,8 +355,8 @@ def _rcmes_calc_average_on_new_time_unit
                 dd = int(smyunit[6:8])
             if len(smyunit)==3:  # Full time range
                 # Need to set an appropriate time representing the mid-point of the entire time span
-                dt = dateList[-1]-dateList[0]
-                halfway = dateList[0]+(dt/2)
+                dt = dates[-1]-dates[0]
+                halfway = dates[0]+(dt/2)
                 yyyy = int(halfway.year)
                 mm = int(halfway.month)
                 dd = int(halfway.day)

Modified: incubator/climate/branches/RefactorInput/ocw/tests/test_dataset_processor.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/RefactorInput/ocw/tests/test_dataset_processor.py?rev=1511431&r1=1511430&r2=1511431&view=diff
==============================================================================
--- incubator/climate/branches/RefactorInput/ocw/tests/test_dataset_processor.py (original)
+++ incubator/climate/branches/RefactorInput/ocw/tests/test_dataset_processor.py Wed Aug  7 18:41:41 2013
@@ -22,12 +22,21 @@ from ocw import dataset as ds
 import numpy as np
 import numpy.ma as ma
 
+class CustomAssertions:
+    # Custom Assertions to handle Numpy Arrays
+    def assert1DArraysEqual(self, array1, array2):
+        self.assertSequenceEqual(tuple(array1), tuple(array2))
 
-class TestTemporalRebin(unittest.TestCase):
+class TestTemporalRebin(unittest.TestCase, CustomAssertions):
     
     def setUp(self):
-        self.input_dataset = ten_year_monthly_dataset()
-        pass
+        self.ten_year_monthly_dataset = ten_year_monthly_dataset()
+        self.ten_year_annual_times = np.array([datetime.datetime(year, 1, 1) for year in range(2000, 2010)])
+    
+    def test_monthly_to_annual_rebin(self):
+        annual_dataset = dp.temporal_rebin(self.ten_year_monthly_dataset, datetime.timedelta(days=365))
+        self.assert1DArraysEqual(annual_dataset.times, self.ten_year_annual_times)
+        
         
     
     def test__congrid_neighbor(self):
@@ -61,16 +70,14 @@ class TestRcmesSpatialRegrid(unittest.Te
         self.assertEqual(regridded_values.shape, lats2.shape)
         self.assertEqual(regridded_values.shape, lons2.shape)
 
-class TestSpatialRegrid(unittest.TestCase):
+class TestSpatialRegrid(unittest.TestCase, CustomAssertions):
     
     def setUp(self):
         self.input_dataset = ten_year_monthly_dataset()
         self.new_lats = np.array(range(-89, 90, 4))
         self.new_lons = np.array(range(-179, 180, 4))
         self.regridded_dataset = dp.spatial_regrid(self.input_dataset, self.new_lats, self.new_lons)
-    # Custom Assertions to handle Numpy Arrays
-    def assert1DArraysEqual(self, array1, array2):
-        self.assertSequenceEqual(tuple(array1), tuple(array2))
+
 
     def test_returned_lats(self):
         self.assert1DArraysEqual(self.regridded_dataset.lats, self.new_lats)