You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by go...@apache.org on 2013/08/07 20:41:42 UTC
svn commit: r1511431 - in /incubator/climate/branches/RefactorInput/ocw:
dataset_processor.py tests/test_dataset_processor.py
Author: goodale
Date: Wed Aug 7 18:41:41 2013
New Revision: 1511431
URL: http://svn.apache.org/r1511431
Log:
Progress toward CLIMATE-235:
Added test to check the regridded times are correct.
Modified:
incubator/climate/branches/RefactorInput/ocw/dataset_processor.py
incubator/climate/branches/RefactorInput/ocw/tests/test_dataset_processor.py
Modified: incubator/climate/branches/RefactorInput/ocw/dataset_processor.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/RefactorInput/ocw/dataset_processor.py?rev=1511431&r1=1511430&r2=1511431&view=diff
==============================================================================
--- incubator/climate/branches/RefactorInput/ocw/dataset_processor.py (original)
+++ incubator/climate/branches/RefactorInput/ocw/dataset_processor.py Wed Aug 7 18:41:41 2013
@@ -16,6 +16,7 @@
#
from ocw import dataset as ds
+from toolkit import process
import datetime
import numpy as np
@@ -35,11 +36,29 @@ def temporal_rebin(target_dataset, tempo
:returns: A new temporally rebinned Dataset
:rtype: Open Climate Workbench Dataset Object
"""
- # This will call down into the _congrid() function within this module
- # after using the temporal_resolution to determine the new number of time
- # bins to use in the resulting output Dataset. Only the time axis of the
- # Dataset will be changed
- pass
+ # Decode the temporal resolution into a string format that
+ # _rcmes_calc_average_on_new_time_unit_K() can understand
+ day_count = temporal_resolution.days
+ time_unit = None
+ if day_count == 1:
+ time_unit = 'daily'
+ elif day_count > 1 and day_count <= 31:
+ time_unit = 'monthly'
+ elif day_count > 31 and day_count <= 366:
+ time_unit = 'annual'
+ else:
+ time_unit = 'full'
+
+ # This is how RCMES calls this underlying function
+ masked_values = target_dataset.values.view(ma.MaskedArray)
+ binned_values, binned_dates = _rcmes_calc_average_on_new_time_unit_K(masked_values, target_dataset.times, time_unit)
+
+ new_dataset = ds.Dataset(target_dataset.lats,
+ target_dataset.lons,
+ binned_dates,
+ binned_values)
+
+ return new_dataset
def spatial_regrid(target_dataset, new_latitudes, new_longitudes):
""" Regrid a Dataset using the new latitudes and longitudes
@@ -197,25 +216,22 @@ def _rcmes_spatial_regrid(spatial_values
return regridded_values
-def _rcmes_calc_average_on_new_time_unit_K(data, dateList, unit):
- '''
- Routine to calculate averages on longer time units than the data exists on.
- e.g. if the data is 6-hourly, calculate daily, or monthly means.
-
- Input:
- data - data values
- dateList - list of python datetime structures corresponding to data values
- unit - string describing time unit to average onto
- e.g. 'monthly', 'daily', 'pentad','annual','decadal'
- Output:
- meanstorem - numpy masked array of data values meaned over required time period
- newTimesList - a list of python datetime objects representing the data in the new averagin period
- NB. currently set to beginning of averaging period,
- i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z.
- '''
+def _rcmes_calc_average_on_new_time_unit_K(data, dates, unit):
+ """ Rebin 3d array and list of dates using the provided unit parameter
+
+ :param data: Input data that needs to be averaged
+ :type data: 3D masked numpy array of shape (times, lats, lons)
+ :param dates: List of dates that correspond to the given data values
+ :type dates: Python datetime objects
+ :param unit: Time unit to average the data into
+ :type unit: String matching one of these values : full | annual | monthly | daily
+
+ :returns: meanstorem, newTimesList
+ :rtype: 3D numpy masked array the same shape as the input array, list of python datetime objects
+ """
# Check if the user-selected temporal grid is valid. If not, EXIT
- acceptable = (unit=='full')|(unit=='annual')|(unit=='monthly')|(unit=='daily')|(unit=='pentad')
+ acceptable = (unit=='full')|(unit=='annual')|(unit=='monthly')|(unit=='daily')
if not acceptable:
print 'Error: unknown unit type selected for time averaging: EXIT'
return -1,-1,-1,-1
@@ -228,31 +244,31 @@ def _rcmes_calc_average_on_new_time_unit
# Year list
if unit=='annual':
timeunits = []
- for i in np.arange(len(dateList)):
- timeunits.append(str(dateList[i].year))
+ for i in np.arange(len(dates)):
+ timeunits.append(str(dates[i].year))
timeunits = np.array(timeunits, dtype=int)
# YearMonth format list
if unit=='monthly':
timeunits = []
- for i in np.arange(len(dateList)):
- timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month))
+ for i in np.arange(len(dates)):
+ timeunits.append(str(dates[i].year) + str("%02d" % dates[i].month))
timeunits = np.array(timeunits,dtype=int)
# YearMonthDay format list
if unit=='daily':
timeunits = []
- for i in np.arange(len(dateList)):
- timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month) + str("%02d" % dateList[i].day))
+ for i in np.arange(len(dates)):
+ timeunits.append(str(dates[i].year) + str("%02d" % dates[i].month) + str("%02d" % dates[i].day))
timeunits = np.array(timeunits,dtype=int)
# TODO: add pentad setting using Julian days?
# Full list: a special case
if unit == 'full':
- comment='Calculating means data over the entire time range: i.e., annual-mean climatology'
+ # Calculating means data over the entire time range: i.e., annual-mean climatology
timeunits = []
- for i in np.arange(len(dateList)):
+ for i in np.arange(len(dates)):
timeunits.append(999) # i.e. we just want the same value for all times.
timeunits = np.array(timeunits, dtype=int)
@@ -294,8 +310,8 @@ def _rcmes_calc_average_on_new_time_unit
dd = int(smyunit[6:8])
if len(smyunit)==3: # Full time range
# Need to set an appropriate time representing the mid-point of the entire time span
- dt = dateList[-1]-dateList[0]
- halfway = dateList[0]+(dt/2)
+ dt = dates[-1]-dates[0]
+ halfway = dates[0]+(dt/2)
yyyy = int(halfway.year)
mm = int(halfway.month)
dd = int(halfway.day)
@@ -305,7 +321,6 @@ def _rcmes_calc_average_on_new_time_unit
# 3D data arrays
if data.ndim==3:
- # datamask = create_mask_using_threshold(data,threshold=0.75)
# Create array to store the resulting data
meanstore = np.zeros([len(np.unique(timeunits)),data.shape[1],data.shape[2]])
@@ -318,7 +333,7 @@ def _rcmes_calc_average_on_new_time_unit
mask[timeunits!=myunit,:,:] = 1.0
# Calculate missing data mask within each time unit...
datamask_at_this_timeunit = np.zeros_like(data)
- datamask_at_this_timeunit[:]= create_mask_using_threshold(data[timeunits==myunit,:,:],threshold=0.75)
+ datamask_at_this_timeunit[:]= process.create_mask_using_threshold(data[timeunits==myunit,:,:],threshold=0.75)
# Store results for masking later
datamask_store.append(datamask_at_this_timeunit[0])
# Calculate means for each pixel in this time unit, ignoring missing data (using masked array).
@@ -340,8 +355,8 @@ def _rcmes_calc_average_on_new_time_unit
dd = int(smyunit[6:8])
if len(smyunit)==3: # Full time range
# Need to set an appropriate time representing the mid-point of the entire time span
- dt = dateList[-1]-dateList[0]
- halfway = dateList[0]+(dt/2)
+ dt = dates[-1]-dates[0]
+ halfway = dates[0]+(dt/2)
yyyy = int(halfway.year)
mm = int(halfway.month)
dd = int(halfway.day)
Modified: incubator/climate/branches/RefactorInput/ocw/tests/test_dataset_processor.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/RefactorInput/ocw/tests/test_dataset_processor.py?rev=1511431&r1=1511430&r2=1511431&view=diff
==============================================================================
--- incubator/climate/branches/RefactorInput/ocw/tests/test_dataset_processor.py (original)
+++ incubator/climate/branches/RefactorInput/ocw/tests/test_dataset_processor.py Wed Aug 7 18:41:41 2013
@@ -22,12 +22,21 @@ from ocw import dataset as ds
import numpy as np
import numpy.ma as ma
+class CustomAssertions:
+ # Custom Assertions to handle Numpy Arrays
+ def assert1DArraysEqual(self, array1, array2):
+ self.assertSequenceEqual(tuple(array1), tuple(array2))
-class TestTemporalRebin(unittest.TestCase):
+class TestTemporalRebin(unittest.TestCase, CustomAssertions):
def setUp(self):
- self.input_dataset = ten_year_monthly_dataset()
- pass
+ self.ten_year_monthly_dataset = ten_year_monthly_dataset()
+ self.ten_year_annual_times = np.array([datetime.datetime(year, 1, 1) for year in range(2000, 2010)])
+
+ def test_monthly_to_annual_rebin(self):
+ annual_dataset = dp.temporal_rebin(self.ten_year_monthly_dataset, datetime.timedelta(days=365))
+ self.assert1DArraysEqual(annual_dataset.times, self.ten_year_annual_times)
+
def test__congrid_neighbor(self):
@@ -61,16 +70,14 @@ class TestRcmesSpatialRegrid(unittest.Te
self.assertEqual(regridded_values.shape, lats2.shape)
self.assertEqual(regridded_values.shape, lons2.shape)
-class TestSpatialRegrid(unittest.TestCase):
+class TestSpatialRegrid(unittest.TestCase, CustomAssertions):
def setUp(self):
self.input_dataset = ten_year_monthly_dataset()
self.new_lats = np.array(range(-89, 90, 4))
self.new_lons = np.array(range(-179, 180, 4))
self.regridded_dataset = dp.spatial_regrid(self.input_dataset, self.new_lats, self.new_lons)
- # Custom Assertions to handle Numpy Arrays
- def assert1DArraysEqual(self, array1, array2):
- self.assertSequenceEqual(tuple(array1), tuple(array2))
+
def test_returned_lats(self):
self.assert1DArraysEqual(self.regridded_dataset.lats, self.new_lats)