You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by go...@apache.org on 2013/08/08 15:36:37 UTC
svn commit: r1511774 - in /incubator/climate/branches/RefactorInput/ocw:
dataset_processor.py tests/test_dataset_processor.py
Author: goodale
Date: Thu Aug 8 13:36:37 2013
New Revision: 1511774
URL: http://svn.apache.org/r1511774
Log:
CLIMATE-235: Temporal Rebinning is now available within the dataset_processor module
Modified:
incubator/climate/branches/RefactorInput/ocw/dataset_processor.py
incubator/climate/branches/RefactorInput/ocw/tests/test_dataset_processor.py
Modified: incubator/climate/branches/RefactorInput/ocw/dataset_processor.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/RefactorInput/ocw/dataset_processor.py?rev=1511774&r1=1511773&r2=1511774&view=diff
==============================================================================
--- incubator/climate/branches/RefactorInput/ocw/dataset_processor.py (original)
+++ incubator/climate/branches/RefactorInput/ocw/dataset_processor.py Thu Aug 8 13:36:37 2013
@@ -48,11 +48,10 @@ def temporal_rebin(target_dataset, tempo
time_unit = 'annual'
else:
time_unit = 'full'
-
- # This is how RCMES calls this underlying function
+
masked_values = target_dataset.values.view(ma.MaskedArray)
binned_values, binned_dates = _rcmes_calc_average_on_new_time_unit_K(masked_values, target_dataset.times, time_unit)
-
+ binned_dates = np.array(binned_dates)
new_dataset = ds.Dataset(target_dataset.lats,
target_dataset.lons,
binned_dates,
@@ -243,24 +242,16 @@ def _rcmes_calc_average_on_new_time_unit
# Year list
if unit=='annual':
- timeunits = []
- for i in np.arange(len(dates)):
- timeunits.append(str(dates[i].year))
- timeunits = np.array(timeunits, dtype=int)
+ timeunits = np.array([int(d.strftime("%Y")) for d in dates])
# YearMonth format list
if unit=='monthly':
- timeunits = []
- for i in np.arange(len(dates)):
- timeunits.append(str(dates[i].year) + str("%02d" % dates[i].month))
- timeunits = np.array(timeunits,dtype=int)
+ timeunits = np.array([int(d.strftime("%Y%m")) for d in dates])
# YearMonthDay format list
if unit=='daily':
- timeunits = []
- for i in np.arange(len(dates)):
- timeunits.append(str(dates[i].year) + str("%02d" % dates[i].month) + str("%02d" % dates[i].day))
- timeunits = np.array(timeunits,dtype=int)
+ timeunits = np.array([int(d.strftime("%Y%m%d")) for d in dates])
+
# TODO: add pentad setting using Julian days?
@@ -345,22 +336,22 @@ def _rcmes_calc_average_on_new_time_unit
yyyy = int(smyunit[0:4])
mm = 1
dd = 1
- if len(smyunit)==6: # YYYYMM
+ elif len(smyunit)==6: # YYYYMM
yyyy = int(smyunit[0:4])
mm = int(smyunit[4:6])
dd = 1
- if len(smyunit)==8: # YYYYMMDD
+ elif len(smyunit)==8: # YYYYMMDD
yyyy = int(smyunit[0:4])
mm = int(smyunit[4:6])
dd = int(smyunit[6:8])
- if len(smyunit)==3: # Full time range
+ elif len(smyunit)==3: # Full time range
# Need to set an appropriate time representing the mid-point of the entire time span
dt = dates[-1]-dates[0]
halfway = dates[0]+(dt/2)
yyyy = int(halfway.year)
mm = int(halfway.month)
dd = int(halfway.day)
- newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
+ newTimesList.append(datetime.datetime(yyyy,mm,dd))
i += 1
if not processing_required:
Modified: incubator/climate/branches/RefactorInput/ocw/tests/test_dataset_processor.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/RefactorInput/ocw/tests/test_dataset_processor.py?rev=1511774&r1=1511773&r2=1511774&view=diff
==============================================================================
--- incubator/climate/branches/RefactorInput/ocw/tests/test_dataset_processor.py (original)
+++ incubator/climate/branches/RefactorInput/ocw/tests/test_dataset_processor.py Thu Aug 8 13:36:37 2013
@@ -32,16 +32,40 @@ class TestTemporalRebin(unittest.TestCas
def setUp(self):
self.ten_year_monthly_dataset = ten_year_monthly_dataset()
self.ten_year_annual_times = np.array([datetime.datetime(year, 1, 1) for year in range(2000, 2010)])
+ self.two_years_daily_dataset = two_year_daily_dataset()
def test_monthly_to_annual_rebin(self):
annual_dataset = dp.temporal_rebin(self.ten_year_monthly_dataset, datetime.timedelta(days=365))
self.assert1DArraysEqual(annual_dataset.times, self.ten_year_annual_times)
-
-
- def test__congrid_neighbor(self):
- pass
+ def test_monthly_to_full_rebin(self):
+ full_dataset = dp.temporal_rebin(self.ten_year_monthly_dataset, datetime.timedelta(days=3650))
+ full_times = [datetime.datetime(2004, 12, 16)]
+ self.assertEqual(full_dataset.times, full_times)
+
+ def test_daily_to_monthly_rebin(self):
+ """This test takes a really long time to run. TODO: Figure out where the performance drag is"""
+ monthly_dataset = dp.temporal_rebin(self.two_years_daily_dataset, datetime.timedelta(days=31))
+ bins = list(set([datetime.datetime(time_reading.year, time_reading.month, 1) for time_reading in self.two_years_daily_dataset.times]))
+ bins = np.array(bins)
+ bins.sort()
+ self.assert1DArraysEqual(monthly_dataset.times, bins)
+ def test_daily_to_annual_rebin(self):
+ annual_dataset = dp.temporal_rebin(self.two_years_daily_dataset, datetime.timedelta(days=366))
+ bins = list(set([datetime.datetime(time_reading.year, 1, 1) for time_reading in self.two_years_daily_dataset.times]))
+ bins = np.array(bins)
+ bins.sort()
+ self.assert1DArraysEqual(annual_dataset.times, bins)
+
+
+ def test_non_rebin(self):
+ """This will take a monthly dataset and ask for a monthly rebin of 28 days. The resulting
+ dataset should have the same time values"""
+ monthly_dataset = dp.temporal_rebin(self.ten_year_monthly_dataset, datetime.timedelta(days=28))
+ good_times = self.ten_year_monthly_dataset.times
+ self.assert1DArraysEqual(monthly_dataset.times, good_times)
+
class TestRcmesSpatialRegrid(unittest.TestCase):
@@ -91,13 +115,21 @@ class TestSpatialRegrid(unittest.TestCas
self.assertSequenceEqual(regridded_data_shape, expected_data_shape)
def ten_year_monthly_dataset():
- lats = np.array(range(-89, 90, 2))
- lons = np.array(range(-179, 180, 2))
- # Ten Years of monthly data
- times = np.array([datetime.datetime(year, month, 1) for year in range(2000, 2010) for month in range(1, 13)])
- values = np.ones([len(times), len(lats), len(lons)])
- input_dataset = ds.Dataset(lats, lons, times, values, variable="test variable name")
- return input_dataset
+ lats = np.array(range(-89, 90, 2))
+ lons = np.array(range(-179, 180, 2))
+ # Ten Years of monthly data
+ times = np.array([datetime.datetime(year, month, 1) for year in range(2000, 2010) for month in range(1, 13)])
+ values = np.ones([len(times), len(lats), len(lons)])
+ input_dataset = ds.Dataset(lats, lons, times, values, variable="test variable name")
+ return input_dataset
+
+def two_year_daily_dataset():
+ lats = np.array(range(-89, 90, 2))
+ lons = np.array(range(-179, 180, 2))
+ times = np.array([datetime.datetime(2001, 1, 1) + datetime.timedelta(days=d) for d in range(730)])
+ values = np.ones([len(times), len(lats), len(lons)])
+ dataset = ds.Dataset(lats, lons, times, values, variable='random data')
+ return dataset