You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by go...@apache.org on 2013/08/08 15:36:37 UTC

svn commit: r1511774 - in /incubator/climate/branches/RefactorInput/ocw: dataset_processor.py tests/test_dataset_processor.py

Author: goodale
Date: Thu Aug  8 13:36:37 2013
New Revision: 1511774

URL: http://svn.apache.org/r1511774
Log:
CLIMATE-235: Temporal Rebinning is now available within the dataset_processor module

Modified:
    incubator/climate/branches/RefactorInput/ocw/dataset_processor.py
    incubator/climate/branches/RefactorInput/ocw/tests/test_dataset_processor.py

Modified: incubator/climate/branches/RefactorInput/ocw/dataset_processor.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/RefactorInput/ocw/dataset_processor.py?rev=1511774&r1=1511773&r2=1511774&view=diff
==============================================================================
--- incubator/climate/branches/RefactorInput/ocw/dataset_processor.py (original)
+++ incubator/climate/branches/RefactorInput/ocw/dataset_processor.py Thu Aug  8 13:36:37 2013
@@ -48,11 +48,10 @@ def temporal_rebin(target_dataset, tempo
         time_unit = 'annual'
     else:
         time_unit = 'full'
-    
-    #  This is how RCMES calls this underlying function
+
     masked_values = target_dataset.values.view(ma.MaskedArray)
     binned_values, binned_dates = _rcmes_calc_average_on_new_time_unit_K(masked_values, target_dataset.times, time_unit)
-    
+    binned_dates = np.array(binned_dates)
     new_dataset = ds.Dataset(target_dataset.lats, 
                              target_dataset.lons, 
                              binned_dates, 
@@ -243,24 +242,16 @@ def _rcmes_calc_average_on_new_time_unit
 
     # Year list
     if unit=='annual':
-        timeunits = []
-        for i in np.arange(len(dates)):
-            timeunits.append(str(dates[i].year))
-        timeunits = np.array(timeunits, dtype=int)
+        timeunits = np.array([int(d.strftime("%Y")) for d in dates])
          
     # YearMonth format list
     if unit=='monthly':
-        timeunits = []
-        for i in np.arange(len(dates)):
-            timeunits.append(str(dates[i].year) + str("%02d" % dates[i].month))
-        timeunits = np.array(timeunits,dtype=int)
+        timeunits = np.array([int(d.strftime("%Y%m")) for d in dates])
 
     # YearMonthDay format list
     if unit=='daily':
-        timeunits = []
-        for i in np.arange(len(dates)):
-            timeunits.append(str(dates[i].year) + str("%02d" % dates[i].month) + str("%02d" % dates[i].day))
-        timeunits = np.array(timeunits,dtype=int)
+        timeunits = np.array([int(d.strftime("%Y%m%d")) for d in dates])
+
 
     # TODO: add pentad setting using Julian days?
 
@@ -345,22 +336,22 @@ def _rcmes_calc_average_on_new_time_unit
                 yyyy = int(smyunit[0:4])
                 mm = 1
                 dd = 1
-            if len(smyunit)==6:  # YYYYMM
+            elif len(smyunit)==6:  # YYYYMM
                 yyyy = int(smyunit[0:4])
                 mm = int(smyunit[4:6])
                 dd = 1
-            if len(smyunit)==8:  # YYYYMMDD
+            elif len(smyunit)==8:  # YYYYMMDD
                 yyyy = int(smyunit[0:4])
                 mm = int(smyunit[4:6])
                 dd = int(smyunit[6:8])
-            if len(smyunit)==3:  # Full time range
+            elif len(smyunit)==3:  # Full time range
                 # Need to set an appropriate time representing the mid-point of the entire time span
                 dt = dates[-1]-dates[0]
                 halfway = dates[0]+(dt/2)
                 yyyy = int(halfway.year)
                 mm = int(halfway.month)
                 dd = int(halfway.day)
-            newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
+            newTimesList.append(datetime.datetime(yyyy,mm,dd))
             i += 1
 
         if not processing_required:

Modified: incubator/climate/branches/RefactorInput/ocw/tests/test_dataset_processor.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/RefactorInput/ocw/tests/test_dataset_processor.py?rev=1511774&r1=1511773&r2=1511774&view=diff
==============================================================================
--- incubator/climate/branches/RefactorInput/ocw/tests/test_dataset_processor.py (original)
+++ incubator/climate/branches/RefactorInput/ocw/tests/test_dataset_processor.py Thu Aug  8 13:36:37 2013
@@ -32,16 +32,40 @@ class TestTemporalRebin(unittest.TestCas
     def setUp(self):
         self.ten_year_monthly_dataset = ten_year_monthly_dataset()
         self.ten_year_annual_times = np.array([datetime.datetime(year, 1, 1) for year in range(2000, 2010)])
+        self.two_years_daily_dataset = two_year_daily_dataset()
     
     def test_monthly_to_annual_rebin(self):
         annual_dataset = dp.temporal_rebin(self.ten_year_monthly_dataset, datetime.timedelta(days=365))
         self.assert1DArraysEqual(annual_dataset.times, self.ten_year_annual_times)
-        
-        
     
-    def test__congrid_neighbor(self):
-        pass
+    def test_monthly_to_full_rebin(self):
+        full_dataset = dp.temporal_rebin(self.ten_year_monthly_dataset, datetime.timedelta(days=3650))
+        full_times = [datetime.datetime(2004, 12, 16)]
+        self.assertEqual(full_dataset.times, full_times)
+    
+    def test_daily_to_monthly_rebin(self):
+        """This test takes a really long time to run.  TODO: Figure out where the performance drag is"""
+        monthly_dataset = dp.temporal_rebin(self.two_years_daily_dataset, datetime.timedelta(days=31))
+        bins = list(set([datetime.datetime(time_reading.year, time_reading.month, 1) for time_reading in self.two_years_daily_dataset.times]))
+        bins = np.array(bins)
+        bins.sort()
+        self.assert1DArraysEqual(monthly_dataset.times, bins)
     
+    def test_daily_to_annual_rebin(self):
+        annual_dataset = dp.temporal_rebin(self.two_years_daily_dataset, datetime.timedelta(days=366))
+        bins = list(set([datetime.datetime(time_reading.year, 1, 1) for time_reading in self.two_years_daily_dataset.times]))
+        bins = np.array(bins)
+        bins.sort()
+        self.assert1DArraysEqual(annual_dataset.times, bins)
+        
+    
+    def test_non_rebin(self):
+        """This will take a monthly dataset and ask for a monthly rebin of 28 days.  The resulting
+        dataset should have the same time values"""
+        monthly_dataset = dp.temporal_rebin(self.ten_year_monthly_dataset, datetime.timedelta(days=28))
+        good_times = self.ten_year_monthly_dataset.times
+        self.assert1DArraysEqual(monthly_dataset.times, good_times)
+
 
 
 class TestRcmesSpatialRegrid(unittest.TestCase):
@@ -91,13 +115,21 @@ class TestSpatialRegrid(unittest.TestCas
         self.assertSequenceEqual(regridded_data_shape, expected_data_shape)
 
 def ten_year_monthly_dataset():
-        lats = np.array(range(-89, 90, 2))
-        lons = np.array(range(-179, 180, 2))
-        # Ten Years of monthly data
-        times = np.array([datetime.datetime(year, month, 1) for year in range(2000, 2010) for month in range(1, 13)])
-        values = np.ones([len(times), len(lats), len(lons)])
-        input_dataset = ds.Dataset(lats, lons, times, values, variable="test variable name")
-        return input_dataset
+    lats = np.array(range(-89, 90, 2))
+    lons = np.array(range(-179, 180, 2))
+    # Ten Years of monthly data
+    times = np.array([datetime.datetime(year, month, 1) for year in range(2000, 2010) for month in range(1, 13)])
+    values = np.ones([len(times), len(lats), len(lons)])
+    input_dataset = ds.Dataset(lats, lons, times, values, variable="test variable name")
+    return input_dataset
+
+def two_year_daily_dataset():
+    lats = np.array(range(-89, 90, 2))
+    lons = np.array(range(-179, 180, 2))
+    times = np.array([datetime.datetime(2001, 1, 1) + datetime.timedelta(days=d) for d in range(730)])
+    values = np.ones([len(times), len(lats), len(lons)])
+    dataset = ds.Dataset(lats, lons, times, values, variable='random data')
+    return dataset