You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by jo...@apache.org on 2013/08/20 15:56:02 UTC
svn commit: r1515826 - in /incubator/climate/trunk: ./ ocw/ ocw/data_source/ ocw/tests/ rcmet/src/main/python/rcmes/toolkit/ rcmet/src/main/python/rcmes/utils/

Author: joyce
Date: Tue Aug 20 13:56:02 2013
New Revision: 1515826

URL: http://svn.apache.org/r1515826
Log:
CLIMATE-137 - Reintegrating refactoring branch into trunk.

Added:
    incubator/climate/trunk/ocw/__init__.py
      - copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/__init__.py
    incubator/climate/trunk/ocw/data_source/
      - copied from r1515824, incubator/climate/branches/RefactorInput/ocw/data_source/
    incubator/climate/trunk/ocw/data_source/local.py
      - copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/data_source/local.py
    incubator/climate/trunk/ocw/data_source/rcmed.py
      - copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py
    incubator/climate/trunk/ocw/dataset.py
      - copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/dataset.py
    incubator/climate/trunk/ocw/dataset_processor.py
      - copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/dataset_processor.py
    incubator/climate/trunk/ocw/evaluation.py
      - copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/evaluation.py
    incubator/climate/trunk/ocw/metrics.py
      - copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/metrics.py
    incubator/climate/trunk/ocw/plotter.py
      - copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/plotter.py
    incubator/climate/trunk/ocw/tests/parameter_dataset_text.txt
      - copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/tests/parameter_dataset_text.txt
    incubator/climate/trunk/ocw/tests/parameters_metadata_output.p
      - copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/tests/parameters_metadata_output.p
    incubator/climate/trunk/ocw/tests/parameters_metadata_text.txt
      - copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/tests/parameters_metadata_text.txt
    incubator/climate/trunk/ocw/tests/parameters_values.p
      - copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/tests/parameters_values.p
    incubator/climate/trunk/ocw/tests/test_dataset.py
      - copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/tests/test_dataset.py
    incubator/climate/trunk/ocw/tests/test_dataset_processor.py
      - copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/tests/test_dataset_processor.py
    incubator/climate/trunk/ocw/tests/test_evaluation.py
      - copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/tests/test_evaluation.py
    incubator/climate/trunk/ocw/tests/test_local.py
      - copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/tests/test_local.py
    incubator/climate/trunk/ocw/tests/test_plotter.py
      - copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/tests/test_plotter.py
    incubator/climate/trunk/ocw/tests/test_rcmed.py
      - copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/tests/test_rcmed.py
    incubator/climate/trunk/rcmet/src/main/python/rcmes/utils/taylor.py
      - copied unchanged from r1515824, incubator/climate/branches/RefactorInput/rcmet/src/main/python/rcmes/utils/taylor.py
Modified:
    incubator/climate/trunk/   (props changed)
    incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/do_data_prep.py
    incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/metrics.py
    incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/process.py

Propchange: incubator/climate/trunk/
------------------------------------------------------------------------------
    svn:mergeinfo = /incubator/climate/branches/RefactorInput:1505725-1515824

Modified: incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/do_data_prep.py
URL: http://svn.apache.org/viewvc/incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/do_data_prep.py?rev=1515826&r1=1515825&r2=1515826&view=diff
==============================================================================
--- incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/do_data_prep.py (original)
+++ incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/do_data_prep.py Tue Aug 20 13:56:02 2013
@@ -76,8 +76,8 @@ def prep_data(settings, obsDatasetList, 
      Processes both single and multiple files of obs and mdl or combinations in a general way.
            i)    retrieve observations from the database
            ii)   load in model data
-           iii)  temporal regridding
-           iv)   spatial regridding
+           iii)  spatial regridding
+           iv)   temporal regridding
            v)    area-averaging
            Input:
                    cachedir 	- string describing directory path
@@ -138,8 +138,7 @@ def prep_data(settings, obsDatasetList, 
         print 'No input observation data file. EXIT'
         sys.exit()
 
-    ## Part 1: retrieve observation data from the database and regrid them
-    ##       NB. automatically uses local cache if already retrieved.
+    ## Part 0: Setup the regrid variables based on the regridOption given
 
     # preparation for spatial re-gridding: define the size of horizontal array of the target interpolation grid system (ngrdX and ngrdY)
     print 'regridOption in prep_data= ', regridOption
@@ -180,6 +179,11 @@ def prep_data(settings, obsDatasetList, 
 
     regObsData = []
     
+    
+    
+    ## Part 1: retrieve observation data from the database and regrid them
+    ##       NB. automatically uses local cache if already retrieved.
+    
     for n in np.arange(numOBSs):
         # spatial regridding
         oLats, oLons, _, oTimes, oData = db.extractData(obsDatasetId[n],
@@ -201,6 +205,7 @@ def prep_data(settings, obsDatasetList, 
         
         print 'tmpOBS shape = ', tmpOBS.shape
         
+        # OBS SPATIAL REGRIDING 
         for t in np.arange(nstOBSs):
             tmpOBS[t, :, :] = process.do_regrid(oData[t, :, :], oLats, oLons, lats, lons)
             
@@ -210,7 +215,7 @@ def prep_data(settings, obsDatasetList, 
         oLats = 0.0
         oLons = 0.0       # release the memory occupied by the temporary variables oLats and oLons.
         
-        # temporally regrid the spatially regridded obs data
+        # OBS TEMPORAL REGRIDING
         oData, newObsTimes = process.calc_average_on_new_time_unit_K(tmpOBS, oTimes, unit=timeRegridOption)
 
         tmpOBS = 0.0
@@ -239,29 +244,14 @@ def prep_data(settings, obsDatasetList, 
 
     obsData = ma.array(regObsData)
     obsTimes = newObsTimes
-    regObsData = 0
-    oldObsTimes = 0
-    nT = len(obsTimes)
 
-    # TODO:  Refactor this into a function within the toolkit module
     # compute the simple multi-obs ensemble if multiple obs are used
     if numOBSs > 1:
-        print 'numOBSs = ', numOBSs
-        oData = obsData
-        print 'oData shape = ', oData.shape
-        obsData = ma.zeros((numOBSs + 1, nT, ngrdY, ngrdX))
-        print 'obsData shape = ', obsData.shape
-        avg = ma.zeros((nT, ngrdY, ngrdX))
-        
-        for i in np.arange(numOBSs):
-            obsData[i, :, :, :] = oData[i, :, :, :]
-            avg[:, :, :] = avg[:, :, :] + oData[i, :, :, :]
-
-        avg = avg / float(numOBSs)
-        obsData[numOBSs, :, :, :] = avg[:, :, :]     # store the model-ensemble data
-        numOBSs = numOBSs + 1                     # update the number of obs data to include the model ensemble
+        ensemble = np.mean(regObsData, axis=0)
+        regObsData.append(ensemble)
+        numOBSs = len(regObsData)
         obsList.append('ENS-OBS')
-    print 'OBS regridded: ', obsData.shape
+        obsData = ma.array(regObsData) # Cast to a masked array
 
 
     ## Part 2: load in and regrid model data from file(s)
@@ -306,11 +296,7 @@ def prep_data(settings, obsDatasetList, 
    
         # determine the dimension size from the model time and latitude data.
         nT = len(modelTimes)
-        
-        # UNUSED VARIABLES - WILL DELETE AFTER TESTING
-        # nmdlY=modelLats.shape[0]
-        # nmdlX=modelLats.shape[1]
-        #print 'nT, ngrdY, ngrdX = ',nT,ngrdY, ngrdX,min(modelTimes),max(modelTimes)
+
         print '  The shape of model data to be processed= ', mData.shape, ' for the period ', min(modelTimes), max(modelTimes)
         # spatial regridding of the modl data
         tmpMDL = ma.zeros((nT, ngrdY, ngrdX))
@@ -340,10 +326,7 @@ def prep_data(settings, obsDatasetList, 
         regridMdlData.append(mData)
 
     modelData = ma.array(regridMdlData)
-    modelTimes = newMdlTimes
-    regridMdlData = 0
-    oldMdlTimes = 0
-    newMdlTimes = 0
+
     if (precipFlag == True) & (mvUnit == 'KG M-2 S-1'):
         print 'convert model variable unit from mm/s to mm/day'
         modelData = 86400.*modelData
@@ -365,24 +348,14 @@ def prep_data(settings, obsDatasetList, 
 
     print 'Reading and regridding model data are completed'
     print 'numMDLs, modelData.shape= ', numMDLs, modelData.shape
-    
-    # TODO: Do we need to make this a user supplied flag, or do we just create an ensemble ALWAYS
-    # TODO: Add in Kyo's code here as well
+
     # TODO:  Commented out until I can talk with Jinwon about this
     # compute the simple multi-model ensemble if multiple models are evaluated
     if numMDLs > 1:
-        mdlData=modelData
-        modelData=ma.zeros((numMDLs+1,nT,ngrdY,ngrdX))
-        avg=ma.zeros((nT,ngrdY,ngrdX))
-        for i in np.arange(numMDLs):
-            modelData[i,:,:,:]=mdlData[i,:,:,:]
-            avg[:,:,:]=avg[:,:,:]+mdlData[i,:,:,:]
-        avg=avg/float(numMDLs)
-        modelData[numMDLs,:,:,:]=avg[:,:,:]     # store the model-ensemble data
-        # THESE ARE NOT USED.  WILL DELETE AFTER TESTING
-        # i0mdl=0
-        # i1mdl=numMDLs
-        numMDLs=numMDLs+1
+        model_ensemble = np.mean(regridMdlData, axis=0)
+        regridMdlData.append(model_ensemble)
+        numMDLs = len(regridMdlData)
+        modelData = ma.array(regridMdlData)
         mdlName.append('ENS-MODEL')
         print 'Eval mdl-mean timeseries for the obs periods: modelData.shape= ',modelData.shape
     # GOODALE:  This ensemble code should be refactored into process.py module since it's purpose is data processing

Modified: incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/metrics.py
URL: http://svn.apache.org/viewvc/incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/metrics.py?rev=1515826&r1=1515825&r2=1515826&view=diff
==============================================================================
--- incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/metrics.py (original)
+++ incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/metrics.py Tue Aug 20 13:56:02 2013
@@ -47,6 +47,24 @@ def calcAnnualCycleMeans(dataset1):
     means = data.mean(axis = 0)
     return data, means
 
+def calcAnnualCycleMeansSubRegion(dataset1):
+    '''
+    Purpose:: 
+        Calculate annual cycle in terms of monthly means at every sub-region.
+
+    Input::
+        dataset1 - 2d numpy array of data in (region, t)
+        
+    Output:: 
+        means - (region, # of months)
+
+     '''
+    nregion, nT = dataset1.shape
+    data = dataset1.reshape[nregion, nT/12, 12]
+    means = data.mean(axis = 1)
+    return data, means
+
+
 def calcClimYear(dataset1):
     '''
     Purpose:: 
@@ -99,6 +117,39 @@ def calcClimSeason(monthBegin, monthEnd,
     means = tSeries.mean(axis = 0)        
     return tSeries, means
 
+def calcClimSeasonSubRegion(monthBegin, monthEnd, dataset1):
+    '''
+    Purpose :: 
+       Calculate seasonal mean montheries and climatology for both 3-D and point time series.
+       For example, to calculate DJF mean time series, monthBegin = 12, monthEnd =2 
+       This can handle monthBegin=monthEnd i.e. for climatology of a specific month
+
+    Input::
+        monthBegin - an integer for the beginning month (Jan =1)
+        monthEnd - an integer for the ending month (Jan = 1)
+        dataset1 - 3d numpy array of data in (region, t) or 1d numpy array montheries
+
+    Output:: 
+        tSeries - (region, number of years/number of years -1 if monthBegin > monthEnd,lat,lon),
+        means - (region)
+    '''
+    nregion, nT = dataset1.shape
+    nYR = nT/12
+    if monthBegin > monthEnd:
+        # Offset the original array so that the the first month
+        # becomes monthBegin, note that this cuts off the first year of data
+        offset = slice(monthBegin - 1, monthBegin - 13)
+        data = dataset1[:,offset].reshape([nregion,nYR-1, 12])
+        monthIndex = slice(0, 13 - monthBegin + monthEnd)
+    else:
+        # Since monthBegin <= monthEnd, just take a slice containing those months
+        data = dataset1.reshape([nregion,nYR,12])
+        monthIndex =  slice(monthBegin - 1, monthEnd)
+        
+    tSeries = data[:, :, monthIndex].mean(axis = 2)
+    means = tSeries.mean(axis = 1)        
+    return tSeries, means
+
 def calcAnnualCycleStdev(dataset1):
     '''
      Purpose:: 
@@ -114,7 +165,21 @@ def calcAnnualCycleStdev(dataset1):
     stds = data.std(axis = 0, ddof = 1)
     return stds
 
+def calcAnnualCycleStdevSubRegion(dataset1):
+    '''
+     Purpose:: 
+        Calculate monthly standard deviations for every sub-region
+     
+     Input::
+        dataset1 - 2d numpy array of data in (nregion, 12* number of years) 
 
+     Output:: 
+        stds - (nregion, 12)
+    '''
+    nregion, nT = data1.shape
+    data = dataset1.reshape([nregion, nT/12, 12])
+    stds = data.std(axis = 1, ddof = 1)
+    return stds
 
 def calcAnnualCycleDomainMeans(dataset1):
     '''
@@ -136,6 +201,22 @@ def calcAnnualCycleDomainMeans(dataset1)
         
     return means
 
+def calcSpatialStdevRatio(evaluationData, referenceData):
+    '''
+    Purpose ::
+        Calculate the ratio of spatial standard deviation (model standard deviation)/(observed standard deviation)
+
+    Input ::
+        evaluationData - model data array (lat, lon)
+        referenceData- observation data array (lat,lon)
+
+    Output::
+        ratio of standard deviation (a scholar) 
+    
+    '''
+    stdevRatio = evaluationData[(evaluationData.mask==False) & (referenceData.mask==False)].std()/ \
+                 referenceData[(evaluationData.mask==False) & (referenceData.mask==False)].std()  
+    return stdevRatio
 
 def calcTemporalStdev(dataset1):
     '''
@@ -321,7 +402,7 @@ def calcTemporalCorrelation(evaluationDa
         referenceData- observation data array of any shape
             
     Output::
-        temporalCorelation - A 2-D array of temporal correlation coefficients at each grid point.
+        temporalCorelation - A 2-D array of temporal correlation coefficients at each subregion
         sigLev - A 2-D array of confidence levels related to temporalCorelation 
     
     REF: 277-281 in Stat methods in atmos sci by Wilks, 1995, Academic Press, 467pp.
@@ -330,6 +411,40 @@ def calcTemporalCorrelation(evaluationDa
     evaluationDataMask = process.create_mask_using_threshold(evaluationData, threshold = 0.75)
     referenceDataMask = process.create_mask_using_threshold(referenceData, threshold = 0.75)
     
+    nregion = evaluationData.shape[0]
+    temporalCorrelation = ma.zeros([nregion])-100.
+    sigLev = ma.zeros([nregion])-100.
+    for iregion in np.arange(nregion):
+        temporalCorrelation[iregion], sigLev[iregion] = stats.pearsonr(evaluationData[iregion,:], referenceData[iregion,:])
+        sigLev[iregion] = 1 - sigLev[iregion]
+                    
+    temporalCorrelation=ma.masked_equal(temporalCorrelation.data, -100.)        
+    sigLev=ma.masked_equal(sigLev.data, -100.)    
+    
+    return temporalCorrelation, sigLev
+
+def calcTemporalCorrelationSubRegion(evaluationData, referenceData):
+    '''
+    Purpose ::
+        Calculate the temporal correlation.
+    
+    Assumption(s) ::
+        both evaluation and reference data are subregion averaged time series
+    
+    Input ::
+        evaluationData - model data array [region,t]
+        referenceData- observation data [region, t]
+            
+    Output::
+        temporalCorelation - A 1-D array of temporal correlation coefficients at each grid point.
+        sigLev - A 1-D array of confidence levels related to temporalCorelation 
+    
+    REF: 277-281 in Stat methods in atmos sci by Wilks, 1995, Academic Press, 467pp.
+    sigLev: the correlation between model and observation is significant at sigLev * 100 %
+    '''
+    evaluationDataMask = process.create_mask_using_threshold(evaluationData, threshold = 0.75)
+    referenceDataMask = process.create_mask_using_threshold(referenceData, threshold = 0.75)
+    
     ngrdY = evaluationData.shape[1] 
     ngrdX = evaluationData.shape[2] 
     temporalCorrelation = ma.zeros([ngrdY,ngrdX])-100.

Modified: incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/process.py
URL: http://svn.apache.org/viewvc/incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/process.py?rev=1515826&r1=1515825&r2=1515826&view=diff
==============================================================================
--- incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/process.py (original)
+++ incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/process.py Tue Aug 20 13:56:02 2013
@@ -29,7 +29,7 @@ import time
 import netCDF4
 import numpy as np
 import numpy.ma as ma
-from scipy.ndimage import map_coordinates
+
 
 
 def extract_subregion_from_data_array(data, lats, lons, latmin, latmax, lonmin, lonmax):
@@ -157,92 +157,12 @@ def calc_area_in_grid_box(latitude, dlat
 
     return A
 
-def do_regrid(q, lat, lon, lat2, lon2, order=1, mdi= -999999999):
-    '''
-     Perform regridding from one set of lat,lon values onto a new set (lat2,lon2)
-
-     Input::
-         q          - the variable to be regridded
-         lat,lon    - original co-ordinates corresponding to q values
-         lat2,lon2  - new set of latitudes and longitudes that you want to regrid q onto
-         order      - (optional) interpolation order 1=bi-linear, 3=cubic spline
-         mdi        - (optional) fill value for missing data (used in creation of masked array)
-
-     Output::
-         q2  - q regridded onto the new set of lat2,lon2
-
-    '''
-
-    nlat = q.shape[0]
-    nlon = q.shape[1]
-
-    nlat2 = lat2.shape[0]
-    nlon2 = lon2.shape[1]
-
-    # To make our lives easier down the road, let's
-    # turn these into arrays of x & y coords
-    loni = lon2.ravel()
-    lati = lat2.ravel()
-
-    loni = loni.copy() # NB. it won't run unless you do this...
-    lati = lati.copy()
-
-    # Now, we'll set points outside the boundaries to lie along an edge
-    loni[loni > lon.max()] = lon.max()
-    loni[loni < lon.min()] = lon.min()
-
-    # To deal with the "hard" break, we'll have to treat y differently,
-    # so we're just setting the min here...
-    lati[lati > lat.max()] = lat.max()
-    lati[lati < lat.min()] = lat.min()
-
-
-    # We need to convert these to (float) indicies
-    #   (xi should range from 0 to (nx - 1), etc)
-    loni = (nlon - 1) * (loni - lon.min()) / (lon.max() - lon.min())
-
-    # Deal with the "hard" break in the y-direction
-    lati = (nlat - 1) * (lati - lat.min()) / (lat.max() - lat.min())
-
-    # Notes on dealing with MDI when regridding data.
-    #  Method adopted here:
-    #    Use bilinear interpolation of data by default (but user can specify other order using order=... in call)
-    #    Perform bilinear interpolation of data, and of mask.
-    #    To be conservative, new grid point which contained some missing data on the old grid is set to missing data.
-    #            -this is achieved by looking for any non-zero interpolated mask values.
-    #    To avoid issues with bilinear interpolation producing strong gradients leading into the MDI,
-    #     set values at MDI points to mean data value so little gradient visible = not ideal, but acceptable for now.
-
-    # Set values in MDI so that similar to surroundings so don't produce large gradients when interpolating
-    # Preserve MDI mask, by only changing data part of masked array object.
-    for shift in (-1, 1):
-        for axis in (0, 1):
-            q_shifted = np.roll(q, shift=shift, axis=axis)
-            idx = ~q_shifted.mask * q.mask
-            q.data[idx] = q_shifted[idx]
-
-    # Now we actually interpolate
-    # map_coordinates does cubic interpolation by default,
-    # use "order=1" to preform bilinear interpolation instead...
-    q2 = map_coordinates(q, [lati, loni], order=order)
-    q2 = q2.reshape([nlat2, nlon2])
-
-    # Set values to missing data outside of original domain
-    q2 = ma.masked_array(q2, mask=np.logical_or(np.logical_or(lat2 >= lat.max(),
-                                                              lat2 <= lat.min()),
-                                                np.logical_or(lon2 <= lon.min(),
-                                                              lon2 >= lon.max())))
-
-    # Make second map using nearest neighbour interpolation -use this to determine locations with MDI and mask these
-    qmdi = np.zeros_like(q)
-    qmdi[q.mask == True] = 1.
-    qmdi[q.mask == False] = 0.
-    qmdi_r = map_coordinates(qmdi, [lati, loni], order=order)
-    qmdi_r = qmdi_r.reshape([nlat2, nlon2])
-    mdimask = (qmdi_r != 0.0)
-
-    # Combine missing data mask, with outside domain mask define above.
-    q2.mask = np.logical_or(mdimask, q2.mask)
+def do_regrid(q, lat, lon, lat2, lon2, order=1, mdi=-999999999):
+    """ 
+    This function has been moved to the ocw/dataset_processor module
+    """
+    from ocw import dataset_processor
+    q2 = dataset_processor._rcmes_spatial_regrid(q, lat, lon, lat2, lon2, order=1)
 
     return q2
 
@@ -918,165 +838,13 @@ def extract_sub_time_selection(allTimes,
     return subdata
 
 def calc_average_on_new_time_unit_K(data, dateList, unit):
-    '''
-    Routine to calculate averages on longer time units than the data exists on.
-    e.g. if the data is 6-hourly, calculate daily, or monthly means.
-
-    Input:
-        data     - data values
-        dateList - list of python datetime structures corresponding to data values
-        unit     - string describing time unit to average onto
-                      e.g. 'monthly', 'daily', 'pentad','annual','decadal'
-    Output:
-        meanstorem - numpy masked array of data values meaned over required time period
-        newTimesList - a list of python datetime objects representing the data in the new averagin period
-                           NB. currently set to beginning of averaging period,
-                           i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z.
-    '''
-
-    # Check if the user-selected temporal grid is valid. If not, EXIT
-    acceptable = (unit=='full')|(unit=='annual')|(unit=='monthly')|(unit=='daily')|(unit=='pentad')
-    if not acceptable:
-        print 'Error: unknown unit type selected for time averaging: EXIT'
-        return -1,-1,-1,-1
-
-    # Calculate arrays of: annual timeseries: year (2007,2007),
-    #                      monthly time series: year-month (200701,200702),
-    #                      daily timeseries:  year-month-day (20070101,20070102)
-    #  depending on user-selected averaging period.
-
-    # Year list
-    if unit=='annual':
-        timeunits = []
-        for i in np.arange(len(dateList)):
-            timeunits.append(str(dateList[i].year))
-        timeunits = np.array(timeunits, dtype=int)
-
-    # YearMonth format list
-    if unit=='monthly':
-        timeunits = []
-        for i in np.arange(len(dateList)):
-            timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month))
-        timeunits = np.array(timeunits,dtype=int)
-
-    # YearMonthDay format list
-    if unit=='daily':
-        timeunits = []
-        for i in np.arange(len(dateList)):
-            timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month) + str("%02d" % dateList[i].day))
-        timeunits = np.array(timeunits,dtype=int)
-
-    # TODO: add pentad setting using Julian days?
-
-    # Full list: a special case
-    if unit == 'full':
-        comment='Calculating means data over the entire time range: i.e., annual-mean climatology'
-        timeunits = []
-        for i in np.arange(len(dateList)):
-            timeunits.append(999)  # i.e. we just want the same value for all times.
-        timeunits = np.array(timeunits, dtype=int)
-
-    # empty list to store new times
-    newTimesList = []
-
-    # Decide whether or not you need to do any time averaging.
-    #   i.e. if data are already on required time unit then just pass data through and
-    #        calculate and return representative datetimes.
-    processing_required = True
-    if len(timeunits)==(len(np.unique(timeunits))):
-        processing_required = False
-
-    # 1D data arrays, i.e. time series
-    if data.ndim==1:
-        # Create array to store the resulting data
-        meanstore = np.zeros(len(np.unique(timeunits)))
-
-        # Calculate the means across each unique time unit
-        i=0
-        for myunit in np.unique(timeunits):
-            if processing_required:
-                datam=ma.masked_array(data,timeunits!=myunit)
-                meanstore[i] = ma.average(datam)
-
-            # construct new times list
-            smyunit = str(myunit)
-            if len(smyunit)==4:  # YYYY
-                yyyy = int(myunit[0:4])
-                mm = 1
-                dd = 1
-            if len(smyunit)==6:  # YYYYMM
-                yyyy = int(smyunit[0:4])
-                mm = int(smyunit[4:6])
-                dd = 1
-            if len(smyunit)==8:  # YYYYMMDD
-                yyyy = int(smyunit[0:4])
-                mm = int(smyunit[4:6])
-                dd = int(smyunit[6:8])
-            if len(smyunit)==3:  # Full time range
-                # Need to set an appropriate time representing the mid-point of the entire time span
-                dt = dateList[-1]-dateList[0]
-                halfway = dateList[0]+(dt/2)
-                yyyy = int(halfway.year)
-                mm = int(halfway.month)
-                dd = int(halfway.day)
-
-            newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
-            i = i+1
-
-    # 3D data arrays
-    if data.ndim==3:
-        # datamask = create_mask_using_threshold(data,threshold=0.75)
-        # Create array to store the resulting data
-        meanstore = np.zeros([len(np.unique(timeunits)),data.shape[1],data.shape[2]])
-
-        # Calculate the means across each unique time unit
-        i=0
-        datamask_store = []
-        for myunit in np.unique(timeunits):
-            if processing_required:
-                mask = np.zeros_like(data)
-                mask[timeunits!=myunit,:,:] = 1.0
-                # Calculate missing data mask within each time unit...
-                datamask_at_this_timeunit = np.zeros_like(data)
-                datamask_at_this_timeunit[:]= create_mask_using_threshold(data[timeunits==myunit,:,:],threshold=0.75)
-                # Store results for masking later
-                datamask_store.append(datamask_at_this_timeunit[0])
-                # Calculate means for each pixel in this time unit, ignoring missing data (using masked array).
-                datam = ma.masked_array(data,np.logical_or(mask,datamask_at_this_timeunit))
-                meanstore[i,:,:] = ma.average(datam,axis=0)
-            # construct new times list
-            smyunit = str(myunit)
-            if len(smyunit)==4:  # YYYY
-                yyyy = int(smyunit[0:4])
-                mm = 1
-                dd = 1
-            if len(smyunit)==6:  # YYYYMM
-                yyyy = int(smyunit[0:4])
-                mm = int(smyunit[4:6])
-                dd = 1
-            if len(smyunit)==8:  # YYYYMMDD
-                yyyy = int(smyunit[0:4])
-                mm = int(smyunit[4:6])
-                dd = int(smyunit[6:8])
-            if len(smyunit)==3:  # Full time range
-                # Need to set an appropriate time representing the mid-point of the entire time span
-                dt = dateList[-1]-dateList[0]
-                halfway = dateList[0]+(dt/2)
-                yyyy = int(halfway.year)
-                mm = int(halfway.month)
-                dd = int(halfway.day)
-            newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
-            i += 1
-
-        if not processing_required:
-            meanstorem = data
-
-        if processing_required:
-            # Create masked array (using missing data mask defined above)
-            datamask_store = np.array(datamask_store)
-            meanstorem = ma.masked_array(meanstore, datamask_store)
+    """ 
+    This function has been moved to the ocw/dataset_processor module
+    """
+    from ocw import dataset_processor
+    temporally_regridded_data = dataset_processor._rcmes_calc_average_on_new_time_unit_K(data, dateList, unit)
+    return temporally_regridded_data
 
-    return meanstorem,newTimesList
 
 def decode_model_timesK(ifile,timeVarName,file_type):
     #################################################################################################