You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by jo...@apache.org on 2013/08/20 15:56:02 UTC
svn commit: r1515826 - in /incubator/climate/trunk: ./ ocw/ ocw/data_source/
ocw/tests/ rcmet/src/main/python/rcmes/toolkit/
rcmet/src/main/python/rcmes/utils/
Author: joyce
Date: Tue Aug 20 13:56:02 2013
New Revision: 1515826
URL: http://svn.apache.org/r1515826
Log:
CLIMATE-137 - Reintegrating refactoring branch into trunk.
Added:
incubator/climate/trunk/ocw/__init__.py
- copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/__init__.py
incubator/climate/trunk/ocw/data_source/
- copied from r1515824, incubator/climate/branches/RefactorInput/ocw/data_source/
incubator/climate/trunk/ocw/data_source/local.py
- copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/data_source/local.py
incubator/climate/trunk/ocw/data_source/rcmed.py
- copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py
incubator/climate/trunk/ocw/dataset.py
- copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/dataset.py
incubator/climate/trunk/ocw/dataset_processor.py
- copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/dataset_processor.py
incubator/climate/trunk/ocw/evaluation.py
- copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/evaluation.py
incubator/climate/trunk/ocw/metrics.py
- copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/metrics.py
incubator/climate/trunk/ocw/plotter.py
- copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/plotter.py
incubator/climate/trunk/ocw/tests/parameter_dataset_text.txt
- copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/tests/parameter_dataset_text.txt
incubator/climate/trunk/ocw/tests/parameters_metadata_output.p
- copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/tests/parameters_metadata_output.p
incubator/climate/trunk/ocw/tests/parameters_metadata_text.txt
- copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/tests/parameters_metadata_text.txt
incubator/climate/trunk/ocw/tests/parameters_values.p
- copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/tests/parameters_values.p
incubator/climate/trunk/ocw/tests/test_dataset.py
- copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/tests/test_dataset.py
incubator/climate/trunk/ocw/tests/test_dataset_processor.py
- copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/tests/test_dataset_processor.py
incubator/climate/trunk/ocw/tests/test_evaluation.py
- copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/tests/test_evaluation.py
incubator/climate/trunk/ocw/tests/test_local.py
- copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/tests/test_local.py
incubator/climate/trunk/ocw/tests/test_plotter.py
- copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/tests/test_plotter.py
incubator/climate/trunk/ocw/tests/test_rcmed.py
- copied unchanged from r1515824, incubator/climate/branches/RefactorInput/ocw/tests/test_rcmed.py
incubator/climate/trunk/rcmet/src/main/python/rcmes/utils/taylor.py
- copied unchanged from r1515824, incubator/climate/branches/RefactorInput/rcmet/src/main/python/rcmes/utils/taylor.py
Modified:
incubator/climate/trunk/ (props changed)
incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/do_data_prep.py
incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/metrics.py
incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/process.py
Propchange: incubator/climate/trunk/
------------------------------------------------------------------------------
svn:mergeinfo = /incubator/climate/branches/RefactorInput:1505725-1515824
Modified: incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/do_data_prep.py
URL: http://svn.apache.org/viewvc/incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/do_data_prep.py?rev=1515826&r1=1515825&r2=1515826&view=diff
==============================================================================
--- incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/do_data_prep.py (original)
+++ incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/do_data_prep.py Tue Aug 20 13:56:02 2013
@@ -76,8 +76,8 @@ def prep_data(settings, obsDatasetList,
Processes both single and multiple files of obs and mdl or combinations in a general way.
i) retrieve observations from the database
ii) load in model data
- iii) temporal regridding
- iv) spatial regridding
+ iii) spatial regridding
+ iv) temporal regridding
v) area-averaging
Input:
cachedir - string describing directory path
@@ -138,8 +138,7 @@ def prep_data(settings, obsDatasetList,
print 'No input observation data file. EXIT'
sys.exit()
- ## Part 1: retrieve observation data from the database and regrid them
- ## NB. automatically uses local cache if already retrieved.
+ ## Part 0: Setup the regrid variables based on the regridOption given
# preparation for spatial re-gridding: define the size of horizontal array of the target interpolation grid system (ngrdX and ngrdY)
print 'regridOption in prep_data= ', regridOption
@@ -180,6 +179,11 @@ def prep_data(settings, obsDatasetList,
regObsData = []
+
+
+ ## Part 1: retrieve observation data from the database and regrid them
+ ## NB. automatically uses local cache if already retrieved.
+
for n in np.arange(numOBSs):
# spatial regridding
oLats, oLons, _, oTimes, oData = db.extractData(obsDatasetId[n],
@@ -201,6 +205,7 @@ def prep_data(settings, obsDatasetList,
print 'tmpOBS shape = ', tmpOBS.shape
+ # OBS SPATIAL REGRIDING
for t in np.arange(nstOBSs):
tmpOBS[t, :, :] = process.do_regrid(oData[t, :, :], oLats, oLons, lats, lons)
@@ -210,7 +215,7 @@ def prep_data(settings, obsDatasetList,
oLats = 0.0
oLons = 0.0 # release the memory occupied by the temporary variables oLats and oLons.
- # temporally regrid the spatially regridded obs data
+ # OBS TEMPORAL REGRIDING
oData, newObsTimes = process.calc_average_on_new_time_unit_K(tmpOBS, oTimes, unit=timeRegridOption)
tmpOBS = 0.0
@@ -239,29 +244,14 @@ def prep_data(settings, obsDatasetList,
obsData = ma.array(regObsData)
obsTimes = newObsTimes
- regObsData = 0
- oldObsTimes = 0
- nT = len(obsTimes)
- # TODO: Refactor this into a function within the toolkit module
# compute the simple multi-obs ensemble if multiple obs are used
if numOBSs > 1:
- print 'numOBSs = ', numOBSs
- oData = obsData
- print 'oData shape = ', oData.shape
- obsData = ma.zeros((numOBSs + 1, nT, ngrdY, ngrdX))
- print 'obsData shape = ', obsData.shape
- avg = ma.zeros((nT, ngrdY, ngrdX))
-
- for i in np.arange(numOBSs):
- obsData[i, :, :, :] = oData[i, :, :, :]
- avg[:, :, :] = avg[:, :, :] + oData[i, :, :, :]
-
- avg = avg / float(numOBSs)
- obsData[numOBSs, :, :, :] = avg[:, :, :] # store the model-ensemble data
- numOBSs = numOBSs + 1 # update the number of obs data to include the model ensemble
+ ensemble = np.mean(regObsData, axis=0)
+ regObsData.append(ensemble)
+ numOBSs = len(regObsData)
obsList.append('ENS-OBS')
- print 'OBS regridded: ', obsData.shape
+ obsData = ma.array(regObsData) # Cast to a masked array
## Part 2: load in and regrid model data from file(s)
@@ -306,11 +296,7 @@ def prep_data(settings, obsDatasetList,
# determine the dimension size from the model time and latitude data.
nT = len(modelTimes)
-
- # UNUSED VARIABLES - WILL DELETE AFTER TESTING
- # nmdlY=modelLats.shape[0]
- # nmdlX=modelLats.shape[1]
- #print 'nT, ngrdY, ngrdX = ',nT,ngrdY, ngrdX,min(modelTimes),max(modelTimes)
+
print ' The shape of model data to be processed= ', mData.shape, ' for the period ', min(modelTimes), max(modelTimes)
# spatial regridding of the modl data
tmpMDL = ma.zeros((nT, ngrdY, ngrdX))
@@ -340,10 +326,7 @@ def prep_data(settings, obsDatasetList,
regridMdlData.append(mData)
modelData = ma.array(regridMdlData)
- modelTimes = newMdlTimes
- regridMdlData = 0
- oldMdlTimes = 0
- newMdlTimes = 0
+
if (precipFlag == True) & (mvUnit == 'KG M-2 S-1'):
print 'convert model variable unit from mm/s to mm/day'
modelData = 86400.*modelData
@@ -365,24 +348,14 @@ def prep_data(settings, obsDatasetList,
print 'Reading and regridding model data are completed'
print 'numMDLs, modelData.shape= ', numMDLs, modelData.shape
-
- # TODO: Do we need to make this a user supplied flag, or do we just create an ensemble ALWAYS
- # TODO: Add in Kyo's code here as well
+
# TODO: Commented out until I can talk with Jinwon about this
# compute the simple multi-model ensemble if multiple models are evaluated
if numMDLs > 1:
- mdlData=modelData
- modelData=ma.zeros((numMDLs+1,nT,ngrdY,ngrdX))
- avg=ma.zeros((nT,ngrdY,ngrdX))
- for i in np.arange(numMDLs):
- modelData[i,:,:,:]=mdlData[i,:,:,:]
- avg[:,:,:]=avg[:,:,:]+mdlData[i,:,:,:]
- avg=avg/float(numMDLs)
- modelData[numMDLs,:,:,:]=avg[:,:,:] # store the model-ensemble data
- # THESE ARE NOT USED. WILL DELETE AFTER TESTING
- # i0mdl=0
- # i1mdl=numMDLs
- numMDLs=numMDLs+1
+ model_ensemble = np.mean(regridMdlData, axis=0)
+ regridMdlData.append(model_ensemble)
+ numMDLs = len(regridMdlData)
+ modelData = ma.array(regridMdlData)
mdlName.append('ENS-MODEL')
print 'Eval mdl-mean timeseries for the obs periods: modelData.shape= ',modelData.shape
# GOODALE: This ensemble code should be refactored into process.py module since it's purpose is data processing
Modified: incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/metrics.py
URL: http://svn.apache.org/viewvc/incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/metrics.py?rev=1515826&r1=1515825&r2=1515826&view=diff
==============================================================================
--- incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/metrics.py (original)
+++ incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/metrics.py Tue Aug 20 13:56:02 2013
@@ -47,6 +47,24 @@ def calcAnnualCycleMeans(dataset1):
means = data.mean(axis = 0)
return data, means
+def calcAnnualCycleMeansSubRegion(dataset1):
+ '''
+ Purpose::
+ Calculate annual cycle in terms of monthly means at every sub-region.
+
+ Input::
+ dataset1 - 2d numpy array of data in (region, t)
+
+ Output::
+ means - (region, # of months)
+
+ '''
+ nregion, nT = dataset1.shape
+ data = dataset1.reshape[nregion, nT/12, 12]
+ means = data.mean(axis = 1)
+ return data, means
+
+
def calcClimYear(dataset1):
'''
Purpose::
@@ -99,6 +117,39 @@ def calcClimSeason(monthBegin, monthEnd,
means = tSeries.mean(axis = 0)
return tSeries, means
+def calcClimSeasonSubRegion(monthBegin, monthEnd, dataset1):
+ '''
+ Purpose ::
+ Calculate seasonal mean montheries and climatology for both 3-D and point time series.
+ For example, to calculate DJF mean time series, monthBegin = 12, monthEnd =2
+ This can handle monthBegin=monthEnd i.e. for climatology of a specific month
+
+ Input::
+ monthBegin - an integer for the beginning month (Jan =1)
+ monthEnd - an integer for the ending month (Jan = 1)
+ dataset1 - 3d numpy array of data in (region, t) or 1d numpy array montheries
+
+ Output::
+ tSeries - (region, number of years/number of years -1 if monthBegin > monthEnd,lat,lon),
+ means - (region)
+ '''
+ nregion, nT = dataset1.shape
+ nYR = nT/12
+ if monthBegin > monthEnd:
+ # Offset the original array so that the the first month
+ # becomes monthBegin, note that this cuts off the first year of data
+ offset = slice(monthBegin - 1, monthBegin - 13)
+ data = dataset1[:,offset].reshape([nregion,nYR-1, 12])
+ monthIndex = slice(0, 13 - monthBegin + monthEnd)
+ else:
+ # Since monthBegin <= monthEnd, just take a slice containing those months
+ data = dataset1.reshape([nregion,nYR,12])
+ monthIndex = slice(monthBegin - 1, monthEnd)
+
+ tSeries = data[:, :, monthIndex].mean(axis = 2)
+ means = tSeries.mean(axis = 1)
+ return tSeries, means
+
def calcAnnualCycleStdev(dataset1):
'''
Purpose::
@@ -114,7 +165,21 @@ def calcAnnualCycleStdev(dataset1):
stds = data.std(axis = 0, ddof = 1)
return stds
+def calcAnnualCycleStdevSubRegion(dataset1):
+ '''
+ Purpose::
+ Calculate monthly standard deviations for every sub-region
+
+ Input::
+ dataset1 - 2d numpy array of data in (nregion, 12* number of years)
+ Output::
+ stds - (nregion, 12)
+ '''
+ nregion, nT = data1.shape
+ data = dataset1.reshape([nregion, nT/12, 12])
+ stds = data.std(axis = 1, ddof = 1)
+ return stds
def calcAnnualCycleDomainMeans(dataset1):
'''
@@ -136,6 +201,22 @@ def calcAnnualCycleDomainMeans(dataset1)
return means
+def calcSpatialStdevRatio(evaluationData, referenceData):
+ '''
+ Purpose ::
+ Calculate the ratio of spatial standard deviation (model standard deviation)/(observed standard deviation)
+
+ Input ::
+ evaluationData - model data array (lat, lon)
+ referenceData- observation data array (lat,lon)
+
+ Output::
+ ratio of standard deviation (a scholar)
+
+ '''
+ stdevRatio = evaluationData[(evaluationData.mask==False) & (referenceData.mask==False)].std()/ \
+ referenceData[(evaluationData.mask==False) & (referenceData.mask==False)].std()
+ return stdevRatio
def calcTemporalStdev(dataset1):
'''
@@ -321,7 +402,7 @@ def calcTemporalCorrelation(evaluationDa
referenceData- observation data array of any shape
Output::
- temporalCorelation - A 2-D array of temporal correlation coefficients at each grid point.
+ temporalCorelation - A 2-D array of temporal correlation coefficients at each subregion
sigLev - A 2-D array of confidence levels related to temporalCorelation
REF: 277-281 in Stat methods in atmos sci by Wilks, 1995, Academic Press, 467pp.
@@ -330,6 +411,40 @@ def calcTemporalCorrelation(evaluationDa
evaluationDataMask = process.create_mask_using_threshold(evaluationData, threshold = 0.75)
referenceDataMask = process.create_mask_using_threshold(referenceData, threshold = 0.75)
+ nregion = evaluationData.shape[0]
+ temporalCorrelation = ma.zeros([nregion])-100.
+ sigLev = ma.zeros([nregion])-100.
+ for iregion in np.arange(nregion):
+ temporalCorrelation[iregion], sigLev[iregion] = stats.pearsonr(evaluationData[iregion,:], referenceData[iregion,:])
+ sigLev[iregion] = 1 - sigLev[iregion]
+
+ temporalCorrelation=ma.masked_equal(temporalCorrelation.data, -100.)
+ sigLev=ma.masked_equal(sigLev.data, -100.)
+
+ return temporalCorrelation, sigLev
+
+def calcTemporalCorrelationSubRegion(evaluationData, referenceData):
+ '''
+ Purpose ::
+ Calculate the temporal correlation.
+
+ Assumption(s) ::
+ both evaluation and reference data are subregion averaged time series
+
+ Input ::
+ evaluationData - model data array [region,t]
+ referenceData- observation data [region, t]
+
+ Output::
+ temporalCorelation - A 1-D array of temporal correlation coefficients at each grid point.
+ sigLev - A 1-D array of confidence levels related to temporalCorelation
+
+ REF: 277-281 in Stat methods in atmos sci by Wilks, 1995, Academic Press, 467pp.
+ sigLev: the correlation between model and observation is significant at sigLev * 100 %
+ '''
+ evaluationDataMask = process.create_mask_using_threshold(evaluationData, threshold = 0.75)
+ referenceDataMask = process.create_mask_using_threshold(referenceData, threshold = 0.75)
+
ngrdY = evaluationData.shape[1]
ngrdX = evaluationData.shape[2]
temporalCorrelation = ma.zeros([ngrdY,ngrdX])-100.
Modified: incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/process.py
URL: http://svn.apache.org/viewvc/incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/process.py?rev=1515826&r1=1515825&r2=1515826&view=diff
==============================================================================
--- incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/process.py (original)
+++ incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/process.py Tue Aug 20 13:56:02 2013
@@ -29,7 +29,7 @@ import time
import netCDF4
import numpy as np
import numpy.ma as ma
-from scipy.ndimage import map_coordinates
+
def extract_subregion_from_data_array(data, lats, lons, latmin, latmax, lonmin, lonmax):
@@ -157,92 +157,12 @@ def calc_area_in_grid_box(latitude, dlat
return A
-def do_regrid(q, lat, lon, lat2, lon2, order=1, mdi= -999999999):
- '''
- Perform regridding from one set of lat,lon values onto a new set (lat2,lon2)
-
- Input::
- q - the variable to be regridded
- lat,lon - original co-ordinates corresponding to q values
- lat2,lon2 - new set of latitudes and longitudes that you want to regrid q onto
- order - (optional) interpolation order 1=bi-linear, 3=cubic spline
- mdi - (optional) fill value for missing data (used in creation of masked array)
-
- Output::
- q2 - q regridded onto the new set of lat2,lon2
-
- '''
-
- nlat = q.shape[0]
- nlon = q.shape[1]
-
- nlat2 = lat2.shape[0]
- nlon2 = lon2.shape[1]
-
- # To make our lives easier down the road, let's
- # turn these into arrays of x & y coords
- loni = lon2.ravel()
- lati = lat2.ravel()
-
- loni = loni.copy() # NB. it won't run unless you do this...
- lati = lati.copy()
-
- # Now, we'll set points outside the boundaries to lie along an edge
- loni[loni > lon.max()] = lon.max()
- loni[loni < lon.min()] = lon.min()
-
- # To deal with the "hard" break, we'll have to treat y differently,
- # so we're just setting the min here...
- lati[lati > lat.max()] = lat.max()
- lati[lati < lat.min()] = lat.min()
-
-
- # We need to convert these to (float) indicies
- # (xi should range from 0 to (nx - 1), etc)
- loni = (nlon - 1) * (loni - lon.min()) / (lon.max() - lon.min())
-
- # Deal with the "hard" break in the y-direction
- lati = (nlat - 1) * (lati - lat.min()) / (lat.max() - lat.min())
-
- # Notes on dealing with MDI when regridding data.
- # Method adopted here:
- # Use bilinear interpolation of data by default (but user can specify other order using order=... in call)
- # Perform bilinear interpolation of data, and of mask.
- # To be conservative, new grid point which contained some missing data on the old grid is set to missing data.
- # -this is achieved by looking for any non-zero interpolated mask values.
- # To avoid issues with bilinear interpolation producing strong gradients leading into the MDI,
- # set values at MDI points to mean data value so little gradient visible = not ideal, but acceptable for now.
-
- # Set values in MDI so that similar to surroundings so don't produce large gradients when interpolating
- # Preserve MDI mask, by only changing data part of masked array object.
- for shift in (-1, 1):
- for axis in (0, 1):
- q_shifted = np.roll(q, shift=shift, axis=axis)
- idx = ~q_shifted.mask * q.mask
- q.data[idx] = q_shifted[idx]
-
- # Now we actually interpolate
- # map_coordinates does cubic interpolation by default,
- # use "order=1" to preform bilinear interpolation instead...
- q2 = map_coordinates(q, [lati, loni], order=order)
- q2 = q2.reshape([nlat2, nlon2])
-
- # Set values to missing data outside of original domain
- q2 = ma.masked_array(q2, mask=np.logical_or(np.logical_or(lat2 >= lat.max(),
- lat2 <= lat.min()),
- np.logical_or(lon2 <= lon.min(),
- lon2 >= lon.max())))
-
- # Make second map using nearest neighbour interpolation -use this to determine locations with MDI and mask these
- qmdi = np.zeros_like(q)
- qmdi[q.mask == True] = 1.
- qmdi[q.mask == False] = 0.
- qmdi_r = map_coordinates(qmdi, [lati, loni], order=order)
- qmdi_r = qmdi_r.reshape([nlat2, nlon2])
- mdimask = (qmdi_r != 0.0)
-
- # Combine missing data mask, with outside domain mask define above.
- q2.mask = np.logical_or(mdimask, q2.mask)
+def do_regrid(q, lat, lon, lat2, lon2, order=1, mdi=-999999999):
+ """
+ This function has been moved to the ocw/dataset_processor module
+ """
+ from ocw import dataset_processor
+ q2 = dataset_processor._rcmes_spatial_regrid(q, lat, lon, lat2, lon2, order=1)
return q2
@@ -918,165 +838,13 @@ def extract_sub_time_selection(allTimes,
return subdata
def calc_average_on_new_time_unit_K(data, dateList, unit):
- '''
- Routine to calculate averages on longer time units than the data exists on.
- e.g. if the data is 6-hourly, calculate daily, or monthly means.
-
- Input:
- data - data values
- dateList - list of python datetime structures corresponding to data values
- unit - string describing time unit to average onto
- e.g. 'monthly', 'daily', 'pentad','annual','decadal'
- Output:
- meanstorem - numpy masked array of data values meaned over required time period
- newTimesList - a list of python datetime objects representing the data in the new averagin period
- NB. currently set to beginning of averaging period,
- i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z.
- '''
-
- # Check if the user-selected temporal grid is valid. If not, EXIT
- acceptable = (unit=='full')|(unit=='annual')|(unit=='monthly')|(unit=='daily')|(unit=='pentad')
- if not acceptable:
- print 'Error: unknown unit type selected for time averaging: EXIT'
- return -1,-1,-1,-1
-
- # Calculate arrays of: annual timeseries: year (2007,2007),
- # monthly time series: year-month (200701,200702),
- # daily timeseries: year-month-day (20070101,20070102)
- # depending on user-selected averaging period.
-
- # Year list
- if unit=='annual':
- timeunits = []
- for i in np.arange(len(dateList)):
- timeunits.append(str(dateList[i].year))
- timeunits = np.array(timeunits, dtype=int)
-
- # YearMonth format list
- if unit=='monthly':
- timeunits = []
- for i in np.arange(len(dateList)):
- timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month))
- timeunits = np.array(timeunits,dtype=int)
-
- # YearMonthDay format list
- if unit=='daily':
- timeunits = []
- for i in np.arange(len(dateList)):
- timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month) + str("%02d" % dateList[i].day))
- timeunits = np.array(timeunits,dtype=int)
-
- # TODO: add pentad setting using Julian days?
-
- # Full list: a special case
- if unit == 'full':
- comment='Calculating means data over the entire time range: i.e., annual-mean climatology'
- timeunits = []
- for i in np.arange(len(dateList)):
- timeunits.append(999) # i.e. we just want the same value for all times.
- timeunits = np.array(timeunits, dtype=int)
-
- # empty list to store new times
- newTimesList = []
-
- # Decide whether or not you need to do any time averaging.
- # i.e. if data are already on required time unit then just pass data through and
- # calculate and return representative datetimes.
- processing_required = True
- if len(timeunits)==(len(np.unique(timeunits))):
- processing_required = False
-
- # 1D data arrays, i.e. time series
- if data.ndim==1:
- # Create array to store the resulting data
- meanstore = np.zeros(len(np.unique(timeunits)))
-
- # Calculate the means across each unique time unit
- i=0
- for myunit in np.unique(timeunits):
- if processing_required:
- datam=ma.masked_array(data,timeunits!=myunit)
- meanstore[i] = ma.average(datam)
-
- # construct new times list
- smyunit = str(myunit)
- if len(smyunit)==4: # YYYY
- yyyy = int(myunit[0:4])
- mm = 1
- dd = 1
- if len(smyunit)==6: # YYYYMM
- yyyy = int(smyunit[0:4])
- mm = int(smyunit[4:6])
- dd = 1
- if len(smyunit)==8: # YYYYMMDD
- yyyy = int(smyunit[0:4])
- mm = int(smyunit[4:6])
- dd = int(smyunit[6:8])
- if len(smyunit)==3: # Full time range
- # Need to set an appropriate time representing the mid-point of the entire time span
- dt = dateList[-1]-dateList[0]
- halfway = dateList[0]+(dt/2)
- yyyy = int(halfway.year)
- mm = int(halfway.month)
- dd = int(halfway.day)
-
- newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
- i = i+1
-
- # 3D data arrays
- if data.ndim==3:
- # datamask = create_mask_using_threshold(data,threshold=0.75)
- # Create array to store the resulting data
- meanstore = np.zeros([len(np.unique(timeunits)),data.shape[1],data.shape[2]])
-
- # Calculate the means across each unique time unit
- i=0
- datamask_store = []
- for myunit in np.unique(timeunits):
- if processing_required:
- mask = np.zeros_like(data)
- mask[timeunits!=myunit,:,:] = 1.0
- # Calculate missing data mask within each time unit...
- datamask_at_this_timeunit = np.zeros_like(data)
- datamask_at_this_timeunit[:]= create_mask_using_threshold(data[timeunits==myunit,:,:],threshold=0.75)
- # Store results for masking later
- datamask_store.append(datamask_at_this_timeunit[0])
- # Calculate means for each pixel in this time unit, ignoring missing data (using masked array).
- datam = ma.masked_array(data,np.logical_or(mask,datamask_at_this_timeunit))
- meanstore[i,:,:] = ma.average(datam,axis=0)
- # construct new times list
- smyunit = str(myunit)
- if len(smyunit)==4: # YYYY
- yyyy = int(smyunit[0:4])
- mm = 1
- dd = 1
- if len(smyunit)==6: # YYYYMM
- yyyy = int(smyunit[0:4])
- mm = int(smyunit[4:6])
- dd = 1
- if len(smyunit)==8: # YYYYMMDD
- yyyy = int(smyunit[0:4])
- mm = int(smyunit[4:6])
- dd = int(smyunit[6:8])
- if len(smyunit)==3: # Full time range
- # Need to set an appropriate time representing the mid-point of the entire time span
- dt = dateList[-1]-dateList[0]
- halfway = dateList[0]+(dt/2)
- yyyy = int(halfway.year)
- mm = int(halfway.month)
- dd = int(halfway.day)
- newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
- i += 1
-
- if not processing_required:
- meanstorem = data
-
- if processing_required:
- # Create masked array (using missing data mask defined above)
- datamask_store = np.array(datamask_store)
- meanstorem = ma.masked_array(meanstore, datamask_store)
+ """
+ This function has been moved to the ocw/dataset_processor module
+ """
+ from ocw import dataset_processor
+ temporally_regridded_data = dataset_processor._rcmes_calc_average_on_new_time_unit_K(data, dateList, unit)
+ return temporally_regridded_data
- return meanstorem,newTimesList
def decode_model_timesK(ifile,timeVarName,file_type):
#################################################################################################