You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by ah...@apache.org on 2013/08/20 00:27:36 UTC
svn commit: r1515646 -
/incubator/climate/branches/RefactorPlots/rcmet/src/main/python/rcmes/toolkit/metrics.py
Author: ahart
Date: Mon Aug 19 22:27:36 2013
New Revision: 1515646
URL: http://svn.apache.org/r1515646
Log:
CLIMATE-259: updates to metrics.py to support generation of time series, taylor, subregion, and portrait diagrams
Modified:
incubator/climate/branches/RefactorPlots/rcmet/src/main/python/rcmes/toolkit/metrics.py
Modified: incubator/climate/branches/RefactorPlots/rcmet/src/main/python/rcmes/toolkit/metrics.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/RefactorPlots/rcmet/src/main/python/rcmes/toolkit/metrics.py?rev=1515646&r1=1515645&r2=1515646&view=diff
==============================================================================
--- incubator/climate/branches/RefactorPlots/rcmet/src/main/python/rcmes/toolkit/metrics.py (original)
+++ incubator/climate/branches/RefactorPlots/rcmet/src/main/python/rcmes/toolkit/metrics.py Mon Aug 19 22:27:36 2013
@@ -47,6 +47,24 @@ def calcAnnualCycleMeans(dataset1):
means = data.mean(axis = 0)
return data, means
+def calcAnnualCycleMeansSubRegion(dataset1):
+ '''
+ Purpose::
+ Calculate annual cycle in terms of monthly means at every sub-region.
+
+ Input::
+ dataset1 - 2d numpy array of data in (region, t)
+
+ Output::
+ means - (region, # of months)
+
+ '''
+ nregion, nT = dataset1.shape
+ data = dataset1.reshape([nregion, nT/12, 12])
+ means = data.mean(axis = 1)
+ return data, means
+
+
def calcClimYear(dataset1):
'''
Purpose::
@@ -99,6 +117,39 @@ def calcClimSeason(monthBegin, monthEnd,
means = tSeries.mean(axis = 0)
return tSeries, means
+def calcClimSeasonSubRegion(monthBegin, monthEnd, dataset1):
+ '''
+ Purpose ::
+ Calculate seasonal mean montheries and climatology for both 3-D and point time series.
+ For example, to calculate DJF mean time series, monthBegin = 12, monthEnd =2
+ This can handle monthBegin=monthEnd i.e. for climatology of a specific month
+
+ Input::
+ monthBegin - an integer for the beginning month (Jan =1)
+ monthEnd - an integer for the ending month (Jan = 1)
+ dataset1 - 3d numpy array of data in (region, t) or 1d numpy array montheries
+
+ Output::
+ tSeries - (region, number of years/number of years -1 if monthBegin > monthEnd,lat,lon),
+ means - (region)
+ '''
+ nregion, nT = dataset1.shape
+ nYR = nT/12
+ if monthBegin > monthEnd:
+ # Offset the original array so that the the first month
+ # becomes monthBegin, note that this cuts off the first year of data
+ offset = slice(monthBegin - 1, monthBegin - 13)
+ data = dataset1[:,offset].reshape([nregion,nYR-1, 12])
+ monthIndex = slice(0, 13 - monthBegin + monthEnd)
+ else:
+ # Since monthBegin <= monthEnd, just take a slice containing those months
+ data = dataset1.reshape([nregion,nYR,12])
+ monthIndex = slice(monthBegin - 1, monthEnd)
+
+ tSeries = data[:, :, monthIndex].mean(axis = 2)
+ means = tSeries.mean(axis = 1)
+ return tSeries, means
+
def calcAnnualCycleStdev(dataset1):
'''
Purpose::
@@ -114,7 +165,21 @@ def calcAnnualCycleStdev(dataset1):
stds = data.std(axis = 0, ddof = 1)
return stds
+def calcAnnualCycleStdevSubRegion(dataset1):
+ '''
+ Purpose::
+ Calculate monthly standard deviations for every sub-region
+
+ Input::
+ dataset1 - 2d numpy array of data in (nregion, 12* number of years)
+ Output::
+ stds - (nregion, 12)
+ '''
+ nregion, nT = dataset1.shape
+ data = dataset1.reshape([nregion, nT/12, 12])
+ stds = data.std(axis = 1, ddof = 1)
+ return stds
def calcAnnualCycleDomainMeans(dataset1):
'''
@@ -136,6 +201,22 @@ def calcAnnualCycleDomainMeans(dataset1)
return means
+def calcSpatialStdevRatio(evaluationData, referenceData):
+ '''
+ Purpose ::
+ Calculate the ratio of spatial standard deviation (model standard deviation)/(observed standard deviation)
+
+ Input ::
+ evaluationData - model data array (lat, lon)
+ referenceData- observation data array (lat,lon)
+
+ Output::
+ ratio of standard deviation (a scholar)
+
+ '''
+ stdevRatio = evaluationData[(evaluationData.mask==False) & (referenceData.mask==False)].std()/ \
+ referenceData[(evaluationData.mask==False) & (referenceData.mask==False)].std()
+ return stdevRatio
def calcTemporalStdev(dataset1):
'''
@@ -321,7 +402,7 @@ def calcTemporalCorrelation(evaluationDa
referenceData- observation data array of any shape
Output::
- temporalCorelation - A 2-D array of temporal correlation coefficients at each grid point.
+ temporalCorelation - A 2-D array of temporal correlation coefficients at each subregion
sigLev - A 2-D array of confidence levels related to temporalCorelation
REF: 277-281 in Stat methods in atmos sci by Wilks, 1995, Academic Press, 467pp.
@@ -330,25 +411,49 @@ def calcTemporalCorrelation(evaluationDa
evaluationDataMask = process.create_mask_using_threshold(evaluationData, threshold = 0.75)
referenceDataMask = process.create_mask_using_threshold(referenceData, threshold = 0.75)
- ngrdY = evaluationData.shape[1]
- ngrdX = evaluationData.shape[2]
- temporalCorrelation = ma.zeros([ngrdY,ngrdX])-100.
- sigLev = ma.zeros([ngrdY,ngrdX])-100.
- for iy in np.arange(ngrdY):
- for ix in np.arange(ngrdX):
- if not evaluationDataMask[iy,ix] and not referenceDataMask[iy,ix]:
- t1=evaluationData[:,iy,ix]
- t2=referenceData[:,iy,ix]
- if t1.min()!=t1.max() and t2.min()!=t2.max():
- temporalCorrelation[iy,ix], sigLev[iy,ix]=stats.pearsonr(t1[(t1.mask==False) & (t2.mask==False)],
- t2[(t1.mask==False) & (t2.mask==False)])
- sigLev[iy,ix]=1-sigLev[iy,ix] # p-value => confidence level
+ nregion = evaluationData.shape[0]
+ temporalCorrelation = ma.zeros([nregion])-100.
+ sigLev = ma.zeros([nregion])-100.
+ for iregion in np.arange(nregion):
+ temporalCorrelation[iregion], sigLev[iregion] = stats.pearsonr(evaluationData[iregion,:], referenceData[iregion,:])
+ sigLev[iregion] = 1 - sigLev[iregion]
temporalCorrelation=ma.masked_equal(temporalCorrelation.data, -100.)
sigLev=ma.masked_equal(sigLev.data, -100.)
return temporalCorrelation, sigLev
+def calcTemporalCorrelationSubRegion(evaluationData, referenceData):
+ '''
+ Purpose ::
+ Calculate the temporal correlation.
+
+ Assumption(s) ::
+ both evaluation and reference data are subregion averaged time series
+
+ Input ::
+ evaluationData - model data array [region,t]
+ referenceData- observation data [region, t]
+
+ Output::
+ temporalCorelation - A 1-D array of temporal correlation coefficients at each grid point.
+ sigLev - A 1-D array of confidence levels related to temporalCorelation
+
+ REF: 277-281 in Stat methods in atmos sci by Wilks, 1995, Academic Press, 467pp.
+ sigLev: the correlation between model and observation is significant at sigLev * 100 %
+ '''
+
+
+ temporalCorrelation = 0.
+ sigLev = 0.
+ t1=evaluationData[:]
+ t2=referenceData[:]
+ if t1.min()!=t1.max() and t2.min()!=t2.max():
+ temporalCorrelation, sigLev=stats.pearsonr(t1,t2)
+ sigLev=1.-sigLev # p-value => confidence level
+
+ return temporalCorrelation
+
def calcPatternCorrelation(evaluationData, referenceData):
'''
Purpose ::
@@ -424,16 +529,8 @@ def calcNashSutcliff(evaluationData, ref
(((data2 - meanData2) ** 2).sum(axis = 1)))
return nashcor
-def getPdfInputValues():
- print '****PDF input values from user required **** \n'
- nbins = int (raw_input('Please enter the number of bins to use. \n'))
- minEdge = float(raw_input('Please enter the minimum value to use for the edge. \n'))
- maxEdge = float(raw_input('Please enter the maximum value to use for the edge. \n'))
-
- return nbins, minEdge, maxEdge
-
-def calcPdf(evaluationData, referenceData, settings=None):
+def calcPdf(evaluationData, referenceData):
'''
Routine to calculate a normalized Probability Distribution Function with
bins set according to data range.
@@ -443,22 +540,16 @@ def calcPdf(evaluationData, referenceDat
called in do_rcmes_processing_sub.py
Inputs::
- evaluationData (3D numpy array): array shape (time, lat, lon)
- referenceData (3D numpy array): array shape (time, lat, lon)
- settings (tuple): [optional] format (binCount, minEdge, maxEdge)
- binCount (int): number of bins to use
- minEdge (int|float): minimum edge
- maxEdge (int|float): maximum edge
-
- NB, time here is the number of time values eg for time period
- 199001010000 - 199201010000
+ 2 arrays of data
+ t1 is the modelData and t2 is 3D obsdata - time,lat, lon NB, time here
+ is the number of time values eg for time period 199001010000 - 199201010000
- Assumptions::
- If annual means-opt 1, was chosen, then referenceData.shape = (YearsCount,lat,lon)
+ if annual means-opt 1, was chosen, then t2.shape = (2,lat,lon)
- If monthly means - opt 2, was choosen, then referenceData.shape = (MonthsCount,lat,lon)
+ if monthly means - opt 2, was choosen, then t2.shape = (24,lat,lon)
- Output::
+ User inputs: number of bins to use and edges (min and max)
+ Output:
one float which represents the PDF for the year
@@ -473,17 +564,20 @@ def calcPdf(evaluationData, referenceDat
PDF for the year
'''
+ # float to store the final PDF similarity score
similarityScore = 0.0
+
print 'min modelData', evaluationData[:, :, :].min()
print 'max modelData', evaluationData[:, :, :].max()
print 'min obsData', referenceData[:, :, :].min()
print 'max obsData', referenceData[:, :, :].max()
-
- if settings == None:
- nbins, minEdge, maxEdge = getPdfInputValues()
- else:
- nbins, minEdge, maxEdge = settings
-
+ # find a distribution for the entire dataset
+ #prompt the user to enter the min, max and number of bin values.
+ # The max, min info above is to help guide the user with these choises
+ print '****PDF input values from user required **** \n'
+ nbins = int (raw_input('Please enter the number of bins to use. \n'))
+ minEdge = float(raw_input('Please enter the minimum value to use for the edge. \n'))
+ maxEdge = float(raw_input('Please enter the maximum value to use for the edge. \n'))
mybins = np.linspace(minEdge, maxEdge, nbins)
print 'nbins is', nbins, 'mybins are', mybins
@@ -669,9 +763,14 @@ def metrics_plots(varName, numOBS, numMD
####################################################
# Terminate job if no metrics are to be calculated #
####################################################
-
+ if doMetricsOption == 'y':
+ calculate_metrics_and_make_plots(varName, workdir, lons, lats, obsData, mdlData, obsRgn, mdlRgn, obsList, mdlList, subRegions,\
+ subRgnLon0, subRgnLon1, subRgnLat0, subRgnLat1)
+
+ '''
neval = 0
-
+
+
while doMetricsOption == 'y':
neval += 1
print ' '
@@ -960,6 +1059,114 @@ def metrics_plots(varName, numOBS, numMD
# Repeat for another metric
doMetricsOption = raw_input('Do you want to perform another evaluation? [y/n]\n> ').lower()
-
+ '''
# Processing complete if a user enters 'n' for 'doMetricsOption'
print 'RCMES processing completed.'
+
+def calculate_metrics_and_make_plots(varName, workdir, lons, lats, obsData, mdlData, obsRgn, mdlRgn, obsList, mdlList, subRegions, \
+ subRgnLon0, subRgnLon1, subRgnLat0, subRgnLat1):
+ '''
+ Purpose::
+ Calculate all the metrics used in Kim et al. [2013] paper and plot them
+
+ Input::
+ varName - evaluating variable
+ workdir -
+ lons -
+ lats -
+ obsData -
+ mdlData -
+ obsRgn -
+ mdlRgn -
+ obsList -
+ mdlList -
+ subRegions -
+ subRgnLon0, subRgnLat0 - southwest boundary of sub-regions [numSubRgn]
+ subRgnLon1, subRgnLat1 - northeast boundary of sub-regions [numSubRgn]
+ Output::
+ png files
+
+ '''
+
+
+ nobs, nt, ny, nx = obsData.shape
+ nmodel = mdlData.shape[0]
+ ### TODO: unit conversion (K to C)
+ if varName == 'temp':
+ obsData[0, :, :, :] = obsData[0, :, :, :] - 273.15
+ if subRegions:
+ obsRgn[0, :, :] = obsRgn[0, :, :] - 273.15
+ if varName == 'prec':
+ obsData[0, :, :, :] = obsData[0, :, :, :]*86400.
+ if subRegions:
+ obsRgn[0, :, :] = obsRgn[0, :, :]*86400.
+ ###
+ oTser, oClim = calcClimYear( obsData[0, :, :, :])
+ bias_of_overall_average = ma.zeros([nmodel, ny, nx])
+ spatial_stdev_ratio = np.zeros([nmodel])
+ spatial_corr = np.zeros([nmodel])
+ mdlList.append('ENS')
+
+ for imodel in np.arange(nmodel):
+ mTser, mClim = calcClimYear( mdlData[imodel,:,:,:])
+ bias_of_overall_average[imodel,:,:] = calcBias(mClim, oClim)
+ spatial_corr[imodel], sigLev = calcPatternCorrelation(oClim, mClim)
+ spatial_stdev_ratio[imodel] = calcSpatialStdevRatio(mClim, oClim)
+ fig_return = plots.draw_contour_map(oClim, lats, lons, workdir+'/observed_climatology_'+varName, fmt='png', gridshape=(1, 1),
+ clabel='', ptitle='', subtitles=obsList, cmap=None,
+ clevs=None, nlevs=10, parallels=None, meridians=None,
+ extend='neither')
+ fig_return = plots.draw_contour_map(bias_of_overall_average, lats, lons, workdir+'/bias_of_climatology_'+varName, fmt='png', gridshape=(4, 2),
+ clabel='', ptitle='', subtitles=mdlList, cmap=None,
+ clevs=None, nlevs=10, parallels=None, meridians=None,
+ extend='neither')
+ Taylor_data = np.array([spatial_stdev_ratio, spatial_corr]).transpose()
+
+ fig_return = plots.draw_taylor_diagram(Taylor_data, mdlList, refname='CRU', fname = workdir+'/Taylor_'+varName, fmt='png')
+
+ if subRegions:
+ nseason = 2 # (0: summer and 1: winter)
+ nregion = len(subRgnLon0)
+ season_name = ['summer','winter']
+ rowlabels = ['PNw','PNe','CAn','CAs','SWw','SWe','COL','GPn','GPc','GC','GL','NE','SE','FL']
+ collabels = ['M1','M2','M3','M4','M5','M6','ENS']
+ collabels[nmodel-1] = 'ENS'
+
+ for iseason in [0,1]:
+ portrait_subregion = np.zeros([4, nregion, nmodel])
+ portrait_titles = ['(a) Normalized Bias', '(b) Normalized STDV', '(c) Normalized RMSE', '(d) Correlation']
+ if iseason == 0:
+ monthBegin=6
+ monthEnd=8
+ if iseason == 1:
+ monthBegin=12
+ monthEnd=2
+
+ obsTser,obsClim = calcClimSeasonSubRegion(6,8,obsRgn[0,:,:])
+ for imodel in np.arange(nmodel):
+ mTser, mClim = calcClimSeasonSubRegion(6,8,mdlRgn[imodel,:,:])
+ for iregion in np.arange(nregion):
+ portrait_subregion[0,iregion,imodel] = calcBias(mClim[iregion],obsClim[iregion])/calcTemporalStdev(obsTser[iregion,:])
+ portrait_subregion[1,iregion,imodel] = calcTemporalStdev(mTser[iregion,:])/ calcTemporalStdev(obsTser[iregion,:])
+ portrait_subregion[2,iregion,imodel] = calcRootMeanSquaredDifferenceAveragedOverTime(mTser[iregion,:], obsTser[iregion,:])/calcTemporalStdev(obsTser[iregion,:])
+ portrait_subregion[3,iregion, imodel] = calcTemporalCorrelationSubRegion(mTser[iregion,:],obsTser[iregion,:])
+ portrait_return = plots.draw_portrait_diagram(portrait_subregion, rowlabels, collabels[0:nmodel], workdir+'/portrait_diagram_'+season_name[iseason]+'_'+varName, fmt='png',
+ gridshape=(2, 2), xlabel='', ylabel='', clabel='',
+ ptitle='', subtitles=portrait_titles, cmap=None, clevs=None,
+ nlevs=10, extend='neither')
+ # annual cycle
+ nmonth = 12
+ times = np.arange(nmonth)
+ data_names = [obsList[0]] + list(mdlList)
+ annual_cycle = np.zeros([nregion, nmonth, nmodel+1])
+ obsTser, annual_cycle[:, :, 0] = calcAnnualCycleMeansSubRegion(obsRgn[0,:,:])
+ obsStd = calcAnnualCycleStdevSubRegion(obsRgn[0,:,:])
+ for imodel in np.arange(nmodel):
+ mdlTser, annual_cycle[:, :, imodel+1] = calcAnnualCycleMeansSubRegion(mdlRgn[imodel, :, :])
+ # Make annual_cycle shape compatible with draw_time_series
+ annual_cycle = annual_cycle.swapaxes(1, 2)
+ tseries_return = plots.draw_time_series(annual_cycle, times, data_names, workdir+'/time_series_'+varName, gridshape=(7, 2),
+ subtitles=rowlabels, label_month=True)
+
+
+