You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by ah...@apache.org on 2013/08/20 00:27:36 UTC

svn commit: r1515646 - /incubator/climate/branches/RefactorPlots/rcmet/src/main/python/rcmes/toolkit/metrics.py

Author: ahart
Date: Mon Aug 19 22:27:36 2013
New Revision: 1515646

URL: http://svn.apache.org/r1515646
Log:
CLIMATE-259: updates to metrics.py to support generation of time series, taylor, subregion, and portrait diagrams

Modified:
    incubator/climate/branches/RefactorPlots/rcmet/src/main/python/rcmes/toolkit/metrics.py

Modified: incubator/climate/branches/RefactorPlots/rcmet/src/main/python/rcmes/toolkit/metrics.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/RefactorPlots/rcmet/src/main/python/rcmes/toolkit/metrics.py?rev=1515646&r1=1515645&r2=1515646&view=diff
==============================================================================
--- incubator/climate/branches/RefactorPlots/rcmet/src/main/python/rcmes/toolkit/metrics.py (original)
+++ incubator/climate/branches/RefactorPlots/rcmet/src/main/python/rcmes/toolkit/metrics.py Mon Aug 19 22:27:36 2013
@@ -47,6 +47,24 @@ def calcAnnualCycleMeans(dataset1):
     means = data.mean(axis = 0)
     return data, means
 
+def calcAnnualCycleMeansSubRegion(dataset1):
+    '''
+    Purpose:: 
+        Calculate annual cycle in terms of monthly means at every sub-region.
+
+    Input::
+        dataset1 - 2d numpy array of data in (region, t)
+        
+    Output:: 
+        means - (region, # of months)
+
+     '''
+    nregion, nT = dataset1.shape
+    data = dataset1.reshape([nregion, nT/12, 12])
+    means = data.mean(axis = 1)
+    return data, means
+
+
 def calcClimYear(dataset1):
     '''
     Purpose:: 
@@ -99,6 +117,39 @@ def calcClimSeason(monthBegin, monthEnd,
     means = tSeries.mean(axis = 0)        
     return tSeries, means
 
+def calcClimSeasonSubRegion(monthBegin, monthEnd, dataset1):
+    '''
+    Purpose :: 
+       Calculate seasonal mean montheries and climatology for both 3-D and point time series.
+       For example, to calculate DJF mean time series, monthBegin = 12, monthEnd =2 
+       This can handle monthBegin=monthEnd i.e. for climatology of a specific month
+
+    Input::
+        monthBegin - an integer for the beginning month (Jan =1)
+        monthEnd - an integer for the ending month (Jan = 1)
+        dataset1 - 3d numpy array of data in (region, t) or 1d numpy array montheries
+
+    Output:: 
+        tSeries - (region, number of years/number of years -1 if monthBegin > monthEnd,lat,lon),
+        means - (region)
+    '''
+    nregion, nT = dataset1.shape
+    nYR = nT/12
+    if monthBegin > monthEnd:
+        # Offset the original array so that the the first month
+        # becomes monthBegin, note that this cuts off the first year of data
+        offset = slice(monthBegin - 1, monthBegin - 13)
+        data = dataset1[:,offset].reshape([nregion,nYR-1, 12])
+        monthIndex = slice(0, 13 - monthBegin + monthEnd)
+    else:
+        # Since monthBegin <= monthEnd, just take a slice containing those months
+        data = dataset1.reshape([nregion,nYR,12])
+        monthIndex =  slice(monthBegin - 1, monthEnd)
+        
+    tSeries = data[:, :, monthIndex].mean(axis = 2)
+    means = tSeries.mean(axis = 1)        
+    return tSeries, means
+
 def calcAnnualCycleStdev(dataset1):
     '''
      Purpose:: 
@@ -114,7 +165,21 @@ def calcAnnualCycleStdev(dataset1):
     stds = data.std(axis = 0, ddof = 1)
     return stds
 
+def calcAnnualCycleStdevSubRegion(dataset1):
+    '''
+     Purpose:: 
+        Calculate monthly standard deviations for every sub-region
+     
+     Input::
+        dataset1 - 2d numpy array of data in (nregion, 12* number of years) 
 
+     Output:: 
+        stds - (nregion, 12)
+    '''
+    nregion, nT = dataset1.shape
+    data = dataset1.reshape([nregion, nT/12, 12])
+    stds = data.std(axis = 1, ddof = 1)
+    return stds
 
 def calcAnnualCycleDomainMeans(dataset1):
     '''
@@ -136,6 +201,22 @@ def calcAnnualCycleDomainMeans(dataset1)
         
     return means
 
+def calcSpatialStdevRatio(evaluationData, referenceData):
+    '''
+    Purpose ::
+        Calculate the ratio of spatial standard deviation (model standard deviation)/(observed standard deviation)
+
+    Input ::
+        evaluationData - model data array (lat, lon)
+        referenceData- observation data array (lat,lon)
+
+    Output::
+        ratio of standard deviation (a scholar) 
+    
+    '''
+    stdevRatio = evaluationData[(evaluationData.mask==False) & (referenceData.mask==False)].std()/ \
+                 referenceData[(evaluationData.mask==False) & (referenceData.mask==False)].std()  
+    return stdevRatio
 
 def calcTemporalStdev(dataset1):
     '''
@@ -321,7 +402,7 @@ def calcTemporalCorrelation(evaluationDa
         referenceData- observation data array of any shape
             
     Output::
-        temporalCorelation - A 2-D array of temporal correlation coefficients at each grid point.
+        temporalCorelation - A 2-D array of temporal correlation coefficients at each subregion
         sigLev - A 2-D array of confidence levels related to temporalCorelation 
     
     REF: 277-281 in Stat methods in atmos sci by Wilks, 1995, Academic Press, 467pp.
@@ -330,25 +411,49 @@ def calcTemporalCorrelation(evaluationDa
     evaluationDataMask = process.create_mask_using_threshold(evaluationData, threshold = 0.75)
     referenceDataMask = process.create_mask_using_threshold(referenceData, threshold = 0.75)
     
-    ngrdY = evaluationData.shape[1] 
-    ngrdX = evaluationData.shape[2] 
-    temporalCorrelation = ma.zeros([ngrdY,ngrdX])-100.
-    sigLev = ma.zeros([ngrdY,ngrdX])-100.
-    for iy in np.arange(ngrdY):
-        for ix in np.arange(ngrdX):
-            if not evaluationDataMask[iy,ix] and not referenceDataMask[iy,ix]:
-                t1=evaluationData[:,iy,ix]
-                t2=referenceData[:,iy,ix]
-                if t1.min()!=t1.max() and t2.min()!=t2.max():
-                    temporalCorrelation[iy,ix], sigLev[iy,ix]=stats.pearsonr(t1[(t1.mask==False) & (t2.mask==False)],
-                                                  t2[(t1.mask==False) & (t2.mask==False)])
-                    sigLev[iy,ix]=1-sigLev[iy,ix]  # p-value => confidence level
+    nregion = evaluationData.shape[0]
+    temporalCorrelation = ma.zeros([nregion])-100.
+    sigLev = ma.zeros([nregion])-100.
+    for iregion in np.arange(nregion):
+        temporalCorrelation[iregion], sigLev[iregion] = stats.pearsonr(evaluationData[iregion,:], referenceData[iregion,:])
+        sigLev[iregion] = 1 - sigLev[iregion]
                     
     temporalCorrelation=ma.masked_equal(temporalCorrelation.data, -100.)        
     sigLev=ma.masked_equal(sigLev.data, -100.)    
     
     return temporalCorrelation, sigLev
 
+def calcTemporalCorrelationSubRegion(evaluationData, referenceData):
+    '''
+    Purpose ::
+        Calculate the temporal correlation.
+    
+    Assumption(s) ::
+        both evaluation and reference data are subregion averaged time series
+    
+    Input ::
+        evaluationData - model data array [region,t]
+        referenceData- observation data [region, t]
+            
+    Output::
+        temporalCorelation - A 1-D array of temporal correlation coefficients at each grid point.
+        sigLev - A 1-D array of confidence levels related to temporalCorelation 
+    
+    REF: 277-281 in Stat methods in atmos sci by Wilks, 1995, Academic Press, 467pp.
+    sigLev: the correlation between model and observation is significant at sigLev * 100 %
+    '''
+        
+    
+    temporalCorrelation = 0.
+    sigLev = 0.
+    t1=evaluationData[:]
+    t2=referenceData[:]
+    if t1.min()!=t1.max() and t2.min()!=t2.max():
+        temporalCorrelation, sigLev=stats.pearsonr(t1,t2)
+        sigLev=1.-sigLev  # p-value => confidence level
+                    
+        return temporalCorrelation
+
 def calcPatternCorrelation(evaluationData, referenceData):
     '''
     Purpose ::
@@ -424,16 +529,8 @@ def calcNashSutcliff(evaluationData, ref
                (((data2 - meanData2) ** 2).sum(axis = 1)))
     return nashcor
 
-def getPdfInputValues():
-    print '****PDF input values from user required **** \n'
-    nbins = int (raw_input('Please enter the number of bins to use. \n'))
-    minEdge = float(raw_input('Please enter the minimum value to use for the edge. \n'))
-    maxEdge = float(raw_input('Please enter the maximum value to use for the edge. \n'))
-    
-    return nbins, minEdge, maxEdge
 
-
-def calcPdf(evaluationData, referenceData, settings=None):
+def calcPdf(evaluationData, referenceData):
     '''
     Routine to calculate a normalized Probability Distribution Function with 
     bins set according to data range.
@@ -443,22 +540,16 @@ def calcPdf(evaluationData, referenceDat
         called in do_rcmes_processing_sub.py
          
     Inputs::
-        evaluationData (3D numpy array): array shape (time, lat, lon)
-        referenceData (3D numpy array): array shape (time, lat, lon)
-        settings (tuple): [optional] format (binCount, minEdge, maxEdge)
-            binCount (int): number of bins to use
-            minEdge (int|float): minimum edge
-            maxEdge (int|float): maximum edge
-        
-        NB, time here is the number of time values eg for time period
-         199001010000 - 199201010000 
+        2 arrays of data
+        t1 is the modelData and t2 is 3D obsdata - time,lat, lon NB, time here 
+        is the number of time values eg for time period 199001010000 - 199201010000 
         
-    Assumptions::
-        If annual means-opt 1, was chosen, then referenceData.shape = (YearsCount,lat,lon)
+        if annual means-opt 1, was chosen, then t2.shape = (2,lat,lon)
         
-        If monthly means - opt 2, was choosen, then referenceData.shape = (MonthsCount,lat,lon)
+        if monthly means - opt 2, was choosen, then t2.shape = (24,lat,lon)
         
-    Output::
+    User inputs: number of bins to use and edges (min and max)
+    Output:
 
         one float which represents the PDF for the year
 
@@ -473,17 +564,20 @@ def calcPdf(evaluationData, referenceDat
         PDF for the year
 
     '''
+    # float to store the final PDF similarity score
     similarityScore = 0.0
+
     print 'min modelData', evaluationData[:, :, :].min()
     print 'max modelData', evaluationData[:, :, :].max()
     print 'min obsData', referenceData[:, :, :].min()
     print 'max obsData', referenceData[:, :, :].max()
-
-    if settings == None:
-        nbins, minEdge, maxEdge = getPdfInputValues()
-    else:
-        nbins, minEdge, maxEdge = settings
-
+    # find a distribution for the entire dataset
+    #prompt the user to enter the min, max and number of bin values. 
+    # The max, min info above is to help guide the user with these choises
+    print '****PDF input values from user required **** \n'
+    nbins = int (raw_input('Please enter the number of bins to use. \n'))
+    minEdge = float(raw_input('Please enter the minimum value to use for the edge. \n'))
+    maxEdge = float(raw_input('Please enter the maximum value to use for the edge. \n'))
     
     mybins = np.linspace(minEdge, maxEdge, nbins)
     print 'nbins is', nbins, 'mybins are', mybins
@@ -669,9 +763,14 @@ def metrics_plots(varName, numOBS, numMD
     ####################################################
     # Terminate job if no metrics are to be calculated #
     ####################################################
-
+    if doMetricsOption == 'y':
+        calculate_metrics_and_make_plots(varName, workdir, lons, lats, obsData, mdlData, obsRgn, mdlRgn, obsList, mdlList, subRegions,\
+                                          subRgnLon0, subRgnLon1, subRgnLat0, subRgnLat1)
+    
+    '''
     neval = 0
-
+    
+    
     while doMetricsOption == 'y':
         neval += 1
         print ' '
@@ -960,6 +1059,114 @@ def metrics_plots(varName, numOBS, numMD
 
             # Repeat for another metric
             doMetricsOption = raw_input('Do you want to perform another evaluation? [y/n]\n> ').lower()
-
+    '''
     # Processing complete if a user enters 'n' for 'doMetricsOption'
     print 'RCMES processing completed.'
+
+def calculate_metrics_and_make_plots(varName, workdir, lons, lats, obsData, mdlData, obsRgn, mdlRgn, obsList, mdlList, subRegions, \
+                                     subRgnLon0, subRgnLon1, subRgnLat0, subRgnLat1):
+    '''
+    Purpose:: 
+        Calculate all the metrics used in Kim et al. [2013] paper and plot them 
+
+    Input::
+        varName - evaluating variable
+        workdir -
+        lons -
+        lats -
+        obsData -
+        mdlData -
+        obsRgn -
+        mdlRgn -
+        obsList -
+        mdlList -
+        subRegions - 
+        subRgnLon0, subRgnLat0 - southwest boundary of sub-regions [numSubRgn]
+        subRgnLon1, subRgnLat1 - northeast boundary of sub-regions [numSubRgn]
+    Output:: 
+        png files
+        
+     '''
+   
+   
+    nobs, nt, ny, nx = obsData.shape
+    nmodel = mdlData.shape[0]
+    ### TODO: unit conversion (K to C)
+    if varName == 'temp':
+        obsData[0, :, :, :] = obsData[0, :, :, :] - 273.15
+        if subRegions:
+            obsRgn[0, :, :] = obsRgn[0, :, :] - 273.15
+    if varName == 'prec':
+        obsData[0, :, :, :] = obsData[0, :, :, :]*86400.
+        if subRegions:
+            obsRgn[0, :, :] = obsRgn[0, :, :]*86400.
+    ###    
+    oTser, oClim = calcClimYear( obsData[0, :, :, :])
+    bias_of_overall_average = ma.zeros([nmodel, ny, nx])
+    spatial_stdev_ratio = np.zeros([nmodel])
+    spatial_corr = np.zeros([nmodel])
+    mdlList.append('ENS')
+    
+    for imodel in np.arange(nmodel):
+        mTser, mClim = calcClimYear( mdlData[imodel,:,:,:])
+        bias_of_overall_average[imodel,:,:] = calcBias(mClim, oClim)
+        spatial_corr[imodel], sigLev = calcPatternCorrelation(oClim, mClim)
+        spatial_stdev_ratio[imodel] = calcSpatialStdevRatio(mClim, oClim)   
+    fig_return = plots.draw_contour_map(oClim, lats, lons, workdir+'/observed_climatology_'+varName, fmt='png', gridshape=(1, 1),
+                   clabel='', ptitle='', subtitles=obsList, cmap=None, 
+                   clevs=None, nlevs=10, parallels=None, meridians=None,
+                   extend='neither')    
+    fig_return = plots.draw_contour_map(bias_of_overall_average, lats, lons, workdir+'/bias_of_climatology_'+varName, fmt='png', gridshape=(4, 2),
+                   clabel='', ptitle='', subtitles=mdlList, cmap=None, 
+                   clevs=None, nlevs=10, parallels=None, meridians=None,
+                   extend='neither')
+    Taylor_data = np.array([spatial_stdev_ratio, spatial_corr]).transpose()
+    
+    fig_return = plots.draw_taylor_diagram(Taylor_data, mdlList, refname='CRU', fname = workdir+'/Taylor_'+varName, fmt='png')
+
+    if subRegions:
+        nseason = 2      # (0: summer and 1: winter)
+        nregion = len(subRgnLon0)
+        season_name = ['summer','winter']
+        rowlabels = ['PNw','PNe','CAn','CAs','SWw','SWe','COL','GPn','GPc','GC','GL','NE','SE','FL']
+        collabels = ['M1','M2','M3','M4','M5','M6','ENS']
+        collabels[nmodel-1] = 'ENS'
+        
+        for iseason in [0,1]:
+            portrait_subregion = np.zeros([4, nregion, nmodel])
+            portrait_titles = ['(a) Normalized Bias', '(b) Normalized STDV', '(c) Normalized RMSE', '(d) Correlation']
+            if iseason == 0:
+                monthBegin=6
+                monthEnd=8
+            if iseason == 1:
+                monthBegin=12
+                monthEnd=2
+                      
+            obsTser,obsClim = calcClimSeasonSubRegion(6,8,obsRgn[0,:,:])
+            for imodel in np.arange(nmodel):
+                mTser, mClim =  calcClimSeasonSubRegion(6,8,mdlRgn[imodel,:,:])
+                for iregion in np.arange(nregion):
+                      portrait_subregion[0,iregion,imodel] = calcBias(mClim[iregion],obsClim[iregion])/calcTemporalStdev(obsTser[iregion,:])   
+                      portrait_subregion[1,iregion,imodel] = calcTemporalStdev(mTser[iregion,:])/ calcTemporalStdev(obsTser[iregion,:]) 
+                      portrait_subregion[2,iregion,imodel] = calcRootMeanSquaredDifferenceAveragedOverTime(mTser[iregion,:], obsTser[iregion,:])/calcTemporalStdev(obsTser[iregion,:])
+                      portrait_subregion[3,iregion, imodel] = calcTemporalCorrelationSubRegion(mTser[iregion,:],obsTser[iregion,:])
+            portrait_return = plots.draw_portrait_diagram(portrait_subregion, rowlabels, collabels[0:nmodel], workdir+'/portrait_diagram_'+season_name[iseason]+'_'+varName, fmt='png', 
+                             gridshape=(2, 2), xlabel='', ylabel='', clabel='', 
+                             ptitle='', subtitles=portrait_titles, cmap=None, clevs=None, 
+                             nlevs=10, extend='neither')  
+            # annual cycle
+            nmonth = 12
+            times = np.arange(nmonth)
+            data_names = [obsList[0]] + list(mdlList)
+            annual_cycle = np.zeros([nregion, nmonth, nmodel+1])
+            obsTser, annual_cycle[:, :, 0] = calcAnnualCycleMeansSubRegion(obsRgn[0,:,:])
+            obsStd = calcAnnualCycleStdevSubRegion(obsRgn[0,:,:])
+            for imodel in np.arange(nmodel):
+                mdlTser, annual_cycle[:, :, imodel+1] = calcAnnualCycleMeansSubRegion(mdlRgn[imodel, :, :])
+            # Make annual_cycle shape compatible with draw_time_series
+            annual_cycle = annual_cycle.swapaxes(1, 2)
+            tseries_return = plots.draw_time_series(annual_cycle, times, data_names, workdir+'/time_series_'+varName, gridshape=(7, 2), 
+                  subtitles=rowlabels, label_month=True)
+            
+         
+