You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by pr...@apache.org on 2013/08/27 07:35:49 UTC
svn commit: r1517753 [9/33] - in /incubator/climate/branches/rcmet-2.1.1: ./
src/ src/main/ src/main/python/ src/main/python/bin/ src/main/python/docs/
src/main/python/docs/_static/ src/main/python/docs/_templates/
src/main/python/rcmes/ src/main/pytho...
Added: incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/toolkit/metrics.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/toolkit/metrics.py?rev=1517753&view=auto
==============================================================================
--- incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/toolkit/metrics.py (added)
+++ incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/toolkit/metrics.py Tue Aug 27 05:35:42 2013
@@ -0,0 +1,2455 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+'''
+Module storing functions to calculate statistical metrics from numpy arrays
+'''
+
+import datetime
+import subprocess
+import sys
+import os
+import numpy as np
+import numpy.ma as ma
+from math import floor, log
+from toolkit import process
+from utils import misc
+from storage import files
+from pylab import *
+import scipy.stats.mstats as mstats
+import matplotlib as mpl
+import matplotlib.dates
+import matplotlib.pyplot as plt
+from mpl_toolkits.axes_grid1 import ImageGrid
+from matplotlib.font_manager import FontProperties
+from mpl_toolkits.basemap import Basemap
+from utils.Taylor import TaylorDiagram
+# 6/10/2012 JK: any Ngl dependence will be removed in later versions
+#import Ngl
+
+def calc_ann_mean(t2, time):
+ '''
+ Calculate annual cycle in terms of monthly means at every grid point.
+ '''
+ # Calculate annual cycle in terms of monthly means at every grid point including single point case (ndim=1)
+ # note: this routine is identical to 'calc_annual_cycle_means': must be converted to calculate the annual mean
+ # Extract months from time variable
+ months = np.empty(len(time))
+ for t in np.arange(len(time)):
+ months[t] = time[t].month
+ if t2.ndim == 3:
+ means = ma.empty((12, t2.shape[1], t2.shape[2])) # empty array to store means
+ # Calculate means month by month
+ for i in np.arange(12)+1:
+ means[i - 1, :, :] = t2[months == i, :, :].mean(0)
+ if t2.ndim == 1:
+ means = np.empty((12)) # empty array to store means
+ # Calculate means month by month
+ for i in np.arange(12)+1:
+ means[i - 1] = t2[months == i].mean(0)
+ return means
+
+
+def calc_clim_month(t2, time):
+ '''
+ Calculate monthly means at every grid point.
+ '''
+ # Calculate monthly means at every grid point including single point case (ndim=1)
+ # Extract months from time variable
+ months = np.empty(len(time))
+ for t in np.arange(len(time)):
+ months[t] = time[t].month
+ if t2.ndim == 3:
+ means = ma.empty((12, t2.shape[1], t2.shape[2])) # empty array to store means
+ # Calculate means month by month
+ for i in np.arange(12) + 1:
+ means[i - 1, :, :] = t2[months == i, :, :].mean(0)
+ if t2.ndim == 1:
+ means = np.empty((12)) # empty array to store means
+ # Calculate means month by month
+ for i in np.arange(12) + 1:
+ means[i - 1] = t2[months == i].mean(0)
+ return means
+
+
+def calc_clim_year(nYR, nT, ngrdY, ngrdX, t2, time):
+ '''
+ Calculate annual mean timeseries and climatology for both 2-D and point time series.
+ '''
+ # Extract months from time variable
+ yy = np.empty(nT)
+ mm = np.empty(nT)
+ for t in np.arange(nT):
+ yy[t] = time[t].year
+ mm[t] = time[t].month
+ if t2.ndim == 3:
+ tSeries = ma.zeros((nYR, ngrdY, ngrdX))
+ i = 0
+ for myunit in np.unique(yy):
+ wh = (yy == myunit)
+ data = t2[wh, :, :]
+ tSeries[i, :, :] = ma.average(data, axis = 0)
+ #print 'data.shape= ',data.shape,' i= ',i,' yy= ',yy
+ i += 1
+ means = ma.zeros((ngrdY, ngrdX))
+ means = ma.average(tSeries, axis = 0)
+ elif t2.ndim == 1:
+ tSeries = ma.zeros((nYR))
+ i = 0
+ for myunit in np.unique(yy):
+ wh = (yy == myunit)
+ data = t2[wh]
+ tSeries[i] = ma.average(data, axis = 0)
+ #print 'data.shape= ',data.shape,' i= ',i,' yy= ',yy
+ i += 1
+ means = ma.zeros((ngrdY, ngrdX))
+ means = ma.average(tSeries, axis = 0)
+ return tSeries, means
+
+
+def calc_clim_season(nYR, nT, mB, mE, ngrdY, ngrdX, t2, time):
+ '''
+ Calculate seasonal mean timeseries and climatology for both 2-D and point time series.
+ '''
+ #-------------------------------------------------------------------------------------
+ # Calculate seasonal mean timeseries and climatology for both 2-d and point time series
+ # The season to be calculated is defined by moB and moE; moE>=moB always
+ #-------------------------------------------------------------------------------------
+ # Extract months from time variable
+ yy = np.empty(nT)
+ mm = np.empty(nT)
+ for t in np.arange(nT):
+ yy[t] = time[t].year
+ mm[t] = time[t].month
+ if t2.ndim == 3:
+ tSeries = ma.zeros((nYR, ngrdY, ngrdX))
+ i = 0
+ for myunit in np.unique(yy):
+ wh = (yy == myunit) & (mm >= mB) & (mm <= mE)
+ data = t2[wh, :, :]
+ tSeries[i, :, :] = ma.average(data, axis = 0)
+ #print 'data.shape= ',data.shape,' i= ',i,' yy= ',yy
+ i += 1
+ means = ma.zeros((ngrdY, ngrdX))
+ means = ma.average(tSeries, axis = 0)
+ elif t2.ndim == 1:
+ tSeries = ma.zeros((nYR))
+ i = 0
+ for myunit in np.unique(yy):
+ wh = (yy == myunit) & (mm >= mB) & (mm <= mE)
+ data = t2[wh]
+ tSeries[i] = ma.average(data, axis = 0)
+ #print 'data.shape= ',data.shape,' i= ',i,' yy= ',yy
+ i += 1
+ means = ma.zeros((ngrdY, ngrdX))
+ means = ma.average(tSeries, axis = 0)
+ return tSeries, means
+
+
+def calc_clim_mo(nYR, nT, ngrdY, ngrdX, t2, time):
+ '''
+ Calculate monthly means at every grid points and the annual time series of single model including single point case (ndim=1).
+ JK20: This is modified from 'calc_clim_month' with additional arguments & output, the annual time series of single model output (mData)
+ 6/8/2013: bug fix: mm = months[t] --> mm = months[t] - 1, otherwise array overflow occurs
+ '''
+ # Extract months and monthly time series from the time and raw variable, respectively
+ months = np.empty(nT)
+ for t in np.arange(nT):
+ months[t] = time[t].month
+ if t == 0:
+ yy0 = time[t].year
+ # for a 2-D time series data
+ if t2.ndim == 3:
+ mData = ma.empty((nYR, 12, ngrdY, ngrdX))
+ for t in np.arange(nT):
+ yy = time[t].year
+ mm = months[t] - 1
+ yr = yy - yy0
+ mData[yr, mm, :, :] = t2[t, :, :]
+ # Calculate means month by month. means is an empty array to store means
+ means = ma.empty((12, ngrdY, ngrdX))
+ for i in np.arange(12) + 1:
+ means[i - 1, :, :] = t2[months == i, :, :].mean(0)
+ # for a point time series data
+ if t2.ndim == 1:
+ mData = ma.empty((nYR, 12))
+ for t in np.arange(nT):
+ yy = time[t].year
+ mm = months[t]
+ yr = yy - yy0
+ mData[yr, mm] = t2[t]
+ means = np.empty((12))
+ # Calculate means month by month. means is an empty array to store means
+ for i in np.arange(12) + 1:
+ means[i - 1] = t2[months == i].mean(0)
+ return mData, means
+
+
+def calc_clim_One_month(moID, nYR, nT, t2, time):
+ '''
+ Calculate the montly mean at every grid point for a specified month.
+ '''
+ #-------------------------------------------------------------------------------------
+ # Calculate monthly means at every grid point for a specified month
+ #-------------------------------------------------------------------------------------
+ # Extract months and the corresponding time series from time variable
+ months = np.empty(nT)
+ for t in np.arange(nT):
+ months[t] = time[t].month
+ if t2.ndim == 3:
+ mData = ma.empty((nYR, t2.shape[1], t2.shape[2])) # empty array to store time series
+ n = 0
+ if months[t] == moID:
+ mData[n, :, :] = t2[t, :, :]
+ n += 1
+ means = ma.empty((t2.shape[1], t2.shape[2])) # empty array to store means
+ # Calculate means for the month specified by moID
+ means[:, :] = t2[months == moID, :, :].mean(0)
+ return mData, means
+
+
+def calc_annual_cycle_means(data, time):
+ '''
+ Calculate monthly means for every grid point
+
+ Inputs::
+ data - masked 3d array of the model data (time, lon, lat)
+ time - an array of python datetime objects
+ '''
+ # Extract months from time variable
+ months = np.empty(len(time))
+
+ for t in np.arange(len(time)):
+ months[t] = time[t].month
+
+ #if there is data varying in t and space
+ if data.ndim == 3:
+ # empty array to store means
+ means = ma.empty((12, data.shape[1], data.shape[2]))
+
+ # Calculate means month by month
+ for i in np.arange(12) + 1:
+ means[i - 1, :, :] = data[months == i, :, :].mean(0)
+
+ #if the data is a timeseries over area-averaged values
+ if data.ndim == 1:
+ # TODO - Investigate using ma per KDW
+ means = np.empty((12)) # empty array to store means??WHY NOT ma?
+
+ # Calculate means month by month
+ for i in np.arange(12) + 1:
+ means[i - 1] = data[months == i].mean(0)
+
+ return means
+
+
+def calc_annual_cycle_std(data, time):
+ '''
+ Calculate monthly standard deviations for every grid point
+ '''
+ # Extract months from time variable
+ months = np.empty(len(time))
+
+ for t in np.arange(len(time)):
+ months[t] = time[t].month
+
+ # empty array to store means
+ stds = np.empty((12, data.shape[1], data.shape[2]))
+
+ # Calculate means month by month
+ for i in np.arange(12) + 1:
+ stds[i - 1, :, :] = data[months == i, :, :].std(axis = 0, ddof = 1)
+
+ return stds
+
+
+def calc_annual_cycle_domain_means(data, time):
+ '''
+ Calculate domain means for each month of the year
+ '''
+ # Extract months from time variable
+ months = np.empty(len(time))
+
+ for t in np.arange(len(time)):
+ months[t] = time[t].month
+
+ means = np.empty(12) # empty array to store means
+
+ # Calculate means month by month
+ for i in np.arange(12) + 1:
+ means[i - 1] = data[months == i, :, :].mean()
+
+ return means
+
+
+def calc_annual_cycle_domain_std(data, time):
+ '''
+ Calculate domain standard deviations for each month of the year
+ '''
+ # Extract months from time variable
+ months = np.empty(len(time))
+
+ for t in np.arange(len(time)):
+ months[t] = time[t].month
+
+ stds = np.empty(12) # empty array to store means
+
+ # Calculate means month by month
+ for i in np.arange(12) + 1:
+ stds[i - 1] = data[months == i, :, :].std(ddof = 1)
+
+ return stds
+
+
+def calc_bias_annual(t1, t2, optn): # Mean Bias
+ '''
+ Calculate the mean difference between two fields over time for each grid point.
+ '''
+ # Calculate mean difference between two fields over time for each grid point
+ # Precrocessing of both obs and model data ensures the absence of missing values
+ diff = t1-t2
+ if(open == 'abs'):
+ diff = abs(diff)
+ bias = diff.mean(axis = 0)
+ return bias
+
+
+def calc_bias(t1, t2):
+ '''
+ Calculate mean difference between two fields over time for each grid point
+
+ Classify missing data resulting from multiple times (using threshold
+ data requirement)
+
+ i.e. if the working time unit is monthly data, and we are dealing with
+ multiple months of data then when we show mean of several months, we need
+ to decide what threshold of missing data we tolerate before classifying a
+ data point as missing data.
+ '''
+ t1Mask = process.create_mask_using_threshold(t1, threshold = 0.75)
+ t2Mask = process.create_mask_using_threshold(t2, threshold = 0.75)
+
+ diff = t1 - t2
+ bias = diff.mean(axis = 0)
+
+ # Set mask for bias metric using missing data in obs or model data series
+ # i.e. if obs contains more than threshold (e.g.50%) missing data
+ # then classify time average bias as missing data for that location.
+ bias = ma.masked_array(bias.data, np.logical_or(t1Mask, t2Mask))
+ return bias
+
+
+def calc_bias_dom(t1, t2):
+ '''
+ Calculate domain mean difference between two fields over time
+ '''
+ diff = t1 - t2
+ bias = diff.mean()
+ return bias
+
+
+def calc_difference(t1, t2):
+ '''
+ Calculate mean difference between two fields over time for each grid point
+ '''
+ print 'Calculating difference'
+ diff = t1 - t2
+ return diff
+
+
+def calc_mae(t1, t2):
+ '''
+ Calculate mean difference between two fields over time for each grid point
+
+ Classify missing data resulting from multiple times (using threshold
+ data requirement)
+
+ i.e. if the working time unit is monthly data, and we are dealing with
+ multiple months of data then when we show mean of several months, we need
+ to decide what threshold of missing data we tolerate before classifying
+ a data point as missing data.
+ '''
+ t1Mask = process.create_mask_using_threshold(t1, threshold = 0.75)
+ t2Mask = process.create_mask_using_threshold(t2, threshold = 0.75)
+
+ diff = t1 - t2
+ adiff = abs(diff)
+
+ mae = adiff.mean(axis = 0)
+
+ # Set mask for mae metric using missing data in obs or model data series
+ # i.e. if obs contains more than threshold (e.g.50%) missing data
+ # then classify time average mae as missing data for that location.
+ mae = ma.masked_array(mae.data, np.logical_or(t1Mask, t2Mask))
+ return mae
+
+
+def calc_mae_dom(t1, t2):
+ '''
+ Calculate domain mean difference between two fields over time
+ '''
+ diff = t1 - t2
+ adiff = abs(diff)
+ mae = adiff.mean()
+ return mae
+
+
+def calc_rms(t1, t2):
+ '''
+ Calculate mean difference between two fields over time for each grid point
+ '''
+ diff = t1 - t2
+ sqdiff = diff ** 2
+ msd = sqdiff.mean(axis = 0)
+ rms = np.sqrt(msd)
+ return rms
+
+
+def calc_rms_dom(t1, t2):
+ '''
+ Calculate RMS differences between two fields
+ '''
+ diff = t1 - t2
+ sqdiff = diff ** 2
+ msd = sqdiff.mean()
+ rms = np.sqrt(msd)
+ return rms
+
+
+def calc_temporal_stdv(t1):
+ '''
+ Calculate the temporal standard deviation.
+
+ Input:
+ t1 - data array of any shape
+
+ Output:
+ A 2-D array of temporal standard deviation
+ '''
+ # TODO Make sure the first dimension of t1 is teh time axis.
+ stdv = t1.std(axis = 0)
+ return stdv
+
+
+def calc_temporal_anom_cor(mD, oD):
+ '''
+ Calculate the temporal anomaly correlation.
+
+ Assumption(s);
+ The first dimension of mD and oD is the time axis.
+
+ Input:
+ mD - model data array of any shape
+ oD - observation data array of any shape
+
+ Output:
+ A 2-D array of time series pattern correlation coefficients at each grid point.
+
+ REF: 277-281 in Stat methods in atmos sci by Wilks, 1995, Academic Press, 467pp.
+ '''
+ mo = oD.mean(axis = 0)
+ nt = oD.shape[0]
+ deno1 = ((mD - mo) * (mD - mo)).sum(axis = 0)
+ deno2 = ((oD - mo) * (oD - mo)).sum(axis = 0)
+ patcor = ((mD - mo) * (oD - mo)).sum(axis = 0) / sqrt(deno1 * deno2)
+ return patcor
+
+
+def calc_spatial_anom_cor(mD, oD):
+ '''
+ Calculate anomaly correlation between two 2-D arrays.
+
+ Input:
+ mD - 2-D array of model data
+ oD - 2-D array of observation data
+
+ Output:
+ The anomaly correlation between the two input arrays.
+ '''
+ mo = oD.mean()
+ d1 = ((mD - mo)*(mD - mo)).sum()
+ d2 = ((oD - mo)*(oD - mo)).sum()
+ patcor = ((mD - mo) * (oD - mo)).sum() / sqrt(d1 * d2)
+ return patcor
+
+
+def calc_temporal_pat_cor(t1, t2):
+ '''
+ Calculate the Temporal Pattern Correlation
+
+ Input::
+ t1 - 3d array of model data
+ t2 - 3d array of obs data
+
+ Output::
+ 2d array of time series pattern correlation coefficients at each grid point.
+ **Note:** std_dev is standardized on 1 degree of freedom
+ '''
+ mt1 = t1.mean(axis = 0)
+ mt2 = t2.mean(axis = 0)
+ nt = t1.shape[0]
+ sigma_t1 = t1.std(axis = 0, ddof = 1)
+ sigma_t2 = t2.std(axis = 0, ddof = 1)
+ patcor = ((((t1 - mt1) * (t2 - mt2)).sum(axis = 0)) / (nt)) / (sigma_t1 * sigma_t2)
+
+ return patcor
+
+
+def calc_spatial_pat_cor(t1, t2):
+ '''
+ Calcualte pattern correlation between 2-D arrays.
+ 6/10/2013: JK: Enforce both t1 & t2 have the identical mask before calculating std and corr
+
+ Input:
+ t1 - 2-D array of model data
+ t2 - 2-D array of observation data
+
+ Output:
+ Pattern correlation between two input arrays.
+ '''
+ import numpy as np
+ msk1 = ma.getmaskarray(t1)
+ msk2 = ma.getmaskarray(t2)
+ t1 = ma.masked_array(t1.data, np.logical_or(msk1, msk2))
+ t2 = ma.masked_array(t2.data, np.logical_or(msk1, msk2))
+ np = ma.count(t1)
+ mt1 = t1.mean()
+ mt2 = t2.mean()
+ st1 = t1.std()
+ st2 = t2.std()
+ patcor = ((t1 - mt1) * (t2 - mt2)).sum() / (np * st1 * st2)
+ return patcor
+
+
+def calc_pat_cor2D(t1, t2, nT):
+ '''
+ Calculate the pattern correlation between 3-D input arrays.
+
+ Input:
+ t1 - 3-D array of model data
+ t2 - 3-D array of observation data
+ nT
+
+ Output:
+ 1-D array (time series) of pattern correlation coefficients.
+ '''
+ # TODO - Update docstring. What is nT?
+ nt = t1.shape[0]
+ if(nt != nT):
+ print 'input time levels do not match: Exit', nT, nt
+ return -1
+ # store results in list for convenience (then convert to numpy array at the end)
+ patcor = []
+ for t in xrange(nt):
+ mt1 = t1[t, :, :].mean()
+ mt2 = t2[t, :, :].mean()
+ sigma_t1 = t1[t, :, :].std()
+ sigma_t2 = t2[t, :, :].std()
+ # TODO: make means and standard deviations weighted by grid box area.
+ patcor.append((((((t1[t, :, :] - mt1) * (t2[t, :, :] - mt2)).sum()) /
+ (t1.shape[1] * t1.shape[2]) ) / (sigma_t1 * sigma_t2)))
+ print t, mt1.shape, mt2.shape, sigma_t1.shape, sigma_t2.shape, patcor[t]
+ # TODO: deal with missing data appropriately, i.e. mask out grid points with missing data above tolerence level
+ # convert from list into numpy array
+ patcor = numpy.array(patcor)
+ print patcor.shape
+ return patcor
+
+def calc_pat_cor(dataset_1, dataset_2):
+ '''
+ Purpose: Calculate the Pattern Correlation Timeseries
+ Assumption(s)::
+ Both dataset_1 and dataset_2 are the same shape.
+ * lat, lon must match up
+ * time steps must align (i.e. months vs. months)
+ Input::
+ dataset_1 - 3d (time, lat, lon) array of data
+ dataset_2 - 3d (time, lat, lon) array of data
+ Output:
+ patcor - a 1d array (time series) of pattern correlation coefficients.
+ **Note:** Standard deviation is using 1 degree of freedom. Debugging print
+ statements to show the difference the n-1 makes. http://docs.scipy.org/doc/numpy/reference/generated/numpy.std.html
+ 6/17/2013 JK: Add an option for a 1-d arrays
+ '''
+
+ # TODO: Add in try block to ensure the shapes match
+
+ nDim1 = dataset_1.ndim
+ nDim2 = dataset_2.ndim
+ if nDim1 != nDim2:
+ print 'dimension mismatch in calc_pat_cor: exit', nDim1,nDim2
+ sys.exit()
+
+ if nDim1 == 1:
+ mt1 = dataset_1.mean()
+ mt2 = dataset_2.mean()
+ nt = dataset_1.shape[0]
+ sigma_t1 = dataset_1.std()
+ sigma_t2 = dataset_2.std()
+ patcor=((dataset_1 - mt1) * (dataset_2 - mt2)).sum() / (nt * sigma_t1 * sigma_t2)
+
+ elif nDim1 == 2:
+ # find mean and std_dev
+ mt1 = dataset_1.mean()
+ mt2 = dataset_2.mean()
+ ny = dataset_1.shape[0]
+ nx = dataset_1.shape[1]
+ sigma_t1 = dataset_1.std()
+ sigma_t2 = dataset_2.std()
+ patcor=((dataset_1 - mt1) * (dataset_2 - mt2)).sum() / ((ny * nx) * (sigma_t1 * sigma_t2))
+
+ elif nDim1 == 3:
+ nt = dataset_1.shape[0]
+ ny = dataset_1.shape[1]
+ nx = dataset_1.shape[2]
+ # store results in list for convenience (then convert to numpy array)
+ patcor = []
+ for t in xrange(nt):
+ # find mean and std_dev
+ mt1 = dataset_1[t, :, :].mean()
+ mt2 = dataset_2[t, :, :].mean()
+ sigma_t1 = dataset_1[t, :, :].std(ddof = 1)
+ sigma_t2 = dataset_2[t, :, :].std(ddof=1)
+ # TODO: make means and standard deviations weighted by grid box area.
+ # Equation from Santer_et_al 1995
+ # patcor = (1/(N*M_std*O_std))*sum((M_i-M_bar)*(O_i-O_bar))
+ patcor.append((((((dataset_1[t, :, :] - mt1) * (dataset_2[t, :, :] - mt2)).sum()) / (ny * nx)) / (sigma_t1 * sigma_t2)))
+ print t, mt1.shape, mt2.shape, sigma_t1.shape, sigma_t2.shape, patcor[t]
+ # TODO: deal with missing data appropriately, i.e. mask out grid points
+ # with missing data above tolerance level
+ # convert from list into numpy array
+ patcor = np.array(patcor)
+
+ print patcor.shape, patcor
+ return patcor
+
+
+def calc_anom_corn(dataset_1, dataset_2, climatology = None):
+ '''
+ Calculate the anomaly correlation.
+
+ Input:
+ dataset_1 - First input dataset
+ dataset_2 - Second input dataset
+ climatology - Optional climatology input array. Assumption is that it is for
+ the same time period by default.
+
+ Output:
+ The anomaly correlation.
+ '''
+ # TODO: Update docstring with actual useful information
+
+ # store results in list for convenience (then convert to numpy array)
+ anomcor = []
+ nt = dataset_1.shape[0]
+ #prompt for the third file, i.e. climo file...
+ #include making sure the lat, lon and times are ok for comparision
+ # find the climo in here and then using for eg, if 100 yrs
+ # is given for the climo file, but only looking at 10yrs
+ # ask if want to input climo dataset for use....if no, call previous
+
+ if climatology != None:
+ climoFileOption = raw_input('Would you like to use the full observation dataset as \
+ the climatology in this calculation? [y/n] \n>')
+ if climoFileOption == 'y':
+ base_dataset = climatology
+ else:
+ base_dataset = dataset_2
+ for t in xrange(nt):
+ mean_base = base_dataset[t, :, :].mean()
+ anomcor.append((((dataset_1[t, :, :] - mean_base) * (dataset_2[t, :, :] - mean_base)).sum()) /
+ np.sqrt(((dataset_1[t, :, :] - mean_base) ** 2).sum() *
+ ((dataset_2[t, :, :] - mean_base) ** 2).sum()))
+ print t, mean_base.shape, anomcor[t]
+
+ # TODO: deal with missing data appropriately, i.e. mask out grid points
+ # with missing data above tolerence level
+
+ # convert from list into numpy array
+ anomcor = np.array(anomcor)
+ print anomcor.shape, anomcor.ndim, anomcor
+ return anomcor
+
+
+def calc_anom_cor(t1, t2):
+ '''
+ Calculate the Anomaly Correlation (Deprecated)
+ '''
+
+ nt = t1.shape[0]
+
+ # store results in list for convenience (then convert to numpy
+ # array at the end)
+ anomcor = []
+ for t in xrange(nt):
+
+ mt2 = t2[t, :, :].mean()
+
+ sigma_t1 = t1[t, :, :].std(ddof = 1)
+ sigma_t2 = t2[t, :, :].std(ddof = 1)
+
+ # TODO: make means and standard deviations weighted by grid box area.
+
+ anomcor.append(((((t1[t, :, :] - mt2) * (t2[t, :, :] - mt2)).sum()) /
+ (t1.shape[1] * t1.shape[2])) / (sigma_t1 * sigma_t2))
+
+ print t, mt2.shape, sigma_t1.shape, sigma_t2.shape, anomcor[t]
+
+ # TODO: deal with missing data appropriately, i.e. mask out grid points with
+ # missing data above tolerence level
+
+ # convert from list into numpy array
+ anomcor = np.array(anomcor)
+ print anomcor.shape, anomcor.ndim, anomcor
+ return anomcor
+
+
+def calc_nash_sutcliff(dataset_1, dataset_2):
+ '''
+ Routine to calculate the Nash-Sutcliff coefficient of efficiency (E)
+
+ Assumption(s)::
+ Both dataset_1 and dataset_2 are the same shape.
+ * lat, lon must match up
+ * time steps must align (i.e. months vs. months)
+
+ Input::
+ dataset_1 - 3d (time, lat, lon) array of data
+ dataset_2 - 3d (time, lat, lon) array of data
+
+ Output:
+ nashcor - 1d array aligned along the time dimension of the input
+ datasets. Time Series of Nash-Sutcliff Coefficient of efficiency
+
+ '''
+
+ nt = dataset_1.shape[0]
+ nashcor = []
+ for t in xrange(nt):
+ mean_dataset_2 = dataset_2[t, :, :].mean()
+
+ nashcor.append(1 - ((((dataset_2[t, :, :] - dataset_1[t, :, :]) ** 2).sum()) /
+ ((dataset_2[t, :, :] - mean_dataset_2) ** 2).sum()))
+
+ print t, mean_dataset_2.shape, nashcor[t]
+
+ nashcor = np.array(nashcor)
+ print nashcor.shape, nashcor.ndim, nashcor
+ return nashcor
+
+
+def calc_pdf(dataset_1, dataset_2):
+ '''
+ Routine to calculate a normalized Probability Distribution Function with
+ bins set according to data range.
+ Equation from Perkins et al. 2007
+
+ PS=sum(min(Z_O_i, Z_M_i)) where Z is the distribution (histogram of the data for either set)
+ called in do_rcmes_processing_sub.py
+
+ Inputs::
+ 2 arrays of data
+ t1 is the modelData and t2 is 3D obsdata - time,lat, lon NB, time here
+ is the number of time values eg for time period 199001010000 - 199201010000
+
+ if annual means-opt 1, was chosen, then t2.shape = (2,lat,lon)
+
+ if monthly means - opt 2, was choosen, then t2.shape = (24,lat,lon)
+
+ User inputs: number of bins to use and edges (min and max)
+ Output:
+
+ one float which represents the PDF for the year
+
+ TODO: Clean up this docstring so we have a single purpose statement
+
+ Routine to calculate a normalised PDF with bins set according to data range.
+
+ Input::
+ 2 data arrays, modelData and obsData
+
+ Output::
+ PDF for the year
+
+ '''
+ #list to store PDFs of modelData and obsData
+ pdf_mod = []
+ pdf_obs = []
+ # float to store the final PDF similarity score
+ similarity_score = 0.0
+ d1_max = dataset_1.amax()
+ d1_min = dataset_1.amin()
+
+ print 'min modelData', dataset_1[:, :, :].min()
+ print 'max modelData', dataset_1[:, :, :].max()
+ print 'min obsData', dataset_2[:, :, :].min()
+ print 'max obsData', dataset_2[:, :, :].max()
+ # find a distribution for the entire dataset
+ #prompt the user to enter the min, max and number of bin values.
+ # The max, min info above is to help guide the user with these choises
+ print '****PDF input values from user required **** \n'
+ nbins = int (raw_input('Please enter the number of bins to use. \n'))
+ minEdge = float(raw_input('Please enter the minimum value to use for the edge. \n'))
+ maxEdge = float(raw_input('Please enter the maximum value to use for the edge. \n'))
+
+ mybins = np.linspace(minEdge, maxEdge, nbins)
+ print 'nbins is', nbins, 'mybins are', mybins
+
+
+ # TODO: there is no 'new' kwargs for numpy.histogram
+ # per: http://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram.html
+ # PLAN: Replace new with density param.
+ pdf_mod, edges = np.histogram(dataset_1, bins = mybins, normed = True, new = True)
+ print 'dataset_1 distribution and edges', pdf_mod, edges
+ pdf_obs, edges = np.histogram(dataset_2, bins = mybins, normed = True, new = True)
+ print 'dataset_2 distribution and edges', pdf_obs, edges
+
+ # TODO: drop this
+ """
+ considering using pdf function from statistics package. It is not
+ installed. Have to test on Mac.
+ http://bonsai.hgc.jp/~mdehoon/software/python/Statistics/manual/index.xhtml#TOC31
+ pdf_mod, edges = stats.pdf(dataset_1, bins=mybins)
+ print 'dataset_1 distribution and edges', pdf_mod, edges
+ pdf_obs,edges=stats.pdf(dataset_2,bins=mybins)
+ print 'dataset_2 distribution and edges', pdf_obs, edges
+ """
+
+ #find minimum at each bin between lists
+ i = 0
+ for model_value in pdf_mod :
+ print 'model_value is', model_value, 'pdf_obs[', i, '] is', pdf_obs[i]
+ if model_value < pdf_obs[i]:
+ similarity_score += model_value
+ else:
+ similarity_score += pdf_obs[i]
+ i += 1
+ print 'similarity_score is', similarity_score
+ return similarity_score
+
+
+def calc_stdev(t1):
+ '''
+ Calculate the standard deviation for a given dataset.
+
+ Input:
+ t1 - Dataset to calculate the standard deviation on.
+
+ Output:
+ Array of the standard deviations for each month in the provided dataset.
+ '''
+ nt = t1.shape[0]
+ sigma_t1 = []
+ for t in xrange(nt):
+ sigma_t1.append(t1[t, :, :].std(ddof = 1))
+ sigma_t1 = np.array(sigma_t1)
+ print sigma_t1, sigma_t1.shape
+ return sigma_t1
+
+# 6/10/2013 JK: plotting routines are added below
+
+def pow_round(x):
+ '''
+ Function to round x to the nearest power of 10
+ '''
+ return 10 ** (floor(log(x, 10) - log(0.5, 10)))
+
+def calcNiceIntervals(data, nLevs):
+ '''
+ Purpose::
+ Calculates nice intervals between each color level for colorbars
+ and contour plots. The target minimum and maximum color levels are
+ calculated by taking the minimum and maximum of the distribution
+ after cutting off the tails to remove outliers.
+
+ Input::
+ data - an array of data to be plotted
+ nLevs - an int giving the target number of intervals
+
+ Output::
+ cLevs - A list of floats for the resultant colorbar levels
+ '''
+ # Find the min and max levels by cutting off the tails of the distribution
+ # This mitigates the influence of outliers
+ data = data.ravel()
+ mnLvl = mstats.scoreatpercentile(data, 5)
+ mxLvl = mstats.scoreatpercentile(data, 95)
+ locator = mpl.ticker.MaxNLocator(nLevs)
+ cLevs = locator.tick_values(mnLvl, mxLvl)
+
+ # Make sure the bounds of cLevs are reasonable since sometimes
+ # MaxNLocator gives values outside the domain of the input data
+ cLevs = cLevs[(cLevs >= mnLvl) & (cLevs <= mxLvl)]
+ return cLevs
+
+def calcBestGridShape(nPlots, oldShape):
+ '''
+ Purpose::
+ Calculate a better grid shape in case the user enters more columns
+ and rows than needed to fit a given number of subplots.
+
+ Input::
+ nPlots - an int giving the number of plots that will be made
+ oldShape - a tuple denoting the desired grid shape (nRows, nCols) for arranging
+ the subplots originally requested by the user.
+
+ Output::
+ newShape - the smallest possible subplot grid shape needed to fit nPlots
+ '''
+ nRows, nCols = oldShape
+ size = nRows * nCols
+ diff = size - nPlots
+ if diff < 0:
+ raise ValueError('gridShape=(%d, %d): Cannot fit enough subplots for data' %(nRows, nCols))
+ else:
+ # If the user enters an excessively large number of
+ # rows and columns for gridShape, automatically
+ # correct it so that it fits only as many plots
+ # as needed
+ while diff >= nCols:
+ nRows -= 1
+ size = nRows * nCols
+ diff = size - nPlots
+
+ # Don't forget to remove unnecessary columns too
+ if nRows == 1:
+ nCols = nPlots
+
+ newShape = nRows, nCols
+ return newShape
+
+def drawPortraitDiagramSingle(data, rowLabels, colLabels, cLevs, fName, fType = 'png',
+ xLabel = '', yLabel = '', cLabel = '', pTitle = '', cMap = None):
+ '''
+ Purpose::
+ Makes a portrait diagram plot.
+
+ Input::
+ data - 2d array of the field to be plotted
+ rowLabels - list of strings denoting labels for each row
+ colLabels - list of strings denoting labels for each column
+ cLevs - a list of integers or floats specifying the colorbar levels
+ xLabel - a string specifying the x-axis title
+ yLabel - a string specifying the y-axis title
+ cLabel - a string specifying the colorbar title
+ pTitle - a string specifying the plot title
+ fName - a string specifying the filename of the plot
+ fType - an optional string specifying the filetype, default is .png
+ cMap - an optional matplotlib.LinearSegmentedColormap object denoting the colormap,
+ '''
+ # Set up the colormap if not specified
+ if cMap is None:
+ cMap = plt.cm.RdBu_r
+
+ # Set up figure and axes
+ fig = plt.figure()
+ ax = fig.add_subplot(111)
+
+ # Make the portrait diagram
+ norm = mpl.colors.BoundaryNorm(cLevs, cMap.N)
+ cs = ax.matshow(data, cmap = cMap, aspect = 'auto', origin = 'lower', norm = norm)
+
+ # Add colorbar
+ cbar = fig.colorbar(cs, norm = norm, boundaries = cLevs, drawedges = True,
+ pad = .05)
+ cbar.set_label(cLabel)
+ cbar.set_ticks(cLevs)
+ cbar.ax.xaxis.set_ticks_position("none")
+ cbar.ax.yaxis.set_ticks_position("none")
+
+ # Add grid lines
+ ax.xaxis.set_ticks(np.arange(data.shape[1] + 1))
+ ax.yaxis.set_ticks(np.arange(data.shape[0] + 1))
+ x = (ax.xaxis.get_majorticklocs() - .5)
+ y = (ax.yaxis.get_majorticklocs() - .5)
+ ax.vlines(x, y.min(), y.max())
+ ax.hlines(y, x.min(), x.max())
+
+ # Configure ticks
+ ax.xaxis.tick_bottom()
+ ax.xaxis.set_ticks_position('none')
+ ax.yaxis.set_ticks_position('none')
+ ax.set_xticklabels(rowLabels)
+ ax.set_yticklabels(colLabels)
+
+ # Add labels and title
+ ax.set_xlabel(xLabel)
+ ax.set_ylabel(yLabel)
+ ax.set_title(pTitle)
+
+ # Save the figure
+ fig.savefig('%s.%s' %(fName, fType))
+ plt.show()
+
+def drawPortraitDiagram(dataset, rowLabels, colLabels, fName, fType = 'png',
+ gridShape = (1, 1), xLabel = '', yLabel = '', cLabel = '',
+ pTitle = '', subTitles = None, cMap = None, cLevs = None,
+ nLevs = 10, extend = 'neither'):
+ '''
+ Purpose::
+ Makes a portrait diagram plot.
+
+ Input::
+ dataset - 3d array of the field to be plotted (nT, nRows, nCols)
+ rowLabels - a list of strings denoting labels for each row
+ colLabels - a list of strings denoting labels for each column
+ fName - a string specifying the filename of the plot
+ fType - an optional string specifying the output filetype
+ xLabel - an optional string specifying the x-axis title
+ yLabel - an optional string specifying the y-axis title
+ cLabel - an optional string specifying the colorbar title
+ pTitle - a string specifying the plot title
+ subTitles - an optional list of strings specifying the title for each subplot
+ cMap - an optional matplotlib.LinearSegmentedColormap object denoting the colormap
+ cLevs - an optional list of ints or floats specifying colorbar levels
+ nLevs - an optional integer specifying the target number of contour levels if
+ cLevs is None
+ extend - an optional string to toggle whether to place arrows at the colorbar
+ boundaries. Default is 'neither', but can also be 'min', 'max', or
+ 'both'. Will be automatically set to 'both' if cLevs is None.
+
+ '''
+ # Handle the single plot case.
+ if dataset.ndim == 2 or (dataset.ndim == 3 and dataset.shape[0] == 1):
+ dataset = dataset.reshape(1, *dataset.shape)
+
+ nPlots = dataset.shape[0]
+
+ # Make sure gridShape is compatible with input data
+ gridShape = calcBestGridShape(nPlots, gridShape)
+
+ # Row and Column labels must be consistent with the shape of
+ # the input data too
+ nRows, nCols = dataset.shape[1:]
+ if len(rowLabels) != nRows or len(colLabels) != nCols:
+ raise ValueError('rowLabels and colLabels must have %d and %d elements respectively' %(nRows, nCols))
+
+ # Set up the colormap if not specified
+ if cMap is None:
+ cMap = plt.cm.coolwarm
+
+ # Set up the figure
+ fig = plt.figure()
+ fig.set_size_inches((8.5, 11.))
+ fig.dpi = 300
+
+ # Make the subplot grid
+ grid = ImageGrid(fig, 111,
+ nrows_ncols = gridShape,
+ axes_pad = 0.4,
+ share_all = True,
+ aspect = False,
+ add_all = True,
+ ngrids = nPlots,
+ label_mode = "all",
+ cbar_mode = 'single',
+ cbar_location = 'bottom',
+ cbar_pad = '3%',
+ cbar_size = .15
+ )
+
+ # Calculate colorbar levels if not given
+ if cLevs is None:
+ # Cut off the tails of the distribution
+ # for more representative colorbar levels
+ cLevs = calcNiceIntervals(dataset, nLevs)
+ extend = 'both'
+
+ norm = mpl.colors.BoundaryNorm(cLevs, cMap.N)
+
+ # Do the plotting
+ for i, ax in enumerate(grid):
+ data = dataset[i]
+ cs = ax.matshow(data, cmap = cMap, aspect = 'auto', origin = 'lower', norm = norm)
+
+ # Add grid lines
+ ax.xaxis.set_ticks(np.arange(data.shape[1] + 1))
+ ax.yaxis.set_ticks(np.arange(data.shape[0] + 1))
+ x = (ax.xaxis.get_majorticklocs() - .5)
+ y = (ax.yaxis.get_majorticklocs() - .5)
+ ax.vlines(x, y.min(), y.max())
+ ax.hlines(y, x.min(), x.max())
+
+ # Configure ticks
+ ax.xaxis.tick_bottom()
+ ax.xaxis.set_ticks_position('none')
+ ax.yaxis.set_ticks_position('none')
+ ax.set_xticklabels(colLabels, fontsize = 'xx-small')
+ ax.set_yticklabels(rowLabels, fontsize = 'xx-small')
+
+ # Add axes title
+ if subTitles is not None:
+ ax.text(0.5, 1.04, subTitles[i], va = 'center', ha = 'center',
+ transform = ax.transAxes, fontsize = 'small')
+
+ # Add colorbar
+ cbar = fig.colorbar(cs, cax = ax.cax, norm = norm, boundaries = cLevs, drawedges = True,
+ extend = extend, orientation = 'horizontal')
+ cbar.set_label(cLabel)
+ cbar.set_ticks(cLevs)
+ cbar.ax.xaxis.set_ticks_position("none")
+ cbar.ax.yaxis.set_ticks_position("none")
+
+ # This is an ugly hack to make the title show up at the correct height.
+ # Basically save the figure once to achieve tight layout and calculate
+ # the adjusted heights of the axes, then draw the title slightly above
+ # that height and save the figure again
+ fig.savefig('%s.%s' %(fName, fType), bbox_inches = 'tight', dpi = fig.dpi)
+ ymax = 0
+ for ax in grid:
+ bbox = ax.get_position()
+ ymax = max(ymax, bbox.ymax)
+
+ # Add figure title and axes labels
+ fig.text(.51, .14, yLabel, va = 'center', ha = 'center', rotation = 'horizontal')
+ fig.text(.08, .53, xLabel, va = 'center', ha = 'center', rotation = 'vertical')
+ fig.suptitle(pTitle, y = ymax + .04, fontsize = 16)
+ fig.savefig('%s.%s' %(fName, fType), bbox_inches = 'tight', dpi = fig.dpi)
+ plt.show()
+ fig.clf()
+
+def taylorDiagram(pltDat, pltTit, pltFileName, refName, legendPos, radMax):
+ '''
+ Draw a Taylor diagram
+ '''
+ stdref = 1. # Standard reference value
+ rect = 111 # Subplot setting and location
+ markerSize = 6
+ fig = plt.figure()
+ fig.suptitle(pltTit) # PLot title
+ dia = TaylorDiagram (stdref, fig = fig, radMax = radMax, rect = rect, label = refName)
+ for i,(stddev,corrcoef,name) in enumerate(pltDat):
+ dia.add_sample (stddev, corrcoef, marker = '$%d$' % (i+1), ms = markerSize, label=name)
+ # Add ploylines to mark a range specified by input data - 2 be implemented
+ #circular_line = dia.add_stddev_contours(0.959, 1, 0.965)
+ #circular_line = dia.add_stddev_contours(1.1, 1, 0.973)
+ #straight_line = dia.add_contours(0.959, 1, 1.1, 1)
+ #straight_line = dia.add_contours(0.959, 0.965, 1.1, 0.973)
+ #l=fig.legend (dia.samplePoints, [p.get_label() for p in dia.samplePoints ], handlelength=0., prop={'size':10}, numpoints=1, loc=legendPos)
+ # loc: 1='upper right', 2='upper left' or specified in the calling program via "legendPos"
+ l = fig.legend (dia.samplePoints, [p.get_label() for p in dia.samplePoints ], handlelength=0., prop={'size':10}, numpoints=1, loc=legendPos)
+ l.draw_frame(False)
+ plt.savefig(pltFileName)
+ plt.show()
+ pltDat = 0.
+
+def drawTimeSeriesSingle(dataset, times, labels, fName, fType = 'png', xLabel = '', yLabel ='', pTitle ='',
+ legendPos = 'upper center', legendFrameOn = False, yearLabels = True, yscale = 'linear'):
+ '''
+ Purpose::
+ Function to draw a time series plot
+
+ Input::
+ dataset - a list of arrays for each dataset as a time series
+ times - a list of python datetime objects
+ labels - a list of strings with the names of each dataset
+ fName - a string specifying the filename of the plot
+ fType - an optional string specifying the output filetype
+ xLabel - a string specifying the x-axis title
+ yLabel - a string specifying the y-axis title
+ pTitle - a string specifying the plot title
+ legendPos - an optional string or tuple of float for determining
+ the position of the legend
+ legendFrameOn - optional bool to toggle drawing the frame around
+ the legend
+ yearLabels - optional bool to toggle drawing year labels on the x-xaxis
+ yscale - optional string for setting the y-axis scale, 'linear' for linear
+ and 'log' for log base 10.
+ '''
+ fig = plt.figure()
+ ax = fig.add_subplot(111)
+
+ if not yearLabels:
+ xfmt = mpl.dates.DateFormatter('%b')
+ ax.xaxis.set_major_formatter(xfmt)
+
+ ax.set_xlabel(xLabel)
+ ax.set_ylabel(yLabel)
+ ax.set_title(pTitle)
+
+ # Set up list of lines for legend
+ lines = []
+ ymin, ymax = 0, 0
+
+ # Plot each dataset
+ for data in dataset:
+ line = ax.plot_date(times, data, '', linewidth = 2)
+ lines.extend(line)
+ cmin, cmax = data.min(), data.max()
+ if ymin > cmin:
+ ymin = cmin
+ if ymax < cmax:
+ ymax = cmax
+
+ # Add a bit of padding so lines don't touch bottom and top of the plot
+ ymin = ymin - ((ymax - ymin) * 0.1)
+ ymax = ymax + ((ymax - ymin) * 0.1)
+ ax.set_ylim((ymin, ymax))
+
+ # Set the y-axis scale
+ ax.set_yscale(yscale)
+
+ # Create the legend
+ ax.legend((lines), labels, loc = legendPos, ncol = 10, fontsize='x-small',
+ frameon=legendFrameOn)
+ fig.savefig('%s.%s' %(fName, fType), bbox_inches = 'tight')
+ plt.show()
+ fig.clf()
+
+
+def pltXY(x, y, lineLabel, lineTyp, pltTit, xLabel, yLabel, pltName, xmin, xmax, deltaX, ymin, ymax, deltaY, legendPos, xScale, yScale):
+ """
+ The default drawing order for axes is patches, lines, text.This order is determined by the zorder attribute. The following defaults are set:
+ Artist Z-order
+ Patch / PatchCollection 1
+ Line2D / LineCollection 2
+ Text 3
+ You can change the order for individual artists by setting the zorder. Any individual plot() call can set a value
+ for the zorder of that particular item.
+ In the fist subplot below, the lines are drawn above the patch collection from the scatter, which is the default.
+ In the subplot below, the order is reversed.
+ The second figure shows how to control the zorder of individual lines.
+ Arguments
+ x (nX) : np array: the number of points in the x axis
+ y (nX,nY): np array: the number of y values to be plotted
+ lineLabel: list(nY): labels for individual y data
+ pltTit : Text : plot title
+ xLabel : Text : x-axis label
+ yLabel : Text : y-axis label
+ pltName : Text : name of the plot file
+ 3/28/2013 Jinwon Kim: Modification of a routine in the matplotlib gallery
+ """
+ lineColors = ['k', 'b', 'r', 'g', 'c', 'm', 'y', '0.6', '0.7', '0.8', '0.9', '1.0']
+ nX = x.size
+ nY = len(lineLabel)
+ lineColors[nY - 1] = 'b'
+ fig = plt.figure()
+ ax = fig.add_subplot(1,1,1)
+ for n in np.arange(nY):
+ plot(x,y[n, :], linewidth=1, color=lineColors[n], linestyle=lineTyp[n], label=lineLabel[n], zorder = 10)
+ xlabel(xLabel,fontsize=10); ylabel(yLabel,fontsize=10)
+ if xmax > xmin:
+ plt.xlim(xmin,xmax)
+ ticks = frange(xmin,xmax,npts=(xmax-xmin)/deltaX+1)
+ ax.xaxis.set_ticks(ticks,minor=False)
+ if ymax > ymin:
+ plt.ylim(ymin,ymax)
+ ticks = frange(ymin,ymax,npts=(ymax-ymin)/deltaY+1)
+ ax.yaxis.set_ticks(ticks,minor=False)
+ ax.set_title(pltTit)
+ ax.xaxis.tick_bottom(); ax.yaxis.tick_left() # put tick marks only on the left (for y) and bottom (for x) axis
+ if xScale == 'log':
+ ax.set_xscale('log')
+ else:
+ ax.set_xscale('linear')
+ if yScale == 'log':
+ ax.set_yscale('log')
+ else:
+ ax.set_yscale('linear')
+ if(nY>1):
+ l = legend(prop={'size':10},loc='best')
+ l.set_zorder(20) # put the legend on top
+ plt.savefig(pltName)
+ show()
+ # release work arrays
+ x=0.; y=0.
+
+
+def pltSca1F(x, y, pltName, xLabel, yLabel, pmin, pmax, delP, pTit, pFname, xScale, yScale):
+ #*************************************#
+ # Plot a single-frame scatter diagram #
+ #*************************************#
+ fig = plt.figure(); ax = fig.add_subplot(1,1,1)
+ lTyp='o'
+ plot(x,y,lTyp,c='r')
+ if pmax > pmin:
+ ticks = frange(pmin,pmax,npts=(pmax-pmin)/delP+1)
+ plt.xlim(pmin,pmax); ax.xaxis.set_ticks(ticks,minor=False)
+ plt.ylim(pmin,pmax); ax.yaxis.set_ticks(ticks,minor=False)
+ ax.set_title(pTit)
+ if xScale == 'log':
+ ax.set_xscale('log')
+ else:
+ ax.set_xscale('linear')
+ if yScale == 'log':
+ ax.set_yscale('log')
+ else:
+ ax.set_yscale('linear')
+ xlabel(xLabel,fontsize=10); ylabel(yLabel,fontsize=10)
+ l = legend(prop={'size':8},loc='best')
+ l.set_zorder(20)
+ plt.savefig(pFname)
+ show()
+ x=0.; y=0.
+
+
+def pltSca6F(x, dName, pmin, pmax, delP, pTitle, pFname, xScale, yScale):
+ #****************************************************#
+ # Plot up to 6 frames (6 rows X 2 columns) on a page #
+ #****************************************************#
+ nPlt = x.shape[0]
+ if pmax > pmin:
+ ticks = frange(pmin,pmax,npts=(pmax-pmin)/delP+1)
+ if nPlt > 6:
+ print 'frames exceed 12: return'
+ return
+ nrows = 3; ncols = 2; npmax = nrows*ncols; lTyp='o'; xlabcol='black'; ylabcol='green'
+ fig = plt.figure()
+ plt.subplots_adjust(hspace=0.3, wspace=0.2)
+ for n in range(1,nPlt):
+ pid = n % npmax
+ if pid == 0:
+ pid = npmax
+ ax = fig.add_subplot(nrows,ncols,pid)
+ #ax.set_title(pTitle[n-1],fontsize=6)
+ if xScale == 'log':
+ ax.set_xscale('log')
+ else:
+ ax.set_xscale('linear')
+ if yScale == 'log':
+ ax.set_yscale('log')
+ else:
+ ax.set_yscale('linear')
+ plot(x[0,:],x[n,:],lTyp,label=pTitle[n-1],c='r')
+ plot(x[0,:],x[n,:],lTyp,c='r')
+ plot(frange(pmin,pmax),c='b')
+ l = legend(prop={'size':8},loc='best')
+ l.set_zorder(20)
+ xlabel(dName[0],fontsize=10); ylabel(dName[n],fontsize=10)
+ if pmax > pmin:
+ plt.xlim(pmin,pmax); ax.xaxis.set_ticks(ticks,minor=False)
+ plt.ylim(pmin,pmax); ax.yaxis.set_ticks(ticks,minor=False)
+ for label in ax.xaxis.get_ticklabels():
+ label.set_color(xlabcol)
+ label.set_rotation(0)
+ label.set_fontsize(8)
+ for label in ax.yaxis.get_ticklabels():
+ label.set_color(ylabcol)
+ label.set_rotation(0)
+ label.set_fontsize(8)
+ plt.savefig(pFname)
+ show()
+ x=0.
+
+def drawContourMapSingle(data, lats, lons, cLevs, fName, fType = 'png',
+ cLabel = '', pTitle = '', cMap = None, nParallels = 5, nMeridians = 5):
+ '''
+ Purpose::
+ Plots a filled contour map.
+ Input::
+ data - 2d array of the field to be plotted with shape (nLon, nLat)
+ lats - array of latitudes
+ lons - array of longitudes
+ cLevs - A list of ints or floats specifying contour levels
+ fName - a string specifying the filename of the plot
+ fType - an optional string specifying the filetype, default is .png
+ cLabel - an optional string specifying the colorbar title
+ pTitle - an optional string specifying plot title
+ cMap - an optional matplotlib.LinearSegmentedColormap object denoting the colormap
+ nParallels - an optional int for the number of parallels to draw
+ nMeridians - an optional int for the number of meridians to draw
+ '''
+ # Set up the colormap if not specified
+ if cMap is None:
+ cMap = plt.cm.RdBu_r
+
+ # Set up the figure
+ fig = plt.figure()
+ ax = fig.add_subplot(111)
+
+ # Determine the map boundaries and construct a Basemap object
+ lonMin = lons.min()
+ lonMax = lons.max()
+ latMin = lats.min()
+ latMax = lats.max()
+ m = Basemap(projection = 'cyl', llcrnrlat = latMin, urcrnrlat = latMax,
+ llcrnrlon = lonMin, urcrnrlon = lonMax, resolution = 'l', ax = ax)
+
+ # Draw the borders for coastlines and countries
+ m.drawcoastlines(linewidth = 1)
+ m.drawcountries(linewidth = .75)
+
+ # Draw parallels / meridians.
+ m.drawmeridians(np.linspace(lonMin, lonMax, nMeridians), labels = [0, 0, 0, 1])
+ m.drawparallels(np.linspace(latMin, latMax, nMeridians), labels = [1, 0, 0, 1])
+
+ # Convert lats and lons to projection coordinates
+ if lats.ndim == 1 and lons.ndim == 1:
+ lons, lats = np.meshgrid(lons, lats)
+ x, y = m(lons, lats)
+
+ # Plot data with filled contours
+ cs = m.contourf(x, y, data, cmap = cMap, levels = cLevs)
+
+ # Add colorbar
+ cbar = m.colorbar(cs, drawedges = True, pad = '2%', size = '3%')
+ cbar.set_label(cLabel)
+ cbar.set_ticks(cLevs)
+ cbar.ax.xaxis.set_ticks_position("none")
+ cbar.ax.yaxis.set_ticks_position("none")
+
+ # Add title and save the figure
+ ax.set_title(pTitle)
+ fig.savefig('%s.%s' %(fName, fType))
+ show()
+
+def drawCntrMap(data,lon,lat,titles,ncols,pFname):
+ '''
+ This routine is based on PyNGL, i.e., NcarGraphics
+ data - a masked numpy array of data to plot (nT,nY,nX)
+ lon - longitude (nY,nX)
+ lat - latitude (nY,nX)
+ titles - array of titles (nT)
+ nrows - number of rows of the paneled plots
+ ncols - number of columns of the paneled plots
+ nT = nrows * ncols
+ pFname - name of output postscript file
+ '''
+ wks_type = 'ps'
+ if data.ndim == 2:
+ nT = 1; nrows = 1; ncols = 1
+ elif data.ndim == 3:
+ nT=data.shape[0]
+ if nT % ncols == 0:
+ nrows = nT/ncols
+ else:
+ nrows = nT/ncols + 1
+ # set workstation type (X11, ps, png)
+ res = Ngl.Resources()
+ wks = Ngl.open_wks(wks_type,pFname,res)
+ # set plot resource paramters
+ resources = Ngl.Resources()
+ resources.mpLimitMode = "LatLon" # Limit the map view.
+ resources.mpMinLonF = lon.min()
+ resources.mpMaxLonF = lon.max()
+ resources.mpMinLatF = lat.min()
+ resources.mpMaxLatF = lat.max()
+ resources.cnFillOn = True
+ resources.cnLineLabelsOn = False # Turn off line labels.
+ resources.cnInfoLabelOn = False # Turn off info label.
+ resources.cnLinesOn = False # Turn off countour line (only filled colors)
+ resources.sfXArray = lon[:,:]
+ resources.sfYArray = lat[:,:]
+ resources.pmTickMarkDisplayMode = "Never" # Turn off map tickmarks.
+ resources.mpOutlineBoundarySets = "GeophysicalAndUSStates"
+ resources.mpGeophysicalLineColor = "red"
+ resources.mpUSStateLineColor = "red"
+ resources.mpGeophysicalLineThicknessF = 0.75
+ resources.mpUSStateLineThicknessF = 0.75
+ resources.mpPerimOn = True # Turn on/off map perimeter
+ resources.mpGridAndLimbOn = True # Turn off map grid.
+ resources.nglFrame = False # Don't advance the frame
+ resources.nglDraw = False
+ plot=[]
+ for iT in np.arange(nT):
+ resources.tiMainString = titles[iT]
+ #resources.pmLabelBarDisplayMode = "Never" # Turn off individual label bars
+ resources.pmLabelBarDisplayMode = "Always" # Turn on individual label bars
+ if data.ndim == 3:
+ plot.append(Ngl.contour_map(wks,data[iT,:,:],resources))
+ elif data.ndim == 2:
+ plot.append(Ngl.contour_map(wks,data[:,:],resources))
+ panelres = Ngl.Resources()
+ panelres.nglPanelTop=0.95
+ panelres.nglPanelBottom=0.05
+ panelres.nglPanelLabelBar = False # Turn on panel labelbar
+ panelres.nglPanelLabelBarLabelFontHeightF = 0.012 # Labelbar font height
+ panelres.nglPanelLabelBarHeightF = 0.03 # Height of labelbar
+ panelres.nglPanelLabelBarWidthF = 0.8 # Width of labelbar
+ panelres.lbLabelFont = "helvetica-bold" # Labelbar font
+ Ngl.panel(wks,plot[0:nT],[nrows,ncols],panelres)
+ Ngl.destroy(wks)
+ del plot
+ del resources
+ del wks
+ return
+
+def drawContourMap(dataset, lats, lons, fName, fType = 'png', gridShape = (1, 1),
+ cLabel = '', pTitle = '', subTitles = None, cMap = None,
+ cLevs = None, nLevs = 10, parallels = None, meridians = None,
+ extend = 'neither'):
+ '''
+ Purpose::
+ Create a multiple panel contour map plot.
+ Input::
+ dataset - 3d array of the field to be plotted with shape (nT, nLon, nLat)
+ lats - array of latitudes
+ lons - array of longitudes
+ fName - a string specifying the filename of the plot
+ fType - an optional string specifying the filetype, default is .png
+ gridShape - optional tuple denoting the desired grid shape (nRows, nCols) for arranging
+ the subplots.
+ cLabel - an optional string specifying the colorbar title
+ pTitle - an optional string specifying plot title
+ subTitles - an optional list of strings specifying the title for each subplot
+ cMap - an optional matplotlib.LinearSegmentedColormap object denoting the colormap
+ cLevs - an optional list of ints or floats specifying contour levels
+ nLevs - an optional integer specifying the target number of contour levels if
+ cLevs is None
+ parallels - an optional list of ints or floats for the parallels to be drawn
+ meridians - an optional list of ints or floats for the meridians to be drawn
+ extend - an optional string to toggle whether to place arrows at the colorbar
+ boundaries. Default is 'neither', but can also be 'min', 'max', or
+ 'both'. Will be automatically set to 'both' if cLevs is None.
+ '''
+ # Handle the single plot case. Meridians and Parallels are not labeled for
+ # multiple plots to save space.
+ if dataset.ndim == 2 or (dataset.ndim == 3 and dataset.shape[0] == 1):
+ if dataset.ndim == 2:
+ dataset = dataset.reshape(1, *dataset.shape)
+ nPlots = 1
+ mLabels = [0, 0, 0, 1]
+ pLabels = [1, 0, 0, 1]
+ else:
+ nPlots = dataset.shape[0]
+ mLabels = [0, 0, 0, 0]
+ pLabels = [0, 0, 0, 0]
+
+ # Make sure gridShape is compatible with input data
+ gridShape = calcBestGridShape(nPlots, gridShape)
+
+ # Set up the colormap if not specified
+ if cMap is None:
+ cMap = plt.cm.coolwarm
+
+ # Set up the figure
+ fig = plt.figure()
+ #fig.set_size_inches((8.5, 11.))
+ #fig.dpi = 600
+
+ # Make the subplot grid
+ grid = ImageGrid(fig, 111,
+ nrows_ncols = gridShape,
+ axes_pad = 0.3,
+ share_all = True,
+ add_all = True,
+ ngrids = nPlots,
+ label_mode = "L",
+ cbar_mode = 'single',
+ cbar_location = 'bottom',
+ cbar_pad = '0%'
+ )
+
+ # Determine the map boundaries and construct a Basemap object
+ lonMin = lons.min()
+ lonMax = lons.max()
+ latMin = lats.min()
+ latMax = lats.max()
+ m = Basemap(projection = 'cyl', llcrnrlat = latMin, urcrnrlat = latMax,
+ llcrnrlon = lonMin, urcrnrlon = lonMax, resolution = 'l')
+
+ # Convert lats and lons to projection coordinates
+ if lats.ndim == 1 and lons.ndim == 1:
+ lons, lats = np.meshgrid(lons, lats)
+
+ # Calculate contour levels if not given
+ if cLevs is None:
+ # Cut off the tails of the distribution
+ # for more representative contour levels
+ cLevs = calcNiceIntervals(dataset, nLevs)
+ extend = 'both'
+
+ # Create default meridians and parallels
+ if meridians is None:
+ meridians = np.arange(-180, 181, 15)
+ if parallels is None:
+ parallels = np.arange(-90, 91, 15)
+
+ x, y = m(lons, lats)
+ for i, ax in enumerate(grid):
+ # Load the data to be plotted
+ data = dataset[i]
+ m.ax = ax
+
+ # Draw the borders for coastlines and countries
+ m.drawcoastlines(linewidth = 1)
+ m.drawcountries(linewidth = .75)
+
+ # Draw parallels / meridians
+ m.drawmeridians(meridians, labels = mLabels, linewidth = .75)
+ m.drawparallels(parallels, labels = pLabels, linewidth = .75)
+
+ # Draw filled contours
+ cs = m.contourf(x, y, data, cmap = cMap, levels = cLevs, extend = extend)
+
+ # Add title
+ if subTitles is not None:
+ ax.set_title(subTitles[i], fontsize = 'small')
+
+
+ # Add colorbar
+ cbar = fig.colorbar(cs, cax = ax.cax, drawedges = True, orientation = 'horizontal',
+ extendfrac = 'auto')
+ cbar.set_label(cLabel)
+ cbar.set_ticks(cLevs)
+ cbar.ax.xaxis.set_ticks_position("none")
+ cbar.ax.yaxis.set_ticks_position("none")
+
+ # This is an ugly hack to make the title show up at the correct height.
+ # Basically save the figure once to achieve tight layout and calculate
+ # the adjusted heights of the axes, then draw the title slightly above
+ # that height and save the figure again
+ fig.savefig('%s.%s' %(fName, fType), bbox_inches = 'tight', dpi = fig.dpi)
+ ymax = 0
+ for ax in grid:
+ bbox = ax.get_position()
+ ymax = max(ymax, bbox.ymax)
+
+ # Add figure title
+ fig.suptitle(pTitle, y = ymax + .04, fontsize = 16)
+ fig.savefig('%s.%s' %(fName, fType), bbox_inches = 'tight', dpi = fig.dpi)
+ plt.show()
+ fig.clf()
+
+def drawSubRegions(subRegions, lats, lons, fName, fType = 'png', pTitle = '',
+ parallels = None, meridians = None, subRegionMasks = None):
+ '''
+ Purpose::
+ Function to draw subregion domain(s) on a map
+
+ Input::
+ subRegions - a list of subRegion objects
+ lats - array of latitudes
+ lons - array of longitudes
+ fName - a string specifying the filename of the plot
+ fType - an optional string specifying the filetype, default is .png
+ pTitle - an optional string specifying plot title
+ parallels - an optional list of ints or floats for the parallels to be drawn
+ meridians - an optional list of ints or floats for the meridians to be drawn
+ subRegionMasks - optional dictionary of boolean arrays for each subRegion
+ for giving finer control of the domain to be drawn, by default
+ the entire domain is drawn.
+ '''
+ # Set up the figure
+ fig = plt.figure()
+ fig.set_size_inches((8.5, 11.))
+ fig.dpi = 300
+ ax = fig.add_subplot(111)
+
+ # Determine the map boundaries and construct a Basemap object
+ lonMin = lons.min()
+ lonMax = lons.max()
+ latMin = lats.min()
+ latMax = lats.max()
+ m = Basemap(projection = 'cyl', llcrnrlat = latMin, urcrnrlat = latMax,
+ llcrnrlon = lonMin, urcrnrlon = lonMax, resolution = 'l', ax = ax)
+
+ # Draw the borders for coastlines and countries
+ m.drawcoastlines(linewidth = 1)
+ m.drawcountries(linewidth = .75)
+ m.drawstates()
+
+ # Create default meridians and parallels
+ if meridians is None:
+ meridians = np.arange(-180, 181, 15)
+ if parallels is None:
+ parallels = np.arange(-90, 91, 15)
+
+ # Draw parallels / meridians
+ m.drawmeridians(meridians, labels = [0, 0, 0, 1], linewidth = .75)
+ m.drawparallels(parallels, labels = [1, 0, 0, 1], linewidth = .75)
+
+ # Set up the color scaling
+ cMap = plt.cm.jet
+ norm = mpl.colors.BoundaryNorm(np.arange(1, len(subRegions) + 3), cMap.N)
+
+ # Process the subregions
+ for i, reg in enumerate(subRegions):
+ if subRegionMasks is not None and reg.name in subRegionMasks.keys():
+ domain = (i + 1) * subRegionMasks[reg.name]
+ else:
+ domain = (i + 1) * np.ones((2, 2))
+
+ nLats, nLons = domain.shape
+ domain = ma.masked_equal(domain, 0)
+ regLats = np.linspace(reg.latMin, reg.latMax, nLats)
+ regLons = np.linspace(reg.lonMin, reg.lonMax, nLons)
+ regLons, regLats = np.meshgrid(regLons, regLats)
+
+ # Convert to to projection coordinates. Not really necessary
+ # for cylindrical projections but keeping it here in case we need
+ # support for other projections.
+ x, y = m(regLons, regLats)
+
+ # Draw the subregion domain
+ m.pcolormesh(x, y, domain, cmap = cMap, norm = norm, alpha = .5)
+
+ # Label the subregion
+ xm, ym = x.mean(), y.mean()
+ m.plot(xm, ym, marker = '$%s$' %(reg.name), markersize = 12, color = 'k')
+
+ # Add the the title
+ ax.set_title(pTitle)
+
+ # Save the figure
+ fig.savefig('%s.%s' %(fName, fType), bbox_inches = 'tight', dpi = fig.dpi)
+ show()
+ fig.clf()
+
+def metrics_plots(varName, numOBS, numMDL, nT, ngrdY, ngrdX, Times, lons, lats, allData, dataList, workdir, subRegions, timeStep, fileOutputOption):
+ '''
+ Calculate evaluation metrics and generate plots.
+ '''
+ ##################################################################################################################
+ # Routine to compute evaluation metrics and generate plots
+ # (1) metric calculation
+ # (2) plot production
+ # Input:
+ # numOBS - the number of obs data. either 1 or >2 as obs ensemble is added for multi-obs cases
+ # numMDL - the number of mdl data. either 1 or >2 as obs ensemble is added for multi-mdl cases
+ # nT - the length of the data in the time dimension
+ # ngrdY - the length of the data in Y dimension
+ # ngrdX - the length of the data in the X dimension
+ # Times - time stamps
+ # lons,lats - longitude & latitude values of the data domain (same for model & obs)
+ # allData - the sum of the observed and model data (combines obsData & mdlData in the old code)
+ # dataList - the list of data names (combines obsList and mdlList in the old code)
+ # workdir - string describing the directory path for storing results and plots
+ # subRegions - list of SubRegion Objects or False
+ # fileOutputOption - option to write regridded data in a netCDF file or not
+ # Output: image files of plots + possibly data
+ #******************************************************
+ # JK2.0: Only the data interpolated temporally and spatially onto the analysis grid
+ # are transferred into this routine. The rest of processing (e.g., area-averaging, etc.)
+ # are to be performed in this routine. Do not overwrite obsData[numOBs,nt,ngrdY,ngrdX] &
+ # mdlData[numMDL,nt,ngrdY,ngrdX]. These are the raw, re-gridded data to be used repeatedly
+ # for multiple evaluation steps as desired by an evaluator
+ # JK2.1: The observed and model data are unified in a single variable for ease of processing
+ ##################################################################################################################
+
+ print ''
+ print 'Start metrics.py'
+ print ''
+ # JK2.1: define the variable to represent the total number of combined (obs + model) datasets
+ numDatasets = numOBS + numMDL
+
+ #####################################################################################################
+ # JK2.0: Compute evaluation metrics and plots to visualize the results
+ #####################################################################################################
+ # (mp.001) Sub-regions for local timeseries analysis
+ #--------------------------------
+ # Enter the location of the subrgns via screen input of data;
+ # modify this to add an option to read-in from data file(s)
+ #----------------------------------------------------------------------------------------------------
+ if subRegions:
+ numSubRgn = len(subRegions)
+ subRgnName = [ x.name for x in subRegions ]
+ subRgnLon0 = [ x.lonMin for x in subRegions ]
+ subRgnLon1 = [ x.lonMax for x in subRegions ]
+ subRgnLat0 = [ x.latMin for x in subRegions ]
+ subRgnLat1 = [ x.latMax for x in subRegions ]
+ else:
+ print ''
+ ans = raw_input('Calculate area-mean timeseries for subregions? y/n: [n] \n')
+ print ''
+ if ans == 'y':
+ ans = raw_input('Input subregion info interactively? y/n: \n> ')
+ if ans == 'y':
+ numSubRgn, subRgnName, subRgnLon0, subRgnLon1, subRgnLat0, subRgnLat1 = misc.assign_subRgns_interactively()
+ else:
+ print 'Read subregion info from a pre-fabricated text file'
+ ans = raw_input('Read from a defaule file (workdir + "/sub_regions.txt")? y/n: \n> ')
+ if ans == 'y':
+ subRgnFileName = workdir + "/sub_regions.txt"
+ else:
+ subRgnFileName = raw_input('Enter the subRgnFileName to read from \n')
+ print 'subRgnFileName ', subRgnFileName
+ numSubRgn, subRgnName, subRgnLon0, subRgnLon1, subRgnLat0, subRgnLat1 = misc.assign_subRgns_from_a_text_file(subRgnFileName)
+ print subRgnName, subRgnLon0, subRgnLon1, subRgnLat0, subRgnLat1
+ else:
+ numSubRgn = 0
+ # compute the area-mean timeseries for all subregions if subregion(s) are defined.
+ # the number of subregions is usually small and memory usage is usually not a concern
+ dataRgn = np.zeros((numDatasets, numSubRgn, nT))
+
+ if subRegions:
+ print 'Enter area-averaging: allData.shape ', allData.shape
+ print 'Using Latitude/Longitude Mask for Area Averaging'
+ for n in np.arange(numSubRgn):
+ # Define mask using regular lat/lon box specified by users ('mask=True' defines the area to be excluded)
+ maskLonMin = subRegions[n].lonMin
+ if maskLonMin > 180.:
+ maskLonMin = maskLonMin - 360.
+ maskLonMax = subRegions[n].lonMax
+ if maskLonMax > 180.:
+ maskLonMax = maskLonMax - 360.
+ maskLatMin = subRegions[n].latMin
+ maskLatMax = subRegions[n].latMax
+ mask = np.logical_or(np.logical_or(lats <= maskLatMin, lats >= maskLatMax),
+ np.logical_or(lons <= maskLonMin, lons >= maskLonMax))
+
+ # Calculate area-weighted averages within this region and store in a new list
+ for k in np.arange(numDatasets): #JK2.1: area-average all data
+ Store = []
+ for t in np.arange(nT):
+ Store.append(process.calc_area_mean(allData[k, t, :, :], lats, lons, mymask = mask))
+ dataRgn[k, n, :] = ma.array(Store[:])
+ Store = [] # release the memory allocated by temporary vars
+
+ #-------------------------------------------------------------------------
+ # (mp.002) fileOutputOption: The option to create a binary or netCDF file of processed
+ # (re-gridded and regionally-averaged) data for user-specific processing.
+ # This option is useful for advanced users who need more than
+ # the metrics and vidualization provided in the basic package.
+ #----------------------------------------------------------------------------------------------------
+ print ''
+ if not fileOutputOption:
+ while fileOutputOption not in ['no', 'nc']:
+ fileOutputOption = raw_input('Option for output files of obs/model data: Enter no/nc \
+ for no, netCDF file \n> ').lower()
+ print ''
+
+ # write a netCDF file for post-processing if desired. JK21: binary output option has been completely eliminated
+ if fileOutputOption == 'nc':
+ fileName = '%s/%s_Tseries.nc' % (workdir, varName)
+ tempName = fileName
+ if(os.path.exists(tempName) == True):
+ print "removing %s from the local filesystem, so it can be replaced..." % (tempName,)
+ cmnd = 'rm -f ' + tempName
+ subprocess.call(cmnd, shell=True)
+ files.writeNCfile1(fileName, numSubRgn, lons, lats, allData, dataRgn, dataList, subRegions)
+ print 'The regridded obs and model data are written in the netCDF file ', fileName
+
+ #####################################################################################################
+ ###################### Metrics calculation and plotting cycle starts from here ######################
+ #####################################################################################################
+ print ''
+ print 'OBS and MDL data have been prepared for the evaluation step'
+ print ''
+ doMetricsOption = raw_input('Want to calculate metrics and plot them? [y/n]\n> ').lower()
+ if doMetricsOption == 'y':
+ # Assign the variable name and unit to be used in plots
+ print 'The variable to be processed is ',timeStep,' ',varName
+ pltVarName = raw_input('Enter the variable name to appear in the plot\n> ')
+ pltVarUnit = raw_input('Enter the variable unit to appear in the plot\n> ')
+ print ''
+
+ ####################################################
+ # Terminate job if no metrics are to be calculated #
+ ####################################################
+
+ neval = 0
+
+ while doMetricsOption == 'y':
+ neval += 1
+ print ' '
+ print 'neval= ', neval
+ print ' '
+ #--------------------------------
+ # (mp.003) Preparation
+ #----------------------------------------------------------------------------------------------------
+ # Determine info on years (the years in the record [YR] and its number[numYR])
+ yy = ma.zeros(nT, 'i')
+ for n in np.arange(nT):
+ yy[n] = Times[n].strftime("%Y")
+ YR = np.unique(yy)
+ yy = 0
+ nYR = len(YR)
+ print 'nYR, YR = ', nYR, YR
+
+ # Select the eval domain: over the entire domain (spatial distrib) or regional time series
+ anlDomain = 'n'
+ anlRgn = 'n'
+ print ' '
+ analSelect = int(raw_input('Eval over domain (Enter 0) or time series of selected Sub Regions (Enter 1) \n> '))
+ print ' '
+ if analSelect == 0:
+ anlDomain = 'y'
+ elif analSelect == 1:
+ anlRgn = 'y'
+ else:
+ print 'analSelect= ', analSelect, ' is Not a valid option: CRASH'
+
+ #--------------------------------------------------------------------------------------------------------------------
+ # (mp.004) Select the model and data to be used in the evaluation step
+ # 6/7/2013: JK4 - unified handling of the ref & mdl datasets allows several diff types of evaluation (RCMES v2.1)
+ # such as ref vs. one model or ref; ref vs. all models; ref vs. all model + non-ref refs
+ # refID: the ID of the reference data against which all data are to be evaluated
+ # mdlID: the list of data ID's to be evaluated. if mdlSelect == -99, the list also includes non-ref obs data
+ #--------------------------------------------------------------------------------------------------------------------
+ refID = int(misc.select_data_combined(numDatasets, Times, dataList, 'ref'))
+ mdlSelect = int(misc.select_data_combined(numDatasets, Times, dataList, 'mdl'))
+ mdlID=[]
+ # Assign the data id to be evaluated. Note that a non-reference obs dataset is treated like a model dataset (mdlSelect == -99)
+ if mdlSelect >= 0:
+ mdlID.append(mdlSelect)
+ elif mdlSelect == -1:
+ for n in np.arange(numMDL):
+ mdlID.append(n+numOBS)
+ elif mdlSelect == -2:
+ for n in np.arange(refID):
+ mdlID.append(n)
+ for n in range(refID + 1, numDatasets):
+ mdlID.append(n)
+ elif mdlSelect == -3:
+ if numOBS == 1:
+ print 'There exist only one reference data: EXIT'
+ sys.exit()
+ for n in np.arange(refID):
+ mdlID.append(n)
+ for n in range(refID + 1, numOBS):
+ mdlID.append(n)
+ elif mdlSelect == -4:
+ id4eval = 0 # any number != -9
+ print 'Enter the data id to be evaluated: -9 to stop entering'
+ while id4eval != -9:
+ id4eval = int(raw_input('Enter the data id for evaluation. -9 to stop entry\n> '))
+ if id4eval != -9:
+ mdlID.append(id4eval)
+ refName = dataList[refID]
+ mdlName = []
+ numMdl = len(mdlID)
+ for n in np.arange(numMdl):
+ tname = dataList[mdlID[n]]
+ m = min(len(tname), 8)
+ for k in np.arange(m):
+ if tname[k] == ' ':
+ break
+ elif k == m-1:
+ k = m
+ mdlName.append(tname[0:k])
+ print 'selected reference and model data for evaluation= ', refName, mdlName
+
+ #--------------------------------
+ # (mp.005) Spatial distribution analysis/Evaluation (anlDomain='y')
+ # Obs/mdl climatology variables are 2-d/3d arrays (e.g., oClim = ma.zeros((ngrdY,ngrdX), mClim = ma.zeros((numMdl,ngrdY,ngrdX))
+ #----------------------------------------------------------------------------------------------------
+ if anlDomain == 'y':
+ # first determine the temporal properties to be evaluated
+ print ''
+ timeOption = misc.select_timOpt()
+ print ''
+ if timeOption == 1:
+ timeScale = 'annual'
+ # compute the annual-mean time series and climatology.
+ oTser, oClim = calc_clim_year(nYR, nT, ngrdY, ngrdX, allData[refID, :, :, :], Times)
+ mTser = ma.zeros((numMdl, nYR, ngrdY, ngrdX))
+ mClim = ma.zeros((numMdl, ngrdY, ngrdX))
+ for n in np.arange(numMdl):
+ id = mdlID[n]
+ mTser[n, :, :, :], mClim[n, :, :] = calc_clim_year(nYR, nT, ngrdY, ngrdX, allData[id, :, :, :], Times)
+ elif timeOption == 2:
+ timeScale = 'seasonal'
+ # select the timeseries and climatology for a season specifiec by a user
+ mTser = ma.zeros((numMdl, nYR, ngrdY, ngrdX))
+ mClim = ma.zeros((numMdl, ngrdY, ngrdX))
+ print ' '
+ moBgn = int(raw_input('Enter the beginning month for your season. 1-12: \n> '))
+ moEnd = int(raw_input('Enter the ending month for your season. 1-12: \n> '))
+ print ' '
+ if moEnd >= moBgn:
+ nMoPerSeason = moEnd - moBgn + 1
+ oTser, oClim = calc_clim_season(nYR, nT, moBgn, moEnd, ngrdY, ngrdX, allData[refID, :, :, :], Times)
+ for n in np.arange(numMdl):
+ id = mdlID[n]
+ mTser[n, :, :, :], mClim[n, :, :] = calc_clim_season(nYR, nT, moBgn, moEnd, ngrdY, ngrdX, allData[id, :, :, :], Times)
+ elif moEnd == moBgn:
+ # Eval for a single month. mTser, oTser are the annual time series
+ # for the specified month (moEnd), and mClim, oClim are the corresponding climatology
+ oTser, oClim = calc_clim_One_month(moEnd, nYR, nT, allData[refID, :, :, :], Times)
+ for n in np.arange(numMdl):
+ id = mdlID[n]
+ mTser[n, :, :, :], mClim[n, :, :] = calc_clim_One_month(moEnd, nYR, nT, allData[id, :, :, :], Times)
+ elif moEnd < moBgn: # have to lose the ending year. redefine nYR=nYR-1, and drop the YR[nYR]
+ nMoS1 = 12 - moBgn + 1
+ nMoS2 = moEnd
+ nMoPerSeason = nMoS1 + nMoS2
+ mTser = ma.zeros((numMdl, nYR - 1, ngrdY, ngrdX))
+ # calculate the seasonal timeseries and climatology for the model data
+ for n in np.arange(numMdl):
+ id = mdlID[n]
+ mTser1, mClim1 = calc_clim_season(nYR, nT, moBgn, 12, ngrdY, ngrdX, allData[id, :, :, :], Times)
+ mTser2, mClim2 = calc_clim_season(nYR, nT, 1, moEnd, ngrdY, ngrdX, allData[id, :, :, :], Times)
+ for i in np.arange(nYR - 1):
+ mTser[n, i, :, :] = (real(nMoS1) * mTser1[i, :, :] + real(nMoS2) * mTser2[i + 1, :, :]) / nMoPerSeason
+ mClim = ma.average(mTser, axis=1)
+ # repeat for the obs data
+ mTser1, mClim1 = calc_clim_season(nYR, nT, moBgn, 12, ngrdY, ngrdX, allData[refID, :, :, :], Times)
+ mTser2, mClim2 = calc_clim_season(nYR, nT, 1, moEnd, ngrdY, ngrdX, allData[refID, :, :, :], Times)
+ oTser = ma.zeros((nYR - 1, ngrdY, ngrdX))
+ for i in np.arange(nYR - 1):
+ oTser[i, :, :] = (real(nMoS1) * mTser1[i, :, :] + real(nMoS2) * mTser2[i + 1, :, :]) / nMoPerSeason
+ oClim = ma.zeros((ngrdY, ngrdX))
+ oClim = ma.average(oTser, axis=0)
+ nYR = nYR - 1
+ yy = ma.empty(nYR)
+ for i in np.arange(nYR):
+ yy[i] = YR[i]
+ mTser1 = 0.
+ mTser2 = 0.
+ mClim1 = 0.
+ mClim2 = 0.
+ elif timeOption == 3:
+ timeScale = 'monthly'
+ # compute the monthly-mean time series and climatology
+ # Note that the shapes of the output vars are:
+ # mTser = ma.zeros((nYR,12,ngrdY,ngrdX)) & mClim = ma.zeros((12,ngrdY,ngrdX))
+ # Also same for oTser = ma.zeros((nYR,12,ngrdY,ngrdX)) &,oClim = ma.zeros((12,ngrdY,ngrdX))
+ oTser, oClim = calc_clim_mo(nYR, nT, ngrdY, ngrdX, allData[refID, :, :, :], Times)
+ mTser = ma.zeros((numMdl, nYR, 12, ngrdY, ngrdX))
+ mClim = ma.zeros((numMdl, 12, ngrdY, ngrdX))
+ for n in np.arange(numMdl):
+ id = mdlID[n]
+ mTser[n, :, :, :, :], mClim[n, :, :, :] = calc_clim_mo(nYR, nT, ngrdY, ngrdX, allData[id, :, :, :], Times)
+ else:
+ # undefined process options. exit
+ print 'The desired temporal scale is not available this time. END the job'
+ sys.exit()
+
+ #--------------------------------
+ # (mp.006) Select metric to be calculated
+ # bias, mae, acct, accs, pcct, pccs, rmst, rmss, pdfSkillScore, taylor diagram
+ #----------------------------------------------------------------------------------------------------
+ print ' '
+ metricOption = misc.select_metrics()
+ print ' '
+
+ # metrics calculation: the shape of metricDat varies according to the metric type & timescale opetions
+
+ # metrics below yields a 2-d (annual or seasonal) or 3-d (monthly) array for each model
+ if metricOption == 'BIAS':
+ if timeScale == 'monthly':
+ oStdv = np.zeros(12)
+ metricDat = ma.zeros((numMdl, 12, ngrdY, ngrdX))
+ for n in np.arange(numMdl):
+ for m in np.arange(12):
+ metricDat[n, m, :, :] = calc_bias(mTser[n, :, m, :, :], oTser[m, :, :])
+ if n == 0:
+ oStdv[m] = calc_temporal_stdv(oTser[m, :, :])
+ else:
+ oStdv = calc_temporal_stdv(oTser)
+ metricDat = ma.zeros((numMdl, ngrdY, ngrdX))
+ for n in np.arange(numMdl):
+ metricDat[n, :, :] = calc_bias(mTser[n, :, :, :], oTser)
+
+ elif metricOption == 'MAE':
+ if timeScale == 'monthly':
+ metricDat = ma.zeros((numMdl, 12, ngrdY, ngrdX))
+ for n in np.arange(numMdl):
+ for m in np.arange(12):
+ metricDat[n, m, :, :] = calc_mae(mTser[n, :, m, :, :], oTser[m, :, :])
+ else:
+ metricDat = ma.zeros((numMdl, ngrdY, ngrdX))
+ for n in np.arange(numMdl):
+ metricDat[n, :, :] = calc_mae(mTser[n, :, :, :], oTser)
+
+ elif metricOption == 'ACCt':
+ if timeScale == 'monthly':
+ metricDat = ma.zeros((numMdl, 12, ngrdY, ngrdX))
+ for n in np.arange(numMdl):
+ for m in np.arange(12):
+ metricDat[n, m, :, :] = calc_temporal_anom_cor(mTser[n, :, m, :, :], oTser[m, :, :])
+ else:
+ metricDat = ma.zeros((numMdl, ngrdY, ngrdX))
+ for n in np.arange(numMdl):
+ metricDat[n, :, :] = calc_temporal_anom_cor(mTser[n, :, :, :], oTser)
+
+ elif metricOption == 'PCCt':
+ if timeScale == 'monthly':
+ metricDat = ma.zeros((numMdl, 12, ngrdY, ngrdX))
+ for n in np.arange(numMdl):
+ for m in np.arange(12):
+ metricDat[n, m, :, :] = calc_temporal_pat_cor(mTser[n, :, m, :, :], oTser[m, :, :])
+ else:
+ metricDat = ma.zeros((numMdl, ngrdY, ngrdX))
+ for n in np.arange(numMdl):
+ metricDat[n, :, :] = calc_temporal_pat_cor(mTser[n, :, :, :], oTser)
+
+ elif metricOption == 'RMSt':
+ if timeScale == 'monthly':
+ metricDat = ma.zeros((numMdl, 12, ngrdY, ngrdX))
+ for n in np.arange(numMdl):
+ for m in np.arange(12):
+ metricDat[n, m, :, :] = calc_rms(mTser[n, :, m, :, :], oTser[m, :, :])
+ else:
+ metricDat = ma.zeros((numMdl, ngrdY, ngrdX))
+ for n in np.arange(numMdl):
+ metricDat[n, :, :] = calc_rms(mTser[n, :, :, :], oTser)
+
+ # metrics below yields a scalar value for each model
+ elif metricOption == 'ACCs':
+ if timeScale == 'monthly':
+ metricDat = ma.zeros((numMdl, 12))
+ for n in np.arange(numMdl):
+ for m in np.arange(12):
+ metricDat[n, m] = calc_spatial_anom_cor(mClim[n, m, :, :], oClim[m, :, :])
+ else:
+ metricDat = ma.zeros(numMdl)
+ for n in np.arange(numMdl):
+ metricDat[n] = calc_spatial_anom_cor(mClim[n, :, :], oClim)
+
+ elif metricOption == 'PCCs':
+ if timeScale == 'monthly':
+ metricDat = ma.zeros((numMdl, 12))
+ for n in np.arange(numMdl):
+ for m in np.arange(12):
+ metricDat[n, m] = calc_spatial_pat_cor(mClim[n, m, :, :], oClim[m, :, :])
+ else:
+ metricDat = ma.zeros(numMdl)
+ for n in np.arange(numMdl):
+ metricDat[n] = calc_spatial_pat_cor(mTmp[n, :, :], oTmp)
+
+ elif metricOption == 'RMSs':
+ if timeScale == 'monthly':
+ metricDat = ma.zeros((numMdl, 12))
+ for n in np.arange(numMdl):
+ for m in np.arange(12):
+ metricDat[n, m] = rms_dom(mClim[n, m, :, :], oClim[m, :, :])
+ else:
+ metricDat = ma.zeros(numMdl)
+ for n in np.arange(numMdl):
+ metricDat[n] = rms_dom(mClim[n, :, :], oClim)
+
+ # metrics to plot taylor diagram
+ elif metricOption == 'Taylor_space':
+ if timeScale == 'monthly':
+ oStdv = ma.zeros(12)
+ mStdv = ma.zeros((numMdl, 12))
+ mCorr = ma.zeros((numMdl, 12))
+ mTemp = ma.zeros((ngrdY, ngrdX))
+ for m in np.arange(12):
+ oStdv[m] = oClim[m, :, :].std()
+ for n in np.arange(numMdl):
+ for m in np.arange(12):
+ mTemp = mClim[n, m, :, :]
+ mStdv[n, m] = mTemp.std()
+ mCorr[n, m] = calc_spatial_pat_cor(mTemp, oClim)
+ else:
+ oStdv = oClim.std()
+ mStdv = ma.zeros(numMdl)
+ mCorr = ma.zeros(numMdl)
+ for n in np.arange(numMdl):
+ mStdv[n] = mClim[n, :, :].std()
+ mCorr[n] = calc_spatial_pat_cor(mClim[n, :, :], oClim)
+ mStdv = mStdv / oStdv # standardized deviation
+
+ #--------------------------------
+ # (mp.007) Plot the metrics. First, enter plot info
+ #----------------------------------------------------------------------------------------------------
+
+ # Taylor diagram
[... 312 lines stripped ...]