You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by pr...@apache.org on 2013/08/27 07:35:49 UTC
svn commit: r1517753 [10/33] - in /incubator/climate/branches/rcmet-2.1.1:
./ src/ src/main/ src/main/python/ src/main/python/bin/
src/main/python/docs/ src/main/python/docs/_static/
src/main/python/docs/_templates/ src/main/python/rcmes/ src/main/pyth...
Propchange: incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/toolkit/metrics.py
------------------------------------------------------------------------------
svn:executable = *
Added: incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/toolkit/process.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/toolkit/process.py?rev=1517753&view=auto
==============================================================================
--- incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/toolkit/process.py (added)
+++ incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/toolkit/process.py Tue Aug 27 05:35:42 2013
@@ -0,0 +1,1270 @@
+"""Collection of functions that process numpy arrays of varying shapes.
+Functions can range from subsetting to regridding"""
+
+# Python Standard Libary Imports
+import os, subprocess
+import math
+import datetime
+import re
+import string
+import sys
+import time
+
+# 3rd Party Imports
+import netCDF4
+import numpy as np
+import numpy.ma as ma
+from scipy.ndimage import map_coordinates
+
+
+def extract_subregion_from_data_array(data, lats, lons, latmin, latmax, lonmin, lonmax):
+ '''
+ Extract a sub-region from a data array.
+
+ Example::
+ **e.g.** the user may load a global model file, but only want to examine data over North America
+ This function extracts a sub-domain from the original data.
+ The defined sub-region must be a regular lat/lon bounding box,
+ but the model data may be on a non-regular grid (e.g. rotated, or Guassian grid layout).
+ Data are kept on the original model grid and a rectangular (in model-space) region
+ is extracted which contains the rectangular (in lat/lon space) user supplied region.
+
+ INPUT::
+ data - 3d masked data array
+ lats - 2d array of latitudes corresponding to data array
+ lons - 2d array of longitudes corresponding to data array
+ latmin, latmax, lonmin, lonmax - bounding box of required region to extract
+
+ OUTPUT::
+ data2 - subset of original data array containing only data from required subregion
+ lats2 - subset of original latitude data array
+ lons2 - subset of original longitude data array
+
+ '''
+
+
+
+ # Mask model data array to find grid points inside the supplied bounding box
+ whlat = (lats > latmin) & (lats < latmax)
+ whlon = (lons > lonmin) & (lons < lonmax)
+ wh = whlat & whlon
+
+ # Find required matrix indices describing the limits of the regular lat/lon bounding box
+ jmax = np.where(wh == True)[0].max()
+ jmin = np.where(wh == True)[0].min()
+ imax = np.where(wh == True)[1].max()
+ imin = np.where(wh == True)[1].min()
+
+ # Cut out the sub-region from the data array
+ data2 = ma.zeros((data.shape[0], jmax - jmin, imax - imin))
+ data2 = data[:, jmin:jmax, imin:imax]
+
+ # Cut out sub-region from lats,lons arrays
+ lats2 = lats[jmin:jmax, imin:imax]
+ lons2 = lons[jmin:jmax, imin:imax]
+
+ return data2, lats2, lons2
+
+def calc_area_mean(data, lats, lons, mymask='not set'):
+ '''
+ Calculate Area Average of data in a masked array
+
+ INPUT::
+ data: a masked array of data (NB. only data from one time expected to be passed at once)
+ lats: 2d array of regularly gridded latitudes
+ lons: 2d array of regularly gridded longitudes
+ mymask: (optional) defines spatial region to do averaging over
+
+ OUTPUT::
+ area_mean: a value for the mean inside the area
+
+ '''
+
+
+
+ # If mask not passed in, then set maks to cover whole data domain
+ if(mymask == 'not set'):
+ mymask = np.empty(data.shape)
+ mymask[:] = False # NB. mask means (don't show), so False everywhere means use everything.
+
+ # Dimension check on lats, lons
+ # Sometimes these arrays are 3d, sometimes 2d, sometimes 1d
+ # This bit of code just converts to the required 2d array shape
+ if(len(lats.shape) == 3):
+ lats = lats[0, :, :]
+
+ if(len(lons.shape) == 3):
+ lons = lons[0, :, :]
+
+ if(np.logical_and(len(lats.shape) == 1, len(lons.shape) == 1)):
+ lons, lats = np.meshgrid(lons, lats)
+
+ # Calculate grid length (assuming regular lat/lon grid)
+ dlat = lats[1, 0] - lats[0, 0]
+ dlon = lons[0, 1] - lons[0, 0]
+
+ # Calculates weights for each grid box
+ myweights = calc_area_in_grid_box(lats, dlon, dlat)
+
+ # Create a new masked array covering just user selected area (defined in mymask)
+ # NB. this preserves missing data points in the observations data
+ subdata = ma.masked_array(data, mask=mymask)
+
+ if(myweights.shape != subdata.shape):
+ myweights.resize(subdata.shape)
+ myweights[1:, :] = myweights[0, :]
+
+ # Calculate weighted mean using ma.average (which takes weights)
+ area_mean = ma.average(subdata, weights=myweights)
+
+ return area_mean
+
+def calc_area_in_grid_box(latitude, dlat, dlon):
+ '''
+ Calculate area of regular lat-lon grid box
+
+ INPUT::
+ latitude: latitude of grid box (degrees)
+ dlat: grid length in latitude direction (degrees)
+ dlon: grid length in longitude direction (degrees)
+
+ OUTPUT::
+ A: area of the grid box
+
+ '''
+
+ R = 6371000 # radius of Earth in metres
+ C = 2 * math.pi * R
+
+ latitude = np.radians(latitude)
+
+ A = (dlon * (C / 360.)) * (dlat * (C / 360.) * np.cos(latitude))
+
+ return A
+
+def do_regrid(q, lat, lon, lat2, lon2, order=1, mdi= -999999999):
+ '''
+ Perform regridding from one set of lat,lon values onto a new set (lat2,lon2)
+
+ Input::
+ q - the variable to be regridded
+ lat,lon - original co-ordinates corresponding to q values
+ lat2,lon2 - new set of latitudes and longitudes that you want to regrid q onto
+ order - (optional) interpolation order 1=bi-linear, 3=cubic spline
+ mdi - (optional) fill value for missing data (used in creation of masked array)
+
+ Output::
+ q2 - q regridded onto the new set of lat2,lon2
+
+ '''
+
+ nlat = q.shape[0]
+ nlon = q.shape[1]
+
+ nlat2 = lat2.shape[0]
+ nlon2 = lon2.shape[1]
+
+ # To make our lives easier down the road, let's
+ # turn these into arrays of x & y coords
+ loni = lon2.ravel()
+ lati = lat2.ravel()
+
+ loni = loni.copy() # NB. it won't run unless you do this...
+ lati = lati.copy()
+
+ # Now, we'll set points outside the boundaries to lie along an edge
+ loni[loni > lon.max()] = lon.max()
+ loni[loni < lon.min()] = lon.min()
+
+ # To deal with the "hard" break, we'll have to treat y differently,
+ # so we're just setting the min here...
+ lati[lati > lat.max()] = lat.max()
+ lati[lati < lat.min()] = lat.min()
+
+
+ # We need to convert these to (float) indicies
+ # (xi should range from 0 to (nx - 1), etc)
+ loni = (nlon - 1) * (loni - lon.min()) / (lon.max() - lon.min())
+
+ # Deal with the "hard" break in the y-direction
+ lati = (nlat - 1) * (lati - lat.min()) / (lat.max() - lat.min())
+
+ # Notes on dealing with MDI when regridding data.
+ # Method adopted here:
+ # Use bilinear interpolation of data by default (but user can specify other order using order=... in call)
+ # Perform bilinear interpolation of data, and of mask.
+ # To be conservative, new grid point which contained some missing data on the old grid is set to missing data.
+ # -this is achieved by looking for any non-zero interpolated mask values.
+ # To avoid issues with bilinear interpolation producing strong gradients leading into the MDI,
+ # set values at MDI points to mean data value so little gradient visible = not ideal, but acceptable for now.
+
+ # Set values in MDI so that similar to surroundings so don't produce large gradients when interpolating
+ # Preserve MDI mask, by only changing data part of masked array object.
+ for shift in (-1, 1):
+ for axis in (0, 1):
+ q_shifted = np.roll(q, shift=shift, axis=axis)
+ idx = ~q_shifted.mask * q.mask
+ q.data[idx] = q_shifted[idx]
+
+ # Now we actually interpolate
+ # map_coordinates does cubic interpolation by default,
+ # use "order=1" to preform bilinear interpolation instead...
+ q2 = map_coordinates(q, [lati, loni], order=order)
+ q2 = q2.reshape([nlat2, nlon2])
+
+ # Set values to missing data outside of original domain
+ q2 = ma.masked_array(q2, mask=np.logical_or(np.logical_or(lat2 >= lat.max(),
+ lat2 <= lat.min()),
+ np.logical_or(lon2 <= lon.min(),
+ lon2 >= lon.max())))
+
+ # Make second map using nearest neighbour interpolation -use this to determine locations with MDI and mask these
+ qmdi = np.zeros_like(q)
+ qmdi[q.mask == True] = 1.
+ qmdi[q.mask == False] = 0.
+ qmdi_r = map_coordinates(qmdi, [lati, loni], order=order)
+ qmdi_r = qmdi_r.reshape([nlat2, nlon2])
+ mdimask = (qmdi_r != 0.0)
+
+ # Combine missing data mask, with outside domain mask define above.
+ q2.mask = np.logical_or(mdimask, q2.mask)
+
+ return q2
+
+def create_mask_using_threshold(masked_array, threshold=0.5):
+ '''
+ Routine to create a mask, depending on the proportion of times with missing data.
+ For each pixel, calculate proportion of times that are missing data,
+ **if** the proportion is above a specified threshold value,
+ then mark the pixel as missing data.
+
+ Input::
+ masked_array - a numpy masked array of data (assumes time on axis 0, and space on axes 1 and 2).
+ threshold - (optional) threshold proportion above which a pixel is marked as 'missing data'.
+ NB. default threshold = 50%
+
+ Output::
+ mymask - a numpy array describing the mask. NB. not a masked array, just the mask itself.
+
+ '''
+
+
+ # try, except used as some model files don't have a full mask, but a single bool
+ # the except catches this situation and deals with it appropriately.
+ try:
+ nT = masked_array.mask.shape[0]
+
+ # For each pixel, count how many times are masked.
+ nMasked = masked_array.mask[:, :, :].sum(axis=0)
+
+ # Define new mask as when a pixel has over a defined threshold ratio of masked data
+ # e.g. if the threshold is 75%, and there are 10 times,
+ # then a pixel will be masked if more than 5 times are masked.
+ mymask = nMasked > (nT * threshold)
+
+ except:
+ mymask = np.zeros_like(masked_array.data[0, :, :])
+
+ return mymask
+
+def calc_average_on_new_time_unit(data, dateList, unit='monthly'):
+ '''
+ Routine to calculate averages on longer time units than the data exists on.
+
+ Example::
+ If the data is 6-hourly, calculate daily, or monthly means.
+
+ Input::
+ data - data values
+ dateList - list of python datetime structures corresponding to data values
+ unit - string describing time unit to average onto.
+ *Valid values are: 'monthly', 'daily', 'pentad','annual','decadal'*
+
+ Output:
+ meanstorem - numpy masked array of data values meaned over required time period
+ newTimesList - a list of python datetime objects representing the data in the new averagin period
+ *NB.* currently set to beginning of averaging period,
+ **i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z.**
+ '''
+
+ # First catch unknown values of time unit passed in by user
+ acceptable = (unit == 'full') | (unit == 'annual') | (unit == 'monthly') | (unit == 'daily') | (unit == 'pentad')
+
+ if not acceptable:
+ print 'Error: unknown unit type selected for time averaging'
+ print ' Please check your code.'
+ return
+
+ # Calculate arrays of years (2007,2007),
+ # yearsmonths (200701,200702),
+ # or yearmonthdays (20070101,20070102)
+ # -depending on user selected averaging period.
+
+ # Year list
+ if unit == 'annual':
+ print 'Calculating annual mean data'
+ timeunits = []
+
+ for i in np.arange(len(dateList)):
+ timeunits.append(str(dateList[i].year))
+
+ timeunits = np.array(timeunits, dtype=int)
+
+ # YearMonth format list
+ if unit == 'monthly':
+ print 'Calculating monthly mean data'
+ timeunits = []
+
+ for i in np.arange(len(dateList)):
+ timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month))
+
+ timeunits = np.array(timeunits, dtype=int)
+
+ # YearMonthDay format list
+ if unit == 'daily':
+ print 'Calculating daily mean data'
+ timeunits = []
+
+ for i in np.arange(len(dateList)):
+ timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month) + \
+ str("%02d" % dateList[i].day))
+
+ timeunits = np.array(timeunits, dtype=int)
+
+
+ # TODO: add pentad setting using Julian days?
+
+
+ # Full list: a special case
+ if unit == 'full':
+ print 'Calculating means data over full time range'
+ timeunits = []
+
+ for i in np.arange(len(dateList)):
+ timeunits.append(999) # i.e. we just want the same value for all times.
+
+ timeunits = np.array(timeunits, dtype=int)
+
+
+
+ # empty list to store new times
+ newTimesList = []
+
+ # Decide whether or not you need to do any time averaging.
+ # i.e. if data are already on required time unit then just pass data through and
+ # calculate and return representative datetimes.
+ processing_required = True
+ if len(timeunits) == (len(np.unique(timeunits))):
+ processing_required = False
+
+ # 1D data arrays, i.e. time series
+ if data.ndim == 1:
+ # Create array to store the resulting data
+ meanstore = np.zeros(len(np.unique(timeunits)))
+
+ # Calculate the means across each unique time unit
+ i = 0
+ for myunit in np.unique(timeunits):
+ if processing_required:
+ datam = ma.masked_array(data, timeunits != myunit)
+ meanstore[i] = ma.average(datam)
+
+ # construct new times list
+ smyunit = str(myunit)
+ if len(smyunit) == 4: # YYYY
+ yyyy = int(myunit[0:4])
+ mm = 1
+ dd = 1
+ if len(smyunit) == 6: # YYYYMM
+ yyyy = int(smyunit[0:4])
+ mm = int(smyunit[4:6])
+ dd = 1
+ if len(smyunit) == 8: # YYYYMMDD
+ yyyy = int(smyunit[0:4])
+ mm = int(smyunit[4:6])
+ dd = int(smyunit[6:8])
+ if len(smyunit) == 3: # Full time range
+ # Need to set an appropriate time representing the mid-point of the entire time span
+ dt = dateList[-1] - dateList[0]
+ halfway = dateList[0] + (dt / 2)
+ yyyy = int(halfway.year)
+ mm = int(halfway.month)
+ dd = int(halfway.day)
+
+ newTimesList.append(datetime.datetime(yyyy, mm, dd, 0, 0, 0, 0))
+ i = i + 1
+
+ # 3D data arrays
+ if data.ndim == 3:
+
+ #datamask = create_mask_using_threshold(data,threshold=0.75)
+
+ # Create array to store the resulting data
+ meanstore = np.zeros([len(np.unique(timeunits)), data.shape[1], data.shape[2]])
+
+ # Calculate the means across each unique time unit
+ i = 0
+ datamask_store = []
+ for myunit in np.unique(timeunits):
+ if processing_required:
+
+ mask = np.zeros_like(data)
+ mask[timeunits != myunit, :, :] = 1.0
+
+ # Calculate missing data mask within each time unit...
+ datamask_at_this_timeunit = np.zeros_like(data)
+ datamask_at_this_timeunit[:] = create_mask_using_threshold(data[timeunits == myunit, :, :], threshold=0.75)
+ # Store results for masking later
+ datamask_store.append(datamask_at_this_timeunit[0])
+
+ # Calculate means for each pixel in this time unit, ignoring missing data (using masked array).
+ datam = ma.masked_array(data, np.logical_or(mask, datamask_at_this_timeunit))
+ meanstore[i, :, :] = ma.average(datam, axis=0)
+
+ # construct new times list
+ smyunit = str(myunit)
+ if len(smyunit) == 4: # YYYY
+ yyyy = int(smyunit[0:4])
+ mm = 1
+ dd = 1
+ if len(smyunit) == 6: # YYYYMM
+ yyyy = int(smyunit[0:4])
+ mm = int(smyunit[4:6])
+ dd = 1
+ if len(smyunit) == 8: # YYYYMMDD
+ yyyy = int(smyunit[0:4])
+ mm = int(smyunit[4:6])
+ dd = int(smyunit[6:8])
+ if len(smyunit) == 3: # Full time range
+ # Need to set an appropriate time representing the mid-point of the entire time span
+ dt = dateList[-1] - dateList[0]
+ halfway = dateList[0] + (dt / 2)
+ yyyy = int(halfway.year)
+ mm = int(halfway.month)
+ dd = int(halfway.day)
+
+ newTimesList.append(datetime.datetime(yyyy, mm, dd, 0, 0, 0, 0))
+
+ i += 1
+
+ if not processing_required:
+ meanstorem = data
+
+ if processing_required:
+ # Create masked array (using missing data mask defined above)
+ datamask_store = np.array(datamask_store)
+ meanstorem = ma.masked_array(meanstore, datamask_store)
+
+ return meanstorem, newTimesList
+
+def calc_running_accum_from_period_accum(data):
+ '''
+ Routine to calculate running total accumulations from individual period accumulations.
+ ::
+
+ e.g. 0,0,1,0,0,2,2,1,0,0
+ -> 0,0,1,1,1,3,5,6,6,6
+
+ Input::
+ data: numpy array with time in the first axis
+
+ Output::
+ running_acc: running accumulations
+
+ '''
+
+
+ running_acc = np.zeros_like(data)
+
+ if(len(data.shape) == 1):
+ running_acc[0] = data[0]
+
+ if(len(data.shape) > 1):
+ running_acc[0, :] = data[0, :]
+
+ for i in np.arange(data.shape[0] - 1):
+ if(len(data.shape) == 1):
+ running_acc[i + 1] = running_acc[i] + data[i + 1]
+
+ if(len(data.shape) > 1):
+ running_acc[i + 1, :] = running_acc[i, :] + data[i + 1, :]
+
+ return running_acc
+
+def ignore_boundaries(data, rim=10):
+ '''
+ Routine to mask the lateral boundary regions of model data to ignore them from calculations.
+
+ Input::
+ data - a masked array of model data
+ rim - (optional) number of grid points to ignore
+
+ Output::
+ data - data array with boundary region masked
+
+ '''
+
+ nx = data.shape[1]
+ ny = data.shape[0]
+
+ rimmask = np.zeros_like(data)
+ for j in np.arange(rim):
+ rimmask[j, 0:nx - 1] = 1.0
+
+ for j in ny - 1 - np.arange(rim):
+ rimmask[j, 0:nx - 1] = 1.0
+
+ for i in np.arange(rim):
+ rimmask[0:ny - 1, i] = 1.0
+
+ for i in nx - 1 - np.arange(rim):
+ rimmask[0:ny - 1, i] = 1.0
+
+ data = ma.masked_array(data, mask=rimmask)
+
+ return data
+
+def normalizeDatetimes(datetimes, timestep):
+ """
+ Input::
+ datetimes - list of datetime objects that need to be normalized
+ timestep - string of value ('daily' | 'monthly')
+ Output::
+ normalDatetimes - list of datetime objects that have been normalized
+
+ Normalization Rules::
+ Daily data will be forced to an hour value of 00:00:00
+ Monthly data will be forced to the first of the month at midnight
+ """
+ normalDatetimes = []
+ if timestep.lower() == 'monthly':
+ for inputDatetime in datetimes:
+ if inputDatetime.day != 1:
+ # Clean the inputDatetime
+ inputDatetimeString = inputDatetime.strftime('%Y%m%d')
+ normalInputDatetimeString = inputDatetimeString[:6] + '01'
+ inputDatetime = datetime.datetime.strptime(normalInputDatetimeString, '%Y%m%d')
+
+ normalDatetimes.append(inputDatetime)
+
+ elif timestep.lower() == 'daily':
+ for inputDatetime in datetimes:
+ if inputDatetime.hour != 0 or inputDatetime.minute != 0 or inputDatetime.second != 0:
+ datetimeString = inputDatetime.strftime('%Y%m%d%H%M%S')
+ normalDatetimeString = datetimeString[:8] + '000000'
+ inputDatetime = datetime.datetime.strptime(normalDatetimeString, '%Y%m%d%H%M%S')
+
+ normalDatetimes.append(inputDatetime)
+
+
+ return normalDatetimes
+
+def getModelTimes(modelFile, timeVarName):
+ '''
+ TODO: Do a better job handling dates here
+ Routine to convert from model times ('hours since 1900...', 'days since ...')
+ into a python datetime structure
+
+ Input::
+ modelFile - path to the model tile you want to extract the times list and modelTimeStep from
+ timeVarName - name of the time variable in the model file
+
+ Output::
+ times - list of python datetime objects describing model data times
+ modelTimeStep - 'hourly','daily','monthly','annual'
+ '''
+
+ if os.path.exists(modelFile) == False:
+ print 'the designated file "', modelFile, '" does not exist in getModelTimes: EXIT'
+ cmnd = 'ls -l ' + modelFile
+ subprocess.call(cmnd, shell=True)
+ sys.exit()
+
+ f = netCDF4.Dataset(modelFile, mode='r')
+ xtimes = f.variables[timeVarName]
+ timeFormat = xtimes.units.encode()
+
+ # search to check if 'since' appears in units
+ try:
+ sinceLoc = re.search('since', timeFormat).end()
+
+ except AttributeError:
+ print 'Error decoding model times: time variable attributes do not contain "since"'
+ raise
+
+ units = None
+ TIME_UNITS = ('minutes', 'hours', 'days', 'months', 'years')
+ # search for 'seconds','minutes','hours', 'days', 'months', 'years' so know units
+ for unit in TIME_UNITS:
+ if re.search(unit, timeFormat):
+ units = unit
+ break
+
+ # cut out base time (the bit following 'since')
+ base_time_string = string.lstrip(timeFormat[sinceLoc:])
+ # decode base time
+ base_time = decodeTimeFromString(base_time_string)
+ times = []
+
+ for xtime in xtimes[:]:
+
+ # Cast time as an int
+ xtime = int(xtime)
+
+ if units == 'minutes':
+ dt = datetime.timedelta(minutes=xtime)
+ new_time = base_time + dt
+ elif units == 'hours':
+ dt = datetime.timedelta(hours=xtime)
+ new_time = base_time + dt
+ elif units == 'days':
+ dt = datetime.timedelta(days=xtime)
+ new_time = base_time + dt
+ elif units == 'months':
+ # NB. adding months in python is complicated as month length varies and hence ambiguous.
+ # Perform date arithmatic manually
+ # Assumption: the base_date will usually be the first of the month
+ # NB. this method will fail if the base time is on the 29th or higher day of month
+ # -as can't have, e.g. Feb 31st.
+ new_month = int(base_time.month + xtime % 12)
+ new_year = int(math.floor(base_time.year + xtime / 12.))
+ new_time = datetime.datetime(new_year, new_month, base_time.day, base_time.hour, base_time.second, 0)
+ elif units == 'years':
+ dt = datetime.timedelta(years=xtime)
+ new_time = base_time + dt
+
+ times.append(new_time)
+
+ try:
+ timeStepLength = int(xtimes[1] - xtimes[0] + 1.e-12)
+ modelTimeStep = getModelTimeStep(units, timeStepLength)
+ except:
+ raise
+
+ times = normalizeDatetimes(times, modelTimeStep)
+
+ return times, modelTimeStep
+
+def getModelTimeStep(units, stepSize):
+ # Time units are now determined. Determine the time intervals of input data (mdlTimeStep)
+
+ if units == 'minutes':
+ if stepSize == 60:
+ modelTimeStep = 'hourly'
+ elif stepSize == 1440:
+ modelTimeStep = 'daily'
+ # 28 days through 31 days
+ elif 40320 <= stepSize <= 44640:
+ modelTimeStep = 'monthly'
+ # 365 days through 366 days
+ elif 525600 <= stepSize <= 527040:
+ modelTimeStep = 'annual'
+ else:
+ raise Exception('model data time step interval exceeds the max time interval (annual)', units, stepSize)
+
+ elif units == 'hours':
+ if stepSize == 1:
+ modelTimeStep = 'hourly'
+ elif stepSize == 24:
+ modelTimeStep = 'daily'
+ elif 672 <= stepSize <= 744:
+ modelTimeStep = 'monthly'
+ elif 8760 <= stepSize <= 8784:
+ modelTimeStep = 'annual'
+ else:
+ raise Exception('model data time step interval exceeds the max time interval (annual)', units, stepSize)
+
+ elif units == 'days':
+ if stepSize == 1:
+ modelTimeStep = 'daily'
+ elif 28 <= stepSize <= 31:
+ modelTimeStep = 'monthly'
+ elif 365 <= stepSize <= 366:
+ modelTimeStep = 'annual'
+ else:
+ raise Exception('model data time step interval exceeds the max time interval (annual)', units, stepSize)
+
+ elif units == 'months':
+ if stepSize == 1:
+ modelTimeStep = 'monthly'
+ elif stepSize == 12:
+ modelTimeStep = 'annual'
+ else:
+ raise Exception('model data time step interval exceeds the max time interval (annual)', units, stepSize)
+
+ elif units == 'years':
+ if stepSize == 1:
+ modelTimeStep = 'annual'
+ else:
+ raise Exception('model data time step interval exceeds the max time interval (annual)', units, stepSize)
+
+ else:
+ errorMessage = 'the time unit ', units, ' is not currently handled in this version.'
+ raise Exception(errorMessage)
+
+ return modelTimeStep
+
+def decodeTimeFromString(time_string):
+ '''
+ Decodes string into a python datetime object
+ *Method:* tries a bunch of different time format possibilities and hopefully one of them will hit.
+ ::
+
+ **Input:** time_string - a string that represents a date/time
+
+ **Output:** mytime - a python datetime object
+ '''
+
+ # This will deal with times that use decimal seconds
+ if '.' in time_string:
+ time_string = time_string.split('.')[0] + '0'
+ else:
+ pass
+
+ try:
+ mytime = time.strptime(time_string, '%Y-%m-%d %H:%M:%S')
+ mytime = datetime.datetime(*mytime[0:6])
+ return mytime
+
+ except ValueError:
+ pass
+
+ try:
+ mytime = time.strptime(time_string, '%Y/%m/%d %H:%M:%S')
+ mytime = datetime.datetime(*mytime[0:6])
+ return mytime
+
+ except ValueError:
+ pass
+
+ try:
+ mytime = time.strptime(time_string, '%Y%m%d %H:%M:%S')
+ mytime = datetime.datetime(*mytime[0:6])
+ return mytime
+
+ except ValueError:
+ pass
+
+ try:
+ mytime = time.strptime(time_string, '%Y:%m:%d %H:%M:%S')
+ mytime = datetime.datetime(*mytime[0:6])
+ return mytime
+
+ except ValueError:
+ pass
+
+ try:
+ mytime = time.strptime(time_string, '%Y%m%d%H%M%S')
+ mytime = datetime.datetime(*mytime[0:6])
+ return mytime
+
+ except ValueError:
+ pass
+
+ try:
+ mytime = time.strptime(time_string, '%Y-%m-%d %H:%M')
+ mytime = datetime.datetime(*mytime[0:6])
+ return mytime
+
+ except ValueError:
+ pass
+
+ try:
+ mytime = time.strptime(time_string, '%Y-%m-%d')
+ mytime = datetime.datetime(*mytime[0:6])
+ return mytime
+
+ except ValueError:
+ pass
+
+
+ print 'Error decoding time string: string does not match a predefined time format'
+ return 0
+
+def regrid_wrapper(regrid_choice, obsdata, obslats, obslons, wrfdata, wrflats, wrflons):
+ '''
+ Wrapper routine for regridding.
+
+ Either regrids model to obs grid, or obs to model grid, depending on user choice
+
+ Inputs::
+ regrid_choice - [0] = Regrid obs data onto model grid or
+ [1] = Regrid model data onto obs grid
+
+ obsdata,wrfdata - data arrays
+ obslats,obslons - observation lat,lon arrays
+ wrflats,wrflons - model lat,lon arrays
+
+ Output::
+ rdata,rdata2 - regridded data
+ lats,lons - latitudes and longitudes of regridded data
+
+ '''
+
+ # Regrid obs data onto model grid
+ if(regrid_choice == '0'):
+
+ ndims = len(obsdata.shape)
+ if(ndims == 3):
+ newshape = [obsdata.shape[0], wrfdata.shape[1], wrfdata.shape[2]]
+ nT = obsdata.shape[0]
+
+ if(ndims == 2):
+ newshape = [wrfdata.shape[0], wrfdata.shape[1]]
+ nT = 0
+
+ rdata = wrfdata
+ lats, lons = wrflats, wrflons
+
+ # Create a new masked array with the required dimensions
+ tmp = np.zeros(newshape)
+ rdata2 = ma.MaskedArray(tmp, mask=(tmp == 0))
+ tmp = 0 # free up some memory
+
+ rdata2[:] = 0.0
+
+ if(nT > 0):
+ for t in np.arange(nT):
+ rdata2[t, :, :] = do_regrid(obsdata[t, :, :], obslats[:, :], obslons[:, :], wrflats[:, :], wrflons[:, :])
+
+ if(nT == 0):
+ rdata2[:, :] = do_regrid(obsdata[:, :], obslats[:, :], obslons[:, :], wrflats[:, :], wrflons[:, :])
+
+
+ # Regrid model data onto obs grid
+ if(regrid_choice == '1'):
+ ndims = len(wrfdata.shape)
+ if(ndims == 3):
+ newshape = [wrfdata.shape[0], obsdata.shape[1], obsdata.shape[2]]
+ nT = obsdata.shape[0]
+
+ if(ndims == 2):
+ newshape = [obsdata.shape[0], obsdata.shape[1]]
+ nT = 0
+
+ rdata2 = obsdata
+ lats, lons = obslats, obslons
+
+ tmp = np.zeros(newshape)
+ rdata = ma.MaskedArray(tmp, mask=(tmp == 0))
+ tmp = 0 # free up some memory
+
+ rdata[:] = 0.0
+
+ if(nT > 0):
+ for t in np.arange(nT):
+ rdata[t, :, :] = do_regrid(wrfdata[t, :, :], wrflats[:, :], wrflons[:, :], obslats[:, :], obslons[:, :])
+
+ if(nT == 0):
+ rdata[:, :] = do_regrid(wrfdata[:, :], wrflats[:, :], wrflons[:, :], obslats[:, :], obslons[:, :])
+
+
+ return rdata, rdata2, lats, lons
+
+def extract_sub_time_selection(allTimes, subTimes, data):
+ '''
+ Routine to extract a sub-selection of times from a data array.
+
+ Example::
+ Suppose a data array has 30 time values for daily data for a whole month,
+ but you only want the data from the 5th-15th of the month.
+
+ Input::
+ allTimes - list of datetimes describing what times the data in the data array correspond to
+ subTimes - the times you want to extract data from
+ data - the data array
+
+ Output::
+ subdata - subselection of data array
+
+ '''
+ # Create new array to store the subselection
+ subdata = np.zeros([len(subTimes), data.shape[1], data.shape[2]])
+
+ # Loop over all Times and when it is a member of the required subselection, copy the data across
+ i = 0 # counter for allTimes
+ subi = 0 # counter for subTimes
+ for t in allTimes:
+ if(np.setmember1d(allTimes, subTimes)):
+ subdata[subi, :, :] = data[i, :, :]
+ subi += 1
+ i += 1
+
+ return subdata
+
+def calc_average_on_new_time_unit_K(data, dateList, unit):
+ '''
+ Routine to calculate averages on longer time units than the data exists on.
+ e.g. if the data is 6-hourly, calculate daily, or monthly means.
+
+ Input:
+ data - data values
+ dateList - list of python datetime structures corresponding to data values
+ unit - string describing time unit to average onto
+ e.g. 'monthly', 'daily', 'pentad','annual','decadal'
+ Output:
+ meanstorem - numpy masked array of data values meaned over required time period
+ newTimesList - a list of python datetime objects representing the data in the new averagin period
+ NB. currently set to beginning of averaging period,
+ i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z.
+ '''
+
+ # Check if the user-selected temporal grid is valid. If not, EXIT
+ acceptable = (unit=='full')|(unit=='annual')|(unit=='monthly')|(unit=='daily')|(unit=='pentad')
+ if not acceptable:
+ print 'Error: unknown unit type selected for time averaging: EXIT'
+ return -1,-1,-1,-1
+
+ # Calculate arrays of: annual timeseries: year (2007,2007),
+ # monthly time series: year-month (200701,200702),
+ # daily timeseries: year-month-day (20070101,20070102)
+ # depending on user-selected averaging period.
+
+ # Year list
+ if unit=='annual':
+ timeunits = []
+ for i in np.arange(len(dateList)):
+ timeunits.append(str(dateList[i].year))
+ timeunits = np.array(timeunits, dtype=int)
+
+ # YearMonth format list
+ if unit=='monthly':
+ timeunits = []
+ for i in np.arange(len(dateList)):
+ timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month))
+ timeunits = np.array(timeunits,dtype=int)
+
+ # YearMonthDay format list
+ if unit=='daily':
+ timeunits = []
+ for i in np.arange(len(dateList)):
+ timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month) + str("%02d" % dateList[i].day))
+ timeunits = np.array(timeunits,dtype=int)
+
+ # TODO: add pentad setting using Julian days?
+
+ # Full list: a special case
+ if unit == 'full':
+ comment='Calculating means data over the entire time range: i.e., annual-mean climatology'
+ timeunits = []
+ for i in np.arange(len(dateList)):
+ timeunits.append(999) # i.e. we just want the same value for all times.
+ timeunits = np.array(timeunits, dtype=int)
+
+ # empty list to store new times
+ newTimesList = []
+
+ # Decide whether or not you need to do any time averaging.
+ # i.e. if data are already on required time unit then just pass data through and
+ # calculate and return representative datetimes.
+ processing_required = True
+ if len(timeunits)==(len(np.unique(timeunits))):
+ processing_required = False
+
+ # 1D data arrays, i.e. time series
+ if data.ndim==1:
+ # Create array to store the resulting data
+ meanstore = np.zeros(len(np.unique(timeunits)))
+
+ # Calculate the means across each unique time unit
+ i=0
+ for myunit in np.unique(timeunits):
+ if processing_required:
+ datam=ma.masked_array(data,timeunits!=myunit)
+ meanstore[i] = ma.average(datam)
+
+ # construct new times list
+ smyunit = str(myunit)
+ if len(smyunit)==4: # YYYY
+ yyyy = int(myunit[0:4])
+ mm = 1
+ dd = 1
+ if len(smyunit)==6: # YYYYMM
+ yyyy = int(smyunit[0:4])
+ mm = int(smyunit[4:6])
+ dd = 1
+ if len(smyunit)==8: # YYYYMMDD
+ yyyy = int(smyunit[0:4])
+ mm = int(smyunit[4:6])
+ dd = int(smyunit[6:8])
+ if len(smyunit)==3: # Full time range
+ # Need to set an appropriate time representing the mid-point of the entire time span
+ dt = dateList[-1]-dateList[0]
+ halfway = dateList[0]+(dt/2)
+ yyyy = int(halfway.year)
+ mm = int(halfway.month)
+ dd = int(halfway.day)
+
+ newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
+ i = i+1
+
+ # 3D data arrays
+ if data.ndim==3:
+ # datamask = create_mask_using_threshold(data,threshold=0.75)
+ # Create array to store the resulting data
+ meanstore = np.zeros([len(np.unique(timeunits)),data.shape[1],data.shape[2]])
+
+ # Calculate the means across each unique time unit
+ i=0
+ datamask_store = []
+ for myunit in np.unique(timeunits):
+ if processing_required:
+ mask = np.zeros_like(data)
+ mask[timeunits!=myunit,:,:] = 1.0
+ # Calculate missing data mask within each time unit...
+ datamask_at_this_timeunit = np.zeros_like(data)
+ datamask_at_this_timeunit[:]= create_mask_using_threshold(data[timeunits==myunit,:,:],threshold=0.75)
+ # Store results for masking later
+ datamask_store.append(datamask_at_this_timeunit[0])
+ # Calculate means for each pixel in this time unit, ignoring missing data (using masked array).
+ datam = ma.masked_array(data,np.logical_or(mask,datamask_at_this_timeunit))
+ meanstore[i,:,:] = ma.average(datam,axis=0)
+ # construct new times list
+ smyunit = str(myunit)
+ if len(smyunit)==4: # YYYY
+ yyyy = int(smyunit[0:4])
+ mm = 1
+ dd = 1
+ if len(smyunit)==6: # YYYYMM
+ yyyy = int(smyunit[0:4])
+ mm = int(smyunit[4:6])
+ dd = 1
+ if len(smyunit)==8: # YYYYMMDD
+ yyyy = int(smyunit[0:4])
+ mm = int(smyunit[4:6])
+ dd = int(smyunit[6:8])
+ if len(smyunit)==3: # Full time range
+ # Need to set an appropriate time representing the mid-point of the entire time span
+ dt = dateList[-1]-dateList[0]
+ halfway = dateList[0]+(dt/2)
+ yyyy = int(halfway.year)
+ mm = int(halfway.month)
+ dd = int(halfway.day)
+ newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
+ i += 1
+
+ if not processing_required:
+ meanstorem = data
+
+ if processing_required:
+ # Create masked array (using missing data mask defined above)
+ datamask_store = np.array(datamask_store)
+ meanstorem = ma.masked_array(meanstore, datamask_store)
+
+ return meanstorem,newTimesList
+
+def decode_model_timesK(ifile,timeVarName,file_type):
+ #################################################################################################
+ # Convert model times ('hours since 1900...', 'days since ...') into a python datetime structure
+ # Input:
+ # filelist - list of model files
+ # timeVarName - name of the time variable in the model files
+ # Output:
+ # times - list of python datetime objects describing model data times
+ # Peter Lean February 2011
+ #################################################################################################
+ f = netCDF4.Dataset(ifile,mode='r',format=file_type)
+ xtimes = f.variables[timeVarName]
+ timeFormat = xtimes.units.encode()
+ #timeFormat = "days since 1979-01-01 00:00:00"
+ # search to check if 'since' appears in units
+ try:
+ sinceLoc = re.search('since',timeFormat).end()
+ except:
+ print 'Error decoding model times: time var attributes do not contain "since": Exit'
+ sys.exit()
+ # search for 'seconds','minutes','hours', 'days', 'months', 'years' so know units
+ # TODO: Swap out this section for a set of if...elseif statements
+ units = ''
+ try:
+ _ = re.search('minutes',timeFormat).end()
+ units = 'minutes'
+ except:
+ pass
+ try:
+ _ = re.search('hours',timeFormat).end()
+ units = 'hours'
+ except:
+ pass
+ try:
+ _ = re.search('days',timeFormat).end()
+ units = 'days'
+ except:
+ pass
+ try:
+ _ = re.search('months',timeFormat).end()
+ units = 'months'
+ except:
+ pass
+ try:
+ _ = re.search('years',timeFormat).end()
+ units = 'years'
+ except:
+ pass
+ # cut out base time (the bit following 'since')
+ base_time_string = string.lstrip(timeFormat[sinceLoc:])
+ # decode base time
+ # 9/25/2012: datetime.timedelta has problem with the argument because xtimes is NioVariable.
+ # Soln (J. Kim): use a temp variable ttmp in the for loop, then convert it into an integer xtime.
+ base_time = decodeTimeFromString(base_time_string)
+ times=[]
+ for ttmp in xtimes[:]:
+ xtime = int(ttmp)
+ if(units=='minutes'):
+ dt = datetime.timedelta(minutes=xtime); new_time = base_time + dt
+ if(units=='hours'):
+ dt = datetime.timedelta(hours=xtime); new_time = base_time + dt
+ if(units=='days'):
+ dt = datetime.timedelta(days=xtime); new_time = base_time + dt
+ if(units=='months'): # NB. adding months in python is complicated as month length varies and hence ambigous.
+ # Perform date arithmatic manually
+ # Assumption: the base_date will usually be the first of the month
+ # NB. this method will fail if the base time is on the 29th or higher day of month
+ # -as can't have, e.g. Feb 31st.
+ new_month = int(base_time.month + xtime % 12)
+ new_year = int(math.floor(base_time.year + xtime / 12.))
+ #print type(new_year),type(new_month),type(base_time.day),type(base_time.hour),type(base_time.second)
+ new_time = datetime.datetime(new_year,new_month,base_time.day,base_time.hour,base_time.second,0)
+ if(units=='years'):
+ dt = datetime.timedelta(years=xtime); new_time = base_time + dt
+ times.append(new_time)
+ return times
+
+
+def regrid_in_time(data,dateList,unit):
+ #################################################################################################
+ # Routine to calculate averages on longer time units than the data exists on.
+ # e.g. if the data is 6-hourly, calculate daily, or monthly means.
+ # Input:
+ # data - data values
+ # dateList - list of python datetime structures corresponding to data values
+ # unit - string describing time unit to average onto
+ # e.g. 'monthly', 'daily', 'pentad','annual','decadal'
+ # Output:
+ # meanstorem - numpy masked array of data values meaned over required time period
+ # newTimesList - a list of python datetime objects representing the data in the new averagin period
+ # NB. currently set to beginning of averaging period,
+ # i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z.
+ # ..............................
+ # Jinwon Kim, Sept 30, 2012
+ # Created from calc_average_on_new_time_unit_K, Peter's original, with the modification below:
+ # Instead of masked array used by Peter, use wh to defined data within the averaging range.
+ #################################################################################################
+
+ print '*** Begin calc_average_on_new_time_unit_KK ***'
+ # Check if the user-selected temporal grid is valid. If not, EXIT
+ acceptable = (unit=='full')|(unit=='annual')|(unit=='monthly')|(unit=='daily')|(unit=='pentad')
+ if not acceptable:
+ print 'Error: unknown unit type selected for time averaging: EXIT'; return -1,-1,-1,-1
+
+ # Calculate time arrays of: annual (yyyy, [2007]), monthly (yyyymm, [200701,200702]), daily (yyyymmdd, [20070101,20070102])
+ # "%02d" is similar to i2.2 in Fortran
+ if unit=='annual': # YYYY
+ timeunits = []
+ for i in np.arange(len(dateList)):
+ timeunits.append(str(dateList[i].year))
+ elif unit=='monthly': # YYYYMM
+ timeunits = []
+ for i in np.arange(len(dateList)):
+ timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month))
+ elif unit=='daily': # YYYYMMDD
+ timeunits = []
+ for i in np.arange(len(dateList)):
+ timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month) + str("%02d" % dateList[i].day))
+ elif unit=='full': # Full list: a special case
+ comment='Calculating means data over the entire time range: i.e., annual-mean climatology'
+ timeunits = []
+ for i in np.arange(len(dateList)):
+ timeunits.append(999) # i.e. we just want the same value for all times.
+ timeunits = np.array(timeunits,dtype=int)
+ print 'timeRegridOption= ',unit#'; output timeunits= ',timeunits
+ #print 'timeRegridOption= ',unit,'; output timeunits= ',timeunits
+
+ # empty list to store time levels after temporal regridding
+ newTimesList = []
+
+ # Decide whether or not you need to do any time averaging.
+ # i.e. if data are already on required time unit then just pass data through and calculate and return representative datetimes.
+ processing_required = True
+ if len(timeunits)==(len(np.unique(timeunits))):
+ processing_required = False
+ print 'processing_required= ',processing_required,': input time steps= ',len(timeunits),': regridded output time steps = ',len(np.unique(timeunits))
+ #print 'np.unique(timeunits): ',np.unique(timeunits)
+ print 'data.ndim= ',data.ndim
+
+ if data.ndim==1: # 1D data arrays, i.e. 1D time series
+ # Create array to store the temporally regridded data
+ meanstore = np.zeros(len(np.unique(timeunits)))
+ # Calculate the means across each unique time unit
+ i=0
+ for myunit in np.unique(timeunits):
+ if processing_required:
+ wh = timeunits==myunit
+ datam = data[wh]
+ meanstore[i] = ma.average(datam)
+ # construct new times list
+ smyunit = str(myunit)
+ if len(smyunit)==4: # YYYY
+ yyyy = int(myunit[0:4])
+ mm = 1
+ dd = 1
+ if len(smyunit)==6: # YYYYMM
+ yyyy = int(smyunit[0:4])
+ mm = int(smyunit[4:6])
+ dd = 1
+ if len(smyunit)==8: # YYYYMMDD
+ yyyy = int(smyunit[0:4])
+ mm = int(smyunit[4:6])
+ dd = int(smyunit[6:8])
+ if len(smyunit)==3: # Full time range
+ # Need to set an appropriate time representing the mid-point of the entire time span
+ dt = dateList[-1]-dateList[0]
+ halfway = dateList[0]+(dt/2)
+ yyyy = int(halfway.year)
+ mm = int(halfway.month)
+ dd = int(halfway.day)
+ newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
+ i = i+1
+
+ elif data.ndim==3: # 3D data arrays, i.e. 2D time series
+ # Create array to store the resulting data
+ meanstore = np.zeros([len(np.unique(timeunits)),data.shape[1],data.shape[2]])
+ # Calculate the means across each unique time unit
+ i=0
+ datamask_store = []
+ for myunit in np.unique(timeunits):
+ if processing_required:
+ wh = timeunits==myunit
+ datam = data[wh,:,:]
+ meanstore[i,:,:] = ma.average(datam,axis=0)
+ # construct new times list
+ smyunit = str(myunit)
+ if len(smyunit)==4: # YYYY
+ yyyy = int(smyunit[0:4])
+ mm = 1
+ dd = 1
+ if len(smyunit)==6: # YYYYMM
+ yyyy = int(smyunit[0:4])
+ mm = int(smyunit[4:6])
+ dd = 1
+ if len(smyunit)==8: # YYYYMMDD
+ yyyy = int(smyunit[0:4])
+ mm = int(smyunit[4:6])
+ dd = int(smyunit[6:8])
+ if len(smyunit)==3: # Full time range
+ # Need to set an appropriate time representing the mid-point of the entire time span
+ dt = dateList[-1]-dateList[0]
+ halfway = dateList[0]+(dt/2)
+ yyyy = int(halfway.year)
+ mm = int(halfway.month)
+ dd = int(halfway.day)
+ newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
+ i += 1
+
+ if not processing_required:
+ meanstorem = data
+ elif processing_required:
+ meanstorem = meanstore
+
+ print '*** End calc_average_on_new_time_unit_KK ***'
+ return meanstorem,newTimesList
Propchange: incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/toolkit/process.py
------------------------------------------------------------------------------
svn:executable = *
Added: incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/Taylor.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/Taylor.py?rev=1517753&view=auto
==============================================================================
--- incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/Taylor.py (added)
+++ incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/Taylor.py Tue Aug 27 05:35:42 2013
@@ -0,0 +1,126 @@
+"""
+Taylor diagram (Taylor, 2001) test implementation.
+
+http://www-pcmdi.llnl.gov/about/staff/Taylor/CV/Taylor_diagram_primer.htm
+"""
+
+__version__ = "Time-stamp: <2012-02-17 20:59:35 ycopin>"
+__author__ = "Yannick Copin <ya...@laposte.net>"
+
+import numpy as NP
+import matplotlib.pyplot as PLT
+
+class TaylorDiagram(object):
+ """Taylor diagram: plot model standard deviation and correlation
+ to reference (data) sample in a single-quadrant polar plot, with
+ r=stddev and theta=arccos(correlation).
+ """
+
+ def __init__(self, refstd, radMax = 1.5, fig=None, rect=111, label='_'):
+ """Set up Taylor diagram axes, i.e. single quadrant polar
+ plot, using mpl_toolkits.axisartist.floating_axes. refstd is
+ the reference standard deviation to be compared to.
+ """
+
+ from matplotlib.projections import PolarAxes
+ import mpl_toolkits.axisartist.floating_axes as FA
+ import mpl_toolkits.axisartist.grid_finder as GF
+
+ self.refstd = refstd # Reference standard deviation
+
+ tr = PolarAxes.PolarTransform()
+
+ # Correlation labels
+ rlocs = NP.concatenate((NP.arange(10)/10.,[0.95,0.99]))
+ tlocs = NP.arccos(rlocs) # Conversion to polar angles
+ gl1 = GF.FixedLocator(tlocs) # Positions
+ tf1 = GF.DictFormatter(dict(zip(tlocs, map(str,rlocs))))
+
+ # Standard deviation axis extent
+ self.smin = 0
+ self.smax = radMax * self.refstd
+
+ ghelper = FA.GridHelperCurveLinear(tr,
+ extremes=(0,NP.pi/2, # 1st quadrant
+ self.smin,self.smax),
+ grid_locator1=gl1,
+ tick_formatter1=tf1,
+ )
+
+ if fig is None:
+ fig = PLT.figure()
+
+ ax = FA.FloatingSubplot(fig, rect, grid_helper=ghelper)
+ fig.add_subplot(ax)
+
+ # Adjust axes
+ ax.axis["top"].set_axis_direction("bottom") # "Angle axis"
+ ax.axis["top"].toggle(ticklabels=True, label=True)
+ ax.axis["top"].major_ticklabels.set_axis_direction("top")
+ ax.axis["top"].label.set_axis_direction("top")
+ ax.axis["top"].label.set_text("Correlation")
+
+ ax.axis["left"].set_axis_direction("bottom") # "X axis"
+ ax.axis["left"].label.set_text("Standardized deviation")
+
+ ax.axis["right"].set_axis_direction("top") # "Y axis"
+ ax.axis["right"].toggle(ticklabels=True)
+ ax.axis["right"].major_ticklabels.set_axis_direction("left")
+
+ ax.axis["bottom"].set_visible(False) # Useless
+
+ # Contours along standard deviations
+ ax.grid(False)
+
+ self._ax = ax # Graphical axes
+ self.ax = ax.get_aux_axes(tr) # Polar coordinates
+
+ # Add reference point and stddev contour
+ # print "Reference std:", self.refstd
+ l, = self.ax.plot([0], self.refstd, 'k*',
+ ls='', ms=10, label=label)
+ t = NP.linspace(0, NP.pi/2)
+ r = NP.zeros_like(t) + self.refstd
+ self.ax.plot(t,r, 'k--', label='_')
+
+ # Collect sample points for latter use (e.g. legend)
+ self.samplePoints = [l]
+
+ def add_sample(self, stddev, corrcoef, *args, **kwargs):
+ """Add sample (stddev,corrcoeff) to the Taylor diagram. args
+ and kwargs are directly propagated to the Figure.plot
+ command."""
+
+ l, = self.ax.plot(NP.arccos(corrcoef), stddev,
+ *args, **kwargs) # (theta,radius)
+ self.samplePoints.append(l)
+
+ return l
+
+ def add_rms_contours(self, levels=5, **kwargs):
+ """Add constant centered RMS difference contours."""
+
+ rs,ts = NP.meshgrid(NP.linspace(self.smin,self.smax),
+ NP.linspace(0,NP.pi/2))
+ # Compute centered RMS difference
+ rms = NP.sqrt(self.refstd**2 + rs**2 - 2*self.refstd*rs*NP.cos(ts))
+
+ contours = self.ax.contour(ts, rs, rms, levels, **kwargs)
+
+ def add_stddev_contours(self, std, corr1, corr2, **kwargs):
+ """Add a curved line with a radius of std between two points
+ [std, corr1] and [std, corr2]"""
+
+ t = NP.linspace(NP.arccos(corr1), NP.arccos(corr2))
+ r = NP.zeros_like(t) + std
+ return self.ax.plot(t,r,'red', linewidth=2)
+
+ def add_contours(self,std1,corr1,std2,corr2, **kwargs):
+ """Add a line between two points
+ [std1, corr1] and [std2, corr2]"""
+
+ t = NP.linspace(NP.arccos(corr1), NP.arccos(corr2))
+ r = NP.linspace(std1, std2)
+
+ return self.ax.plot(t,r,'red',linewidth=2)
+
Propchange: incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/Taylor.py
------------------------------------------------------------------------------
svn:executable = *
Added: incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/__init__.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/__init__.py?rev=1517753&view=auto
==============================================================================
--- incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/__init__.py (added)
+++ incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/__init__.py Tue Aug 27 05:35:42 2013
@@ -0,0 +1,2 @@
+"""Collection of modules that provide functionality across all of the RCMES
+sub-packages"""
\ No newline at end of file
Propchange: incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/__init__.py
------------------------------------------------------------------------------
svn:executable = *
Added: incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/fortranfile.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/fortranfile.py?rev=1517753&view=auto
==============================================================================
--- incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/fortranfile.py (added)
+++ incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/fortranfile.py Tue Aug 27 05:35:42 2013
@@ -0,0 +1,274 @@
+# Copyright 2008, 2009 Neil Martinsen-Burrell
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+
+"""
+Defines a file-derived class to read/write Fortran unformatted files.
+
+The assumption is that a Fortran unformatted file is being written by
+the Fortran runtime as a sequence of records. Each record consists of
+an integer (of the default size [usually 32 or 64 bits]) giving the
+length of the following data in bytes, then the data itself, then the
+same integer as before.
+
+Examples
+--------
+
+To use the default endian and size settings, one can just do::
+
+ >>> f = FortranFile('filename')
+ >>> x = f.readReals()
+
+One can read arrays with varying precisions::
+
+ >>> f = FortranFile('filename')
+ >>> x = f.readInts('h')
+ >>> y = f.readInts('q')
+ >>> z = f.readReals('f')
+
+Where the format codes are those used by Python's struct module.
+
+One can change the default endian-ness and header precision::
+
+ >>> f = FortranFile('filename', endian='>', header_prec='l')
+
+for a file with little-endian data whose record headers are long
+integers.
+"""
+
+__docformat__ = "restructuredtext en"
+
+import struct
+import numpy
+
+class FortranFile(file):
+
+ """File with methods for dealing with fortran unformatted data files"""
+
+ def _get_header_length(self):
+ return struct.calcsize(self._header_prec)
+ _header_length = property(fget=_get_header_length)
+
+ def _set_endian(self,c):
+ """
+ Set endian to big (c='>') or little (c='<') or native (c='@')
+
+ Parameters:
+ `c` : string
+ The endian-ness to use when reading from this file.
+ """
+ if c in '<>@=':
+ self._endian = c
+ else:
+ raise ValueError('Cannot set endian-ness')
+ def _get_endian(self):
+ return self._endian
+ ENDIAN = property(fset=_set_endian,
+ fget=_get_endian,
+ doc="Possible endian values are '<', '>', '@', '='"
+ )
+
+ def _set_header_prec(self, prec):
+ if prec in 'hilq':
+ self._header_prec = prec
+ else:
+ raise ValueError('Cannot set header precision')
+ def _get_header_prec(self):
+ return self._header_prec
+ HEADER_PREC = property(fset=_set_header_prec,
+ fget=_get_header_prec,
+ doc="Possible header precisions are 'h', 'i', 'l', 'q'"
+ )
+
+ def __init__(self, fname, endian='@', header_prec='i', *args, **kwargs):
+ """Open a Fortran unformatted file for writing.
+
+ Parameters
+ ----------
+ endian : character, optional
+ Specify the endian-ness of the file. Possible values are
+ '>', '<', '@' and '='. See the documentation of Python's
+ struct module for their meanings. The deafult is '@' (native
+ byte order)
+ header_prec : character, optional
+ Specify the precision used for the record headers. Possible
+ values are 'h', 'i', 'l' and 'q' with their meanings from
+ Python's struct module. The default is 'i' (the system's
+ default integer).
+
+ """
+ file.__init__(self, fname, *args, **kwargs)
+ self.ENDIAN = endian
+ self.HEADER_PREC = header_prec
+
+ def _read_exactly(self, num_bytes):
+ """Read in exactly num_bytes, raising an error if it can't be done."""
+ data = ''
+ while True:
+ l = len(data)
+ if l == num_bytes:
+ return data
+ else:
+ read_data = self.read(num_bytes - l)
+ if read_data == '':
+ raise IOError('Could not read enough data.'
+ ' Wanted %d bytes, got %d.' % (num_bytes, l))
+ data += read_data
+
+ def _read_check(self):
+ return struct.unpack(self.ENDIAN+self.HEADER_PREC,
+ self._read_exactly(self._header_length)
+ )[0]
+
+ def _write_check(self, number_of_bytes):
+ """Write the header for the given number of bytes"""
+ self.write(struct.pack(self.ENDIAN+self.HEADER_PREC,
+ number_of_bytes))
+
+ def readRecord(self):
+ """Read a single fortran record"""
+ l = self._read_check()
+ data_str = self._read_exactly(l)
+ check_size = self._read_check()
+ if check_size != l:
+ raise IOError('Error reading record from data file')
+ return data_str
+
+ def writeRecord(self,s):
+ """Write a record with the given bytes.
+
+ Parameters
+
+ s : the string to write
+
+ """
+ length_bytes = len(s)
+ self._write_check(length_bytes)
+ self.write(s)
+ self._write_check(length_bytes)
+
+ def readString(self):
+ """Read a string."""
+ return self.readRecord()
+
+ def writeString(self,s):
+ """Write a string
+
+ Parameters
+
+ s : the string to write
+
+ """
+ self.writeRecord(s)
+
+ _real_precisions = 'df'
+
+ def readReals(self, prec='f'):
+ """
+ Read in an array of real numbers.
+
+ Parameters
+
+ prec : character, optional
+
+ Specify the precision of the array using character codes from
+ Python's struct module. Possible values are 'd' and 'f'.
+
+ """
+
+ _numpy_precisions = {'d': numpy.float64,
+ 'f': numpy.float32
+ }
+
+ if prec not in self._real_precisions:
+ raise ValueError('Not an appropriate precision')
+
+ data_str = self.readRecord()
+ num = len(data_str)/struct.calcsize(prec)
+ numbers =struct.unpack(self.ENDIAN+str(num)+prec,data_str)
+ return numpy.array(numbers, dtype=_numpy_precisions[prec])
+
+ def writeReals(self, reals, prec='f'):
+ """
+ Write an array of floats in given precision
+
+ Parameters
+
+ reals : array
+ Data to write
+ prec` : string
+ Character code for the precision to use in writing
+
+ """
+ if prec not in self._real_precisions:
+ raise ValueError('Not an appropriate precision')
+
+ # Don't use writeRecord to avoid having to form a
+ # string as large as the array of numbers
+ length_bytes = len(reals)*struct.calcsize(prec)
+ self._write_check(length_bytes)
+ _fmt = self.ENDIAN + prec
+ for r in reals:
+ self.write(struct.pack(_fmt,r))
+ self._write_check(length_bytes)
+
+ _int_precisions = 'hilq'
+
+ def readInts(self, prec='i'):
+ """
+ Read an array of integers.
+
+ Parameters
+
+ prec : character, optional
+ Specify the precision of the data to be read using
+ character codes from Python's struct module. Possible
+ values are 'h', 'i', 'l' and 'q'
+
+ """
+ if prec not in self._int_precisions:
+ raise ValueError('Not an appropriate precision')
+
+ data_str = self.readRecord()
+ num = len(data_str)/struct.calcsize(prec)
+ return numpy.array(struct.unpack(self.ENDIAN+str(num)+prec,data_str))
+
+ def writeInts(self, ints, prec='i'):
+ """
+ Write an array of integers in given precision
+
+ Parameters
+
+ reals : array
+ Data to write
+ prec : string
+ Character code for the precision to use in writing
+ """
+ if prec not in self._int_precisions:
+ raise ValueError('Not an appropriate precision')
+
+ # Don't use writeRecord to avoid having to form a
+ # string as large as the array of numbers
+ length_bytes = len(ints)*struct.calcsize(prec)
+ self._write_check(length_bytes)
+ _fmt = self.ENDIAN + prec
+ for item in ints:
+ self.write(struct.pack(_fmt,item))
+ self._write_check(length_bytes)
Propchange: incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/fortranfile.py
------------------------------------------------------------------------------
svn:executable = *