You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by pr...@apache.org on 2013/08/27 07:35:49 UTC

svn commit: r1517753 [10/33] - in /incubator/climate/branches/rcmet-2.1.1: ./ src/ src/main/ src/main/python/ src/main/python/bin/ src/main/python/docs/ src/main/python/docs/_static/ src/main/python/docs/_templates/ src/main/python/rcmes/ src/main/pyth...

Propchange: incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/toolkit/metrics.py
------------------------------------------------------------------------------
    svn:executable = *

Added: incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/toolkit/process.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/toolkit/process.py?rev=1517753&view=auto
==============================================================================
--- incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/toolkit/process.py (added)
+++ incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/toolkit/process.py Tue Aug 27 05:35:42 2013
@@ -0,0 +1,1270 @@
+"""Collection of functions that process numpy arrays of varying shapes.  
+Functions can range from subsetting to regridding"""
+
+# Python Standard Libary Imports
+import os, subprocess
+import math
+import datetime
+import re
+import string
+import sys
+import time
+
+# 3rd Party Imports
+import netCDF4
+import numpy as np
+import numpy.ma as ma
+from scipy.ndimage import map_coordinates
+
+
+def extract_subregion_from_data_array(data, lats, lons, latmin, latmax, lonmin, lonmax):
+    '''
+     Extract a sub-region from a data array.
+     
+     Example::
+       **e.g.** the user may load a global model file, but only want to examine data over North America
+            This function extracts a sub-domain from the original data.
+            The defined sub-region must be a regular lat/lon bounding box,
+            but the model data may be on a non-regular grid (e.g. rotated, or Guassian grid layout).
+            Data are kept on the original model grid and a rectangular (in model-space) region 
+            is extracted which contains the rectangular (in lat/lon space) user supplied region.
+    
+     INPUT::
+       data - 3d masked data array
+       lats - 2d array of latitudes corresponding to data array
+       lons - 2d array of longitudes corresponding to data array
+       latmin, latmax, lonmin, lonmax - bounding box of required region to extract
+       
+     OUTPUT::
+       data2 - subset of original data array containing only data from required subregion
+       lats2 - subset of original latitude data array
+       lons2 - subset of original longitude data array
+
+    '''
+    
+    
+
+    # Mask model data array to find grid points inside the supplied bounding box
+    whlat = (lats > latmin) & (lats < latmax)
+    whlon = (lons > lonmin) & (lons < lonmax)
+    wh = whlat & whlon
+
+    # Find required matrix indices describing the limits of the regular lat/lon bounding box
+    jmax = np.where(wh == True)[0].max()
+    jmin = np.where(wh == True)[0].min()
+    imax = np.where(wh == True)[1].max()
+    imin = np.where(wh == True)[1].min()
+
+    # Cut out the sub-region from the data array
+    data2 = ma.zeros((data.shape[0], jmax - jmin, imax - imin))
+    data2 = data[:, jmin:jmax, imin:imax]
+
+    # Cut out sub-region from lats,lons arrays
+    lats2 = lats[jmin:jmax, imin:imax]
+    lons2 = lons[jmin:jmax, imin:imax]
+
+    return data2, lats2, lons2
+
+def calc_area_mean(data, lats, lons, mymask='not set'):
+    '''
+     Calculate Area Average of data in a masked array
+     
+     INPUT::
+         data:  a masked array of data (NB. only data from one time expected to be passed at once)
+         lats:  2d array of regularly gridded latitudes
+         lons:  2d array of regularly gridded longitudes
+         mymask:  (optional) defines spatial region to do averaging over
+     
+     OUTPUT::
+         area_mean: a value for the mean inside the area
+
+    '''
+    
+    
+
+    # If mask not passed in, then set maks to cover whole data domain
+    if(mymask == 'not set'):
+       mymask = np.empty(data.shape)
+       mymask[:] = False	# NB. mask means (don't show), so False everywhere means use everything.
+   
+    # Dimension check on lats, lons
+    #  Sometimes these arrays are 3d, sometimes 2d, sometimes 1d
+    #  This bit of code just converts to the required 2d array shape
+    if(len(lats.shape) == 3):
+       lats = lats[0, :, :]
+
+    if(len(lons.shape) == 3):
+       lons = lons[0, :, :]
+
+    if(np.logical_and(len(lats.shape) == 1, len(lons.shape) == 1)):
+       lons, lats = np.meshgrid(lons, lats)
+ 
+    # Calculate grid length (assuming regular lat/lon grid)
+    dlat = lats[1, 0] - lats[0, 0]
+    dlon = lons[0, 1] - lons[0, 0]
+
+    # Calculates weights for each grid box
+    myweights = calc_area_in_grid_box(lats, dlon, dlat)
+
+    # Create a new masked array covering just user selected area (defined in mymask)
+    #   NB. this preserves missing data points in the observations data
+    subdata = ma.masked_array(data, mask=mymask)
+ 
+    if(myweights.shape != subdata.shape):
+       myweights.resize(subdata.shape)
+       myweights[1:, :] = myweights[0, :]
+
+    # Calculate weighted mean using ma.average (which takes weights)
+    area_mean = ma.average(subdata, weights=myweights)
+ 
+    return area_mean
+
+def calc_area_in_grid_box(latitude, dlat, dlon):
+    '''
+     Calculate area of regular lat-lon grid box
+     
+     INPUT::
+        latitude: latitude of grid box (degrees)
+        dlat:     grid length in latitude direction (degrees)
+        dlon:     grid length in longitude direction (degrees)
+        
+     OUTPUT::
+        A:        area of the grid box
+      
+      '''
+
+    R = 6371000  # radius of Earth in metres
+    C = 2 * math.pi * R
+
+    latitude = np.radians(latitude)
+
+    A = (dlon * (C / 360.)) * (dlat * (C / 360.) * np.cos(latitude))
+
+    return A
+
+def do_regrid(q, lat, lon, lat2, lon2, order=1, mdi= -999999999):
+    '''
+     Perform regridding from one set of lat,lon values onto a new set (lat2,lon2)
+    
+     Input::
+         q          - the variable to be regridded
+         lat,lon    - original co-ordinates corresponding to q values
+         lat2,lon2  - new set of latitudes and longitudes that you want to regrid q onto 
+         order      - (optional) interpolation order 1=bi-linear, 3=cubic spline
+         mdi  	    - (optional) fill value for missing data (used in creation of masked array)
+      
+     Output::
+         q2  - q regridded onto the new set of lat2,lon2 
+    
+    '''
+
+    nlat = q.shape[0]
+    nlon = q.shape[1]
+
+    nlat2 = lat2.shape[0]
+    nlon2 = lon2.shape[1]
+
+    # To make our lives easier down the road, let's 
+    # turn these into arrays of x & y coords
+    loni = lon2.ravel()
+    lati = lat2.ravel()
+
+    loni = loni.copy() # NB. it won't run unless you do this...
+    lati = lati.copy()
+
+    # Now, we'll set points outside the boundaries to lie along an edge
+    loni[loni > lon.max()] = lon.max()
+    loni[loni < lon.min()] = lon.min()
+    
+    # To deal with the "hard" break, we'll have to treat y differently,
+    # so we're just setting the min here...
+    lati[lati > lat.max()] = lat.max()
+    lati[lati < lat.min()] = lat.min()
+    
+    
+    # We need to convert these to (float) indicies
+    #   (xi should range from 0 to (nx - 1), etc)
+    loni = (nlon - 1) * (loni - lon.min()) / (lon.max() - lon.min())
+    
+    # Deal with the "hard" break in the y-direction
+    lati = (nlat - 1) * (lati - lat.min()) / (lat.max() - lat.min())
+    
+    # Notes on dealing with MDI when regridding data.
+    #  Method adopted here:
+    #    Use bilinear interpolation of data by default (but user can specify other order using order=... in call)
+    #    Perform bilinear interpolation of data, and of mask.
+    #    To be conservative, new grid point which contained some missing data on the old grid is set to missing data.
+    #            -this is achieved by looking for any non-zero interpolated mask values.
+    #    To avoid issues with bilinear interpolation producing strong gradients leading into the MDI,
+    #     set values at MDI points to mean data value so little gradient visible = not ideal, but acceptable for now.
+    
+    # Set values in MDI so that similar to surroundings so don't produce large gradients when interpolating
+    # Preserve MDI mask, by only changing data part of masked array object.
+    for shift in (-1, 1):
+        for axis in (0, 1):
+            q_shifted = np.roll(q, shift=shift, axis=axis)
+            idx = ~q_shifted.mask * q.mask
+            q.data[idx] = q_shifted[idx]
+
+    # Now we actually interpolate
+    # map_coordinates does cubic interpolation by default, 
+    # use "order=1" to preform bilinear interpolation instead...
+    q2 = map_coordinates(q, [lati, loni], order=order)
+    q2 = q2.reshape([nlat2, nlon2])
+
+    # Set values to missing data outside of original domain
+    q2 = ma.masked_array(q2, mask=np.logical_or(np.logical_or(lat2 >= lat.max(), 
+                                                              lat2 <= lat.min()), 
+                                                np.logical_or(lon2 <= lon.min(), 
+                                                              lon2 >= lon.max())))
+    
+    # Make second map using nearest neighbour interpolation -use this to determine locations with MDI and mask these
+    qmdi = np.zeros_like(q)
+    qmdi[q.mask == True] = 1.
+    qmdi[q.mask == False] = 0.
+    qmdi_r = map_coordinates(qmdi, [lati, loni], order=order)
+    qmdi_r = qmdi_r.reshape([nlat2, nlon2])
+    mdimask = (qmdi_r != 0.0)
+    
+    # Combine missing data mask, with outside domain mask define above.
+    q2.mask = np.logical_or(mdimask, q2.mask)
+
+    return q2
+
+def create_mask_using_threshold(masked_array, threshold=0.5):
+   '''
+    Routine to create a mask, depending on the proportion of times with missing data.
+    For each pixel, calculate proportion of times that are missing data,
+    **if** the proportion is above a specified threshold value,
+    then mark the pixel as missing data.
+   
+    Input::
+        masked_array - a numpy masked array of data (assumes time on axis 0, and space on axes 1 and 2).
+        threshold    - (optional) threshold proportion above which a pixel is marked as 'missing data'. 
+        NB. default threshold = 50%
+   
+    Output::
+        mymask       - a numpy array describing the mask. NB. not a masked array, just the mask itself.
+
+   '''
+   
+
+   # try, except used as some model files don't have a full mask, but a single bool
+   #  the except catches this situation and deals with it appropriately.
+   try:
+       nT = masked_array.mask.shape[0]
+
+       # For each pixel, count how many times are masked.
+       nMasked = masked_array.mask[:, :, :].sum(axis=0)
+
+       # Define new mask as when a pixel has over a defined threshold ratio of masked data
+       #   e.g. if the threshold is 75%, and there are 10 times,
+       #        then a pixel will be masked if more than 5 times are masked.
+       mymask = nMasked > (nT * threshold)
+
+   except:
+       mymask = np.zeros_like(masked_array.data[0, :, :])
+
+   return mymask
+
+def calc_average_on_new_time_unit(data, dateList, unit='monthly'):
+   '''
+    Routine to calculate averages on longer time units than the data exists on.
+    
+    Example::
+        If the data is 6-hourly, calculate daily, or monthly means.
+   
+    Input::
+        data     - data values
+        dateList - list of python datetime structures corresponding to data values
+        unit     - string describing time unit to average onto. 
+        *Valid values are: 'monthly', 'daily', 'pentad','annual','decadal'*
+      
+     Output:
+        meanstorem - numpy masked array of data values meaned over required time period
+        newTimesList - a list of python datetime objects representing the data in the new averagin period
+        *NB.* currently set to beginning of averaging period, 
+        **i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z.**
+   '''
+
+   # First catch unknown values of time unit passed in by user
+   acceptable = (unit == 'full') | (unit == 'annual') | (unit == 'monthly') | (unit == 'daily') | (unit == 'pentad')
+
+   if not acceptable:
+      print 'Error: unknown unit type selected for time averaging'
+      print '       Please check your code.'
+      return
+
+   # Calculate arrays of years (2007,2007),
+   #                     yearsmonths (200701,200702),
+   #                     or yearmonthdays (20070101,20070102) 
+   #  -depending on user selected averaging period.
+
+   # Year list
+   if unit == 'annual':
+      print 'Calculating annual mean data'
+      timeunits = []
+
+      for i in np.arange(len(dateList)):
+         timeunits.append(str(dateList[i].year))
+
+      timeunits = np.array(timeunits, dtype=int)
+         
+   # YearMonth format list
+   if unit == 'monthly':
+      print 'Calculating monthly mean data'
+      timeunits = []
+
+      for i in np.arange(len(dateList)):
+         timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month))
+
+      timeunits = np.array(timeunits, dtype=int)
+
+   # YearMonthDay format list
+   if unit == 'daily':
+      print 'Calculating daily mean data'
+      timeunits = []
+ 
+      for i in np.arange(len(dateList)):
+         timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month) + \
+                          str("%02d" % dateList[i].day))
+
+      timeunits = np.array(timeunits, dtype=int)
+
+
+   # TODO: add pentad setting using Julian days?
+
+
+   # Full list: a special case
+   if unit == 'full':
+      print 'Calculating means data over full time range'
+      timeunits = []
+
+      for i in np.arange(len(dateList)):
+         timeunits.append(999)  # i.e. we just want the same value for all times.
+
+      timeunits = np.array(timeunits, dtype=int)
+
+
+
+   # empty list to store new times
+   newTimesList = []
+
+   # Decide whether or not you need to do any time averaging.
+   #   i.e. if data are already on required time unit then just pass data through and 
+   #        calculate and return representative datetimes.
+   processing_required = True
+   if len(timeunits) == (len(np.unique(timeunits))):
+        processing_required = False
+
+   # 1D data arrays, i.e. time series
+   if data.ndim == 1:
+     # Create array to store the resulting data
+     meanstore = np.zeros(len(np.unique(timeunits)))
+  
+     # Calculate the means across each unique time unit
+     i = 0
+     for myunit in np.unique(timeunits):
+       if processing_required:
+         datam = ma.masked_array(data, timeunits != myunit)
+         meanstore[i] = ma.average(datam)
+
+       # construct new times list
+       smyunit = str(myunit)
+       if len(smyunit) == 4:  # YYYY
+        yyyy = int(myunit[0:4])
+        mm = 1
+        dd = 1
+       if len(smyunit) == 6:  # YYYYMM
+        yyyy = int(smyunit[0:4])
+        mm = int(smyunit[4:6])
+        dd = 1
+       if len(smyunit) == 8:  # YYYYMMDD
+        yyyy = int(smyunit[0:4])
+        mm = int(smyunit[4:6])
+        dd = int(smyunit[6:8])
+       if len(smyunit) == 3:  # Full time range
+        # Need to set an appropriate time representing the mid-point of the entire time span
+        dt = dateList[-1] - dateList[0]
+        halfway = dateList[0] + (dt / 2)
+        yyyy = int(halfway.year)
+        mm = int(halfway.month)
+        dd = int(halfway.day)
+  
+       newTimesList.append(datetime.datetime(yyyy, mm, dd, 0, 0, 0, 0))
+       i = i + 1
+
+   # 3D data arrays
+   if data.ndim == 3:
+
+     #datamask = create_mask_using_threshold(data,threshold=0.75)
+
+     # Create array to store the resulting data
+     meanstore = np.zeros([len(np.unique(timeunits)), data.shape[1], data.shape[2]])
+  
+     # Calculate the means across each unique time unit
+     i = 0
+     datamask_store = []
+     for myunit in np.unique(timeunits):
+       if processing_required:
+
+         mask = np.zeros_like(data)
+         mask[timeunits != myunit, :, :] = 1.0
+
+         # Calculate missing data mask within each time unit...
+         datamask_at_this_timeunit = np.zeros_like(data)
+         datamask_at_this_timeunit[:] = create_mask_using_threshold(data[timeunits == myunit, :, :], threshold=0.75)
+         # Store results for masking later
+         datamask_store.append(datamask_at_this_timeunit[0])
+
+         # Calculate means for each pixel in this time unit, ignoring missing data (using masked array).
+         datam = ma.masked_array(data, np.logical_or(mask, datamask_at_this_timeunit))
+         meanstore[i, :, :] = ma.average(datam, axis=0)
+
+       # construct new times list
+       smyunit = str(myunit)
+       if len(smyunit) == 4:  # YYYY
+        yyyy = int(smyunit[0:4])
+        mm = 1
+        dd = 1
+       if len(smyunit) == 6:  # YYYYMM
+        yyyy = int(smyunit[0:4])
+        mm = int(smyunit[4:6])
+        dd = 1
+       if len(smyunit) == 8:  # YYYYMMDD
+        yyyy = int(smyunit[0:4])
+        mm = int(smyunit[4:6])
+        dd = int(smyunit[6:8])
+       if len(smyunit) == 3:  # Full time range
+        # Need to set an appropriate time representing the mid-point of the entire time span
+        dt = dateList[-1] - dateList[0]
+        halfway = dateList[0] + (dt / 2)
+        yyyy = int(halfway.year)
+        mm = int(halfway.month)
+        dd = int(halfway.day)
+
+       newTimesList.append(datetime.datetime(yyyy, mm, dd, 0, 0, 0, 0))
+
+       i += 1
+
+     if not processing_required:
+        meanstorem = data
+
+     if processing_required:
+        # Create masked array (using missing data mask defined above)
+        datamask_store = np.array(datamask_store)
+        meanstorem = ma.masked_array(meanstore, datamask_store)
+
+   return meanstorem, newTimesList
+
+def calc_running_accum_from_period_accum(data):
+    '''
+     Routine to calculate running total accumulations from individual period accumulations.
+     ::
+    
+         e.g.  0,0,1,0,0,2,2,1,0,0
+            -> 0,0,1,1,1,3,5,6,6,6
+    
+     Input::
+          data: numpy array with time in the first axis
+    
+     Output::
+          running_acc: running accumulations
+    
+    '''
+    
+    
+    running_acc = np.zeros_like(data)
+    
+    if(len(data.shape) == 1):
+        running_acc[0] = data[0]
+    
+    if(len(data.shape) > 1):
+        running_acc[0, :] = data[0, :]
+    
+    for i in np.arange(data.shape[0] - 1):
+        if(len(data.shape) == 1):
+            running_acc[i + 1] = running_acc[i] + data[i + 1]
+    
+        if(len(data.shape) > 1):
+            running_acc[i + 1, :] = running_acc[i, :] + data[i + 1, :]
+    
+    return running_acc
+
+def ignore_boundaries(data, rim=10):
+    '''
+     Routine to mask the lateral boundary regions of model data to ignore them from calculations.
+     
+     Input::
+         data - a masked array of model data
+         rim - (optional) number of grid points to ignore
+    
+     Output::
+         data - data array with boundary region masked
+    
+    '''
+    
+    nx = data.shape[1]
+    ny = data.shape[0]
+    
+    rimmask = np.zeros_like(data)
+    for j in np.arange(rim):
+        rimmask[j, 0:nx - 1] = 1.0
+    
+    for j in ny - 1 - np.arange(rim):
+        rimmask[j, 0:nx - 1] = 1.0
+    
+    for i in np.arange(rim):
+        rimmask[0:ny - 1, i] = 1.0
+    
+    for i in nx - 1 - np.arange(rim):
+        rimmask[0:ny - 1, i] = 1.0
+    
+    data = ma.masked_array(data, mask=rimmask)
+    
+    return data
+
+def normalizeDatetimes(datetimes, timestep):
+    """
+    Input::
+        datetimes - list of datetime objects that need to be normalized
+        timestep - string of value ('daily' | 'monthly')
+    Output::
+        normalDatetimes - list of datetime objects that have been normalized
+    
+    Normalization Rules::
+        Daily data will be forced to an hour value of 00:00:00
+        Monthly data will be forced to the first of the month at midnight 
+    """
+    normalDatetimes = []
+    if timestep.lower() == 'monthly':
+        for inputDatetime in datetimes:
+            if inputDatetime.day != 1:
+                # Clean the inputDatetime
+                inputDatetimeString = inputDatetime.strftime('%Y%m%d')
+                normalInputDatetimeString = inputDatetimeString[:6] + '01'
+                inputDatetime = datetime.datetime.strptime(normalInputDatetimeString, '%Y%m%d')
+
+            normalDatetimes.append(inputDatetime)
+
+    elif timestep.lower() == 'daily':
+        for inputDatetime in datetimes:
+            if inputDatetime.hour != 0 or inputDatetime.minute != 0 or inputDatetime.second != 0:
+                datetimeString = inputDatetime.strftime('%Y%m%d%H%M%S')
+                normalDatetimeString = datetimeString[:8] + '000000'
+                inputDatetime = datetime.datetime.strptime(normalDatetimeString, '%Y%m%d%H%M%S')
+            
+            normalDatetimes.append(inputDatetime)
+
+
+    return normalDatetimes
+
+def getModelTimes(modelFile, timeVarName):
+    '''
+    TODO:  Do a better job handling dates here
+    Routine to convert from model times ('hours since 1900...', 'days since ...')
+    into a python datetime structure
+
+    Input::
+        modelFile - path to the model tile you want to extract the times list and modelTimeStep from
+        timeVarName - name of the time variable in the model file
+
+    Output::
+        times  - list of python datetime objects describing model data times
+        modelTimeStep - 'hourly','daily','monthly','annual'
+    '''
+
+    if os.path.exists(modelFile) == False:
+        print 'the designated file "', modelFile, '" does not exist in getModelTimes: EXIT'
+        cmnd = 'ls -l ' + modelFile
+        subprocess.call(cmnd, shell=True)
+        sys.exit()
+
+    f = netCDF4.Dataset(modelFile, mode='r')
+    xtimes = f.variables[timeVarName]
+    timeFormat = xtimes.units.encode()
+
+    # search to check if 'since' appears in units
+    try:
+        sinceLoc = re.search('since', timeFormat).end()
+
+    except AttributeError:
+        print 'Error decoding model times: time variable attributes do not contain "since"'
+        raise
+    
+    units = None
+    TIME_UNITS = ('minutes', 'hours', 'days', 'months', 'years')
+    # search for 'seconds','minutes','hours', 'days', 'months', 'years' so know units
+    for unit in TIME_UNITS:
+        if re.search(unit, timeFormat):
+            units = unit
+            break
+
+    # cut out base time (the bit following 'since')
+    base_time_string = string.lstrip(timeFormat[sinceLoc:])
+    # decode base time
+    base_time = decodeTimeFromString(base_time_string)
+    times = []
+
+    for xtime in xtimes[:]:
+        
+        # Cast time as an int
+        xtime = int(xtime)
+
+        if units == 'minutes':
+            dt = datetime.timedelta(minutes=xtime)
+            new_time = base_time + dt
+        elif units == 'hours':
+            dt = datetime.timedelta(hours=xtime)
+            new_time = base_time + dt
+        elif units == 'days':
+            dt = datetime.timedelta(days=xtime)
+            new_time = base_time + dt
+        elif units == 'months':
+            # NB. adding months in python is complicated as month length varies and hence ambiguous.
+            # Perform date arithmatic manually
+            #  Assumption: the base_date will usually be the first of the month
+            #              NB. this method will fail if the base time is on the 29th or higher day of month
+            #                      -as can't have, e.g. Feb 31st.
+            new_month = int(base_time.month + xtime % 12)
+            new_year = int(math.floor(base_time.year + xtime / 12.))
+            new_time = datetime.datetime(new_year, new_month, base_time.day, base_time.hour, base_time.second, 0)
+        elif units == 'years':
+            dt = datetime.timedelta(years=xtime)
+            new_time = base_time + dt
+
+        times.append(new_time)
+    
+    try:
+        timeStepLength = int(xtimes[1] - xtimes[0] + 1.e-12)
+        modelTimeStep = getModelTimeStep(units, timeStepLength)
+    except:
+        raise
+    
+    times = normalizeDatetimes(times, modelTimeStep)
+    
+    return times, modelTimeStep
+
+def getModelTimeStep(units, stepSize):
+    # Time units are now determined. Determine the time intervals of input data (mdlTimeStep)
+
+    if units == 'minutes':
+        if stepSize == 60:
+            modelTimeStep = 'hourly'
+        elif stepSize == 1440:
+            modelTimeStep = 'daily'
+        # 28 days through 31 days
+        elif 40320 <= stepSize <= 44640:
+            modelTimeStep = 'monthly'
+        # 365 days through 366 days 
+        elif 525600 <= stepSize <= 527040:
+            modelTimeStep = 'annual' 
+        else:
+            raise Exception('model data time step interval exceeds the max time interval (annual)', units, stepSize)
+
+    elif units == 'hours':
+        if stepSize == 1:
+            modelTimeStep = 'hourly'
+        elif stepSize == 24:
+            modelTimeStep = 'daily'
+        elif 672 <= stepSize <= 744:
+            modelTimeStep = 'monthly' 
+        elif 8760 <= stepSize <= 8784:
+            modelTimeStep = 'annual' 
+        else:
+            raise Exception('model data time step interval exceeds the max time interval (annual)', units, stepSize)
+
+    elif units == 'days':
+        if stepSize == 1:
+            modelTimeStep = 'daily'
+        elif 28 <= stepSize <= 31:
+            modelTimeStep = 'monthly'
+        elif 365 <= stepSize <= 366:
+            modelTimeStep = 'annual'
+        else:
+            raise Exception('model data time step interval exceeds the max time interval (annual)', units, stepSize)
+
+    elif units == 'months':
+        if stepSize == 1:
+            modelTimeStep = 'monthly'
+        elif stepSize == 12:
+            modelTimeStep = 'annual'
+        else:
+            raise Exception('model data time step interval exceeds the max time interval (annual)', units, stepSize)
+
+    elif units == 'years':
+        if stepSize == 1:
+            modelTimeStep = 'annual'
+        else:
+            raise Exception('model data time step interval exceeds the max time interval (annual)', units, stepSize)
+
+    else:
+        errorMessage = 'the time unit ', units, ' is not currently handled in this version.'
+        raise Exception(errorMessage)
+
+    return modelTimeStep
+
+def decodeTimeFromString(time_string):
+    '''
+     Decodes string into a python datetime object
+     *Method:* tries a bunch of different time format possibilities and hopefully one of them will hit.
+     ::
+
+       **Input:**  time_string - a string that represents a date/time
+       
+       **Output:** mytime - a python datetime object
+    '''
+
+    # This will deal with times that use decimal seconds
+    if '.' in time_string:
+        time_string = time_string.split('.')[0] + '0'
+    else:
+        pass
+
+    try:
+        mytime = time.strptime(time_string, '%Y-%m-%d %H:%M:%S')
+        mytime = datetime.datetime(*mytime[0:6])
+        return mytime
+
+    except ValueError:
+        pass
+
+    try:
+        mytime = time.strptime(time_string, '%Y/%m/%d %H:%M:%S')
+        mytime = datetime.datetime(*mytime[0:6])
+        return mytime
+
+    except ValueError:
+        pass
+
+    try:
+        mytime = time.strptime(time_string, '%Y%m%d %H:%M:%S')
+        mytime = datetime.datetime(*mytime[0:6])
+        return mytime
+
+    except ValueError:
+        pass
+
+    try:
+        mytime = time.strptime(time_string, '%Y:%m:%d %H:%M:%S')
+        mytime = datetime.datetime(*mytime[0:6])
+        return mytime
+
+    except ValueError:
+        pass
+
+    try:
+        mytime = time.strptime(time_string, '%Y%m%d%H%M%S')
+        mytime = datetime.datetime(*mytime[0:6])
+        return mytime
+
+    except ValueError:
+        pass
+
+    try:
+        mytime = time.strptime(time_string, '%Y-%m-%d %H:%M')
+        mytime = datetime.datetime(*mytime[0:6])
+        return mytime
+
+    except ValueError:
+        pass
+
+    try:
+        mytime = time.strptime(time_string, '%Y-%m-%d')
+        mytime = datetime.datetime(*mytime[0:6])
+        return mytime
+
+    except ValueError:
+        pass
+
+
+    print 'Error decoding time string: string does not match a predefined time format'
+    return 0
+
+def regrid_wrapper(regrid_choice, obsdata, obslats, obslons, wrfdata, wrflats, wrflons):
+   '''
+    Wrapper routine for regridding.
+    
+    Either regrids model to obs grid, or obs to model grid, depending on user choice
+    
+        Inputs::
+            regrid_choice - [0] = Regrid obs data onto model grid or 
+                            [1] = Regrid model data onto obs grid
+
+            obsdata,wrfdata - data arrays
+            obslats,obslons - observation lat,lon arrays
+            wrflats,wrflons - model lat,lon arrays
+   
+        Output::
+            rdata,rdata2 - regridded data
+   	        lats,lons    - latitudes and longitudes of regridded data
+   
+   '''
+
+   # Regrid obs data onto model grid
+   if(regrid_choice == '0'):
+
+      ndims = len(obsdata.shape)
+      if(ndims == 3):
+         newshape = [obsdata.shape[0], wrfdata.shape[1], wrfdata.shape[2]]
+	 nT = obsdata.shape[0]
+
+      if(ndims == 2):
+         newshape = [wrfdata.shape[0], wrfdata.shape[1]]
+	 nT = 0
+
+      rdata = wrfdata
+      lats, lons = wrflats, wrflons
+
+      # Create a new masked array with the required dimensions
+      tmp = np.zeros(newshape)
+      rdata2 = ma.MaskedArray(tmp, mask=(tmp == 0))
+      tmp = 0 # free up some memory
+
+      rdata2[:] = 0.0
+
+      if(nT > 0):
+       for t in np.arange(nT):
+         rdata2[t, :, :] = do_regrid(obsdata[t, :, :], obslats[:, :], obslons[:, :], wrflats[:, :], wrflons[:, :])
+
+      if(nT == 0):
+         rdata2[:, :] = do_regrid(obsdata[:, :], obslats[:, :], obslons[:, :], wrflats[:, :], wrflons[:, :])
+
+
+   # Regrid model data onto obs grid
+   if(regrid_choice == '1'):
+      ndims = len(wrfdata.shape)
+      if(ndims == 3):
+         newshape = [wrfdata.shape[0], obsdata.shape[1], obsdata.shape[2]]
+	 nT = obsdata.shape[0]
+
+      if(ndims == 2):
+         newshape = [obsdata.shape[0], obsdata.shape[1]]
+	 nT = 0
+
+      rdata2 = obsdata
+      lats, lons = obslats, obslons
+
+      tmp = np.zeros(newshape)
+      rdata = ma.MaskedArray(tmp, mask=(tmp == 0))
+      tmp = 0 # free up some memory
+
+      rdata[:] = 0.0
+ 
+      if(nT > 0):
+        for t in np.arange(nT):
+	 rdata[t, :, :] = do_regrid(wrfdata[t, :, :], wrflats[:, :], wrflons[:, :], obslats[:, :], obslons[:, :])
+
+      if(nT == 0):
+	rdata[:, :] = do_regrid(wrfdata[:, :], wrflats[:, :], wrflons[:, :], obslats[:, :], obslons[:, :])
+
+  
+   return rdata, rdata2, lats, lons
+
+def extract_sub_time_selection(allTimes, subTimes, data):
+    '''
+     Routine to extract a sub-selection of times from a data array.
+
+     Example::
+           Suppose a data array has 30 time values for daily data for a whole month, 
+           but you only want the data from the 5th-15th of the month.
+    
+     Input::
+           allTimes - list of datetimes describing what times the data in the data array correspond to
+           subTimes - the times you want to extract data from
+           data     - the data array
+    
+     Output::
+           subdata     - subselection of data array
+    
+    '''
+    # Create new array to store the subselection
+    subdata = np.zeros([len(subTimes), data.shape[1], data.shape[2]])
+    
+    # Loop over all Times and when it is a member of the required subselection, copy the data across
+    i = 0     # counter for allTimes
+    subi = 0  # counter for subTimes
+    for t in allTimes:
+        if(np.setmember1d(allTimes, subTimes)):
+            subdata[subi, :, :] = data[i, :, :]
+            subi += 1
+        i += 1
+    
+    return subdata
+
+def calc_average_on_new_time_unit_K(data, dateList, unit):
+    '''
+    Routine to calculate averages on longer time units than the data exists on.
+    e.g. if the data is 6-hourly, calculate daily, or monthly means.
+     
+    Input:
+        data     - data values
+        dateList - list of python datetime structures corresponding to data values
+        unit     - string describing time unit to average onto 
+                      e.g. 'monthly', 'daily', 'pentad','annual','decadal'
+    Output:
+        meanstorem - numpy masked array of data values meaned over required time period
+        newTimesList - a list of python datetime objects representing the data in the new averagin period
+                           NB. currently set to beginning of averaging period, 
+                           i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z.
+    '''
+
+    # Check if the user-selected temporal grid is valid. If not, EXIT
+    acceptable = (unit=='full')|(unit=='annual')|(unit=='monthly')|(unit=='daily')|(unit=='pentad')
+    if not acceptable:
+        print 'Error: unknown unit type selected for time averaging: EXIT'
+        return -1,-1,-1,-1
+
+    # Calculate arrays of: annual timeseries: year (2007,2007),
+    #                      monthly time series: year-month (200701,200702),
+    #                      daily timeseries:  year-month-day (20070101,20070102) 
+    #  depending on user-selected averaging period.
+
+    # Year list
+    if unit=='annual':
+        timeunits = []
+        for i in np.arange(len(dateList)):
+            timeunits.append(str(dateList[i].year))
+        timeunits = np.array(timeunits, dtype=int)
+         
+    # YearMonth format list
+    if unit=='monthly':
+        timeunits = []
+        for i in np.arange(len(dateList)):
+            timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month))
+        timeunits = np.array(timeunits,dtype=int)
+
+    # YearMonthDay format list
+    if unit=='daily':
+        timeunits = []
+        for i in np.arange(len(dateList)):
+            timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month) + str("%02d" % dateList[i].day))
+        timeunits = np.array(timeunits,dtype=int)
+
+    # TODO: add pentad setting using Julian days?
+
+    # Full list: a special case
+    if unit == 'full':
+        comment='Calculating means data over the entire time range: i.e., annual-mean climatology'
+        timeunits = []
+        for i in np.arange(len(dateList)):
+            timeunits.append(999)  # i.e. we just want the same value for all times.
+        timeunits = np.array(timeunits, dtype=int)
+
+    # empty list to store new times
+    newTimesList = []
+
+    # Decide whether or not you need to do any time averaging.
+    #   i.e. if data are already on required time unit then just pass data through and 
+    #        calculate and return representative datetimes.
+    processing_required = True
+    if len(timeunits)==(len(np.unique(timeunits))):
+        processing_required = False
+
+    # 1D data arrays, i.e. time series
+    if data.ndim==1:
+        # Create array to store the resulting data
+        meanstore = np.zeros(len(np.unique(timeunits)))
+  
+        # Calculate the means across each unique time unit
+        i=0
+        for myunit in np.unique(timeunits):
+            if processing_required:
+                datam=ma.masked_array(data,timeunits!=myunit)
+                meanstore[i] = ma.average(datam)
+            
+            # construct new times list
+            smyunit = str(myunit)
+            if len(smyunit)==4:  # YYYY
+                yyyy = int(myunit[0:4])
+                mm = 1
+                dd = 1
+            if len(smyunit)==6:  # YYYYMM
+                yyyy = int(smyunit[0:4])
+                mm = int(smyunit[4:6])
+                dd = 1
+            if len(smyunit)==8:  # YYYYMMDD
+                yyyy = int(smyunit[0:4])
+                mm = int(smyunit[4:6])
+                dd = int(smyunit[6:8])
+            if len(smyunit)==3:  # Full time range
+                # Need to set an appropriate time representing the mid-point of the entire time span
+                dt = dateList[-1]-dateList[0]
+                halfway = dateList[0]+(dt/2)
+                yyyy = int(halfway.year)
+                mm = int(halfway.month)
+                dd = int(halfway.day)
+
+            newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
+            i = i+1
+
+    # 3D data arrays
+    if data.ndim==3:
+        # datamask = create_mask_using_threshold(data,threshold=0.75)
+        # Create array to store the resulting data
+        meanstore = np.zeros([len(np.unique(timeunits)),data.shape[1],data.shape[2]])
+  
+        # Calculate the means across each unique time unit
+        i=0
+        datamask_store = []
+        for myunit in np.unique(timeunits):
+            if processing_required:
+                mask = np.zeros_like(data)
+                mask[timeunits!=myunit,:,:] = 1.0
+                # Calculate missing data mask within each time unit...
+                datamask_at_this_timeunit = np.zeros_like(data)
+                datamask_at_this_timeunit[:]= create_mask_using_threshold(data[timeunits==myunit,:,:],threshold=0.75)
+                # Store results for masking later
+                datamask_store.append(datamask_at_this_timeunit[0])
+                # Calculate means for each pixel in this time unit, ignoring missing data (using masked array).
+                datam = ma.masked_array(data,np.logical_or(mask,datamask_at_this_timeunit))
+                meanstore[i,:,:] = ma.average(datam,axis=0)
+            # construct new times list
+            smyunit = str(myunit)
+            if len(smyunit)==4:  # YYYY
+                yyyy = int(smyunit[0:4])
+                mm = 1
+                dd = 1
+            if len(smyunit)==6:  # YYYYMM
+                yyyy = int(smyunit[0:4])
+                mm = int(smyunit[4:6])
+                dd = 1
+            if len(smyunit)==8:  # YYYYMMDD
+                yyyy = int(smyunit[0:4])
+                mm = int(smyunit[4:6])
+                dd = int(smyunit[6:8])
+            if len(smyunit)==3:  # Full time range
+                # Need to set an appropriate time representing the mid-point of the entire time span
+                dt = dateList[-1]-dateList[0]
+                halfway = dateList[0]+(dt/2)
+                yyyy = int(halfway.year)
+                mm = int(halfway.month)
+                dd = int(halfway.day)
+            newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
+            i += 1
+
+        if not processing_required:
+            meanstorem = data
+
+        if processing_required:
+            # Create masked array (using missing data mask defined above)
+            datamask_store = np.array(datamask_store)
+            meanstorem = ma.masked_array(meanstore, datamask_store)
+
+    return meanstorem,newTimesList
+
+def decode_model_timesK(ifile,timeVarName,file_type):
+    #################################################################################################
+    #  Convert model times ('hours since 1900...', 'days since ...') into a python datetime structure
+    #  Input:
+    #      filelist - list of model files
+    #      timeVarName - name of the time variable in the model files
+    #  Output:
+    #      times  - list of python datetime objects describing model data times
+    #     Peter Lean February 2011
+    #################################################################################################
+    f = netCDF4.Dataset(ifile,mode='r',format=file_type)
+    xtimes = f.variables[timeVarName]
+    timeFormat = xtimes.units.encode()
+    #timeFormat = "days since 1979-01-01 00:00:00"
+    # search to check if 'since' appears in units
+    try:
+        sinceLoc = re.search('since',timeFormat).end()
+    except:
+        print 'Error decoding model times: time var attributes do not contain "since": Exit'
+        sys.exit()
+    # search for 'seconds','minutes','hours', 'days', 'months', 'years' so know units
+    # TODO:  Swap out this section for a set of if...elseif statements
+    units = ''
+    try:
+        _ = re.search('minutes',timeFormat).end()
+        units = 'minutes'
+    except:
+        pass
+    try:
+        _ = re.search('hours',timeFormat).end()
+        units = 'hours'
+    except:
+        pass
+    try:
+        _ = re.search('days',timeFormat).end()
+        units = 'days'
+    except:
+        pass
+    try:
+        _ = re.search('months',timeFormat).end()
+        units = 'months'
+    except:
+        pass
+    try:
+        _ = re.search('years',timeFormat).end()
+        units = 'years'
+    except:
+        pass
+    # cut out base time (the bit following 'since')
+    base_time_string = string.lstrip(timeFormat[sinceLoc:])
+    # decode base time
+    # 9/25/2012: datetime.timedelta has problem with the argument because xtimes is NioVariable.
+    # Soln (J. Kim): use a temp variable ttmp in the for loop, then convert it into an integer xtime.
+    base_time = decodeTimeFromString(base_time_string)
+    times=[]
+    for ttmp in xtimes[:]:
+        xtime = int(ttmp)
+        if(units=='minutes'):  
+            dt = datetime.timedelta(minutes=xtime); new_time = base_time + dt
+        if(units=='hours'):  
+            dt = datetime.timedelta(hours=xtime); new_time = base_time + dt
+        if(units=='days'):  
+            dt = datetime.timedelta(days=xtime); new_time = base_time + dt
+        if(units=='months'):   # NB. adding months in python is complicated as month length varies and hence ambigous.
+            # Perform date arithmatic manually
+            #  Assumption: the base_date will usually be the first of the month
+            #              NB. this method will fail if the base time is on the 29th or higher day of month
+            #                      -as can't have, e.g. Feb 31st.
+            new_month = int(base_time.month + xtime % 12)
+            new_year = int(math.floor(base_time.year + xtime / 12.))
+            #print type(new_year),type(new_month),type(base_time.day),type(base_time.hour),type(base_time.second)
+            new_time = datetime.datetime(new_year,new_month,base_time.day,base_time.hour,base_time.second,0)
+        if(units=='years'):
+            dt = datetime.timedelta(years=xtime); new_time = base_time + dt
+        times.append(new_time)
+    return times
+
+
+def regrid_in_time(data,dateList,unit):
+   #################################################################################################
+   # Routine to calculate averages on longer time units than the data exists on.
+   #  e.g. if the data is 6-hourly, calculate daily, or monthly means.
+   #  Input:
+   #         data     - data values
+   #         dateList - list of python datetime structures corresponding to data values
+   #         unit     - string describing time unit to average onto 
+   #                       e.g. 'monthly', 'daily', 'pentad','annual','decadal'
+   #  Output:
+   #         meanstorem - numpy masked array of data values meaned over required time period
+   #         newTimesList - a list of python datetime objects representing the data in the new averagin period
+   #                            NB. currently set to beginning of averaging period, 
+   #                            i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z.
+   # ..............................
+   #   Jinwon Kim, Sept 30, 2012
+   #   Created from calc_average_on_new_time_unit_K, Peter's original, with the modification below:
+   #   Instead of masked array used by Peter, use wh to defined data within the averaging range.
+   #################################################################################################
+
+   print '***  Begin calc_average_on_new_time_unit_KK  ***'
+   # Check if the user-selected temporal grid is valid. If not, EXIT
+   acceptable = (unit=='full')|(unit=='annual')|(unit=='monthly')|(unit=='daily')|(unit=='pentad')
+   if not acceptable:
+     print 'Error: unknown unit type selected for time averaging: EXIT'; return -1,-1,-1,-1
+
+   # Calculate time arrays of: annual (yyyy, [2007]), monthly (yyyymm, [200701,200702]), daily (yyyymmdd, [20070101,20070102]) 
+   # "%02d" is similar to i2.2 in Fortran
+   if unit=='annual':                      # YYYY
+      timeunits = []
+      for i in np.arange(len(dateList)):
+         timeunits.append(str(dateList[i].year))
+   elif unit=='monthly':                   # YYYYMM
+      timeunits = []
+      for i in np.arange(len(dateList)):
+         timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month))
+   elif unit=='daily':                     # YYYYMMDD
+      timeunits = []
+      for i in np.arange(len(dateList)):
+         timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month) + str("%02d" % dateList[i].day))
+   elif unit=='full':                      # Full list: a special case
+      comment='Calculating means data over the entire time range: i.e., annual-mean climatology'
+      timeunits = []
+      for i in np.arange(len(dateList)):
+         timeunits.append(999)             # i.e. we just want the same value for all times.
+   timeunits = np.array(timeunits,dtype=int)
+   print 'timeRegridOption= ',unit#'; output timeunits= ',timeunits
+   #print 'timeRegridOption= ',unit,'; output timeunits= ',timeunits
+
+   # empty list to store time levels after temporal regridding
+   newTimesList = []
+
+   # Decide whether or not you need to do any time averaging.
+   #   i.e. if data are already on required time unit then just pass data through and calculate and return representative datetimes.
+   processing_required = True
+   if len(timeunits)==(len(np.unique(timeunits))):
+        processing_required = False
+   print 'processing_required= ',processing_required,': input time steps= ',len(timeunits),': regridded output time steps = ',len(np.unique(timeunits))
+   #print 'np.unique(timeunits): ',np.unique(timeunits)
+   print 'data.ndim= ',data.ndim
+
+   if data.ndim==1:                                           # 1D data arrays, i.e. 1D time series
+      # Create array to store the temporally regridded data
+     meanstore = np.zeros(len(np.unique(timeunits)))
+      # Calculate the means across each unique time unit
+     i=0
+     for myunit in np.unique(timeunits):
+       if processing_required:
+         wh = timeunits==myunit
+         datam = data[wh]
+         meanstore[i] = ma.average(datam)
+        # construct new times list
+       smyunit = str(myunit)
+       if len(smyunit)==4:  # YYYY
+        yyyy = int(myunit[0:4])
+        mm = 1
+        dd = 1
+       if len(smyunit)==6:  # YYYYMM
+        yyyy = int(smyunit[0:4])
+        mm = int(smyunit[4:6])
+        dd = 1
+       if len(smyunit)==8:  # YYYYMMDD
+        yyyy = int(smyunit[0:4])
+        mm = int(smyunit[4:6])
+        dd = int(smyunit[6:8])
+       if len(smyunit)==3:  # Full time range
+        # Need to set an appropriate time representing the mid-point of the entire time span
+        dt = dateList[-1]-dateList[0]
+        halfway = dateList[0]+(dt/2)
+        yyyy = int(halfway.year)
+        mm = int(halfway.month)
+        dd = int(halfway.day)
+       newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
+       i = i+1
+
+   elif data.ndim==3:                                         # 3D data arrays, i.e. 2D time series
+      # Create array to store the resulting data
+     meanstore = np.zeros([len(np.unique(timeunits)),data.shape[1],data.shape[2]])
+     # Calculate the means across each unique time unit
+     i=0
+     datamask_store = []
+     for myunit in np.unique(timeunits):
+       if processing_required:
+         wh = timeunits==myunit
+         datam = data[wh,:,:]
+         meanstore[i,:,:] = ma.average(datam,axis=0)
+        # construct new times list
+       smyunit = str(myunit)
+       if len(smyunit)==4:  # YYYY
+        yyyy = int(smyunit[0:4])
+        mm = 1
+        dd = 1
+       if len(smyunit)==6:  # YYYYMM
+        yyyy = int(smyunit[0:4])
+        mm = int(smyunit[4:6])
+        dd = 1
+       if len(smyunit)==8:  # YYYYMMDD
+        yyyy = int(smyunit[0:4])
+        mm = int(smyunit[4:6])
+        dd = int(smyunit[6:8])
+       if len(smyunit)==3:  # Full time range
+        # Need to set an appropriate time representing the mid-point of the entire time span
+        dt = dateList[-1]-dateList[0]
+        halfway = dateList[0]+(dt/2)
+        yyyy = int(halfway.year)
+        mm = int(halfway.month)
+        dd = int(halfway.day)
+       newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
+       i += 1
+
+     if not processing_required:
+        meanstorem = data
+     elif processing_required:
+        meanstorem = meanstore
+
+   print '***  End calc_average_on_new_time_unit_KK  ***'
+   return meanstorem,newTimesList

Propchange: incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/toolkit/process.py
------------------------------------------------------------------------------
    svn:executable = *

Added: incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/Taylor.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/Taylor.py?rev=1517753&view=auto
==============================================================================
--- incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/Taylor.py (added)
+++ incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/Taylor.py Tue Aug 27 05:35:42 2013
@@ -0,0 +1,126 @@
+"""
+Taylor diagram (Taylor, 2001) test implementation.
+ 
+http://www-pcmdi.llnl.gov/about/staff/Taylor/CV/Taylor_diagram_primer.htm
+"""
+ 
+__version__ = "Time-stamp: <2012-02-17 20:59:35 ycopin>"
+__author__ = "Yannick Copin <ya...@laposte.net>"
+ 
+import numpy as NP
+import matplotlib.pyplot as PLT
+ 
+class TaylorDiagram(object):
+    """Taylor diagram: plot model standard deviation and correlation
+    to reference (data) sample in a single-quadrant polar plot, with
+    r=stddev and theta=arccos(correlation).
+    """
+ 
+    def __init__(self, refstd, radMax = 1.5, fig=None, rect=111, label='_'):
+        """Set up Taylor diagram axes, i.e. single quadrant polar
+        plot, using mpl_toolkits.axisartist.floating_axes. refstd is
+        the reference standard deviation to be compared to.
+        """
+ 
+        from matplotlib.projections import PolarAxes
+        import mpl_toolkits.axisartist.floating_axes as FA
+        import mpl_toolkits.axisartist.grid_finder as GF
+ 
+        self.refstd = refstd            # Reference standard deviation
+ 
+        tr = PolarAxes.PolarTransform()
+ 
+        # Correlation labels
+        rlocs = NP.concatenate((NP.arange(10)/10.,[0.95,0.99]))
+        tlocs = NP.arccos(rlocs)        # Conversion to polar angles
+        gl1 = GF.FixedLocator(tlocs)    # Positions
+        tf1 = GF.DictFormatter(dict(zip(tlocs, map(str,rlocs))))
+ 
+        # Standard deviation axis extent
+        self.smin = 0
+        self.smax = radMax * self.refstd
+ 
+        ghelper = FA.GridHelperCurveLinear(tr,
+                                           extremes=(0,NP.pi/2, # 1st quadrant
+                                                     self.smin,self.smax),
+                                           grid_locator1=gl1,
+                                           tick_formatter1=tf1,
+                                           )
+ 
+        if fig is None:
+            fig = PLT.figure()
+ 
+        ax = FA.FloatingSubplot(fig, rect, grid_helper=ghelper)
+        fig.add_subplot(ax)
+ 
+        # Adjust axes
+        ax.axis["top"].set_axis_direction("bottom")  # "Angle axis"
+        ax.axis["top"].toggle(ticklabels=True, label=True)
+        ax.axis["top"].major_ticklabels.set_axis_direction("top")
+        ax.axis["top"].label.set_axis_direction("top")
+        ax.axis["top"].label.set_text("Correlation")
+ 
+        ax.axis["left"].set_axis_direction("bottom") # "X axis"
+        ax.axis["left"].label.set_text("Standardized deviation")
+ 
+        ax.axis["right"].set_axis_direction("top")   # "Y axis"
+        ax.axis["right"].toggle(ticklabels=True)
+        ax.axis["right"].major_ticklabels.set_axis_direction("left")
+ 
+        ax.axis["bottom"].set_visible(False)         # Useless
+        
+        # Contours along standard deviations
+        ax.grid(False)
+ 
+        self._ax = ax                   # Graphical axes
+        self.ax = ax.get_aux_axes(tr)   # Polar coordinates
+ 
+        # Add reference point and stddev contour
+        # print "Reference std:", self.refstd
+        l, = self.ax.plot([0], self.refstd, 'k*',
+                          ls='', ms=10, label=label)
+        t = NP.linspace(0, NP.pi/2)
+        r = NP.zeros_like(t) + self.refstd
+        self.ax.plot(t,r, 'k--', label='_')
+ 
+        # Collect sample points for latter use (e.g. legend)
+        self.samplePoints = [l]
+ 
+    def add_sample(self, stddev, corrcoef, *args, **kwargs):
+        """Add sample (stddev,corrcoeff) to the Taylor diagram. args
+        and kwargs are directly propagated to the Figure.plot
+        command."""
+ 
+        l, = self.ax.plot(NP.arccos(corrcoef), stddev,
+                          *args, **kwargs) # (theta,radius)
+        self.samplePoints.append(l)
+ 
+        return l
+ 
+    def add_rms_contours(self, levels=5, **kwargs):
+        """Add constant centered RMS difference contours."""
+ 
+        rs,ts = NP.meshgrid(NP.linspace(self.smin,self.smax),
+                            NP.linspace(0,NP.pi/2))
+        # Compute centered RMS difference
+        rms = NP.sqrt(self.refstd**2 + rs**2 - 2*self.refstd*rs*NP.cos(ts))
+        
+        contours = self.ax.contour(ts, rs, rms, levels, **kwargs)
+    
+    def add_stddev_contours(self, std, corr1, corr2, **kwargs):
+        """Add a curved line with a radius of std between two points
+        [std, corr1] and [std, corr2]"""
+
+        t = NP.linspace(NP.arccos(corr1), NP.arccos(corr2)) 
+        r = NP.zeros_like(t) + std
+        return self.ax.plot(t,r,'red', linewidth=2)
+    
+    def add_contours(self,std1,corr1,std2,corr2, **kwargs):
+        """Add a line between two points
+        [std1, corr1] and [std2, corr2]"""
+        
+        t = NP.linspace(NP.arccos(corr1), NP.arccos(corr2)) 
+        r = NP.linspace(std1, std2)
+
+        return self.ax.plot(t,r,'red',linewidth=2)
+ 

Propchange: incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/Taylor.py
------------------------------------------------------------------------------
    svn:executable = *

Added: incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/__init__.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/__init__.py?rev=1517753&view=auto
==============================================================================
--- incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/__init__.py (added)
+++ incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/__init__.py Tue Aug 27 05:35:42 2013
@@ -0,0 +1,2 @@
+"""Collection of modules that provide functionality across all of the RCMES
+sub-packages"""
\ No newline at end of file

Propchange: incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/__init__.py
------------------------------------------------------------------------------
    svn:executable = *

Added: incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/fortranfile.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/fortranfile.py?rev=1517753&view=auto
==============================================================================
--- incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/fortranfile.py (added)
+++ incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/fortranfile.py Tue Aug 27 05:35:42 2013
@@ -0,0 +1,274 @@
+# Copyright 2008, 2009 Neil Martinsen-Burrell
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+
+"""
+Defines a file-derived class to read/write Fortran unformatted files.
+
+The assumption is that a Fortran unformatted file is being written by
+the Fortran runtime as a sequence of records.  Each record consists of
+an integer (of the default size [usually 32 or 64 bits]) giving the
+length of the following data in bytes, then the data itself, then the
+same integer as before.
+
+Examples
+--------
+
+To use the default endian and size settings, one can just do::
+
+    >>> f = FortranFile('filename')
+    >>> x = f.readReals()
+
+One can read arrays with varying precisions::
+
+    >>> f = FortranFile('filename')
+    >>> x = f.readInts('h')
+    >>> y = f.readInts('q')
+    >>> z = f.readReals('f')
+
+Where the format codes are those used by Python's struct module.
+
+One can change the default endian-ness and header precision::
+
+    >>> f = FortranFile('filename', endian='>', header_prec='l')
+    
+for a file with little-endian data whose record headers are long
+integers.
+"""
+
+__docformat__ = "restructuredtext en"
+
+import struct
+import numpy
+
+class FortranFile(file):
+
+    """File with methods for dealing with fortran unformatted data files"""
+
+    def _get_header_length(self):
+        return struct.calcsize(self._header_prec)
+    _header_length = property(fget=_get_header_length)
+
+    def _set_endian(self,c):
+        """
+        Set endian to big (c='>') or little (c='<') or native (c='@')
+
+        Parameters:
+          `c` : string
+            The endian-ness to use when reading from this file.
+        """
+        if c in '<>@=':
+            self._endian = c
+        else:
+            raise ValueError('Cannot set endian-ness')
+    def _get_endian(self):
+        return self._endian
+    ENDIAN = property(fset=_set_endian,
+                      fget=_get_endian,
+                      doc="Possible endian values are '<', '>', '@', '='"
+                     )
+
+    def _set_header_prec(self, prec):
+        if prec in 'hilq':
+            self._header_prec = prec
+        else:
+            raise ValueError('Cannot set header precision')
+    def _get_header_prec(self):
+        return self._header_prec
+    HEADER_PREC = property(fset=_set_header_prec,
+                           fget=_get_header_prec,
+                           doc="Possible header precisions are 'h', 'i', 'l', 'q'"
+                          )
+
+    def __init__(self, fname, endian='@', header_prec='i', *args, **kwargs):
+        """Open a Fortran unformatted file for writing.
+        
+        Parameters
+        ----------
+        endian : character, optional
+            Specify the endian-ness of the file.  Possible values are
+            '>', '<', '@' and '='.  See the documentation of Python's
+            struct module for their meanings.  The deafult is '@' (native
+            byte order)
+        header_prec : character, optional
+            Specify the precision used for the record headers.  Possible
+            values are 'h', 'i', 'l' and 'q' with their meanings from
+            Python's struct module.  The default is 'i' (the system's
+            default integer).
+
+        """
+        file.__init__(self, fname, *args, **kwargs)
+        self.ENDIAN = endian
+        self.HEADER_PREC = header_prec
+
+    def _read_exactly(self, num_bytes):
+        """Read in exactly num_bytes, raising an error if it can't be done."""
+        data = ''
+        while True:
+            l = len(data)
+            if l == num_bytes:
+                return data
+            else:
+                read_data = self.read(num_bytes - l)
+            if read_data == '':
+                raise IOError('Could not read enough data.'
+                              '  Wanted %d bytes, got %d.' % (num_bytes, l))
+            data += read_data
+
+    def _read_check(self):
+        return struct.unpack(self.ENDIAN+self.HEADER_PREC,
+                             self._read_exactly(self._header_length)
+                            )[0]
+
+    def _write_check(self, number_of_bytes):
+        """Write the header for the given number of bytes"""
+        self.write(struct.pack(self.ENDIAN+self.HEADER_PREC,
+                               number_of_bytes))
+
+    def readRecord(self):
+        """Read a single fortran record"""
+        l = self._read_check()
+        data_str = self._read_exactly(l)
+        check_size = self._read_check()
+        if check_size != l:
+            raise IOError('Error reading record from data file')
+        return data_str
+
+    def writeRecord(self,s):
+        """Write a record with the given bytes.
+
+        Parameters
+        
+            s : the string to write
+
+        """
+        length_bytes = len(s)
+        self._write_check(length_bytes)
+        self.write(s)
+        self._write_check(length_bytes)
+
+    def readString(self):
+        """Read a string."""
+        return self.readRecord()
+
+    def writeString(self,s):
+        """Write a string
+
+        Parameters
+        
+            s : the string to write
+        
+        """
+        self.writeRecord(s)
+
+    _real_precisions = 'df'
+
+    def readReals(self, prec='f'):
+        """
+        Read in an array of real numbers.
+        
+        Parameters
+        
+            prec : character, optional
+            
+            Specify the precision of the array using character codes from
+            Python's struct module.  Possible values are 'd' and 'f'.
+            
+        """
+        
+        _numpy_precisions = {'d': numpy.float64,
+                             'f': numpy.float32
+                            }
+
+        if prec not in self._real_precisions:
+            raise ValueError('Not an appropriate precision')
+            
+        data_str = self.readRecord()
+        num = len(data_str)/struct.calcsize(prec)
+        numbers =struct.unpack(self.ENDIAN+str(num)+prec,data_str) 
+        return numpy.array(numbers, dtype=_numpy_precisions[prec])
+
+    def writeReals(self, reals, prec='f'):
+        """
+        Write an array of floats in given precision
+
+        Parameters
+        
+        reals : array
+            Data to write
+        prec` : string
+            Character code for the precision to use in writing
+        
+        """
+        if prec not in self._real_precisions:
+            raise ValueError('Not an appropriate precision')
+        
+        # Don't use writeRecord to avoid having to form a
+        # string as large as the array of numbers
+        length_bytes = len(reals)*struct.calcsize(prec)
+        self._write_check(length_bytes)
+        _fmt = self.ENDIAN + prec
+        for r in reals:
+            self.write(struct.pack(_fmt,r))
+        self._write_check(length_bytes)
+    
+    _int_precisions = 'hilq'
+
+    def readInts(self, prec='i'):
+        """
+        Read an array of integers.
+        
+        Parameters
+        
+            prec : character, optional
+                Specify the precision of the data to be read using 
+                character codes from Python's struct module.  Possible
+                values are 'h', 'i', 'l' and 'q'
+            
+        """
+        if prec not in self._int_precisions:
+            raise ValueError('Not an appropriate precision')
+            
+        data_str = self.readRecord()
+        num = len(data_str)/struct.calcsize(prec)
+        return numpy.array(struct.unpack(self.ENDIAN+str(num)+prec,data_str))
+
+    def writeInts(self, ints, prec='i'):
+        """
+        Write an array of integers in given precision
+
+        Parameters
+        
+            reals : array
+                Data to write
+            prec : string
+                Character code for the precision to use in writing
+        """
+        if prec not in self._int_precisions:
+            raise ValueError('Not an appropriate precision')
+        
+        # Don't use writeRecord to avoid having to form a
+        # string as large as the array of numbers
+        length_bytes = len(ints)*struct.calcsize(prec)
+        self._write_check(length_bytes)
+        _fmt = self.ENDIAN + prec
+        for item in ints:
+            self.write(struct.pack(_fmt,item))
+        self._write_check(length_bytes)

Propchange: incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/utils/fortranfile.py
------------------------------------------------------------------------------
    svn:executable = *