You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by pr...@apache.org on 2013/08/27 07:35:49 UTC
svn commit: r1517753 [6/33] - in /incubator/climate/branches/rcmet-2.1.1: ./
src/ src/main/ src/main/python/ src/main/python/bin/ src/main/python/docs/
src/main/python/docs/_static/ src/main/python/docs/_templates/
src/main/python/rcmes/ src/main/pytho...
Added: incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/storage/files.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/storage/files.py?rev=1517753&view=auto
==============================================================================
--- incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/storage/files.py (added)
+++ incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/storage/files.py Tue Aug 27 05:35:42 2013
@@ -0,0 +1,760 @@
+"""
+Module for handling data input files.
+This module can easily open NetCDF, HDF and Grib files.
+"""
+
+from os import path
+
+import netCDF4
+import numpy as np
+import numpy.ma as ma
+import sys
+
+from toolkit import process
+from utils import fortranfile
+from utils import misc
+
+
+VARIABLE_NAMES = {'time': ['time', 'times', 'date', 'dates', 'julian'],
+ 'latitude': ['latitude', 'lat', 'lats', 'latitudes'],
+ 'longitude': ['longitude', 'lon', 'lons', 'longitudes']
+ }
+
+
+def findunique(seq):
+ keys = {}
+ for e in seq:
+ keys[e] = 1
+ return keys.keys()
+
+def getVariableByType(filename, variableType):
+ """
+ Function that will try to return the variable from a file based on a provided
+ parameter type.
+
+ Input::
+ filename - the file to inspect
+ variableType - time | latitude | longitude
+
+ Output::
+ variable name OR list of all variables in the file if a single variable
+ name match cannot be found.
+ """
+ try:
+ f = netCDF4.Dataset(filename, mode='r')
+ except:
+ print "netCDF4Error:", sys.exc_info()[0]
+ raise
+
+ variableKeys = f.variables.keys()
+ f.close()
+ variableKeys = [variable.encode().lower() for variable in variableKeys]
+ variableMatch = VARIABLE_NAMES[variableType]
+
+ commonVariables = list(set(variableKeys).intersection(variableMatch))
+
+ if len(commonVariables) == 1:
+ return str(commonVariables[0])
+
+ else:
+ return variableKeys
+
+def getVariableRange(filename, variableName):
+ """
+ Function to return the min and max values of the given variable in
+ the supplied filename.
+
+ Input::
+ filename - absolute path to a file
+ variableName - variable whose min and max values should be returned
+
+ Output::
+ variableRange - tuple of order (variableMin, variableMax)
+ """
+ try:
+ f = netCDF4.Dataset(filename, mode='r')
+ except:
+ print "netCDF4Error:", sys.exc_info()[0]
+ raise
+
+ varArray = f.variables[variableName][:]
+ return (varArray.min(), varArray.max())
+
+
+def read_data_from_file_list(filelist, myvar, timeVarName, latVarName, lonVarName):
+ '''
+ Read in data from a list of model files into a single data structure
+
+ Input:
+ filelist - list of filenames (including path)
+ myvar - string containing name of variable to load in (as it appears in file)
+ Output:
+ lat, lon - 2D array of latitude and longitude values
+ timestore - list of times
+ t2store - numpy array containing data from all files
+
+ NB. originally written specific for WRF netCDF output files
+ modified to make more general (Feb 2011)
+
+ Peter Lean July 2010
+ '''
+
+ filelist.sort()
+ filename = filelist[0]
+ # Crash nicely if 'filelist' is zero length
+ """TODO: Throw Error instead via try Except"""
+ if len(filelist) == 0:
+ print 'Error: no files have been passed to read_data_from_file_list()'
+ sys.exit()
+
+ # Open the first file in the list to:
+ # i) read in lats, lons
+ # ii) find out how many timesteps in the file
+ # (assume same ntimes in each file in list)
+ # -allows you to create an empty array to store variable data for all times
+ tmp = netCDF4.Dataset(filename, mode='r')
+ latsraw = tmp.variables[latVarName][:]
+ lonsraw = tmp.variables[lonVarName][:]
+ lonsraw[lonsraw > 180] = lonsraw[lonsraw > 180] - 360. # convert to -180,180 if necessary
+
+ """TODO: Guard against case where latsraw and lonsraw are not the same dim?"""
+
+ if(latsraw.ndim == 1):
+ lon, lat = np.meshgrid(lonsraw, latsraw)
+ if(latsraw.ndim == 2):
+ lon = lonsraw
+ lat = latsraw
+
+ timesraw = tmp.variables[timeVarName]
+ ntimes = len(timesraw)
+
+ print 'Lats and lons read in for first file in filelist'
+
+ # Create a single empty masked array to store model data from all files
+ t2store = ma.zeros((ntimes * len(filelist), len(lat[:, 0]), len(lon[0, :])))
+ timestore = ma.zeros((ntimes * len(filelist)))
+
+ # Now load in the data for real
+ # NB. no need to reload in the latitudes and longitudes -assume invariant
+ i = 0
+ timesaccu = 0 # a counter for number of times stored so far in t2store
+ # NB. this method allows for missing times in data files
+ # as no assumption made that same number of times in each file...
+
+
+ for ifile in filelist:
+
+ #print 'Loading data from file: ',filelist[i]
+ f = netCDF4.Dataset(ifile, mode='r')
+ t2raw = f.variables[myvar][:]
+ timesraw = f.variables[timeVarName]
+ time = timesraw[:]
+ ntimes = len(time)
+ print 'file= ', i, 'ntimes= ', ntimes, filelist[i]
+ print 't2raw shape: ', t2raw.shape
+
+ # Flatten dimensions which needn't exist, i.e. level
+ # e.g. if for single level then often data have 4 dimensions, when 3 dimensions will do.
+ # Code requires data to have dimensions, (time,lat,lon)
+ # i.e. remove level dimensions
+ # Remove 1d axis from the t2raw array
+ # Example: t2raw.shape == (365, 180, 360 1) <maps to (time, lat, lon, height)>
+ # After the squeeze you will be left with (365, 180, 360) instead
+ t2tmp = t2raw.squeeze()
+ # Nb. if this happens to be data for a single time only, then we just flattened it by accident
+ # lets put it back...
+ if t2tmp.ndim == 2:
+ t2tmp = np.expand_dims(t2tmp, 0)
+
+ t2store[timesaccu + np.arange(ntimes), :, :] = t2tmp[:, :, :]
+ timestore[timesaccu + np.arange(ntimes)] = time
+ timesaccu = timesaccu + ntimes
+ f.close()
+ i += 1
+
+ print 'Data read in successfully with dimensions: ', t2store.shape
+
+ # TODO: search for duplicated entries (same time) and remove duplicates.
+ # Check to see if number of unique times == number of times, if so then no problem
+
+ if(len(np.unique(timestore)) != len(np.where(timestore != 0)[0].view())):
+ print 'WARNING: Possible duplicated times'
+
+ # Decode model times into python datetime objects. Note: timestore becomes a list (no more an array) here
+ timestore, _ = process.getModelTimes(filename, timeVarName)
+
+ data_dict = {}
+ data_dict['lats'] = lat
+ data_dict['lons'] = lon
+ data_dict['times'] = timestore
+ data_dict['data'] = t2store
+ #return lat, lon, timestore, t2store
+ return data_dict
+
+def select_var_from_file(myfile, fmt='not set'):
+ '''
+ Routine to act as user interface to allow users to select variable of interest from a file.
+
+ Input:
+ myfile - filename
+ fmt - (optional) specify fileformat for PyNIO if filename suffix is non-standard
+
+ Output:
+ myvar - variable name in file
+
+ Peter Lean September 2010
+ '''
+
+ print fmt
+
+ if fmt == 'not set':
+ f = netCDF4.Dataset(myfile, mode='r')
+
+ if fmt != 'not set':
+ f = netCDF4.Dataset(myfile, mode='r',format=fmt)
+
+ keylist = [key.encode().lower() for key in f.variables.keys()]
+
+ i = 0
+ for v in keylist:
+ print '[', i, '] ', f.variables[v].long_name, ' (', v, ')'
+ i += 1
+
+ user_selection = raw_input('Please select variable : [0 -' + str(i - 1) + '] ')
+
+ myvar = keylist[int(user_selection)]
+
+ return myvar
+
+def select_var_from_wrf_file(myfile):
+ '''
+ Routine to act as user interface to allow users to select variable of interest from a wrf netCDF file.
+
+ Input:
+ myfile - filename
+
+ Output:
+ mywrfvar - variable name in wrf file
+
+ Peter Lean September 2010
+ '''
+
+ f = netCDF4.Dataset(myfile, mode='r',format='NETCDF4')
+
+ keylist = f.variables.keys()
+
+ i = 0
+ for v in keylist:
+ try:
+ print '[', i, '] ', f.variables[v].description, ' (', v, ')'
+ except:
+ print ''
+
+ i += 1
+
+ user_selection = raw_input('Please select WRF variable : [0 -' + str(i - 1) + '] ')
+
+ mywrfvar = keylist[int(user_selection)]
+
+ return mywrfvar
+
+def read_lolaT_from_file(filename, latVarName, lonVarName, timeVarName, file_type):
+ """
+ Function that will return lat, lon, and time arrays
+
+ Input::
+ filename - the file to inspect
+ latVarName - name of the Latitude Variable
+ lonVarName - name of the Longitude Variable
+ timeVarName - name of the Time Variable
+ fileType = type of file we are trying to parse
+
+ Output::
+ lat - MESH GRID of Latitude values with shape (nx, ny)
+ lon - MESH GRID of Longitude values with shape (nx, ny)
+ timestore - Python list of Datetime objects
+
+ MESHGRID docs: http://docs.scipy.org/doc/numpy/reference/generated/numpy.meshgrid.html
+
+ """
+
+ tmp = netCDF4.Dataset(filename, mode='r',format=file_type)
+ lonsraw = tmp.variables[lonVarName][:]
+ latsraw = tmp.variables[latVarName][:]
+ lonsraw[lonsraw > 180] = lonsraw[lonsraw > 180] - 360. # convert to -180,180 if necessary
+ if(latsraw.ndim == 1):
+ lon, lat = np.meshgrid(lonsraw, latsraw)
+ if(latsraw.ndim == 2):
+ lon = lonsraw
+ lat = latsraw
+ timestore, _ = process.getModelTimes(filename, timeVarName)
+ print ' read_lolaT_from_file: Lats, lons and times read in for the model domain'
+ return lat, lon, timestore
+
+def read_data_from_one_file(ifile, myvar, timeVarName, lat, file_type):
+ ##################################################################################
+ # Read in data from one file at a time
+ # Input: filelist - list of filenames (including path)
+ # myvar - string containing name of variable to load in (as it appears in file)
+ # Output: lat, lon - 2D array of latitude and longitude values
+ # times - list of times
+ # t2store - numpy array containing data from all files
+ # Modified from read_data_from_file_list to read data from multiple models into a 4-D array
+ # 1. The code now processes model data that completely covers the 20-yr period. Thus,
+ # all model data must have the same time levels (ntimes). Unlike in the oroginal, ntimes
+ # is fixed here.
+ # 2. Because one of the model data exceeds 240 mos (243 mos), the model data must be
+ # truncated to the 240 mons using the ntimes determined from the first file.
+ ##################################################################################
+ f = netCDF4.Dataset(ifile, mode='r')
+ try:
+ varUnit = f.variables[myvar].units.encode().upper()
+ except:
+ varUnit = raw_input('Enter the model variable unit: \n> ').upper()
+ t2raw = f.variables[myvar][:]
+ t2tmp = t2raw.squeeze()
+ if t2tmp.ndim == 2:
+ t2tmp = np.expand_dims(t2tmp, 0)
+ t2tmp = ma.array(t2tmp)
+
+ f.close()
+ print ' success read_data_from_one_file: VarName=', myvar, ' Shape(Full)= ', t2tmp.shape, ' Unit= ', varUnit
+ timestore = process.decode_model_timesK(ifile, timeVarName, file_type)
+ return timestore, t2tmp, varUnit
+
+def findTimeVariable(filename):
+ """
+ Function to find what the time variable is called in a model file.
+ Input::
+ filename - file to crack open and check for a time variable
+ Output::
+ timeName - name of the input file's time variable
+ variableNameList - list of variable names from the input filename
+ """
+ try:
+ f = netCDF4.Dataset(filename, mode='r')
+ except:
+ print("Unable to open '%s' to try and read the Time variable" % filename)
+ raise
+
+ variableNameList = [variable.encode() for variable in f.variables.keys()]
+ # convert all variable names into lower case
+ varNameListLowerCase = [x.lower() for x in variableNameList]
+
+ # Use "set" types for finding common variable name from in the file and from the list of possibilities
+ possibleTimeNames = set(['time', 'times', 'date', 'dates', 'julian'])
+
+ # Use the sets to find the intersection where variable names are in possibleNames
+ timeNameSet = possibleTimeNames.intersection(varNameListLowerCase)
+
+ if len(timeNameSet) == 0:
+ print("Unable to autodetect the Time Variable Name in the '%s'" % filename)
+ timeName = misc.askUserForVariableName(variableNameList, targetName ="Time")
+
+ else:
+ timeName = timeNameSet.pop()
+
+ return timeName, variableNameList
+
+
+def findLatLonVarFromFile(filename):
+ """
+ Function to find what the latitude and longitude variables are called in a model file.
+
+ Input::
+ -filename
+ Output::
+ -latVarName
+ -lonVarName
+ -latMin
+ -latMax
+ -lonMin
+ -lonMax
+ """
+ try:
+ f = netCDF4.Dataset(filename, mode='r')
+ except:
+ print("Unable to open '%s' to try and read the Latitude and Longitude variables" % filename)
+ raise
+
+ variableNameList = [variable.encode() for variable in f.variables.keys()]
+ # convert all variable names into lower case
+ varNameListLowerCase = [x.lower() for x in variableNameList]
+
+ # Use "set" types for finding common variable name from in the file and from the list of possibilities
+ possibleLatNames = set(['latitude', 'lat', 'lats', 'latitudes'])
+ possibleLonNames = set(['longitude', 'lon', 'lons', 'longitudes'])
+
+ # Use the sets to find the intersection where variable names are in possibleNames
+ latNameSet = possibleLatNames.intersection(varNameListLowerCase)
+ lonNameSet = possibleLonNames.intersection(varNameListLowerCase)
+
+ if len(latNameSet) == 0 or len(lonNameSet) == 0:
+ print("Unable to autodetect Latitude and/or Longitude values in the file")
+ latName = misc.askUserForVariableName(variableNameList, targetName ="Latitude")
+ lonName = misc.askUserForVariableName(variableNameList, targetName ="Longitude")
+
+ else:
+ latName = latNameSet.pop()
+ lonName = lonNameSet.pop()
+
+ lats = np.array(f.variables[latName][:])
+ lons = np.array(f.variables[lonName][:])
+
+ latMin = lats.min()
+ latMax = lats.max()
+
+ # Convert the lons from 0:360 into -180:180
+ lons[lons > 180] = lons[lons > 180] - 360.
+ lonMin = lons.min()
+ lonMax = lons.max()
+
+ return latName, lonName, latMin, latMax, lonMin, lonMax
+
+
+def read_data_from_file_list_K(filelist, myvar, timeVarName, latVarName, lonVarName, file_type):
+ ##################################################################################
+ # Read in data from a list of model files into a single data structure
+ # Input: filelist - list of filenames (including path)
+ # myvar - string containing name of variable to load in (as it appears in file)
+ # Output: lat, lon - 2D array of latitude and longitude values
+ # times - list of times
+ # t2store - numpy array containing data from all files
+ # Modified from read_data_from_file_list to read data from multiple models into a 4-D array
+ # 1. The code now processes model data that completely covers the 20-yr period. Thus,
+ # all model data must have the same time levels (ntimes). Unlike in the oroginal, ntimes
+ # is fixed here.
+ # 2. Because one of the model data exceeds 240 mos (243 mos), the model data must be
+ # truncated to the 240 mons using the ntimes determined from the first file.
+ ##################################################################################
+ filelist.sort()
+ nfiles = len(filelist)
+ # Crash nicely if 'filelist' is zero length
+ if nfiles == 0:
+ print 'Error: no files have been passed to read_data_from_file_list(): Exit'
+ sys.exit()
+
+ # Open the first file in the list to:
+ # i) read in lats, lons
+ # ii) find out how many timesteps in the file (assume same ntimes in each file in list)
+ # -allows you to create an empty array to store variable data for all times
+ tmp = netCDF4.Dataset(filelist[0], mode='r', format=file_type)
+ latsraw = tmp.variables[latVarName][:]
+ lonsraw = tmp.variables[lonVarName][:]
+ lonsraw[lonsraw > 180] = lonsraw[lonsraw > 180] - 360. # convert to -180,180 if necessary
+ if(latsraw.ndim == 1):
+ lon, lat = np.meshgrid(lonsraw, latsraw)
+ if(latsraw.ndim == 2):
+ lon = lonsraw; lat = latsraw
+
+ timesraw = tmp.variables[timeVarName]
+ ntimes = len(timesraw); nygrd = len(lat[:, 0]); nxgrd = len(lon[0, :])
+
+ print 'Lats and lons read in for first file in filelist'
+
+ # Create a single empty masked array to store model data from all files
+ #t2store = ma.zeros((ntimes*nfiles,nygrd,nxgrd))
+ t2store = ma.zeros((nfiles, ntimes, nygrd, nxgrd))
+ #timestore=ma.zeros((ntimes*nfiles))
+
+ ## Now load in the data for real
+ ## NB. no need to reload in the latitudes and longitudes -assume invariant
+ #timesaccu=0 # a counter for number of times stored so far in t2store
+ # NB. this method allows for missing times in data files
+ # as no assumption made that same number of times in each file...
+
+ i = 0
+ for ifile in filelist:
+ #print 'Loading data from file: ',filelist[i]
+ f = netCDF4.Dataset(ifile, mode='r')
+ t2raw = f.variables[myvar][:]
+ timesraw = f.variables[timeVarName]
+ time = timesraw[0:ntimes]
+ #ntimes=len(time)
+ #print 'file= ',i,'ntimes= ',ntimes,filelist[i]
+ ## Flatten dimensions which needn't exist, i.e. level
+ ## e.g. if for single level then often data have 4 dimensions, when 3 dimensions will do.
+ ## Code requires data to have dimensions, (time,lat,lon)
+ ## i.e. remove level dimensions
+ t2tmp = t2raw.squeeze()
+ ## Nb. if data happen to be for a single time, we flattened it by accident; lets put it back...
+ if t2tmp.ndim == 2:
+ t2tmp = np.expand_dims(t2tmp, 0)
+ #t2store[timesaccu+np.arange(ntimes),:,:]=t2tmp[0:ntimes,:,:]
+ t2store[i, 0:ntimes, :, :] = t2tmp[0:ntimes, :, :]
+ #timestore[timesaccu+np.arange(ntimes)]=time
+ #timesaccu=timesaccu+ntimes
+ f.close()
+ i += 1
+
+ print 'Data read in successfully with dimensions: ', t2store.shape
+
+ # Decode model times into python datetime objects. Note: timestore becomes a list (no more an array) here
+ ifile = filelist[0]
+ timestore, _ = process.getModelTimes(ifile, timeVarName)
+
+ return lat, lon, timestore, t2store
+
+def find_latlon_ranges(filelist, lat_var_name, lon_var_name):
+ # Function to return the latitude and longitude ranges of the data in a file,
+ # given the identifying variable names.
+ #
+ # Input:
+ # filelist - list of filenames (data is read in from first file only)
+ # lat_var_name - variable name of the 'latitude' variable
+ # lon_var_name - variable name of the 'longitude' variable
+ #
+ # Output:
+ # latMin, latMax, lonMin, lonMax - self explanatory
+ #
+ # Peter Lean March 2011
+
+ filename = filelist[0]
+
+ try:
+ f = netCDF4.Dataset(filename, mode='r')
+
+ lats = f.variables[lat_var_name][:]
+ latMin = lats.min()
+ latMax = lats.max()
+
+ lons = f.variables[lon_var_name][:]
+ lons[lons > 180] = lons[lons > 180] - 360.
+ lonMin = lons.min()
+ lonMax = lons.max()
+
+ return latMin, latMax, lonMin, lonMax
+
+ except:
+ print 'Error: there was a problem with finding the latitude and longitude ranges in the file'
+ print ' Please check that you specified the filename, and variable names correctly.'
+
+ sys.exit()
+
+def writeBN_lola(fileName, lons, lats):
+ # write a binary data file that include longitude (1-d) and latitude (1-d) values
+
+ F = fortranfile.FortranFile(fileName, mode='w')
+ ngrdY = lons.shape[0]; ngrdX = lons.shape[1]
+ tmpDat = ma.zeros(ngrdX); tmpDat[:] = lons[0, :]; F.writeReals(tmpDat)
+ tmpDat = ma.zeros(ngrdY); tmpDat[:] = lats[:, 0]; F.writeReals(tmpDat)
+ # release temporary arrays
+ tmpDat = 0
+ F.close()
+
+def writeBNdata(fileName, numOBSs, numMDLs, nT, ngrdX, ngrdY, numSubRgn, obsData, mdlData, obsRgnAvg, mdlRgnAvg):
+ #(fileName,maskOption,numOBSs,numMDLs,nT,ngrdX,ngrdY,numSubRgn,obsData,mdlData,obsRgnAvg,mdlRgnAvg):
+ # write spatially- and regionally regridded data into a binary data file
+ missing = -1.e26
+ F = fortranfile.FortranFile(fileName, mode='w')
+ # construct a data array to replace mask flag with a missing value (missing=-1.e12) for printing
+ data = ma.zeros((nT, ngrdY, ngrdX))
+ for m in np.arange(numOBSs):
+ data[:, :, :] = obsData[m, :, :, :]; msk = data.mask
+ for n in np.arange(nT):
+ for j in np.arange(ngrdY):
+ for i in np.arange(ngrdX):
+ if msk[n, j, i]: data[n, j, i] = missing
+
+ # write observed data. allowed to write only one row at a time
+ tmpDat = ma.zeros(ngrdX)
+ for n in np.arange(nT):
+ for j in np.arange(ngrdY):
+ tmpDat[:] = data[n, j, :]
+ F.writeReals(tmpDat)
+
+ # write model data (dep. on the number of models).
+ for m in np.arange(numMDLs):
+ data[:, :, :] = mdlData[m, :, :, :]
+ msk = data.mask
+ for n in np.arange(nT):
+ for j in np.arange(ngrdY):
+ for i in np.arange(ngrdX):
+ if msk[n, j, i]:
+ data[n, j, i] = missing
+
+ for n in np.arange(nT):
+ for j in np.arange(ngrdY):
+ tmpDat[:] = data[n, j, :]
+ F.writeReals(tmpDat)
+
+ data = 0 # release the array allocated for data
+ # write data in subregions
+ if numSubRgn > 0:
+ print 'Also included are the time series of the means over ', numSubRgn, ' areas from obs and model data'
+ tmpDat = ma.zeros(nT); print numSubRgn
+ for m in np.arange(numOBSs):
+ for n in np.arange(numSubRgn):
+ tmpDat[:] = obsRgnAvg[m, n, :]
+ F.writeReals(tmpDat)
+ for m in np.arange(numMDLs):
+ for n in np.arange(numSubRgn):
+ tmpDat[:] = mdlRgnAvg[m, n, :]
+ F.writeReals(tmpDat)
+ tmpDat = 0 # release the array allocated for tmpDat
+ F.close()
+
+def writeNCfile(fileName, numSubRgn, lons, lats, obsData, mdlData, obsRgnAvg, mdlRgnAvg, obsList, mdlList, subRegions):
+ # write an output file of variables up to 3 dimensions
+ # fileName: the name of the output data file
+ # numSubRgn : the number of subregions
+ # lons[ngrdX]: longitude
+ # lats[ngrdY]: latitudes
+ # obsData[nT,ngrdY,ngrdX]: the obs time series of the entire model domain
+ # mdlData[numMDLs,nT,ngrdY,ngrdX]: the mdltime series of the entire model domain
+ # obsRgnAvg[numSubRgn,nT]: the obs time series for the all subregions
+ # mdlRgnAvg[numMDLs,numSubRgn,nT]: the mdl time series for the all subregions
+ dimO = obsData.shape[0] # the number of obs data
+ dimM = mdlData.shape[0] # the number of mdl data
+ dimT = mdlData.shape[1] # the number of time levels
+ dimY = mdlData.shape[2] # y-dimension
+ dimX = mdlData.shape[3] # x-dimension
+ dimR = obsRgnAvg.shape[1] # the number of subregions
+ f = netCDF4.Dataset(fileName, mode='w', format='NETCDF4')
+ print mdlRgnAvg.shape, dimM, dimR, dimT
+ #create global attributes
+ f.description = ''
+ # create dimensions
+ print 'Creating Dimensions within the NetCDF Object...'
+ f.createDimension('unity', 1)
+ f.createDimension('time', dimT)
+ f.createDimension('west_east', dimX)
+ f.createDimension('south_north', dimY)
+ f.createDimension('obs', dimO)
+ f.createDimension('models', dimM)
+
+ # create the variable (real*4) to be written in the file
+ print 'Creating Variables...'
+ f.createVariable('lon', 'd', ('south_north', 'west_east'))
+ f.createVariable('lat', 'd', ('south_north', 'west_east'))
+ f.createVariable('oDat', 'd', ('obs', 'time', 'south_north', 'west_east'))
+ f.createVariable('mDat', 'd', ('models', 'time', 'south_north', 'west_east'))
+
+ if subRegions:
+ f.createDimension('regions', dimR)
+ f.createVariable('oRgn', 'd', ('obs', 'regions', 'time'))
+ f.createVariable('mRgn', 'd', ('models', 'regions', 'time'))
+ f.variables['oRgn'].varAttName = 'Observation time series: Subregions'
+ f.variables['mRgn'].varAttName = 'Model time series: Subregions'
+
+ loadDataIntoNetCDF(f, obsList, obsData, 'Observation')
+ print 'Loaded the Observations into the NetCDF'
+
+ loadDataIntoNetCDF(f, mdlList, mdlData, 'Model')
+
+ # create attributes and units for the variable
+ print 'Creating Attributes and Units...'
+ f.variables['lon'].varAttName = 'Longitudes'
+ f.variables['lon'].varUnit = 'degrees East'
+ f.variables['lat'].varAttName = 'Latitudes'
+ f.variables['lat'].varUnit = 'degrees North'
+ f.variables['oDat'].varAttName = 'Observation time series: entire domain'
+ f.variables['mDat'].varAttName = 'Model time series: entire domain'
+
+ # assign the values to the variable and write it
+ f.variables['lon'][:] = lons[:]
+ f.variables['lat'][:] = lats[:]
+ if subRegions:
+ f.variables['oRgn'][:] = obsRgnAvg[:]
+ f.variables['mRgn'][:] = mdlRgnAvg[:]
+
+ f.close()
+
+def writeNCfile1(fileName, numSubRgn, lons, lats, allData, datRgnAvg, datList, subRegions):
+ # write an output file of variables up to 3 dimensions
+ # fileName: the name of the output data file
+ # numSubRgn : the number of subregions
+ # lons[ngrdX]: longitude
+ # lats[ngrdY]: latitudes
+ # allData[nT,ngrdY,ngrdX]: the obs+mdl time series of the entire model domain
+ # datRgnAvg[numSubRgn,nT]: the obs+mdl time series for the all subregions
+ dimD, dimT, dimY, dimX = allData.shape
+ #dimD = allData.shape[0] # the number of obs + mdl datasets
+ #dimT = allData.shape[1] # the number of time levels
+ #dimY = allData.shape[2] # y-dimension
+ #dimX = allData.shape[3] # x-dimension
+ dimR = datRgnAvg.shape[1] # the number of subregions
+ f = netCDF4.Dataset(fileName, mode='w', format='NETCDF4')
+ print datRgnAvg.shape, dimD, dimR, dimT
+ #create global attributes
+ f.description = ''
+ # create dimensions
+ print 'Creating Dimensions within the NetCDF Object...'
+ f.createDimension('unity', 1)
+ f.createDimension('time', dimT)
+ f.createDimension('west_east', dimX)
+ f.createDimension('south_north', dimY)
+ f.createDimension('data', dimD)
+
+ # create the variable (real*4) to be written in the file
+ print 'Creating Variables...'
+ f.createVariable('lon', 'd', ('south_north', 'west_east'))
+ f.createVariable('lat', 'd', ('south_north', 'west_east'))
+ f.createVariable('gDat', 'd', ('data', 'time', 'south_north', 'west_east'))
+
+ if subRegions:
+ f.createDimension('regions', dimR)
+ f.createVariable('dRgn', 'd', ('data', 'regions', 'time'))
+ f.variables['dRgn'].varAttName = 'Subregion-mean time series: All (obs + model) datasets'
+
+ loadDataIntoNetCDF1(f, datList, allData)
+ print 'Loaded all data into the NetCDF'
+
+ # create attributes and units for the variable
+ print 'Creating Attributes and Units...'
+ f.variables['lon'].varAttName = 'Longitudes'
+ f.variables['lon'].varUnit = 'degrees East'
+ f.variables['lat'].varAttName = 'Latitudes'
+ f.variables['lat'].varUnit = 'degrees North'
+ f.variables['gDat'].varAttName = 'Gridded data time series: entire domain'
+
+ # assign the values to the variable and write it
+ f.variables['lon'][:] = lons[:]
+ f.variables['lat'][:] = lats[:]
+ f.variables['gDat'][:] = allData[:]
+ if subRegions:
+ f.variables['dRgn'][:] = datRgnAvg[:]
+
+ f.close()
+
+def loadDataIntoNetCDF(fileObject, datasets, dataArray, dataType):
+ """
+ Input::
+ fileObject - netCDF4 file object data will be loaded into
+ datasets - List of dataset names
+ dataArray - Multi-dimensional array of data to be loaded into the NetCDF file
+ dataType - String with value of either 'Model' or 'Observation'
+ Output::
+ No return value. netCDF4 file object is updated in place
+ """
+ datasetCount = 0
+ for dataset in datasets:
+ if dataType.lower() == 'observation':
+ datasetName = dataset.replace(' ','')
+ elif dataType.lower() == 'model':
+ datasetName = path.splitext(path.basename(dataset))[0]
+ print "Creating variable %s" % datasetName
+ fileObject.createVariable(datasetName, 'd', ('time', 'south_north', 'west_east'))
+ fileObject.variables[datasetName].varAttName = 'Obseration time series: entire domain'
+ print 'Loading values into %s' % datasetName
+ fileObject.variables[datasetName][:]=dataArray[datasetCount,:,:,:]
+ datasetCount += 1
+
+def loadDataIntoNetCDF1(fileObject, datasets, dataArray):
+ """
+ Input::
+ fileObject - PyNIO file object data will be loaded into
+ datasets - List of dataset names
+ dataArray - Multi-dimensional array of data to be loaded into the NetCDF file
+ Output::
+ No return value. PyNIO file object is updated in place
+ """
+ datasetCount = 0
+ for dataset in datasets:
+ datasetName = path.splitext(path.basename(dataset))[0]
+ print "Creating variable %s" % datasetName
+ fileObject.createVariable(datasetName, 'd', ('time', 'south_north', 'west_east'))
+ fileObject.variables[datasetName].varAttName = 'Obseration time series: entire domain'
+ print 'Loading values into %s' % datasetName
+ fileObject.variables[datasetName][:]=dataArray[datasetCount,:,:,:]
+ datasetCount += 1
Propchange: incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/storage/files.py
------------------------------------------------------------------------------
svn:executable = *
Added: incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/storage/rcmed.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/storage/rcmed.py?rev=1517753&view=auto
==============================================================================
--- incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/storage/rcmed.py (added)
+++ incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/storage/rcmed.py Tue Aug 27 05:35:42 2013
@@ -0,0 +1,113 @@
+'''This is a collection of functions that provide the single interface to the
+rcmed. Initial design will includes several functions to interact with the
+available parameters within rcmed and their metadata.
+
+Future work includes rolling the rcmed querying code into this module as well.
+'''
+
+import requests, json
+
+paramUri = 'http://rcmes.jpl.nasa.gov/bottle/rcmed/param.json'
+
+def getParams(uri=paramUri):
+ '''This will return all of the parameters from the database as
+ a list of dictionaries.
+
+ If the database is not available, then the method will return None'''
+ # Use a get request to call the Web Service
+ try:
+ httpRequest = requests.get(uri)
+ except:
+ print "HTTPRequest failed. Bottle WebServer is offline"
+ raise
+ # TODO Check the request status code if it is 400 or 500 range then
+ # return None
+ # if the status code is 200 then return the request.text's param list
+ # http_request.status_code is an int we can inspect
+ paramDict = json.loads(httpRequest.text)
+ paramList = paramDict['param']
+
+ filteredParams = []
+ # Filter list to remove missing data values
+ for param in paramList:
+ paramGood = True
+ for key, value in param.iteritems():
+ if value == None:
+ paramGood = False
+
+ if paramGood:
+ filteredParams.append(param)
+ else:
+ filteredParams.append(param)
+
+
+ return filteredParams
+
+
+
+#class Parameter(object):
+#
+# def __init__(self):
+# self.param_query_uri = 'http://some.url'
+# self.param_list = self.param_metadata()
+#
+# def param_metadata(self):
+# '''This method will return a list of python dict's. Each dict will
+# contain a complete record for each parameter from rcmed'''
+# # 1. Query the Parameters Metadata Endpoint using param_query_uri
+# # 2. Parse the returned data and re-format into a dict
+# # 3. define self.para_met_dict
+# test_list = [{"id":12,
+# "description":"ERA Dataset 2 Metre Temperature",
+# "type":'temp'
+# },
+# {"id":13,
+# "description":"ERA Dataset 2 Metre Dewpoint Temperature",
+# 'type':'temp'
+# },
+# {"id":14,
+# "description":"TRMM Dataset HRF parameter",
+# 'type':'hrf'
+# }
+# ]
+# print "self.param_met_dict has been created"
+# return test_list
+#
+# def get_param_by_id(self, id):
+# '''This will take in a parameter id and return a single dict. Can we
+# safely assume we will always hold a unique parameter id? - Currently
+# this is True'''
+# for p in self.param_list:
+# if p['id'] == id:
+# return p
+# else:
+# pass
+#
+# def get_params_by_type(self, type):
+# '''This will take in a parameter type like precip, temp, pressure, etc.
+# and will return a list of all the params that are of the given type.'''
+# param_list = [] #empty list to collect the param dicts
+# for p in self.param_list:
+# if p['type'] == type:
+# param_list.append(p)
+# else:
+# pass
+# return param_list
+#
+#
+#class ObsData(object):
+#
+# def __init__(self):
+# self.query_url = 'http://rcmes/rcmed....' #can we merely insert the query criteria into the url attribute?
+# self.param_id = 6
+# self.dataset_id = 1
+# self.lat_range = [25.4,55.0]
+# self.lon_range = [0.0,10.7]
+# self.time_range = [start,end]
+#
+# def set_param(self, param_dict):
+# self.param_id = param_dict['id']
+# self.dataset_id = null
+# # look up the dataset id using the parameter id and set it
+# p = Parameter.get_param_by_id(id)
+
\ No newline at end of file
Propchange: incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/storage/rcmed.py
------------------------------------------------------------------------------
svn:executable = *