You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by ah...@apache.org on 2013/09/13 15:14:04 UTC
svn commit: r1522915 [1/2] - in /incubator/climate/branches/csag/pycdm: ./
conventions/ handlers/
Author: ahart
Date: Fri Sep 13 13:14:04 2013
New Revision: 1522915
URL: http://svn.apache.org/r1522915
Log:
Initial commit of pycdm code from Chris Jack at CSAG
Added:
incubator/climate/branches/csag/pycdm/
incubator/climate/branches/csag/pycdm/__init__.py
incubator/climate/branches/csag/pycdm/conventions/
incubator/climate/branches/csag/pycdm/conventions/README
incubator/climate/branches/csag/pycdm/coordsystem.py
incubator/climate/branches/csag/pycdm/dataset.py
incubator/climate/branches/csag/pycdm/error.py
incubator/climate/branches/csag/pycdm/field.py
incubator/climate/branches/csag/pycdm/group.py
incubator/climate/branches/csag/pycdm/handlers/
incubator/climate/branches/csag/pycdm/handlers/__init__.py
incubator/climate/branches/csag/pycdm/handlers/csag2.py
incubator/climate/branches/csag/pycdm/handlers/netcdf4.py
incubator/climate/branches/csag/pycdm/styles.py
incubator/climate/branches/csag/pycdm/testdata (with props)
incubator/climate/branches/csag/pycdm/variable.py
Added: incubator/climate/branches/csag/pycdm/__init__.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/csag/pycdm/__init__.py?rev=1522915&view=auto
==============================================================================
--- incubator/climate/branches/csag/pycdm/__init__.py (added)
+++ incubator/climate/branches/csag/pycdm/__init__.py Fri Sep 13 13:14:04 2013
@@ -0,0 +1,16 @@
+
+class CDMError(Exception):
+
+ def __init__(self, message):
+ self.message = message
+
+ def __str__(self):
+ return repr(self.message)
+
+
+from dataset import Dataset
+from variable import Dimension, Variable
+from field import Field
+
+from handlers import *
+
Added: incubator/climate/branches/csag/pycdm/conventions/README
URL: http://svn.apache.org/viewvc/incubator/climate/branches/csag/pycdm/conventions/README?rev=1522915&view=auto
==============================================================================
--- incubator/climate/branches/csag/pycdm/conventions/README (added)
+++ incubator/climate/branches/csag/pycdm/conventions/README Fri Sep 13 13:14:04 2013
@@ -0,0 +1,2 @@
+This is just a placeholder for the moment. The idea is that different meta-data conventions could be described here.
+
Added: incubator/climate/branches/csag/pycdm/coordsystem.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/csag/pycdm/coordsystem.py?rev=1522915&view=auto
==============================================================================
--- incubator/climate/branches/csag/pycdm/coordsystem.py (added)
+++ incubator/climate/branches/csag/pycdm/coordsystem.py Fri Sep 13 13:14:04 2013
@@ -0,0 +1,142 @@
+"""
+An experiment to try and break the coordinate system functionality out of the Field class
+"""
+
+
+from variable import Variable
+
+cf_dimensions = {'degree_east': 'longitude',
+'degree east': 'longitude',
+'degrees_east': 'longitude',
+'degree_north': 'latitude',
+'degrees_north': 'latitude',
+'degree north': 'latitude'}
+
+def cf_units2coordinates(units):
+
+ # For spatial coordinates
+ if units in cf_dimensions:
+ return cf_dimensions[units]
+
+ # Time coordinates
+ try:
+ netCDF4.num2date(0,units)
+ except:
+ return None
+
+ return 'time'
+
+class CoordSystem(object):
+ """
+ Encapsulates a coordinate system. Constructor can take a Variable instance and attempt to
+ auto construct the coordinate system parameters.
+ """
+
+ def __init__(self, variable=None, coordsystem=None):
+ """
+ Takes either a Variable instance or a coordsystem dict (produced by another CoordSystem instance)
+ In the case of a variable instance, interogate the variable and auxilary variables in the parent
+ group in order to construct the coordsystem parameters
+
+ Coordinate systems can be of types: ['cartesian', 'discrete']
+
+ Cartesian coordinate systems have an x-dimension and a y-dimension whereas discrete systems only
+ have an id-dimension as an index into the grid positions.
+ """
+
+ # If we have a variable instance
+ if variable and type(variable) == Variable:
+
+ # Keep a handle on the associated variable
+ self.variable = variable
+
+ # Initialise dimensions
+ x_dimension = None
+ y_dimension = None
+ z_dimension = None
+ t_dimension = None
+ id_dimension = None
+
+ # Keep track of variable dimensions we have mapped
+ mapped = []
+
+ # First just step through the variables dimensions
+ for dimension in self.variable.dimensions:
+
+ # See if the dimension name is also a variable name
+ if dimension.name in self.variable.group.variables:
+
+ # Grab the coordinate variable
+ coordinate_variable = self.variable.group.variables[dimension.name]
+
+ # Try and convert the variable into a CF coordinate
+ cf_coordinate = cf_units2coordinates(coordinate_variable.get_attribute('units'))
+
+ # Get the axis attribute
+ axis = coordinate_variable.get_attribute('axis')
+
+ # Now, if we have a cf_coordinate we can add this variable to the coordinate system
+ # and mark it as mapped
+ if cf_coordinate:
+ self.coordsystem[cf_coordinate] = {'variable':coordinate_variable.name, 'map':[self.variable.dimensions.index(dimension)]}
+ mapped.append(self.variable.dimensions.index(dimension))
+
+ # We can also use cf_coordinates or axis attributes to figure out cartesian axis
+ if cf_coordinate == 'latitude' or axis == 'Y':
+ y_dimension = self.variable.dimensions.index(dimension)
+ if cf_coordinate == 'longitude' or axis == 'X':
+ x_dimension = self.variable.dimensions.index(dimension)
+ if cf_coordinate == 'vertical' or axis == 'Z':
+ z_dimension = self.variable.dimensions.index(dimension)
+ if cf_coordinate == 'time' or axis == 'T':
+ t_dimension = self.variable.dimensions.index(dimension)
+
+ # Check how we are doing, have we mapped all dimensions?
+ if len(mapped) < len(self.variable.dimensions):
+
+ # Check for a coordinates attribute
+ if self.variable.get_attribute('coordinates'):
+
+ self.coordinates_names = self.variable.get_attribute('coordinates').split()
+
+ # Find each associated variable
+ for varname in self.coordinates_names:
+
+ # See if we can find the corresponding variable
+ if varname in self.variable.group.variables.keys():
+
+ # Grab the coordinate variable
+ coordinate_variable = self.variable.group.variables[varname]
+
+ # Try and convert the variable into a CF coordinate
+ cf_coordinate = cf_units2coordinates(coordinate_variable.get_attribute('units'))
+
+ # Get the axis attribute
+ axis = coordinate_variable.get_attribute('axis')
+
+ # If we have a cf_coordinate we can create the mapping
+ if cf_coordinate:
+
+ # Create the coordinates_mapping entry but with an empty map for now
+ self.coordinates_mapping[cf_coordinate] = {'variable':name, 'map':[], 'coordinates': self.coordinates_names}
+
+ # Add each coordinates variable dimension to the mappable list and generate the map
+ for dimension in self.variable.group.variables[varname].dimensions:
+ self.coordinates_mapping[coordinate_name]['map'].append(self.variable.dimensions.index(dimension))
+
+ if not dimension in mapped:
+ mapped.append(dimension)
+
+ # By now we should have mapped all the dimensions, we can now figure out what time of coordinate system we have
+
+ # If we have x_dimension and y_dimension then we have a cartesian system
+ if x_dimension and y_dimension:
+ self.type = 'cartesian'
+
+
+ # If the map for latitude and longitude is of length one and the same value then we can mark this as the id_dimension
+ if self.coordinates_mapping['latitude']['map'] == self.coordinates_mapping['longitude']['map']:
+ if len(self.coordinates_mapping['latitude']['map']) == 1:
+ self.id_dimension = self.coordinates_mapping['latitude']['map'][0]
+
+
Added: incubator/climate/branches/csag/pycdm/dataset.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/csag/pycdm/dataset.py?rev=1522915&view=auto
==============================================================================
--- incubator/climate/branches/csag/pycdm/dataset.py (added)
+++ incubator/climate/branches/csag/pycdm/dataset.py Fri Sep 13 13:14:04 2013
@@ -0,0 +1,78 @@
+class Dataset(object):
+ """A generic Dataset class that represents an abstract dataset. This class is subclassed in order to
+ implement different storage handlers.
+
+ Storage handlers should overload the init method in order to construct a Dataset object. The open method will
+ search for the first subclass that can open the provided location and return a new Dataset instance if successful.
+
+ """
+
+ def __init__(self, location=None):
+ """
+ Create a new generic dataset instance that just hold the location string if provided and sets
+ the root group to None
+ >>> ds = Dataset()
+ >>> print ds
+ <CDM Dataset: None>
+ >>> print ds.root
+ None
+ """
+
+ self.location = location
+ self.root = None
+ self.attributes = {}
+
+
+ @classmethod
+ def open(cls, location):
+ """
+ Class method that finds an appropriate Dataset subclass to handle the resource identified by
+ location, constructs an instance of this subclass and returns it. Returns None if no handler
+ subclass can be found.
+ >>> print Dataset.open(None)
+ <CDM Dataset: None>
+ >>> print Dataset.open('doesnotexist')
+ None
+ """
+
+ # If we have a location parameter, try to find a handler
+ if (location):
+ dataset = None
+ for handler in cls.__subclasses__():
+ try:
+ dataset = handler(location)
+ except:
+ pass
+ else:
+ break
+ return dataset
+
+ # Else return a generic dataset instance
+ else:
+ return Dataset()
+
+ def close(self):
+ """
+ Method to be implemented by format handling subclasses as appropriate
+ """
+
+ pass
+
+
+ def getRootGroup(self):
+ """
+ To be overloaded as needed by handler subclass but otherwise just returns the
+ instance root variable
+ >>> ds = Dataset()
+ >>> print ds.getRootGroup()
+ None
+ """
+
+ return self.root
+
+
+ def __repr__(self):
+ return "<CDM %s: %s>" % (self.__class__.__name__, self.location)
+
+ def __unicode__(self):
+ return self.__repr__()
Added: incubator/climate/branches/csag/pycdm/error.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/csag/pycdm/error.py?rev=1522915&view=auto
==============================================================================
--- incubator/climate/branches/csag/pycdm/error.py (added)
+++ incubator/climate/branches/csag/pycdm/error.py Fri Sep 13 13:14:04 2013
@@ -0,0 +1,7 @@
+class CDMError(Exception):
+
+ def __init__(self, message):
+ self.message = message
+
+ def __str__(self):
+ return repr(self.message)
Added: incubator/climate/branches/csag/pycdm/field.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/csag/pycdm/field.py?rev=1522915&view=auto
==============================================================================
--- incubator/climate/branches/csag/pycdm/field.py (added)
+++ incubator/climate/branches/csag/pycdm/field.py Fri Sep 13 13:14:04 2013
@@ -0,0 +1,1179 @@
+
+import StringIO
+import numpy
+import copy
+import simplejson as json
+
+
+import shapely
+from shapely.wkb import loads
+from shapely.ops import cascaded_union
+from shapely import speedups
+speedups.enable()
+
+cf_dimensions = {'degree_east': 'longitude',
+'degree east': 'longitude',
+'degrees_east': 'longitude',
+'degree_north': 'latitude',
+'degrees_north': 'latitude',
+'degree north': 'latitude'}
+
+def cf_units2coordinates(units):
+
+ # For spatial coordinates
+ if units in cf_dimensions:
+ return cf_dimensions[units]
+
+ # Time coordinates
+ try:
+ netCDF4.num2date(0,units)
+ except:
+ return None
+
+ return 'time'
+
+
+class Field(object):
+ """
+ A Field encapsulates a variable and a coordinate system and provides attributes and methods to constrain or filter
+ access to the underlying variable.
+ >>> import handlers.netcdf4 as netcdf4
+ >>> import handlers.csag2 as csag2
+
+ >>> ds = netcdf4.netCDF4Dataset('testdata/gfs.nc')
+ >>> tmax = ds.root.variables['TMAX_2maboveground']
+ >>> tmax_field = Field(tmax)
+ >>> print tmax_field
+ <CDM Field: TMAX_2maboveground>
+ >>> print tmax_field.coordinates_mapping
+ {'latitude': {'variable': u'latitude', 'map': [1]}, 'longitude': {'variable': u'longitude', 'map': [2]}, u'time': {'variable': u'time', 'map': [0]}}
+
+ >>> ds = netcdf4.netCDF4Dataset('testdata/wrfout_d01_2010-05-28_12.nc')
+ >>> q2 = ds.root.variables['Q2']
+ >>> q2_field = Field(q2)
+ >>> print q2_field
+ <CDM Field: Q2>
+ >>> print q2_field.coordinates_mapping
+ {'latitude': {'variable': u'XLAT', 'map': [1, 2], 'coordinates': [u'XLONG', u'XLAT']}, 'longitude': {'variable': u'XLONG', 'map': [1, 2], \
+'coordinates': [u'XLONG', u'XLAT']}}
+
+ >>> ds = netcdf4.netCDF4Dataset('testdata/pr_AFR-44_CCCMA-CanESM2_historical_r1i1p1_SMHI-RCA4_v0_mon_200101_200512.nc')
+ >>> pr = ds.root.variables['pr']
+ >>> pr_field = Field(pr)
+ >>> print pr_field
+ <CDM Field: pr>
+ >>> print pr_field.coordinates_mapping
+ {'latitude': {'variable': u'lat', 'map': [1, 2], 'coordinates': [u'lon', u'lat']}, u'rlon': {'variable': u'rlon', 'map': [2]}, \
+u'rlat': {'variable': u'rlat', 'map': [1]}, 'longitude': {'variable': u'lon', 'map': [1, 2], 'coordinates': [u'lon', u'lat']}, u'time': {'variable': u'time', 'map': [0]}}
+
+ >>> ds = csag2.csag2Dataset('testdata/csag2/ghcnd.2.afr/ppt')
+ >>> ppt = ds.root.variables['PPT']
+ >>> ppt_field = Field(ppt)
+ >>> print ppt_field.coordinates_mapping
+ {'latitude': {'variable': 'latitude', 'map': [0], 'coordinates': ['latitude', 'longitude', 'altitude']}, \
+'altitude': {'variable': 'altitude', 'map': [0], 'coordinates': ['latitude', 'longitude', 'altitude']}, 'id': {'variable': 'id', 'map': [0]}, \
+'longitude': {'variable': 'longitude', 'map': [0], 'coordinates': ['latitude', 'longitude', 'altitude']}, 'time': {'variable': 'time', 'map': [1]}}
+
+ """
+
+ def __init__(self, variable):
+
+ self.variable = variable
+
+ # Keep track of coordinate variables and coordinates mapping
+ self.coordinates_variables = []
+ self.coordinates_mapping = {}
+
+ # Keep track of X, Y, Z, and T dimensions of the variable
+ self.x_dimension = None
+ self.y_dimension = None
+ self.z_dimension = None
+ self.t_dimension = None
+ self.id_dimension = None
+
+ # We need to keep track of which dimensions we can map
+ mapped = []
+
+ # First lets check for standard 1D coordinate variables
+ dimension_index = 0
+ for dimension in self.variable.dimensions:
+
+ if dimension.name in self.variable.group.variables.keys():
+ self.coordinates_variables.append(self.variable.group.variables[dimension.name])
+ mapped.append(dimension)
+
+ # See if we can find out what spatial/temporal variable this is from the CF conventions
+ coordinate_name = cf_units2coordinates(self.variable.group.variables[dimension.name].getAttribute('units'))
+ axis = self.variable.group.variables[dimension.name].getAttribute('axis')
+
+ # If we can't we just default to the dimension name
+ if not coordinate_name:
+ coordinate_name = dimension.name
+
+ # See if this is one of the X,Y,Z or T axis
+ if (axis == 'Y' or coordinate_name == 'latitude'):
+ self.y_dimension = dimension_index
+ if (axis == 'X' or coordinate_name == 'longitude'):
+ self.x_dimension = dimension_index
+ if (axis == 'T' or coordinate_name == 'time'):
+ self.t_dimension = dimension_index
+
+ self.coordinates_mapping[coordinate_name] = {'variable':dimension.name, 'map':[dimension_index]}
+ dimension_index += 1
+
+
+ # Next lets see if we have a coordinates attribute we can use (CF1.6 convention)
+ if self.variable.getAttribute('coordinates'):
+
+ self.coordinates_names = self.variable.getAttribute('coordinates').split()
+
+ # Find each associated variable
+ for name in self.coordinates_names:
+
+ # See if we can find the corresponding variable
+ if name in self.variable.group.variables.keys():
+ self.coordinates_variables.append(self.variable.group.variables[name])
+
+ # See if we can find out what spatial/temporal variable this is and rename the coordinate if possible
+ try:
+ coordinate_name = cf_dimensions[self.variable.group.variables[name].getAttribute('units')]
+ except:
+ coordinate_name = name
+
+ # Create the coordinates_mapping entry but with an empty map for now
+ self.coordinates_mapping[coordinate_name] = {'variable':name, 'map':[], 'coordinates': self.coordinates_names}
+
+ # Add each coordinates variable dimension to the mappable list and generate the map
+ for dimension in self.variable.group.variables[name].dimensions:
+ self.coordinates_mapping[coordinate_name]['map'].append(self.variable.dimensions.index(dimension))
+
+ if not dimension in mapped:
+ mapped.append(dimension)
+
+ # If the map for latitude and longitude is of length one and the same value then we can mark this as the id_dimension
+ if self.coordinates_mapping['latitude']['map'] == self.coordinates_mapping['longitude']['map']:
+ if len(self.coordinates_mapping['latitude']['map']) == 1:
+ self.id_dimension = self.coordinates_mapping['latitude']['map'][0]
+
+ # We keep a feature mask array that is of the same shape as the latitude/longitude arrays
+ # returned by latitude and longitude methods. This is used when retrieving data and when producing
+ # representations of the grid (asJSON, asKML, etc.)
+ self.feature_mask = numpy.ones_like(self.latitudes())
+
+ # We keep track of the current dimensions offsets and limit. This is used to impement subsetting/masking operations
+ self.dimension_offsets = [0]*len(self.variable.dimensions)
+ self.dimension_limits = [dim.length for dim in self.variable.dimensions]
+
+ def __repr__(self):
+ return "<CDM Field: %s>" % (self.variable.name)
+
+
+ def mapforward(self, indices):
+ """
+ >>> import handlers.netcdf4 as netcdf4
+ >>> import handlers.csag2 as csag2
+
+ >>> ds = netcdf4.netCDF4Dataset('testdata/gfs.nc')
+ >>> tmax = ds.root.variables['TMAX_2maboveground']
+ >>> tmax_field = Field(tmax)
+ >>> print tmax_field.coordinates_mapping
+ {'latitude': {'variable': u'latitude', 'map': [1]}, 'longitude': {'variable': u'longitude', 'map': [2]}, u'time': {'variable': u'time', 'map': [0]}}
+ >>> tmax_field.mapforward((0,441, 1000))
+ {'latitude': (0.30664390285805609, u'degrees_north'), 'longitude': (204.5451961341671, u'degrees_east'), u'time': (1332892800.0, u'seconds since 1970-01-01 00:00:00.0 0:00')}
+ >>> ds = netcdf4.netCDF4Dataset('testdata/wrfout_d01_2010-05-28_12.nc')
+ >>> q2 = ds.root.variables['Q2']
+ >>> q2_field = Field(q2)
+ >>> q2_field.mapforward((0,45,60))
+ {'latitude': (-30.895855, u'degree_north'), 'longitude': (22.115387, u'degree_east')}
+
+ >>> ds = netcdf4.netCDF4Dataset('testdata/pr_AFR-44_CCCMA-CanESM2_historical_r1i1p1_SMHI-RCA4_v0_mon_200101_200512.nc')
+ >>> pr = ds.root.variables['pr']
+ >>> pr_field = Field(pr)
+ >>> pr_field.mapforward((20,50,50))
+ {'latitude': (-23.759999999999998, u'degrees_north'), u'rlon': (-2.6400000000000006, u'degrees'), u'rlat': (-23.759999999999998, u'degrees')\
+, 'longitude': (-2.6399999999999895, u'degrees_east'), u'time': (19282.0, u'days since 1949-12-01 00:00:00')}
+ >>> ds = csag2.csag2Dataset('testdata/csag2/ghcnd.2.afr/ppt')
+ >>> ppt = ds.root.variables['PPT']
+ >>> ppt_field = Field(ppt)
+ >>> print ppt_field.mapforward((40,5000))
+ {'latitude': (10.48, 'degrees_north'), 'altitude': (336.0, 'm'), 'id': ('CD000004705', None), 'longitude': (16.719999999999999, 'degrees_east'), 'time': (5001, 'days since 1850-1-1 12:00:00')}
+ """
+
+ coordinates = {}
+
+ for coordinate in self.coordinates_mapping.keys():
+ coordinate_variable = self.variable.group.variables[self.coordinates_mapping[coordinate]['variable']]
+ coordinate_mapping = self.coordinates_mapping[coordinate]['map']
+
+ slice_list = []
+ for index in coordinate_mapping:
+ slice_list.append(slice(indices[index], indices[index]+1))
+
+ #print coordinate, slice_list
+
+ coordinates[coordinate] = (coordinate_variable[slice_list].flatten()[0], coordinate_variable.getAttribute('units'))
+
+ return coordinates
+
+
+
+ def reversemap(self, method='nearest', min_distance=1e50, **kwargs):
+ """
+ Reverse mapping involves determining the data array indices associated with the coordinates provided. Clearly there is
+ not always a direct mapping as data is defined at discrete positions with the coordinate space. Various methods could be
+ implemented to determine the indices. Finding the nearest indices is probably the most commong. Returning bounding indices
+ with distance weights would also be useful. For some coordinate spaces exact matches are required (eg. station names) but most
+ likely these are only for string based coordinates.
+
+ Nearest match is done by building up an n-dimensional distance(squared) array and then searching for indices of the minimum. This
+ generalises nicely from 1 to n dimensions.
+
+ The index returned is either an integer in the range of the dimension length, numpy.nan if there was no coordinate constraint(s)
+ available for the dimension. What currently isn't checked is is whether the target coordinate value is within the bounds of the
+ coordinate space because its not clear how to define those bounds generically. One possibility is to allow a min_distance parameter
+ to be passed and so avoid figuring out what this should be. But min_distance could depend on which coordinate variable you are
+ searching. We could allow coordinate target values to be tuples consisting of a target value and a minimum distance?
+
+ However we end up doing this, a return value of -1 is expected for out of bounds responses.
+
+ For string based coordinate variables, a -1 should be returned if no string match was found.
+
+
+ >>> import handlers.netcdf4 as netcdf4
+ >>> import handlers.csag2 as csag2
+
+ >>> ds = netcdf4.netCDF4Dataset('testdata/gfs.nc')
+ >>> tmax = ds.root.variables['TMAX_2maboveground']
+ >>> tmax_field = Field(tmax)
+ >>> tmax_field.reversemap(time=1332892800.0, longitude=204.545196, latitude=0.3, method='nearest')
+ [0, 441, 1000]
+
+ >>> ds = netcdf4.netCDF4Dataset('testdata/wrfout_d01_2010-05-28_12.nc')
+ >>> q2 = ds.root.variables['Q2']
+ >>> q2_field = Field(q2)
+ >>> q2_field.reversemap(latitude=-30.89, longitude=22.115)
+ [nan, 45, 60]
+
+ >>> ds = netcdf4.netCDF4Dataset('testdata/pr_AFR-44_CCCMA-CanESM2_historical_r1i1p1_SMHI-RCA4_v0_mon_200101_200512.nc')
+ >>> pr = ds.root.variables['pr']
+ >>> pr_field = Field(pr)
+ >>> pr_field.reversemap(latitude=-23.7599999, longitude=-2.640000, time=19282)
+ [20, 50, 50]
+ >>> pr_field.reversemap(latitude=-23.7599999, longitude=-2.640000)
+ [nan, 50, 50]
+ >>> pr_field.reversemap(latitude=-23.7599999, time=19282)
+ [20, nan, nan]
+
+ >>> ds = csag2.csag2Dataset('testdata/csag2/ghcnd.2.afr/ppt')
+ >>> ppt = ds.root.variables['PPT']
+ >>> ppt_field = Field(ppt)
+ >>> ppt_field.reversemap(latitude=-27.5, longitude=22.6299)
+ [1062, nan]
+ >>> ppt_field.reversemap(latitude=-27.5)
+ [1059, nan]
+ >>> ppt_field.reversemap(time=5001)
+ [nan, 5000]
+
+ """
+
+ indices = []
+
+ # We need to find each dimension index in order
+ for dim_index in range(len(self.variable.dimensions)):
+
+
+ # Find all coordinate mapping keys that relate to this index
+ map_keys = []
+ fail = False
+ for map_key in self.coordinates_mapping:
+
+ # Check if the index is in this map and we have a constraint argument for the variable
+ if dim_index in self.coordinates_mapping[map_key]['map'] and map_key in kwargs:
+
+ # All the maps should be identical so lets keep a copy for later
+ mapping = self.coordinates_mapping[map_key]['map']
+ map_keys.append(map_key)
+
+ # If we have no map_keys then we have to abort
+ if not map_keys:
+ indices.append(numpy.nan)
+ continue
+
+ #print 'reversemap: map_keys(dim_index=%d) = ' % (dim_index), map_keys
+
+ # Check all the coordinate variables are the same shape!
+ first = True
+ shape = None
+ for map_key in map_keys:
+ coord_variable = self.variable.group.variables[self.coordinates_mapping[map_key]['variable']]
+ #print coord_variable.shape
+ if first:
+ shape = coord_variable.shape
+ else:
+ if coord_variable.shape() != shape:
+ raise CDMError("Linked coordinate variables must all be the same shape")
+
+ # We need at least as many map keys as the dimensionality of the coordinate variables
+ if len(map_keys) < len(shape):
+ indices.append(numpy.nan)
+ continue
+
+ # Now we search the coordinate space for the closest feature
+ # Find the associated coordinate variables
+ coordinate_variables = []
+ for key in map_keys:
+ coordinate_variables.append(self.variable.group.variables[self.coordinates_mapping[key]['variable']])
+
+ #print 'reversemap: coordinate_variables(dim_index=%d) = ' % (dim_index), coordinate_variables
+
+ # Get the target arguments for each coordinate variable
+ try:
+ target_args = [kwargs[key] for key in map_keys]
+ except:
+ indices.append(numpy.nan)
+ continue
+
+
+ # See if we can coerce target arguments to floats, otherwise assume they are strings...
+ targets = []
+ for arg in target_args:
+ try:
+ target = numpy.float32(arg)
+ except:
+ target = "%s" % (arg)
+
+ # Check target type agrees with coordinate variables
+ for coordinate_variable in coordinate_variables:
+ if type(target) != coordinate_variable.dtype:
+ raise CDMError("Mismatched target type and coordinate variable type")
+
+ # If we are good then append the target to the list
+ targets.append(target)
+
+
+ # For float targets and coordinate variables
+ if type(target) == numpy.float32:
+
+ # Intialiase our distance array
+ d2 = numpy.zeros(shape)
+
+ for k in range(0,len(map_keys)):
+ d2 += numpy.power(coordinate_variables[k][:] - targets[k], 2)
+
+ closest = numpy.unravel_index(d2.argmin(),shape)
+ indices.append(closest[mapping.index(dim_index)])
+
+
+ return indices
+
+
+ def __getitem__(self, params):
+ """
+ The getitem method for Fields simply delegates to the getitem method for the underlying variable
+
+ >>> import handlers.netcdf4 as netcdf4
+
+ >>> ds = netcdf4.netCDF4Dataset('testdata/gfs.nc')
+ >>> tmax = ds.root.variables['TMAX_2maboveground']
+ >>> tmax_field = Field(tmax)
+ >>> print tmax_field[0,5:7,10:12]
+ [[ 218.1000061 218.1000061 ]
+ [ 217.90000916 217.90000916]]
+
+ """
+ print "__getitem__ params: ", params
+ # Create default slices based on the dimension offsets and limits
+ selectors = []
+ for dim in range(0,len(self.variable.dimensions)):
+ selectors.append(slice(self.dimension_offsets[dim], self.dimension_limits[dim]))
+
+ print "default selectors: ", selectors
+
+ # Force params into a tuple if needed
+ if type(params) != tuple:
+ params = (params,)
+
+ # Step through each parameter
+ if type(params) == tuple:
+ index = 0
+ for param in params:
+
+ if type(param) == slice:
+ start = param.start
+ stop = param.stop
+ step = param.step
+
+ # Replace None with default values
+ if not start:
+ start = 0
+ if not stop:
+ stop = self.dimension_limits[index]
+ if not step:
+ step = 1
+
+ selectors[index] = slice(self.dimension_offsets[index]+start, self.dimension_offsets[index]+stop, step)
+
+ if type(param) == int:
+ selectors[index] = self.dimension_offsets[index]+param
+
+ if type(param) == list:
+ selectors[index] = param
+
+ # Now check if this is the id_dimension then we can try to impose the feature_mask
+ if index == self.id_dimension:
+ print "Got id_dimension, using feature_mask = ", numpy.nonzero(self.feature_mask)[0][selectors[index]]
+ selectors[index] = numpy.nonzero(self.feature_mask)[0][selectors[index]]
+
+ index += 1
+
+ print "final selectors: ", selectors
+
+
+ # Get the actual data
+ data = self.variable.__getitem__(tuple(selectors))
+ print data.shape
+
+ # Extend the spatial mask through any other axis (currently just does axis=0 which is big assumption!
+ # Also apply the data slice to the mask
+ #print self.spatial_mask.shape
+ #print self.variable.shape
+ #fullmask = numpy.resize(self.feature_mask, self.variable.shape)
+ #print fullmask.shape
+ #fullmask = fullmask[slice] < 0.5
+ #print fullmask
+ #print fullmask.shape
+
+
+ # Return the masked array
+ #return numpy.ma.MaskedArray(data, mask=fullmask)
+ return data
+
+ def __getattr__(self, name):
+
+ if name == 'times':
+ return self._times()
+ else:
+ return self.__dict__[name]
+
+ def latitudes(self, extend=True, latvar=None, lonvar=None):
+ """
+ Returns a numpy array of latitudes. If the latitude coordinate variable is 1D but the feature type
+ is Grid or GridSeries and extend is True, then it is extended to 2D. For Point or PointSeries types it remains 1D
+
+ >>> import handlers.netcdf4 as netcdf4
+
+ >>> ds = netcdf4.netCDF4Dataset('testdata/gfs.nc')
+ >>> tmax = ds.root.variables['TMAX_2maboveground']
+ >>> tmax_field = Field(tmax)
+ >>> latitudes = tmax_field.latitudes()
+ >>> print latitudes[:2,:2]
+ [[-89.84351531 -89.84351531]
+ [-89.64079823 -89.64079823]]
+
+ >>> ds = netcdf4.netCDF4Dataset('testdata/wrfout_d01_2010-05-28_12.nc')
+ >>> q2 = ds.root.variables['Q2']
+ >>> q2_field = Field(q2)
+ >>> print q2_field.latitudes()[:2,:2]
+ [[-39.28197861 -39.31740189]
+ [-39.07678223 -39.11207199]]
+
+ """
+ # Get the latitude and longitude variables (need longitude to get length)
+ latvar = self.variable.group.variables[self.coordinates_mapping['latitude']['variable']]
+ lonvar = self.variable.group.variables[self.coordinates_mapping['longitude']['variable']]
+
+ # First check we have a grid feature type
+ if self.featuretype() in ['Grid', 'GridSeries']:
+
+ # Then check if latitude and longitude variables are 1D
+ if len(latvar.shape) == 1 and extend:
+ latvar_2d = latvar[:].reshape((-1,1)).repeat(lonvar.shape[0], axis=1)
+ return latvar_2d
+
+ # If 1D but we don't need to extend
+ elif len(latvar.shape) == 1 and not extend:
+ return latvar[:]
+
+ # for 2D variables its easy, just return the variable data
+ elif len(latvar.shape) == 2:
+ return latvar[:]
+
+ # otherwise, we can't do it!
+ else:
+ raise CDMError("Cannot generate latitudes array, latitude variable is not 1D or 2D")
+
+ # For Point or PointSeries
+ elif self.featuretype() in ['Point', 'PointSeries']:
+ return latvar[:]
+
+ # Otherwise throw an error
+ else:
+ raise CDMError("Cannot generate latitudes array for grid type %s" % (self.featuretype()))
+
+
+ def longitudes(self, extend=True, latvar=None, lonvar=None):
+ """
+ Returns a numpy array of longitudes. If the longitude coordinate variable is 1D but the feature type
+ is Grid or GridSeries and extend is True, then it is extended to 2D. For Point or PointSeries types it remains 1D
+
+ >>> import handlers.netcdf4 as netcdf4
+
+ >>> ds = netcdf4.netCDF4Dataset('testdata/gfs.nc')
+ >>> tmax = ds.root.variables['TMAX_2maboveground']
+ >>> tmax_field = Field(tmax)
+ >>> latitudes = tmax_field.longitudes()
+ >>> print latitudes[:2,:2]
+ [[ 0. 0.2045452]
+ [ 0. 0.2045452]]
+
+ >>> ds = netcdf4.netCDF4Dataset('testdata/wrfout_d01_2010-05-28_12.nc')
+ >>> q2 = ds.root.variables['Q2']
+ >>> q2_field = Field(q2)
+ >>> print q2_field.longitudes()[:2,:2]
+ [[ 6.18844604 6.45373535]
+ [ 6.23449707 6.49893188]]
+
+ """
+ # Get the latitude and longitude variables (need latitude to get length)
+ latvar = self.variable.group.variables[self.coordinates_mapping['latitude']['variable']]
+ lonvar = self.variable.group.variables[self.coordinates_mapping['longitude']['variable']]
+
+ # First check we have a grid feature type
+ if self.featuretype() in ['Grid', 'GridSeries']:
+
+ # Then check if latitude and longitude variables are 1D and we need to extend
+ if len(lonvar.shape) == 1 and extend:
+ lonvar_2d = lonvar[:].reshape((-1,1)).transpose().repeat(latvar.shape[0], axis=0)
+ return lonvar_2d
+
+ # If 1D but we don't need to extend
+ elif len(lonvar.shape) == 1 and not extend:
+ return lonvar[:]
+
+ # for 2D variables its easy, just return the variable data
+ elif len(lonvar.shape) == 2:
+ return lonvar[:]
+
+ # otherwise, we can't do it!
+ else:
+ raise CDMError("Cannot generate latitudes array, latitude variable is not 1D or 2D")
+
+ # For Point or PointSeries
+ elif self.featuretype() in ['Point', 'PointSeries']:
+ return lonvar[:]
+
+ # Otherwise throw an error
+ else:
+ raise CDMError("Cannot generate latitudes array for grid type %s" % (self.featuretype()))
+
+ def featuretype(self):
+ """
+ Returns the type of features represented by this variable. Can be one of:
+
+ Point: Values defined at discrete points for a single time
+ PointSeries: Values defined at discrete points over a number of time steps
+ Grid: Values defined on a rectangular grid for a single time (may also include multiple vertical levels)
+ GridSeries: Values define on a rectangular grid over a number of time steps (may also include multiple vertical levels)
+
+ >>> import handlers.netcdf4 as netcdf4
+ >>> import handlers.csag2 as csag2
+
+ >>> ds = netcdf4.netCDF4Dataset('testdata/wrfout_d01_2010-05-28_12.nc')
+ >>> q2 = ds.root.variables['Q2']
+ >>> q2_field = Field(q2)
+ >>> q2_field.featuretype()
+ 'Grid'
+
+ >>> ds = netcdf4.netCDF4Dataset('testdata/pr_AFR-44_CCCMA-CanESM2_historical_r1i1p1_SMHI-RCA4_v0_mon_200101_200512.nc')
+ >>> pr = ds.root.variables['pr']
+ >>> pr_field = Field(pr)
+ >>> pr_field.featuretype()
+ 'GridSeries'
+
+ >>> ds = csag2.csag2Dataset('testdata/csag2/ghcnd.2.afr/ppt')
+ >>> ppt = ds.root.variables['PPT']
+ >>> ppt_field = Field(ppt)
+ >>> ppt_field.featuretype()
+ 'PointSeries'
+
+
+ """
+
+ # If we have a time coordinate system then we have a Series
+ if self.coordinates_mapping.has_key('time'):
+ series_string = 'Series'
+ else:
+ series_string = ''
+
+
+ # We need latitude and longitude mappings as a starting point
+ if self.coordinates_mapping.has_key('latitude') and self.coordinates_mapping.has_key('longitude'):
+ lat_map = self.coordinates_mapping['latitude']
+ lon_map = self.coordinates_mapping['longitude']
+
+ # For Point and Cartesian grid features latitude and longitude corodinate variables must be 1D
+ if len(lat_map['map']) == 1 and len(lon_map['map']) == 1:
+
+ # For Point features lat/lon must be accessed through the coordinates attribute
+ if lat_map.has_key('coordinates') and lon_map.has_key('coordinates'):
+ return 'Point%s' % (series_string)
+
+ # else we have a cartesian grid
+ else:
+ return 'Grid%s' % (series_string)
+
+ # For non cartesian grids we need 2D latitude and longitude coordinate variables
+ if len(lat_map['map']) == 2 and len(lon_map['map']) == 2:
+ return 'Grid%s' % (series_string)
+
+ # If we don't have latitude and longitude mappings then we are stuck
+ else:
+ return None
+
+ def _times(self):
+ """
+ Returns the data array of the time coordinate variable. The time coordinate variable is identified by the compliance of its units with
+ the udunits time units string
+
+ >>> import handlers.csag2 as csag2
+
+ >>> ds = csag2.csag2Dataset('testdata/csag2/ghcnd.2.afr/ppt')
+ >>> ppt = ds.root.variables['PPT']
+ >>> ppt_field = Field(ppt)
+ >>> print ppt_field._times()
+ [ 1 2 3 ..., 56276 56277 56278]
+
+ """
+ try:
+ timevar = self.variable.group.variables[self.coordinates_mapping['time']['variable']]
+ except:
+ return None
+
+ return timevar[:]
+
+ def features(self, mask=None, propnames=None):
+ """
+ Produces a geoJSON structured dict that represents the feature collection of the field. At the moment the following assumptions
+ are made:
+ Point type feature collections are represented as unconnected collections of points.
+ Rectangular/cartesian grid feature collections are represented as a collection of rectangular polygons centered on the
+ lat/lon location of a the grid point.
+ Rectangular/non-cartesian grids are presented as rectangles centered on the lat/lon location. Corners are interpolated from
+ diagonal grid point locations and extrapolated on the edges. This may not be perfect but is a good working start.
+
+ The optional mask parameter specifies a masking field that needn't be congruent as the mask values will be extracted based on
+ the nearest lat/lon search.
+
+ The optional propnames parameter specifies a list of property labels that correspond to data values. This allows multiple
+ properties to be added to each feature if the field has other dimensions such as time. Each property name corresponds to successive data values
+ after the grid point data array has been flattened.
+
+ >>> import handlers.netcdf4 as netcdf4
+ >>> import handlers.csag2 as csag2
+
+ >>> ds = netcdf4.netCDF4Dataset('testdata/wrfout_d03_2010-01-16_12.nc.24hours')
+ >>> q2 = ds.root.variables['Q2']
+ >>> q2_field = Field(q2)
+ >>> features = q2_field.features(propnames=['hour_%d' % (hour) for hour in range(0,24)])
+
+ """
+ result = {'type': 'FeatureCollection', 'features':[]}
+ features = []
+
+ # We can dealt with grid type collections first
+ if self.featuretype() in ['Grid', 'GridSeries']:
+
+ # Get center point latitudes and longitudes
+ latitudes = self.latitudes()
+ longitudes = self.longitudes()
+ shape = latitudes.shape
+
+ # How do we slice the data to get grid point values?
+ index = 0
+ for dim in self.variable.dimensions:
+ if dim.length == shape[0]:
+ y_index = index
+ if dim.length == shape[1]:
+ x_index = index
+ if dim.length == len(self._times()):
+ t_index = index
+ index += 1
+
+
+ # Create the initial slices with indices defaulting to 0
+ slices = [0]*len(self.variable.dimensions)
+ slices[t_index] = slice(0,self.variable.dimensions[t_index].length)
+
+
+ # Create corner point latitude longitude arrays
+ corner_lats = numpy.zeros((shape[0]+1, shape[1]+1))
+ corner_lons = numpy.zeros((shape[0]+1, shape[1]+1))
+
+ # Step through all the interior grid points
+ for y in range(1, shape[0]):
+ for x in range(1, shape[1]):
+ corner_lats[y,x] = (latitudes[y, x-1] + latitudes[y,x] + latitudes[y-1,x-1] + latitudes[y-1,x])/4
+ corner_lons[y,x] = (longitudes[y, x-1] + longitudes[y,x] + longitudes[y-1,x-1] + longitudes[y-1,x])/4
+
+ # Left boundary
+ x = 0
+ for y in range(1,shape[0]):
+ tmp_lat = (latitudes[y,x] + latitudes[y-1,x])/2
+ tmp_lon = (longitudes[y,x] + longitudes[y-1,x])/2
+ corner_lats[y,x] = tmp_lat - (corner_lats[y,x+1] - tmp_lat)
+ corner_lons[y,x] = tmp_lon - (corner_lons[y,x+1] - tmp_lon)
+
+
+ # Right boundary
+ x = shape[1]
+ for y in range(1,shape[0]):
+ tmp_lat = (latitudes[y,x-1] + latitudes[y-1,x-1])/2
+ tmp_lon = (longitudes[y,x-1] + longitudes[y-1,x-1])/2
+ corner_lats[y,x] = tmp_lat - (corner_lats[y,x-1] - tmp_lat)
+ corner_lons[y,x] = tmp_lon - (corner_lons[y,x-1] - tmp_lon)
+
+
+ # Bottom boundary
+ y = 0
+ for x in range(1,shape[1]):
+ tmp_lat = (latitudes[y,x] + latitudes[y,x-1])/2
+ tmp_lon = (longitudes[y,x] + longitudes[y,x-1])/2
+ corner_lats[y,x] = tmp_lat - (corner_lats[y+1,x] - tmp_lat)
+ corner_lons[y,x] = tmp_lon - (corner_lons[y+1,x] - tmp_lon)
+
+ # Top boundary
+ y = shape[0]
+ for x in range(1,shape[1]):
+ tmp_lat = (latitudes[y-1,x] + latitudes[y-1,x-1])/2
+ tmp_lon = (longitudes[y-1,x] + longitudes[y-1,x-1])/2
+ corner_lats[y,x] = tmp_lat - (corner_lats[y-1,x] - tmp_lat)
+ corner_lons[y,x] = tmp_lon - (corner_lons[y-1,x] - tmp_lon)
+
+ # Corners
+ corner_lats[0,0] = latitudes[0,0] - (corner_lats[1,1] - latitudes[0,0])
+ corner_lats[0,shape[1]] = latitudes[0,shape[1]-1] - (corner_lats[1,shape[1]-1] - latitudes[0,shape[1]-1])
+ corner_lats[shape[0],0] = latitudes[shape[0]-1,0] + (latitudes[shape[0]-1,0] - corner_lats[shape[0]-1,1])
+ corner_lats[shape[0],shape[1]] = latitudes[shape[0]-1,shape[1]-1] + (latitudes[shape[0]-1,shape[1]-1] - corner_lats[shape[0]-1,shape[1]-1])
+
+ corner_lons[0,0] = longitudes[0,0] - (corner_lons[1,1] - longitudes[0,0])
+ corner_lons[0,shape[1]] = longitudes[0,shape[1]-1] + (longitudes[0,shape[1]-1] - corner_lons[1,shape[1]-1])
+ corner_lons[shape[0],0] = longitudes[shape[0]-1,0] - (corner_lons[shape[0]-1,1] - longitudes[shape[0]-1,0])
+ corner_lons[shape[0],shape[1]] = longitudes[shape[0]-1,shape[1]-1] + (longitudes[shape[0]-1,shape[1]-1] - corner_lons[shape[0]-1,shape[1]-1])
+
+
+# print corner_lats
+
+ # Now create all polygons
+ for y in range(0, shape[0]):
+ for x in range(0, shape[1]):
+ # Configure the slices
+ slices[x_index] = slice(x,x+1)
+ slices[y_index] = slice(y,y+1)
+
+ # Check if we are masking and if this point is masked
+ #masked = False
+
+ #if mask:
+ # if mask[y, x] < 0.5:
+ # masked = True
+
+
+ if self.feature_mask[y,x] > 0.5:
+ #print y, x
+ vertices = []
+ vertices.append([corner_lons[y, x], corner_lats[y,x]])
+ vertices.append([corner_lons[y+1, x], corner_lats[y+1,x]])
+ vertices.append([corner_lons[y+1, x+1], corner_lats[y+1,x+1]])
+ vertices.append([corner_lons[y, x+1], corner_lats[y,x+1]])
+ vertices.append([corner_lons[y, x], corner_lats[y,x]])
+
+ # Create the basic feature
+ feature = {'type': 'Feature', 'properties':{'id':x + y * shape[1]}, 'geometry': {'type': 'Polygon', 'coordinates': [vertices]}}
+
+ # Now add the data
+ #data = self.variable[slices].flatten()
+
+
+ # If we have property names then extract data for each name
+ if propnames:
+ for name in propnames:
+
+ feature['properties']['value'] = self.variable[slices].flatten()[1]
+ # print self.variable[slices]
+ #feature['properties']['value'] = self.variable[slices].flatten()[propnames.index(name)]
+
+ # else just set property 'value' to the first value of the flattened data array
+ #else:
+ # feature['properties']['value'] = float(self.variable[slices].flatten()[1])
+
+ #print feature['properties']
+ #, 'value':float(values[y,x])
+ features.append(feature)
+
+ result['features'] = features
+
+
+ outfile = open('test.json', 'w')
+ outfile.write(json.dumps(result))
+ outfile.close()
+
+
+ # Point type feature sets next
+ elif self.featuretype() in ['Point', 'PointSeries']:
+
+ latitudes = self.latitudes()
+ longitudes = self.longitudes()
+
+ # Step through all the points
+ for id in range(len(self.latitudes())):
+
+ if (self.feature_mask[id] > 0):
+
+ # Create the basic feature
+ feature = {'type': 'Feature', 'properties':{'id':id}, 'geometry': {'type': 'Point', 'coordinates': [longitudes[id], latitudes[id]]}}
+ features.append(feature)
+
+ result['features'] = features
+
+ else:
+ return None
+
+
+ return result
+
+ def asJSON(self):
+
+ return json.dumps(self.features())
+
+
+ def asKML(self):
+ """
+ >>> import handlers.netcdf4 as netcdf4
+ >>> import handlers.csag2 as csag2
+
+ >>> #ds = netcdf4.netCDF4Dataset('testdata/wrfout_d01_2010-05-28_12.nc')
+ >>> #ds = netcdf4.netCDF4Dataset('testdata/wrfout_d04_2010-04-25_12.nc')
+ >>> #ds = netcdf4.netCDF4Dataset('testdata/wrfout_d03_2010-01-16_12.nc.24hours')
+ >>> #q2 = ds.root.variables['Q2']
+ >>> #q2_field = Field(q2)
+ >>> #print q2_field
+ >>> #q2_field.asKML()
+
+ >>> ds = netcdf4.netCDF4Dataset('testdata/gfs.subset.nc')
+ >>> prate = ds.root.variables['PRATE_surface']
+ >>> prate_field = Field(prate)
+ >>> #kml = prate_field.asKML()
+
+ """
+
+ #colormap = styles.colormap('rnb.gs')
+
+ kml = StringIO.StringIO()
+ kml.write('<?xml version="1.0" encoding="UTF-8"?>\n')
+ kml.write('<kml xmlns="http://www.opengis.net/kml/2.2">\n')
+ kml.write('<Document>\n')
+
+ # Get global maximums and minimums (this could be expensive!) ALSO is HACK on dimension reduction!
+ #value_max = self.variable[24,:,:].max()
+ #value_min = self.variable[24,:,:].min()
+
+ # Create polygon color styles
+ #index = 1
+ #for color in colormap:
+
+ # kml.write('<Style id="color%d">\n' % (index))
+ # kml.write('<PolyStyle>\n')
+ # kml.write('<color>#%.x%.2x%.2x%.2x</color>\n' % (255, color[2], color[1], color[0]))
+ # kml.write('</PolyStyle>\n')
+ # kml.write('<LineStyle>\n')
+ # kml.write('<color>#00000000</color>\n')
+ # kml.write('</LineStyle>\n')
+ # kml.write('</Style>\n')
+ #
+ # index += 1
+
+ for feature in self.features()['features']:
+
+ if feature['geometry']['type'] == 'Polygon':
+
+ value = feature['properties']['value']
+
+ # style_index = int(len(colormap) * (value - value_min)/(value_max - value_min))+1
+ # if style_index < 0:
+ # style_index = 0
+ # if style_index >= len(colormap):
+ # style_index = len(colormap) - 1
+
+
+ kml.write('<Placemark>\n')
+ kml.write('<description>%s = %f (%s)</description>\n' % (self.variable.name, value, self.variable.getAttribute('units')))
+ # kml.write('<styleUrl>#color%d</styleUrl>\n' % (style_index))
+ kml.write('<Polygon>\n')
+ kml.write('<extrude>0</extrude>\n')
+ kml.write('<altitudeMode>clampToGround</altitudeMode>\n')
+ kml.write('<outerBoundaryIs>\n')
+ kml.write('<LinearRing>\n')
+ kml.write('<coordinates>\n')
+
+ for coordinate in feature['geometry']['coordinates'][0]:
+ kml.write('%f, %f, 2\n' % (coordinate[0], coordinate[1]))
+
+ kml.write('</coordinates>\n')
+ kml.write('</LinearRing>\n')
+ kml.write('</outerBoundaryIs>\n')
+ kml.write('</Polygon>\n')
+ kml.write('</Placemark>\n')
+
+
+ kml.write('</Document>\n')
+ kml.write('</kml>\n')
+
+ result = kml.getvalue()
+ kml.close()
+
+ return result
+
+ def inside(self, geometry, strict=True):
+ """
+ Updates the spatial_mask by masking any features that fall outside the provided geometry
+
+ The strict flag determines whether features that partially fall outside the geometry are masked (True) or
+ not masked (False)
+ """
+
+ # We need a temporary mask
+ new_mask = numpy.zeros_like(self.feature_mask)
+
+ # Firstly lets get the lat/lon bounds of the geometry
+ bounds = geometry.bounds
+ print bounds
+
+ # First we crop to the geometry bounding box as this speeds up the later geometry search
+ # We produce masked latitude longitude arrays and update the dimension offsets and limits from these
+ # This only really works for Grid feature types but we can do it for Point feature types as well... results
+ # will vary depending on how the points are ordered in space, but won't cause any problems.
+ if self.featuretype() in ['Grid', 'GridSeries']:
+
+ # Just a simple numpy masking of the 1D/2D latitude and longitude arrays
+ longitudes_masked = numpy.ma.masked_outside(self.longitudes(extend=False), bounds[0], bounds[2])
+ latitudes_masked = numpy.ma.masked_outside(self.latitudes(extend=False), bounds[1], bounds[3])
+
+ # We'll need the latitude and longitude index maps
+ longitude_map = self.coordinates_mapping['longitude']['map']
+ latitude_map = self.coordinates_mapping['latitude']['map']
+
+ print "longitude_map = ", longitude_map, longitude_map.index(self.x_dimension)
+ print "latitude_map = ", latitude_map, latitude_map.index(self.y_dimension)
+
+ # Get the index ranges using argmin. argmin axis is determined by the x_dimension index within the map (complex but correct!)
+ xmin = longitudes_masked.argmin(axis=longitude_map.index(self.x_dimension)).min() - 1
+ xmax = longitudes_masked.argmax(axis=longitude_map.index(self.x_dimension)).max() + 1
+
+ ymin = latitudes_masked.argmin(axis=latitude_map.index(self.y_dimension)).min() - 1
+ ymax = latitudes_masked.argmax(axis=latitude_map.index(self.y_dimension)).max() + 1
+
+ print xmin, xmax
+ print ymin, ymax
+
+ self.dimension_offsets[self.x_dimension] = xmin
+ self.dimension_offsets[self.y_dimension] = ymin
+ self.dimension_limits[self.x_dimension] = xmax
+ self.dimension_limits[self.y_dimension] = ymax
+
+ # Now we can iterate through grid points and mask based on the geometry
+ if len(self.longitudes().shape) == 2:
+ for y in range(self.dimension_offsets[self.y_dimension], self.dimension_limits[self.y_dimension]):
+ for x in range(self.dimension_offsets[self.x_dimension], self.dimension_limits[self.x_dimension]):
+
+ # We need to order the lat/lon array indices correctly based on the x_dimension and y_dimension variables
+
+ # First inialise as many empty place holders as there are dimensions in the map
+ #lat_slices = lon_slice = [None] * len(latitude_map)
+
+ # Now assign each slice
+ #slices[latitude_map.index(self.x_dimension)] = slice(x,x+1)
+ #slices[latitude_map.index(self.y_dimension)] = slice(y,y+1)
+
+ # Setup empty slice lists
+ lat_slices = []
+ lon_slices = []
+
+ for i in latitude_map:
+ if i == self.x_dimension:
+ lat_slices.append(slice(x,x+1))
+ if i == self.y_dimension:
+ lat_slices.append(slice(y,y+1))
+
+ for i in longitude_map:
+ if i == self.x_dimension:
+ lon_slices.append(slice(x,x+1))
+ if i == self.y_dimension:
+ lon_slices.append(slice(y,y+1))
+
+ #print x,y,lon_slices, lat_slices
+
+ #print x, y, slices
+ lat = self.latitudes(extend=False)[lat_slices]
+ lon = self.longitudes(extend=False)[lon_slices]
+
+ #print lat, lon
+
+ point = shapely.geometry.Point(lon, lat)
+ if point.within(geometry):
+ new_mask[y, x] = 1
+
+ # For discrete grids (Point type) we just step through all lat/lon combinations
+ else:
+ for i in range(0, len(self.latitudes())):
+
+ lat = self.latitudes()[i]
+ lon = self.longitudes()[i]
+
+ point = shapely.geometry.Point(lon, lat)
+ if point.within(geometry):
+ new_mask[i] = 1
+
+ self.feature_mask = new_mask
+
+
+
+ def constrain(self, **kwargs):
+ """
+ The constrain method allows constraints to be defined for each dimension of the underlying Variable using constraints on associated
+ coordinate variables. The result is a new Field object with associated constraint attributes. These constraint attributes are then
+ used to modify the __getitem__ and mapping methods.
+
+ Constraints are defined as ranges in world coordinates but can be translated into either ranges or masks in data coordinates. For
+ monotonically varying coordinate variables, constraints are translated into index ranges. For non-monotonic coordinate variables,
+ constraints are translated into coordinate variable masks.
+ """
+
+ result = copy.copy(self)
+
+ # For inner = True, start off with all index ranges at their default maximum
+ dim_ranges = [(0,self.variable.shape[i]) for i in range(0,len(self.variable.shape))]
+
+ # The set of minimum constraints
+ min_args = {}
+ # The set of maximum constraints
+ max_args = {}
+
+ # Extract the constraints from the kwargs
+ for constraint in kwargs:
+
+ # Try and extract the constraint arguments
+ try:
+ min, max = (kwargs[constraint])
+ except:
+ raise CDMError("Error in constraint argument, expecting a tuple of length 2")
+
+ # Swap min and max if needed
+ if min > max:
+ min, max = max, min
+
+ # Add the constraints to min and max constraints dict
+ min_args[constraint] = min
+ max_args[constraint] = max
+
+ # Add in any missing constraints, set them to global max/min
+ for key in self.coordinates_mapping:
+ varname = self.coordinates_mapping[key]['variable']
+ variable = self.variable.group.variables[varname]
+
+ if key not in min_args:
+ min_args[varname] = variable[:].min()
+
+ if key not in max_args:
+ max_args[varname] = variable[:].max()
+
+
+ print min_args
+ print max_args
+
+
+ min_indices = self.reversemap(**min_args)
+ max_indices = self.reversemap(**max_args)
+
+ #print min_indices
+ #print max_indices
+
+ return min_indices, max_indices
+
+
+ def timeselect(self, **kwargs):
+
+ pass
+
+ def subset(self, **kwargs):
+ """
+ subset returns a subset of the field data based on coordinate space restrictions
+ specified as either single values or tuple ranges. Coordinate space restrictions are translated
+ into data index ranges.
+
+ For non-rectangular grid coordinate spaces it is possible that coordinate space constraints will
+ interact. For example, restricting latitude could also impose a constraint on longitudes. The keyword
+ inner specifies whether contraints relax to an outer bounding box or are constrained to an inner
+ bounding box.
+
+ >>> import handlers.netcdf4 as netcdf4
+
+ >>> ds = netcdf4.netCDF4Dataset('testdata/gfs.nc')
+ >>> tmax = ds.root.variables['TMAX_2maboveground']
+ >>> tmax_field = Field(tmax)
+ >>> print tmax_field.subset(latitude=(-20,20))
+ [(0, 1), (0, 880), (0, 1760)]
+ >>> ds = netcdf4.netCDF4Dataset('testdata/pr_AFR-44_CCCMA-CanESM2_historical_r1i1p1_SMHI-RCA4_v0_mon_200101_200512.nc')
+ >>> pr = ds.root.variables['pr']
+ >>> pr_field = Field(pr)
+ >>> print pr_field.subset(lat=(-20,20), lon=(-20,20))
+ [(0, 60), (0, 201), (0, 194)]
+ """
+
+ # For inner = True, start off with all index ranges at their default maximum
+ dim_ranges = [(0,self.variable.shape[i]) for i in range(0,len(self.variable.shape))]
+
+ # The set of minimum constraints
+ min_args = {}
+ # The set of maximum constraints
+ max_args = {}
+
+ # Extract the constraints from the kwargs
+ for constraint in kwargs:
+
+ # Try and extract the constraint arguments
+ try:
+ min, max = (kwargs[constraint])
+ except:
+ raise CDMError("Error in constraint argument, expecting a tuple of length 2")
+
+ # Swap min and max if needed
+ if min > max:
+ min, max = max, min
+
+ # Add the constraints to min and max constraints dict
+ min_args[constraint] = min
+ max_args[constraint] = max
+
+ # Add in any missing constraints, set them to global max/min
+ for key in self.coordinates_mapping:
+ varname = self.coordinates_mapping[key]['variable']
+ variable = self.variable.group.variables[varname]
+
+ if key not in min_args:
+ min_args[varname] = variable[:].min()
+
+ if key not in max_args:
+ max_args[varname] = variable[:].max()
+
+
+ #print min_args
+ #print max_args
+
+
+ min_indices = self.reversemap(**min_args)
+ max_indices = self.reversemap(**max_args)
+
+ #print min_indices
+ #print max_indices
+
+ return min_indices, max_indices
Added: incubator/climate/branches/csag/pycdm/group.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/csag/pycdm/group.py?rev=1522915&view=auto
==============================================================================
--- incubator/climate/branches/csag/pycdm/group.py (added)
+++ incubator/climate/branches/csag/pycdm/group.py Fri Sep 13 13:14:04 2013
@@ -0,0 +1,47 @@
+from dataset import Dataset
+
+class Group(object):
+
+ def __init__(self, name, dataset, parent=None, dimensions=[], attributes={}, variables={}):
+ """
+ Creates a new group object with a given name and within the given dataset. An empty name
+ string marks the group as a root group within the dataset. Can also assign this group a
+ position in a group hierachy by specifying its parent group which must be another Group
+ instance assigned to the same dataset
+ >>> ds = Dataset()
+ >>> group = Group('', ds)
+ >>> print group.dataset
+ <CDM Dataset: None>
+ >>> print ds.root
+ <CDM Group: [root]>
+ >>> child1 = Group('child1', ds, parent=group)
+ >>> child2 = Group('child2', ds, parent=group)
+ >>> print child1.parent
+ <CDM Group: [root]>
+ >>> print child2.parent
+ <CDM Group: [root]>
+ >>> print group.children
+ [<CDM Group: child1>, <CDM Group: child2>]
+ """
+
+ self.name = name
+ self.dataset = dataset
+ self.parent = parent
+ self.variables = variables
+ self.attributes = attributes
+ self.dimensions = dimensions
+ self.children = []
+
+ # If there is no parent then this must be a root group which must have an empty
+ # string as the name
+ if (not parent):
+ dataset.root = self
+ self.name = '[root]'
+
+ # Add group into a datasets group hierachy
+ elif (type(parent) == Group and parent.dataset == dataset):
+ self.parent = parent
+ parent.children.append(self)
+
+ def __repr__(self):
+ return "<CDM %s: %s>" % (self.__class__.__name__, self.name)
Added: incubator/climate/branches/csag/pycdm/handlers/__init__.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/csag/pycdm/handlers/__init__.py?rev=1522915&view=auto
==============================================================================
--- incubator/climate/branches/csag/pycdm/handlers/__init__.py (added)
+++ incubator/climate/branches/csag/pycdm/handlers/__init__.py Fri Sep 13 13:14:04 2013
@@ -0,0 +1 @@
+__all__=['netcdf4', 'csag2']
Added: incubator/climate/branches/csag/pycdm/handlers/csag2.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/csag/pycdm/handlers/csag2.py?rev=1522915&view=auto
==============================================================================
--- incubator/climate/branches/csag/pycdm/handlers/csag2.py (added)
+++ incubator/climate/branches/csag/pycdm/handlers/csag2.py Fri Sep 13 13:14:04 2013
@@ -0,0 +1,556 @@
+
+from pycdm.dataset import Dataset
+from pycdm.group import Group
+from pycdm.variable import Dimension, Variable
+from pycdm.field import Field
+
+
+import numpy
+import copy
+
+import os
+import pandas
+import datetime
+import calendar
+
+# Just for the useful CF date functions!
+import netCDF4
+
+
+class csag2Variable(Variable):
+ """
+ A subclass of the CDM Variable class that implements csag2 station data variable access
+ """
+
+ def __init__(self, name, group, **kwargs):
+ """
+ Creates a new csag2Variable instance by creating a basic variable instance
+ """
+ super(csag2Variable, self).__init__(name, group, **kwargs)
+
+ def __getitem__(self, params):
+ """
+ Implements the get item array slicing method. For csag2 datasets this requires custom response for different
+ variables as coordinate variables are actually "virtual" variables.
+ [Possibly look at subclassing csag2Variable for different coordinate variables?]
+
+ >>> ds = csag2Dataset('../../testdata/csag2/ghcnd.2.afr/ppt')
+ >>> print ds.root.variables.keys()
+ ['altitude', 'PPT', 'longitude', 'time', 'latitude', 'id']
+ >>> print ds.root.variables['time'][-10:]
+ [56269, 56270, 56271, 56272, 56273, 56274, 56275, 56276, 56277, 56278]
+ >>> print ds.root.variables['id'][:3]
+ ['WA008227940', 'BC005452830', 'SF004002030']
+ >>> print ds.root.variables['latitude'][:3]
+ [-21.73, -25.219999999999999, -27.399999999999999]
+ >>> print ds.root.variables['longitude'][:3]
+ [13.98, 25.670000000000002, 26.620000000000001]
+ >>> print ds.root.variables['altitude'][:3]
+ [5.0, 1192.0, 1280.0]
+ >>> print ds.root.variables['PPT'][2,30000:30010]
+ """
+
+ print params
+
+ # force into a tuple
+ if type(params) != tuple:
+ params = (params,)
+
+ # Get id slice thing, if its not already a slice or a list then make it a slice
+ if type(params[0]) == int:
+ id_slice = slice(params[0],params[0]+1,None)
+ else:
+ id_slice = params[0]
+
+ # Try and get time slice if available
+ if (len(params) > 1):
+ if type(params[1]) == int:
+ time_slice = slice(params[1],None,None)
+ else:
+ time_slice = params[1]
+ else:
+ time_slice = slice(None,None,None)
+
+
+ print "id_slice, time_slice: ", id_slice, time_slice
+
+ # Get the ids
+ ids = self.group.variables['id'][id_slice]
+ print ids
+
+ # Get the times
+ times = self.group.variables['time'][time_slice]
+ print times
+
+ # Start with an empty array of 1e10 values
+ result = numpy.ones((len(ids), len(times))) * -999
+ tmp = numpy.ones((len(times))) * -999
+ print result.shape
+
+ # Get the group global start and end times as a netCDF4 DateTime instance
+ # Open each file based on the ids list
+ id_index = 0
+ for id in ids:
+
+ filename = '%s/%s.txt' % (self.group.dataset.location, id)
+ print filename
+
+ # Try and open the source data file
+ try:
+ csag2data = csag2file(filename, metaonly=False)
+ except:
+ raise CDMError("Cannot open source csag2 file %s" % (filename))
+
+ # Calculate the start time offset in time units
+ d0 = TimeSeriesDate.fromlong(csag2data.startdate)
+ offset = int(netCDF4.date2num(netCDF4.netcdftime.datetime(d0.year, d0.month, d0.day, 12), self.group.variables['time'].getAttribute('units')))
+ print offset
+
+ # Insert subset into the tmp array
+ tmp[offset:offset+len(csag2data.values)] = csag2data.values
+
+ # Now subset using the time selector
+ result[id_index] = tmp[time_slice]
+
+ id_index += 1
+
+
+ return numpy.ma.masked_values(result, -999)
+
+
+class csag2Dataset(Dataset):
+ """
+ A subclass of the CDM Dataset class that implements csag2 data storage format
+ """
+
+ def __init__(self, location):
+ """
+ Creates a new csag2Dataset instance by trying to open the file at location and building the CDM Dataset
+ structure
+
+ >>> ds = csag2Dataset('../../testdata/csag2/ghcnd.2.afr/ppt')
+ >>> print ds
+ <CDM csag2Dataset: ../../testdata/csag2/ghcnd.2.afr/ppt>
+ >>> print ds.root.variables['time'].attributes
+ {'units': 'days since 1850-1-1 12:00:00'}
+ >>> print ds.root.dimensions
+ [<CDM Dimension: id (1783)>, <CDM Dimension: time (56278)>]
+ """
+
+ # Call the super constructor
+ super(csag2Dataset, self).__init__(location)
+
+ # csag2 datasets are actually directories containing a data file per station location so first try to
+ # open the directory specified by location
+ try:
+ allfiles = os.listdir(location)
+ allfiles.sort() # We sort just to keep consistent ids for each station, otherwise ordering is arbitrary and can vary for different fileystems
+ except:
+ raise CDMError('Cannot list directory %s' % (location))
+
+ # We just want the .txt files
+ filenames = [location+'/'+name for name in allfiles if name[-4:] == '.txt']
+
+ # Open all the files and read meta-data only
+ allstationfiles = [csag2file(filename) for filename in filenames]
+
+ # Separate into different variables (technically should be all the same but just checking!
+ # Oh' and also find the date range
+ varstations = {}
+ ids = []
+ times = []
+ latitudes = []
+ longitudes = []
+ altitudes = []
+
+ startdate = 2999123112
+ enddate = 1000010112
+
+ for station in allstationfiles:
+ if station.varname != None:
+
+ if station.startdate < startdate:
+ startdate = station.startdate
+ if station.enddate > enddate:
+ enddate = station.enddate
+
+ if station.varname not in varstations:
+ varstations[station.varname] = [station]
+ else:
+ varstations[station.varname].append(station)
+
+ ids.append(station.station_id)
+ latitudes.append(station.latitude)
+ longitudes.append(station.longitude)
+ altitudes.append(station.altitude)
+
+ #print varstations.keys()
+ #print [len(varstations[key]) for key in varstations]
+ max_count = max([len(varstations[key]) for key in varstations])
+
+ #print startdate, enddate
+
+ # Count the number of days
+ d = d1 = TimeSeriesDate.fromlong(startdate)
+ d2 = TimeSeriesDate.fromlong(enddate)
+ count = 0
+ while (d <= d2):
+ d = d.nextdate()
+ count += 1
+ times.append(count)
+
+ # Create the dimensions list
+ dimensions = [Dimension('id', max_count), Dimension('time', count)]
+
+ #print dimensions
+
+ # Create the root container group
+ self.root = Group(name='', dataset=self, attributes=None, dimensions=dimensions)
+
+ # Create the actual data variables
+ variables = {}
+ for varname in varstations:
+ vardims = ['id', 'time']
+ varattrs = {'coordinates':'latitude longitude'}
+ variables[varname] = csag2Variable(varname, group=self.root, dimensions=vardims, attributes=varattrs)
+
+ # Create the coordinate variables
+ variables['id'] = Variable('id', group=self.root, dimensions=['id'], data=numpy.array(ids))
+ variables['time'] = Variable('time', group=self.root, dimensions=['time'], data=numpy.array(times), attributes={'units':'days since %d-%d-%d 12:00:00' % (d1.year, d1.month, d1.day)})
+ variables['latitude'] = Variable('latitude', group=self.root, dimensions=['id'], data=numpy.array(latitudes), attributes={'units': 'degrees_north', 'axis': 'Y'})
+ variables['longitude'] = Variable('longitude', group=self.root, dimensions=['id'], data=numpy.array(longitudes), attributes={'units': 'degrees_east', 'axis': 'X'})
+ variables['altitude'] = Variable('altitude', group=self.root, dimensions=['id'], data=numpy.array(altitudes), attributes={'units': 'm', 'positive': 'up', 'axis': 'Z'})
+
+
+ self.root.variables = variables
+
+ #print variables['PPT'].dimensions
+ #print self.__dict__
+
+
+ # Open the NetCDF4 file
+ #self.ncfile = netCDF4.Dataset(self.location, 'r')
+
+ # Creat the global attributes dictionary
+ #attributes = copy.deepcopy(self.ncfile.__dict__)
+
+ # Create the dimensions list
+ #dimensions = [Dimension(name, len(dimobj)) for name, dimobj in self.ncfile.dimensions.items()]
+
+ # Create the group
+ #self.root = Group(name='', dataset=self, attributes=attributes, dimensions=dimensions)
+
+ # Create the variables
+ #variables = {}
+ #for varname, varobj in self.ncfile.variables.items():
+ # Create the dimensions list
+ # vardims = [unicode(name) for name in varobj.dimensions]
+ # varattrs = copy.copy(varobj.__dict__)
+ # variables[varname] = netCDF4Variable(varname, group=self.root, dimensions=vardims, attributes=varattrs)
+
+ #self.root.variables = variables
+
+
+
+
+class csag2file(object):
+
+ def __init__(self, path, metaonly=True):
+ """
+ """
+
+ self.path = path
+ self.format = 1.0
+ self.cleaning = 0
+ self.created = None
+ self.varname = None
+ self.country = None
+ self.station_id = None
+ self.station_name = None
+ self.latitude = None
+ self.longitude = None
+ self.altitude = None
+ self.startdate = None
+ self.enddate = None
+
+ self.dates = None
+ self.values = None
+
+ # If we have a path then try and read the file
+ if path:
+ self.read(metaonly=metaonly)
+
+ def __repr__(self):
+ """Generate a string representation of the dataset, just the path"""
+
+ return "csag2(%s)" % (self.path)
+
+ def read(self, metaonly=False):
+ """Actually read the data file"""
+
+ # Try and open the file
+ try:
+ fp = open(self.path, 'r')
+ except:
+ raise CDMError("File does not exist or cannot be opened: %s" % (self.path))
+
+
+ dates = []
+ values = []
+
+ # First we are reading the header
+ header = True
+ for line in fp:
+
+ # First parse the header
+ if header:
+ # Ignore comment lines
+ if line[0] == "#":
+ continue
+
+ #print line
+
+ # Get the fields in each line
+ fields = line.split()
+
+ # Pick up the format
+ if len(fields) == 2:
+ if (fields[0] == 'FORMAT'):
+ self.format = fields[1]
+
+ # Get all the parameters
+ if len(fields) == 3:
+
+ if (fields[0] == 'CLEANING'):
+ self.cleaning = int(fields[2])
+ if (fields[0] == 'CREATED'):
+ self.created = int(fields[2]+"12")
+ if (fields[0] == 'VARIABLE'):
+ self.varname = fields[2]
+ if (fields[0] == 'COUNTRY'):
+ self.country = fields[2]
+ if (fields[0] == 'ID'):
+ self.station_id = fields[2]
+ if (fields[0] == 'NAME'):
+ self.station_name = fields[2].strip('_')
+ if (fields[0] == 'LATITUDE'):
+ self.latitude = float(fields[2])
+ if (fields[0] == 'LONGITUDE'):
+ self.longitude = float(fields[2])
+ if (fields[0] == 'ALTITUDE'):
+ self.altitude = float(fields[2])
+ if (fields[0] == 'START_DATE'):
+ self.startdate = int(fields[2]+"12")
+ if (fields[0] == 'END_DATE'):
+ self.enddate = int(fields[2]+"12")
+
+ if len(fields) > 3:
+
+ # Catch the start of the data section
+ if (fields[0] == "ID" and fields[2] == "SOUID"):
+ header = False
+ if (metaonly):
+ break
+
+ # Else we are reading the data section
+ else:
+ if not metaonly:
+ fields = [field.strip() for field in line.strip().split(",")]
+ id = fields[0]
+ souid = fields[1]
+ date = int(fields[2]+"12")
+ val = float(fields[3])
+ quality = fields[4]
+ errors = fields[5]
+
+ dates.append(date)
+ values.append(val)
+
+ fp.close()
+
+ if (values and dates):
+ #self.series = pandas.Series(values, index=dates)
+ self.values = numpy.array(values)
+
+
+
+ def asdict(self):
+ return {'name':self.station_name, 'id':self.station_id, 'variable': self.varname, 'latitude': self.latitude,
+ 'longitude':self.longitude, 'altitude':self.altitude, 'country':self.country, 'created':self.created, 'cleaning':self.cleaning, 'format':self.format}
+
+
+
+
+
+class TimeSeriesDate(object):
+ """A multi-calendar date class
+ TimeSeriesDate objects represent dates defined in different calendar systems:
+ Normal - Normal Gregorian Calendar with standard leap years
+ Noleap - Normal Gregorian Calendar but with no leap years
+ 360day - 30 day months, 360 day years"""
+
+
+ def __init__(self):
+ "Basic constructor, class methods fromparts or from long should be used to create new instances"
+
+ self.year = 2000
+ self.month = 1
+ self.day = 1
+ self.hour = 0
+ self.calendar = "julian"
+
+
+ @classmethod
+ def fromparts(cls, year, month, day, hour, cal="julian"):
+ """Class method to instantiate a new date with year, month, day and hour parts.
+ Valid years are from 0 to 9999 (representational limit)"""
+
+ date = cls()
+
+ if (year >= 0 and year <= 9999):
+ date.year = year
+ else:
+ raise TimeSeriesError, "Invalid year %d" % (year)
+
+ if (month >= 1 and month <= 12):
+ date.month = month
+ else:
+ raise TimeSeriesError, "Invalid month %d" % (month)
+
+ if (day >= 0 and day <= cls.daysinmonth(year, month, cal)):
+ #if (day >= 0 and day <= 31):
+ date.day = day
+ else:
+ raise TimeSeriesError, "Invalid day %d" % (day)
+
+ if (hour >= 0 and hour <= 23):
+ date.hour = hour
+ else:
+ raise TimeSeriesError, "Invalid hour %d" % (hour)
+
+ return date
+
+ @classmethod
+ def fromlong(cls, long):
+
+ if (long > 9999999999):
+ raise TimeSeriesError, "Invalid date long %ld" % (long)
+
+ date = cls()
+ date.year = int(long/1000000)
+ date.month = int((long - date.year*1000000)/10000)
+ if (date.month < 1 or date.month > 12):
+ raise TimeSeriesError, "Invalid month part in %ld" % (long)
+
+ date.day = int((long - date.year*1000000 - date.month*10000)/100)
+ if (date.day < 1 or date.day > 31):
+ raise TimeSeriesError, "Invalid day part %ld" % (long)
+
+ date.hour = int(long - date.year*1000000 - date.month*10000 - date.day*100)
+ if (date.hour < 0 or date.hour > 23):
+ raise TimeSeriesError, "Invalid hour part %ld" % (long)
+
+ return date
+
+ # A bunch of comparitors
+ def __lt__(self, other):
+ return (self.aslong() < other.aslong())
+
+ def __le__(self, other):
+ return (self.aslong() <= other.aslong())
+
+ def __gt__(self, other):
+ return (self.aslong() > other.aslong())
+
+ def __ge__(self, other):
+ return (self.aslong() >= other.aslong())
+
+ def __eq__(self, other):
+ return (self.aslong() == other.aslong())
+
+ def __repr__(self):
+ return "TimeSeriesDate: %04d%02d%02d%02d" % (self.year, self.month, self.day, self.hour)
+
+ def aslong(self):
+ return self.year*1000000 + self.month*10000 + self.day*100 + self.hour
+
+ @classmethod
+ def daysinmonth(cls, year, month, cal="julian"):
+ """Return the number of days in the specified month
+ Optionally specify a non-standar calendar system"""
+
+ # Standard gregorian month lengths (non leap year)
+ days = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
+
+ # Throw exception for non valid months
+ if (month < 1 or month > 12):
+ raise TimeSeriesError, "Invalid month (%d)" % (month)
+
+ # For 360 day calendar result is trivial, all months are 30 days
+ if (cal == "360day"):
+ return 30
+ else:
+ # Deal with february for normal and noleap calendars
+ if (month == 2):
+ if ((cal == "noldeap") or (not calendar.isleap(year))):
+ return 28
+ else:
+ return 29;
+ # Else return standard month lengths
+ else:
+ return days[month-1]
+
+
+ def nextdate(self, frequency="daily"):
+ """Calculate the next date in a sequence
+ frequency defaults to daily but can be specified as:
+ monthly (increment month, keeping day of month and time the same)
+ monthly_climate (same as monthly but year never increments) NOTE: This needs better defining as it is a bounded sequence!
+ yearly (increment year, keeping month, day and time the same)"""
+
+ # For daily sequences
+ if (frequency == "daily"):
+
+ day = self.day
+ month = self.month
+ year = self.year
+
+ monthlength = self.daysinmonth(year, month)
+ day += 1
+
+ if (day > monthlength):
+ day = 1; month += 1
+ if (month > 12):
+ month = 1; year += 1
+
+ result = TimeSeriesDate.fromparts(year, month, day, self.hour, cal = self.calendar)
+
+ return result
+
+ # For monthly sequences
+ elif (frequency == "monthly" or self.frequency == "monthly_climate"):
+ month = self.month
+ year = self.year
+ month += 1
+
+ if (month > 12):
+ month = 1; year += 1
+
+ result = TimeSeriesDate.fromparts(year, month, self.day, self.hour, cal = self.calendar)
+
+ return result
+
+ # For annual sequences
+ elif (frequency == "annual"):
+ year = date.year
+ year += 1
+
+ if (year > 9999):
+ raise TimeSeriesError, "Invalid year %d > 9999" % (year)
+
+ result = TimeSeriesDate.fromparts(year, self.month, self.day, self.hour, cal = self.calendar)
+
+ return result
+
+
+
Added: incubator/climate/branches/csag/pycdm/handlers/netcdf4.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/csag/pycdm/handlers/netcdf4.py?rev=1522915&view=auto
==============================================================================
--- incubator/climate/branches/csag/pycdm/handlers/netcdf4.py (added)
+++ incubator/climate/branches/csag/pycdm/handlers/netcdf4.py Fri Sep 13 13:14:04 2013
@@ -0,0 +1,104 @@
+
+import sys
+sys.path.append('..')
+
+from pycdm.dataset import Dataset
+from pycdm.group import Group
+from pycdm.variable import Dimension, Variable
+from pycdm.field import Field
+
+import netCDF4
+import numpy
+import copy
+
+
+
+class netCDF4Variable(Variable):
+ """
+ A subclass of the CDM Variable class that implements netcdf4 variable access
+ """
+
+ def __init__(self, name, group, **kwargs):
+ """
+ Creates a new netCDF4Variable instance by creating a basic variable instance and adding in the netCDF4 varobj instance
+ """
+ super(netCDF4Variable, self).__init__(name, group, **kwargs)
+ self.data = group.dataset.ncfile.variables[name]
+
+ def __getitem__(self, slice):
+ """
+ Implements the get item array slicing method
+
+ >>> ds = netCDF4Dataset('../testdata/gfs.nc')
+ >>> print ds.root.variables['latitude'][:2]
+ [-89.84351531 -89.64079823]
+ """
+
+ return self.data[slice]
+
+
+class netCDF4Dataset(Dataset):
+ """
+ A subclass of the CDM Dataset class that implements netcdf4 data storage format
+ """
+
+ def __init__(self, location):
+ """
+ Creates a new netCDFDataset instance by trying to open the file at location and building the CDM Dataset
+ structure
+
+ >>> ds = netCDF4Dataset('../testdata/gfs.nc')
+ >>> print ds.root.dimensions
+ [<CDM Dimension: latitude (880)>, <CDM Dimension: longitude (1760)>, <CDM Dimension: time (1)>]
+ >>> print ds.root.variables['latitude']
+ <CDM netCDF4Variable: latitude>
+ >>> print ds.root.variables['TMAX_2maboveground'].dimensions
+ [<CDM Dimension: time (1)>, <CDM Dimension: latitude (880)>, <CDM Dimension: longitude (1760)>]
+ >>> print ds.root.attributes
+ OrderedDict([(u'Conventions', u'COARDS'), (u'History', u'created by wgrib2'), (u'GRIB2_grid_template', 40)])
+ >>> print ds.root.variables['latitude'].attributes
+ OrderedDict([(u'units', u'degrees_north'), (u'long_name', u'latitude')])
+
+ We can also use the group method coordinateVariables to get a list of coordinate variables from a group
+
+ >>> print ds.root.coordinateVariables
+ [<CDM netCDF4Variable: latitude>, <CDM netCDF4Variable: longitude>, <CDM netCDF4Variable: time>]
+
+ >>> field = Field(ds.root.variables['TMAX_2maboveground'])
+ >>> print field
+ <CDM Field: [<CDM netCDF4Variable: time>, <CDM netCDF4Variable: latitude>, <CDM netCDF4Variable: longitude>]
+
+ >>> print field.coordinates((0,440,1000))
+ (1332892800.0, 0.10221463428702571, 204.5451961341671)
+
+ """
+
+ # Call the super constructor
+ super(netCDF4Dataset, self).__init__(location)
+
+ # Open the NetCDF4 file
+ self.ncfile = netCDF4.Dataset(self.location, 'r')
+
+ # Creat the global attributes dictionary
+ attributes = copy.deepcopy(self.ncfile.__dict__)
+
+ # Create the dimensions list
+ dimensions = [Dimension(name, len(dimobj)) for name, dimobj in self.ncfile.dimensions.items()]
+
+ # Create the group
+ self.root = Group(name='', dataset=self, attributes=attributes, dimensions=dimensions)
+
+ # Create the variables
+ variables = {}
+ for varname, varobj in self.ncfile.variables.items():
+ # Create the dimensions list
+ vardims = [unicode(name) for name in varobj.dimensions]
+ varattrs = copy.copy(varobj.__dict__)
+ variables[varname] = netCDF4Variable(varname, group=self.root, dimensions=vardims, attributes=varattrs)
+
+ self.root.variables = variables
+
+
+
+
+
Added: incubator/climate/branches/csag/pycdm/styles.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/csag/pycdm/styles.py?rev=1522915&view=auto
==============================================================================
--- incubator/climate/branches/csag/pycdm/styles.py (added)
+++ incubator/climate/branches/csag/pycdm/styles.py Fri Sep 13 13:14:04 2013
@@ -0,0 +1,28 @@
+
+def colormap(filename):
+
+ try:
+ lines = open(filename).readlines()
+ except:
+ print "Can't open colormap file %s" % (filename)
+
+ map = []
+
+
+ for line in lines[:-1]:
+ fields = line.split()
+ r, g, b = int(fields[2]), int(fields[3]), int(fields[4])
+
+ map.append((r, g, b))
+
+ return map
+
+def mapcolor(value, min, max, map):
+
+ if (value < min):
+ return map[0]
+
+ if (value > max):
+ return map[-1]
+
+ return map[int(len(map) * (value-min)/(max-min))]
Added: incubator/climate/branches/csag/pycdm/testdata
URL: http://svn.apache.org/viewvc/incubator/climate/branches/csag/pycdm/testdata?rev=1522915&view=auto
==============================================================================
--- incubator/climate/branches/csag/pycdm/testdata (added)
+++ incubator/climate/branches/csag/pycdm/testdata Fri Sep 13 13:14:04 2013
@@ -0,0 +1 @@
+link ../testdata/
\ No newline at end of file
Propchange: incubator/climate/branches/csag/pycdm/testdata
------------------------------------------------------------------------------
svn:special = *