You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by bo...@apache.org on 2013/08/23 00:05:48 UTC

svn commit: r1516637 - in /incubator/climate/trunk/ocw: data_source/local.py tests/test_local.py

Author: boustani
Date: Thu Aug 22 22:05:48 2013
New Revision: 1516637

URL: http://svn.apache.org/r1516637
Log:
CLIMATE-268: Cam, me and Mike improved local.py to guard the 4D array of values and convert that to 3D array, and also it checks for dimensions of lat, lon and time to make sure they are 1D array. The test_local.py has been updated and passes all tests 

Modified:
    incubator/climate/trunk/ocw/data_source/local.py
    incubator/climate/trunk/ocw/tests/test_local.py

Modified: incubator/climate/trunk/ocw/data_source/local.py
URL: http://svn.apache.org/viewvc/incubator/climate/trunk/ocw/data_source/local.py?rev=1516637&r1=1516636&r2=1516637&view=diff
==============================================================================
--- incubator/climate/trunk/ocw/data_source/local.py (original)
+++ incubator/climate/trunk/ocw/data_source/local.py Thu Aug 22 22:05:48 2013
@@ -24,6 +24,9 @@ from datetime import timedelta ,datetime
 import calendar
 import string
 
+LAT_NAMES = ['x', 'rlat', 'rlats', 'lat', 'lats', 'latitude', 'latitudes']
+LON_NAMES = ['y', 'rlon', 'rlons', 'lon', 'lons', 'longitude', 'longitudes']
+TIME_NAMES = ['time', 'times', 'date', 'dates', 'julian']
 
 def _get_time_base(time_format, since_index):
     '''Calculate time base from time data.
@@ -58,6 +61,67 @@ def _get_time_base(time_format, since_in
 
     return time_base
 
+def _get_netcdf_variable_name(valid_var_names, netcdf, netcdf_var):
+    '''Return valid variable from given netCDF object.
+
+    Looks for an occurrence of a valid_var_name in the netcdf variable data.
+    If multiple possible matches are found a ValueError is raised. If no
+    matching variable names are found a Value is raised.
+
+    :param valid_var_names: The possible variable names to search for in 
+        the netCDF object.
+    :type valid_var_names: List of Strings
+    :param netcdf: The netCDF object in which to check for valid_var_names.
+    :type netcdf: netcdf4.Dataset
+    :param netcdf_var: The relevant variable name to search over in the 
+        netcdf object.
+
+    :returns: The variable from valid_var_names that it locates in 
+        the netCDF object.
+
+    :raises: ValueError
+    '''
+
+    # Check for valid variable names in netCDF value variable dimensions
+    dimensions = netcdf.variables[netcdf_var].dimensions
+    dims_lower = [dim.encode().lower() for dim in dimensions]
+
+    intersect = list(set(valid_var_names).intersection(dims_lower))
+
+    if len(intersect) == 1:
+        index = dims_lower.index(intersect[0])
+        dimension_name = dimensions[index].encode()
+
+        possible_vars = []
+        for var in netcdf.variables.keys():
+            var_dimensions = netcdf.variables[var].dimensions
+
+            if len(var_dimensions) != 1:
+                continue
+
+            if var_dimensions[0].encode() == dimension_name:
+                possible_vars.append(var)
+
+        if len(possible_vars) == 1:
+            return possible_vars[0]
+
+    # Check for valid variable names in netCDF variable names
+    variables = netcdf.variables.keys()
+    vars_lower = [var.encode().lower() for var in variables]
+
+    intersect = list(set(valid_var_names).intersection(vars_lower))
+
+    if len(intersect) == 1:
+        index = vars_lower.index(intersect[0])
+        return variables[index]
+
+    # If we couldn't find a single matching valid variable name, we're
+    # unable to load the file properly.
+    error = (
+        "Unable to locate a single matching variable name in NetCDF object. "
+    )
+    raise ValueError(error)
+
 
 def _get_time_step(netcdf, time_variable_name):
     '''Calculate time step from time data.
@@ -235,16 +299,18 @@ def _get_value_name(possible_value_name)
     return value_variable_name
 
 
-def load_file(file_path, variable_name=None):
+def load_file(file_path, variable_name):
     '''Load netCDF file, get the all variables name and get the data.
 
     :param file_path: NetCDF directory with file name
     :type file_path: String
-    :param variable_name[optional]: The given (by user) value variable name
+    :param variable_name: The given (by user) value variable name
     :type variable_name: String
 
     :returns: A dataset object from dataset.py
     :rtype: Object
+
+    :raises: ValueError
     '''
 
     try:
@@ -253,32 +319,49 @@ def load_file(file_path, variable_name=N
         err = "The given file cannot be loaded (Only netCDF file can be supported)."
         raise ValueError(err)
 
-    variable_names = [variable.encode() for variable in netcdf.variables.keys()]
-
-    lat_variable_name = _get_lat_name(variable_names)
-    lon_variable_name = _get_lon_name(variable_names)
-    time_variable_name = _get_time_name(variable_names)
-    level_variable_name = _get_level_name(variable_names)
+    lat_name = _get_netcdf_variable_name(LAT_NAMES, netcdf, variable_name)
+    lon_name = _get_netcdf_variable_name(LON_NAMES, netcdf, variable_name)
+    time_name = _get_netcdf_variable_name(TIME_NAMES, netcdf, variable_name)
+
+    #lat_variable_name = _get_lat_name(variable_names)
+    #lon_variable_name = _get_lon_name(variable_names)
+    #time_variable_name = _get_time_name(variable_names)
+    #level_variable_name = _get_level_name(variable_names)
+
+
+    # Check returned variable dimensions. lats, lons, and times should be 1D
+    #
+    # Check dimensions of the values
+    # if != 3
+    #   find the indices for lat, lon, time
+    #   strip out everything else by select 1st of possible options
+    #
+    # Check the order of the variables
+    # if not correct order (times, lats, lons)
+    #    reorder as appropriate
+    #
+    # Make new dataset object
 
+    '''
     if variable_name in variable_names:
         value_variable_name = variable_name
     else:
         possible_value_name = list(set(variable_names) - set([lat_variable_name, lon_variable_name, time_variable_name, level_variable_name]))
         value_variable_name = _get_value_name(possible_value_name)
-
-    lats = netcdf.variables[lat_variable_name][:]    
-    lons = netcdf.variables[lon_variable_name][:]
-    time_raw_values = netcdf.variables[time_variable_name][:]
-    times = _calculate_time(netcdf, time_raw_values, time_variable_name)
+    '''
+    lats = netcdf.variables[lat_name][:]    
+    lons = netcdf.variables[lon_name][:]
+    time_raw_values = netcdf.variables[time_name][:]
+    times = _calculate_time(netcdf, time_raw_values, time_name)
     times = numpy.array(times)
-    values = ma.array(netcdf.variables[value_variable_name][:])
+    values = ma.array(netcdf.variables[variable_name][:])
 
 
     if len(values.shape) == 4:
-        value_dimensions_names = list(netcdf.variables[value_variable_name].dimensions)
-        value_dimensions_names = [dim_name.encode() for dim_name in value_dimensions_names]
-        required_variable_names = [lat_variable_name, lon_variable_name, time_variable_name]
-        level_index = value_dimensions_names.index(list(set(value_dimensions_names) - set(required_variable_names))[0])
+        #value_dimensions_names = list(netcdf.variables[variable_name].dimensions)
+        value_dimensions_names = [dim_name.encode() for dim_name in netcdf.variables[variable_name].dimensions]
+        lat_lon_time_var_names = [lat_name, lon_name, time_name]
+        level_index = value_dimensions_names.index(list(set(value_dimensions_names) - set(lat_lon_time_var_names))[0])
         if level_index == 0:
             values = values [0,:,:,:]
         elif level_index == 1:
@@ -288,4 +371,4 @@ def load_file(file_path, variable_name=N
         else:
             values = values [:,:,:,0]
 
-    return Dataset(lats, lons, times, values, value_variable_name)
+    return Dataset(lats, lons, times, values, variable_name)

Modified: incubator/climate/trunk/ocw/tests/test_local.py
URL: http://svn.apache.org/viewvc/incubator/climate/trunk/ocw/tests/test_local.py?rev=1516637&r1=1516636&r2=1516637&view=diff
==============================================================================
--- incubator/climate/trunk/ocw/tests/test_local.py (original)
+++ incubator/climate/trunk/ocw/tests/test_local.py Thu Aug 22 22:05:48 2013
@@ -26,74 +26,152 @@ import ocw.data_source.local as local
 
 class test_load_file(unittest.TestCase):
 
-
     def setUp(self):
-        #To create the temporary netCDF file
-        self.file_path = os.getcwd() + '/temporaryNetcdf.nc'
-        netCDF_file = netCDF4.Dataset(self.file_path, 'w',  format='NETCDF4')
-        #To create dimensions
-        netCDF_file.createDimension('lat_dim', 5)
-        netCDF_file.createDimension('lon_dim', 5)
-        netCDF_file.createDimension('time_dim', 3)
-        netCDF_file.createDimension('level_dim', 2)
-        #To create variables
-        latitudes = netCDF_file.createVariable('latitude', 'd', ('lat_dim',))
-        longitudes = netCDF_file.createVariable('longitude', 'd', ('lon_dim',))
-        times = netCDF_file.createVariable('time', 'd', ('time_dim',))
-        levels = netCDF_file.createVariable('level', 'd', ('level_dim',))
-        values = netCDF_file.createVariable('value', 'd', ('level_dim', 'time_dim', 'lat_dim', 'lon_dim'))
-        #To latitudes and longitudes for five values
-        self.latitudes = range(0,5)
-        self.longitudes = range(200,205)
-        #Three months of data
-        self.times = range(3)
-        #Two levels
-        self.levels = [100, 200]
-        #Create 150 values
-        self.values = numpy.array([i for i in range(150)])
-        #Reshape values to 4D array (level, time, lats, lons)
-        self.values = self.values.reshape(len(self.levels), len(self.times),len(self.latitudes),len(self.longitudes))
-        #Ingest values to netCDF file
-        latitudes[:] = self.latitudes
-        longitudes[:] = self.longitudes
-        times[:] = self.times
-        levels[:] = self.levels
-        values[:] = self.values
-        #Assigne time info to time variable
-        netCDF_file.variables['time'].units = 'months since 2001-01-01 00:00:00' 
-        netCDF_file.close()
         #Read netCDF file
+        self.file_path = create_netcdf_object()
         self.netCDF_file = netCDF4.Dataset(self.file_path, 'r')
-        self.user_value_variable_name = 'value'
+        self.latitudes = self.netCDF_file.variables['latitude'][:]
+        self.longitudes = self.netCDF_file.variables['longitude'][:]
+        self.values = self.netCDF_file.variables['value'][:]
         self.variable_name_list = ['latitude', 'longitude', 'time', 'level', 'value']
         self.possible_value_name = ['latitude', 'longitude', 'time', 'level']
-
-
+        
     def tearDown(self):
-        '''To remove the temporary netCDF file'''
         os.remove(self.file_path)
-
+    
 
     def test_function_load_file_lats(self):
         '''To test load_file function for latitudes'''
-        self.assertItemsEqual(local.load_file(self.file_path, None).lats, self.latitudes)
+        self.assertItemsEqual(local.load_file(self.file_path, "value").lats, self.latitudes)
 
 
     def test_function_load_file_lons(self):
         '''To test load_file function for longitudes'''
-        self.assertItemsEqual(local.load_file(self.file_path, None).lons, self.longitudes)
+        self.assertItemsEqual(local.load_file(self.file_path, "value").lons, self.longitudes)
 
 
     def test_function_load_file_times(self):
         '''To test load_file function for times'''
         newTimes = datetime.datetime(2001,01,01), datetime.datetime(2001,02,01), datetime.datetime(2001,03,01)
-        self.assertItemsEqual(local.load_file(self.file_path, None).times, newTimes)
+        self.assertItemsEqual(local.load_file(self.file_path, "value").times, newTimes)
 
 
     def test_function_load_file_values(self):
         '''To test load_file function for values'''
         new_values = self.values[0,:,:,:]
-        self.assertTrue(numpy.allclose(local.load_file(self.file_path, None).values, new_values))
+        self.assertTrue(numpy.allclose(local.load_file(self.file_path, "value").values, new_values))
+
+class test_get_netcdf_variable_names(unittest.TestCase):
+    def setUp(self):
+        self.netcdf_path = os.path.abspath("../../examples/AFRICA_KNMI-RACMO2.2b_CTL_ERAINT_MM_50km_1989-2008_tasmax.nc")
+        self.invalid_netcdf_path = create_invalid_dimensions_netcdf_object()
+        self.netcdf = netCDF4.Dataset(self.netcdf_path, mode='r')
+
+    def tearDown(self):
+        os.remove(self.invalid_netcdf_path)
+
+    def test_valid_latitude(self):
+        self.lat = local._get_netcdf_variable_name(
+                                        local.LAT_NAMES, 
+                                        self.netcdf, 
+                                        "tasmax")
+        self.assertEquals(self.lat, "rlat")
+
+    def test_invalid_dimension_latitude(self):
+        self.netcdf = netCDF4.Dataset(self.invalid_netcdf_path, mode='r')
+        self.lat = local._get_netcdf_variable_name(
+                                        local.LAT_NAMES,
+                                        self.netcdf,
+                                        "value")
+        self.assertEquals(self.lat, "latitude")
+
+    def test_dimension_variable_name_mismatch(self):
+        self.netcdf = netCDF4.Dataset(self.invalid_netcdf_path, mode='r')
+        self.lat = local._get_netcdf_variable_name(
+                                        ["lat_dim"] + local.LAT_NAMES,
+                                        self.netcdf,
+                                        "value")
+        self.assertEquals(self.lat, "latitude")
+
+    def test_no_match_latitude(self):
+        with self.assertRaises(ValueError):
+            self.lat = local._get_netcdf_variable_name(
+                                            ['notAVarName'],
+                                            self.netcdf, 
+                                            "tasmax")
+
+def create_netcdf_object():
+        #To create the temporary netCDF file
+        file_path = '/tmp/temporaryNetcdf.nc'
+        netCDF_file = netCDF4.Dataset(file_path, 'w',  format='NETCDF4')
+        #To create dimensions
+        netCDF_file.createDimension('lat_dim', 5)
+        netCDF_file.createDimension('lon_dim', 5)
+        netCDF_file.createDimension('time_dim', 3)
+        netCDF_file.createDimension('level_dim', 2)
+        #To create variables
+        latitudes = netCDF_file.createVariable('latitude', 'd', ('lat_dim',))
+        longitudes = netCDF_file.createVariable('longitude', 'd', ('lon_dim',))
+        times = netCDF_file.createVariable('time', 'd', ('time_dim',))
+        levels = netCDF_file.createVariable('level', 'd', ('level_dim',))
+        values = netCDF_file.createVariable('value', 'd', ('level_dim', 'time_dim', 'lat_dim', 'lon_dim'))
+        #To latitudes and longitudes for five values
+        latitudes = range(0,5)
+        longitudes = range(200,205)
+        #Three months of data
+        #Two levels
+        levels = [100, 200]
+        #Create 150 values
+        values = numpy.array([i for i in range(150)])
+        #Reshape values to 4D array (level, time, lats, lons)
+        values = values.reshape(len(levels), len(times),len(latitudes),len(longitudes))
+        #Ingest values to netCDF file
+        latitudes[:] = latitudes
+        longitudes[:] = longitudes
+        times[:] = numpy.array(range(3))
+        levels[:] = levels
+        values[:] = values
+        #Assign time info to time variable
+        netCDF_file.variables['time'].units = 'months since 2001-01-01 00:00:00' 
+        netCDF_file.close()
+        return file_path
+
+def create_invalid_dimensions_netcdf_object():
+        #To create the temporary netCDF file
+        file_path = '/tmp/temporaryNetcdf.nc'
+        netCDF_file = netCDF4.Dataset(file_path, 'w', format='NETCDF4')
+        #To create dimensions
+        netCDF_file.createDimension('lat_dim', 5)
+        netCDF_file.createDimension('lon_dim', 5)
+        netCDF_file.createDimension('time_dim', 3)
+        netCDF_file.createDimension('level_dim', 2)
+        #To create variables
+        latitudes = netCDF_file.createVariable('latitude', 'd', ('lat_dim',))
+        longitudes = netCDF_file.createVariable('longitude', 'd', ('lon_dim',))
+        times = netCDF_file.createVariable('time', 'd', ('time_dim',))
+        levels = netCDF_file.createVariable('level', 'd', ('level_dim',))
+        values = netCDF_file.createVariable('value', 'd', ('level_dim', 'time_dim', 'lat_dim', 'lon_dim'))
+        #To latitudes and longitudes for five values
+        latitudes = range(0,5)
+        longitudes = range(200,205)
+        #Three months of data
+        times = range(3)
+        #Two levels
+        levels = [100, 200]
+        #Create 150 values
+        values = numpy.array([i for i in range(150)])
+        #Reshape values to 4D array (level, time, lats, lons)
+        values = values.reshape(len(levels), len(times),len(latitudes),len(longitudes))
+        #Ingest values to netCDF file
+        latitudes[:] = latitudes
+        longitudes[:] = longitudes
+        times[:] = times
+        levels[:] = levels
+        values[:] = values
+        #Assign time info to time variable
+        netCDF_file.variables['time'].units = 'months since 2001-01-01 00:00:00' 
+        netCDF_file.close()
+        return file_path
 
 
 if __name__ == '__main__':