You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by bo...@apache.org on 2013/08/21 17:01:29 UTC

svn commit: r1516182 - /incubator/climate/trunk/ocw/data_source/local.py

Author: boustani
Date: Wed Aug 21 15:01:28 2013
New Revision: 1516182

URL: http://svn.apache.org/r1516182
Log:
Now local.py can guard the 4D value array and convert it to 3D array. It can find the 'level' index in shpae of values and reshape it to (time, lat, lon)

Modified:
    incubator/climate/trunk/ocw/data_source/local.py

Modified: incubator/climate/trunk/ocw/data_source/local.py
URL: http://svn.apache.org/viewvc/incubator/climate/trunk/ocw/data_source/local.py?rev=1516182&r1=1516181&r2=1516182&view=diff
==============================================================================
--- incubator/climate/trunk/ocw/data_source/local.py (original)
+++ incubator/climate/trunk/ocw/data_source/local.py Wed Aug 21 15:01:28 2013
@@ -25,7 +25,7 @@ import calendar
 import string
 
 
-def get_time_base(time_format, since_index):
+def _get_time_base(time_format, since_index):
     '''Calculate time base from time data.
 
     :param time_format: Unit of time in netCDF
@@ -41,24 +41,25 @@ def get_time_base(time_format, since_ind
     time_base = time_base.split('.')[0] + '0' if "." in time_base else time_base
     TIME_FORMATS =[
                     '%Y:%m:%d %H:%M:%S', '%Y-%m-%d %H-%M-%S', '%Y/%m/%d %H/%M/%S','%Y-%m-%d %H:%M:%S', '%Y/%m/%d %H:%M:%S', '%Y%m%d %H:%M:%S',
-                    '%Y%m%d%H%M%S', '%Y-%m-%d-%H-%M-%S', '%Y/%m/%d/%H/%M/%S', '%Y:%m:%d:%H:%M:%S', '%Y-%m-%d-%H:%M:%S', '%Y/%m/%d%H:%M:%S',
-                    '%Y-%m-%d %H:%M','%Y/%m/%d %H:%M', '%Y:%m:%d %H:%M','%Y%m%d %H:%M',
+                    '%Y%m%d%H%M%S', '%Y-%m-%d-%H-%M-%S', '%Y/%m/%d/%H/%M/%S', '%Y:%m:%d:%H:%M:%S', '%Y-%m-%d-%H:%M:%S', '%Y-%m-%d %H:%M:%S',
+                    '%Y/%m/%d%H:%M:%S', '%Y-%m-%d %H:%M','%Y/%m/%d %H:%M', '%Y:%m:%d %H:%M','%Y%m%d %H:%M',
                     '%Y-%m-%d', '%Y/%m/%d', '%Y:%m:%d', '%Y%m%d'
                     ]
     count = 0
-    for format in TIME_FORMATS:
+    for time_format in TIME_FORMATS:
             try:
-                time_base = datetime.strptime(time_base, format)
+                time_base = datetime.strptime(time_base, time_format)
                 break
             except:
                 count = count + 1
                 if count == len(TIME_FORMATS):
-                    raise Exception("The time format is not found. Base time is " + str(time_base) + " .")
+                    err = "The time format is not found. Base time is " + str(time_base) + " ."
+                    raise ValueError(err)
 
     return time_base
 
 
-def get_time_step(netcdf, time_variable_name):
+def _get_time_step(netcdf, time_variable_name):
     '''Calculate time step from time data.
 
     :param netcdf: NetCDF dataset object
@@ -75,7 +76,8 @@ def get_time_step(netcdf, time_variable_
         time_format = netcdf.variables[time_variable_name].units.encode()
         since_index = re.search('since', time_format).end()
     except AttributeError:
-        raise Exception('Time variable attributes cannot be decoded.')
+        err = 'Time variable attributes cannot be decoded.'
+        raise ValueError(err)
 
     time_step = None
     TIME_UNITS = ('minutes', 'hours', 'days', 'months', 'years')
@@ -87,7 +89,7 @@ def get_time_step(netcdf, time_variable_
     return (time_step, time_format, since_index)
 
 
-def calculate_time(netcdf, time_raw_values, time_variable_name):
+def _calculate_time(netcdf, time_raw_values, time_variable_name):
     '''Convert time data from integer to python datetime.
 
     :param netcdf: NetCDF dataset object
@@ -103,8 +105,8 @@ def calculate_time(netcdf, time_raw_valu
 
     time_values = []
 
-    time_step, time_format, since_index = get_time_step(netcdf, time_variable_name)
-    time_base = get_time_base(time_format, since_index)
+    time_step, time_format, since_index = _get_time_step(netcdf, time_variable_name)
+    time_base = _get_time_base(time_format, since_index)
     time_step = time_step.lower()
 
     if 'min' in time_step:
@@ -127,12 +129,13 @@ def calculate_time(netcdf, time_raw_valu
         for time in time_raw_values:
             time_values.append(time_base + timedelta(years=int(time)))
     else:
-        raise Exception("The time step cannot be defined.")
+        err = "The time step cannot be defined."
+        raise ValueError(err)
 
     return time_values
 
 
-def get_lat_name(variable_names):
+def _get_lat_name(variable_names):
     '''Find the latitude variable name
 
     :param variable_names: List of netCDF variables' name
@@ -144,14 +147,15 @@ def get_lat_name(variable_names):
 
     common_name = set(['lat', 'lats', 'latitude', 'latitudes']).intersection(variable_names)
     if len(common_name) !=1:
-        raise Exception("Unable to autodetect latitude variable name.")
+        err = "Unable to autodetect latitude variable name."
+        raise ValueError(err)
     else:
         lat_variable_name = common_name.pop()
 
     return lat_variable_name
 
 
-def get_lon_name(variable_names):
+def _get_lon_name(variable_names):
     '''Find the longitude variable name
 
     :param variable_names: List of netCDF variables' name
@@ -163,14 +167,15 @@ def get_lon_name(variable_names):
 
     common_name = set(['lon', 'lons', 'longitude', 'longitudes']).intersection(variable_names)
     if len(common_name) !=1:
-        raise Exception("Unable to autodetect longitude variable name.")
+        err = "Unable to autodetect longitude variable name."
+        raise ValueError(err)
     else:
         lon_variable_name = common_name.pop()
 
     return lon_variable_name
 
 
-def get_time_name(variable_names):
+def _get_time_name(variable_names):
     '''Find the time variable name.
 
     :param: variableNameList: List of netCDF variables' name
@@ -181,15 +186,17 @@ def get_time_name(variable_names):
     '''
 
     common_name = set(['time', 'times', 'date', 'dates', 'julian']).intersection(variable_names)
+
     if len(common_name) !=1:
-        raise Exception("Unable to autodetect time variable name.")
+        err = "Unable to autodetect time variable name. These option(s) found: {0} ".format([each for each in common_name])
+        raise ValueError(err)
     else:
         time_variable_name = common_name.pop()
 
     return time_variable_name
 
 
-def get_level_name(variable_names):
+def _get_level_name(variable_names):
     '''Find the level variable name.
 
     :param variable_names: List of netCDF variables' name
@@ -209,7 +216,7 @@ def get_level_name(variable_names):
     return level_variable_name
 
 
-def get_value_name(possible_value_name):
+def _get_value_name(possible_value_name):
     '''Find the value variable name.
 
     :param possible_value_name: List of all value variable names
@@ -222,7 +229,8 @@ def get_value_name(possible_value_name):
     if len(possible_value_name) == 1:
         value_variable_name = possible_value_name[0]
     else:
-        raise Exception("The given value variable name does not match with existing variables name.")
+        err = "The given value variable name does not match with existing variables name."
+        raise ValueError(err)
 
     return value_variable_name
 
@@ -242,27 +250,42 @@ def load_file(file_path, variable_name=N
     try:
         netcdf = netCDF4.Dataset(file_path, mode='r')
     except:
-        raise Exception("The given file cannot be loaded (Only netCDF file can be supported).")
+        err = "The given file cannot be loaded (Only netCDF file can be supported)."
+        raise ValueError(err)
 
     variable_names = [variable.encode() for variable in netcdf.variables.keys()]
-    variable_names = [variable.lower() for variable in variable_names]
 
-    lat_variable_name = get_lat_name(variable_names)
-    lon_variable_name = get_lon_name(variable_names)
-    time_variable_name = get_time_name(variable_names)
-    level_variable_name = get_level_name(variable_names)
+    lat_variable_name = _get_lat_name(variable_names)
+    lon_variable_name = _get_lon_name(variable_names)
+    time_variable_name = _get_time_name(variable_names)
+    level_variable_name = _get_level_name(variable_names)
 
     if variable_name in variable_names:
         value_variable_name = variable_name
     else:
         possible_value_name = list(set(variable_names) - set([lat_variable_name, lon_variable_name, time_variable_name, level_variable_name]))
-        value_variable_name = get_value_name(possible_value_name)
+        value_variable_name = _get_value_name(possible_value_name)
 
     lats = netcdf.variables[lat_variable_name][:]    
     lons = netcdf.variables[lon_variable_name][:]
     time_raw_values = netcdf.variables[time_variable_name][:]
-    times = calculate_time(netcdf, time_raw_values, time_variable_name)
+    times = _calculate_time(netcdf, time_raw_values, time_variable_name)
     times = numpy.array(times)
     values = ma.array(netcdf.variables[value_variable_name][:])
 
+
+    if len(values.shape) == 4:
+        value_dimensions_names = list(netcdf.variables[value_variable_name].dimensions)
+        value_dimensions_names = [dim_name.encode() for dim_name in value_dimensions_names]
+        required_variable_names = [lat_variable_name, lon_variable_name, time_variable_name]
+        level_index = value_dimensions_names.index(list(set(value_dimensions_names) - set(required_variable_names))[0])
+        if level_index == 0:
+            values = values [0,:,:,:]
+        elif level_index == 1:
+            values = values [:,0,:,:]
+        elif level_index == 2:
+            values = values [:,:,0,:]
+        else:
+            values = values [:,:,:,0]
+
     return Dataset(lats, lons, times, values, value_variable_name)