You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by bo...@apache.org on 2013/08/21 05:24:33 UTC

svn commit: r1516065 - /incubator/climate/branches/RefactorInput/ocw/data_source/local.py

Author: boustani
Date: Wed Aug 21 03:24:32 2013
New Revision: 1516065

URL: http://svn.apache.org/r1516065
Log:
now local.py will return values as 3D numpy array.

Modified:
    incubator/climate/branches/RefactorInput/ocw/data_source/local.py

Modified: incubator/climate/branches/RefactorInput/ocw/data_source/local.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/RefactorInput/ocw/data_source/local.py?rev=1516065&r1=1516064&r2=1516065&view=diff
==============================================================================
--- incubator/climate/branches/RefactorInput/ocw/data_source/local.py (original)
+++ incubator/climate/branches/RefactorInput/ocw/data_source/local.py Wed Aug 21 03:24:32 2013
@@ -25,7 +25,7 @@ import calendar
 import string
 
 
-def get_time_base(time_format, since_index):
+def _get_time_base(time_format, since_index):
     '''Calculate time base from time data.
 
     :param time_format: Unit of time in netCDF
@@ -41,24 +41,25 @@ def get_time_base(time_format, since_ind
     time_base = time_base.split('.')[0] + '0' if "." in time_base else time_base
     TIME_FORMATS =[
                     '%Y:%m:%d %H:%M:%S', '%Y-%m-%d %H-%M-%S', '%Y/%m/%d %H/%M/%S','%Y-%m-%d %H:%M:%S', '%Y/%m/%d %H:%M:%S', '%Y%m%d %H:%M:%S',
-                    '%Y%m%d%H%M%S', '%Y-%m-%d-%H-%M-%S', '%Y/%m/%d/%H/%M/%S', '%Y:%m:%d:%H:%M:%S', '%Y-%m-%d-%H:%M:%S', '%Y/%m/%d%H:%M:%S',
-                    '%Y-%m-%d %H:%M','%Y/%m/%d %H:%M', '%Y:%m:%d %H:%M','%Y%m%d %H:%M',
+                    '%Y%m%d%H%M%S', '%Y-%m-%d-%H-%M-%S', '%Y/%m/%d/%H/%M/%S', '%Y:%m:%d:%H:%M:%S', '%Y-%m-%d-%H:%M:%S', '%Y-%m-%d %H:%M:%S',
+                    '%Y/%m/%d%H:%M:%S', '%Y-%m-%d %H:%M','%Y/%m/%d %H:%M', '%Y:%m:%d %H:%M','%Y%m%d %H:%M',
                     '%Y-%m-%d', '%Y/%m/%d', '%Y:%m:%d', '%Y%m%d'
                     ]
     count = 0
-    for format in TIME_FORMATS:
+    for time_format in TIME_FORMATS:
             try:
-                time_base = datetime.strptime(time_base, format)
+                time_base = datetime.strptime(time_base, time_format)
                 break
             except:
                 count = count + 1
                 if count == len(TIME_FORMATS):
-                    raise Exception("The time format is not found. Base time is " + str(time_base) + " .")
+                    err = "The time format is not found. Base time is " + str(time_base) + " ."
+                    raise ValueError(err)
 
     return time_base
 
 
-def get_time_step(netcdf, time_variable_name):
+def _get_time_step(netcdf, time_variable_name):
     '''Calculate time step from time data.
 
     :param netcdf: NetCDF dataset object
@@ -75,7 +76,8 @@ def get_time_step(netcdf, time_variable_
         time_format = netcdf.variables[time_variable_name].units.encode()
         since_index = re.search('since', time_format).end()
     except AttributeError:
-        raise Exception('Time variable attributes cannot be decoded.')
+        err = 'Time variable attributes cannot be decoded.'
+        raise ValueError(err)
 
     time_step = None
     TIME_UNITS = ('minutes', 'hours', 'days', 'months', 'years')
@@ -87,7 +89,7 @@ def get_time_step(netcdf, time_variable_
     return (time_step, time_format, since_index)
 
 
-def calculate_time(netcdf, time_raw_values, time_variable_name):
+def _calculate_time(netcdf, time_raw_values, time_variable_name):
     '''Convert time data from integer to python datetime.
 
     :param netcdf: NetCDF dataset object
@@ -103,8 +105,8 @@ def calculate_time(netcdf, time_raw_valu
 
     time_values = []
 
-    time_step, time_format, since_index = get_time_step(netcdf, time_variable_name)
-    time_base = get_time_base(time_format, since_index)
+    time_step, time_format, since_index = _get_time_step(netcdf, time_variable_name)
+    time_base = _get_time_base(time_format, since_index)
     time_step = time_step.lower()
 
     if 'min' in time_step:
@@ -127,12 +129,13 @@ def calculate_time(netcdf, time_raw_valu
         for time in time_raw_values:
             time_values.append(time_base + timedelta(years=int(time)))
     else:
-        raise Exception("The time step cannot be defined.")
+        err = "The time step cannot be defined."
+        raise ValueError(err)
 
     return time_values
 
 
-def get_lat_name(variable_names):
+def _get_lat_name(variable_names):
     '''Find the latitude variable name
 
     :param variable_names: List of netCDF variables' name
@@ -144,14 +147,15 @@ def get_lat_name(variable_names):
 
     common_name = set(['lat', 'lats', 'latitude', 'latitudes']).intersection(variable_names)
     if len(common_name) !=1:
-        raise Exception("Unable to autodetect latitude variable name.")
+        err = "Unable to autodetect latitude variable name."
+        raise ValueError(err)
     else:
         lat_variable_name = common_name.pop()
 
     return lat_variable_name
 
 
-def get_lon_name(variable_names):
+def _get_lon_name(variable_names):
     '''Find the longitude variable name
 
     :param variable_names: List of netCDF variables' name
@@ -163,14 +167,15 @@ def get_lon_name(variable_names):
 
     common_name = set(['lon', 'lons', 'longitude', 'longitudes']).intersection(variable_names)
     if len(common_name) !=1:
-        raise Exception("Unable to autodetect longitude variable name.")
+        err = "Unable to autodetect longitude variable name."
+        raise ValueError(err)
     else:
         lon_variable_name = common_name.pop()
 
     return lon_variable_name
 
 
-def get_time_name(variable_names):
+def _get_time_name(variable_names):
     '''Find the time variable name.
 
     :param: variableNameList: List of netCDF variables' name
@@ -181,15 +186,17 @@ def get_time_name(variable_names):
     '''
 
     common_name = set(['time', 'times', 'date', 'dates', 'julian']).intersection(variable_names)
+
     if len(common_name) !=1:
-        raise Exception("Unable to autodetect time variable name.")
+        err = "Unable to autodetect time variable name. These option(s) found: {0} ".format([each for each in common_name])
+        raise ValueError(err)
     else:
         time_variable_name = common_name.pop()
 
     return time_variable_name
 
 
-def get_level_name(variable_names):
+def _get_level_name(variable_names):
     '''Find the level variable name.
 
     :param variable_names: List of netCDF variables' name
@@ -209,7 +216,7 @@ def get_level_name(variable_names):
     return level_variable_name
 
 
-def get_value_name(possible_value_name):
+def _get_value_name(possible_value_name):
     '''Find the value variable name.
 
     :param possible_value_name: List of all value variable names
@@ -222,7 +229,8 @@ def get_value_name(possible_value_name):
     if len(possible_value_name) == 1:
         value_variable_name = possible_value_name[0]
     else:
-        raise Exception("The given value variable name does not match with existing variables name.")
+        err = "The given value variable name does not match with existing variables name."
+        raise ValueError(err)
 
     return value_variable_name
 
@@ -242,27 +250,42 @@ def load_file(file_path, variable_name=N
     try:
         netcdf = netCDF4.Dataset(file_path, mode='r')
     except:
-        raise Exception("The given file cannot be loaded (Only netCDF file can be supported).")
+        err = "The given file cannot be loaded (Only netCDF file can be supported)."
+        raise ValueError(err)
 
     variable_names = [variable.encode() for variable in netcdf.variables.keys()]
-    variable_names = [variable.lower() for variable in variable_names]
 
-    lat_variable_name = get_lat_name(variable_names)
-    lon_variable_name = get_lon_name(variable_names)
-    time_variable_name = get_time_name(variable_names)
-    level_variable_name = get_level_name(variable_names)
+    lat_variable_name = _get_lat_name(variable_names)
+    lon_variable_name = _get_lon_name(variable_names)
+    time_variable_name = _get_time_name(variable_names)
+    level_variable_name = _get_level_name(variable_names)
 
     if variable_name in variable_names:
         value_variable_name = variable_name
     else:
         possible_value_name = list(set(variable_names) - set([lat_variable_name, lon_variable_name, time_variable_name, level_variable_name]))
-        value_variable_name = get_value_name(possible_value_name)
+        value_variable_name = _get_value_name(possible_value_name)
 
     lats = netcdf.variables[lat_variable_name][:]    
     lons = netcdf.variables[lon_variable_name][:]
     time_raw_values = netcdf.variables[time_variable_name][:]
-    times = calculate_time(netcdf, time_raw_values, time_variable_name)
+    times = _calculate_time(netcdf, time_raw_values, time_variable_name)
     times = numpy.array(times)
     values = ma.array(netcdf.variables[value_variable_name][:])
 
+
+    if len(values.shape) == 4:
+        value_dimensions_names = list(netcdf.variables[value_variable_name].dimensions)
+        value_dimensions_names = [each.encode() for each in value_dimensions_names]
+        required_variable_names = [lat_variable_name, lon_variable_name, time_variable_name]
+        index = value_dimensions_names.index(list(set(value_dimensions_names) - set(required_variable_names))[0])
+        if index == 0:
+            values = values [0,:,:,:]
+        elif index == 1:
+            values = values [:,0,:,:]
+        elif index == 2:
+            values = values [:,:,0,:]
+        else:
+            values = values [:,:,:,0]
+
     return Dataset(lats, lons, times, values, value_variable_name)



Re: svn commit: r1516065 - /incubator/climate/branches/RefactorInput/ocw/data_source/local.py

Posted by Cameron Goodale <si...@gmail.com>.
Mazi,

I just tried the new code and it is still failing.  I have made a recent
commit under https://issues.apache.org/jira/browse/CLIMATE-268.  It
includes the code I am using for end-to-end testing and the CORDEX NetCDF
Files I am using for the test.

Let's continue this conversation on that Issue.  I will document the steps
I take to run my test in the comments of that issue so you can pull down
the files and run the tests on your local machine to help you update
local.py.


Thanks,


Cameron



On Wed, Aug 21, 2013 at 8:04 AM, Boustani, Maziyar (398F) <
Maziyar.Boustani@jpl.nasa.gov> wrote:

> Hey Cam,
>
> Here is the improved local.py in trunk [1].
> Please use the code in your new rcmet.py and get back to me if there is
> need for more improvement.
>
> [1]: http://svn.apache.org/r1516182
>
> Best regards,
> Mazi
>
> On Aug 21, 2013, at 6:27 AM, Cameron Goodale wrote:
>
> Mazi,
>
> Can you please merge these changes into the trunk?
>
> Mike J. merged all the refactored code into the trunk yesterday so our
> future development will be back in the trunk.
>
> Thanks man,
>
>
> Cam
>
>
> On Tue, Aug 20, 2013 at 8:24 PM, <boustani@apache.org<mailto:
> boustani@apache.org>> wrote:
>
> Author: boustani
> Date: Wed Aug 21 03:24:32 2013
> New Revision: 1516065
>
> URL: http://svn.apache.org/r1516065
> Log:
> now local.py will return values as 3D numpy array.
>
> Modified:
>    incubator/climate/branches/RefactorInput/ocw/data_source/local.py
>
> Modified: incubator/climate/branches/RefactorInput/ocw/data_source/local.py
> URL:
>
> http://svn.apache.org/viewvc/incubator/climate/branches/RefactorInput/ocw/data_source/local.py?rev=1516065&r1=1516064&r2=1516065&view=diff
>
>
> ==============================================================================
> --- incubator/climate/branches/RefactorInput/ocw/data_source/local.py
> (original)
> +++ incubator/climate/branches/RefactorInput/ocw/data_source/local.py Wed
> Aug 21 03:24:32 2013
> @@ -25,7 +25,7 @@ import calendar
> import string
>
>
> -def get_time_base(time_format, since_index):
> +def _get_time_base(time_format, since_index):
>     '''Calculate time base from time data.
>
>     :param time_format: Unit of time in netCDF
> @@ -41,24 +41,25 @@ def get_time_base(time_format, since_ind
>     time_base = time_base.split('.')[0] + '0' if "." in time_base else
> time_base
>     TIME_FORMATS =[
>                     '%Y:%m:%d %H:%M:%S', '%Y-%m-%d %H-%M-%S', '%Y/%m/%d
> %H/%M/%S','%Y-%m-%d %H:%M:%S', '%Y/%m/%d %H:%M:%S', '%Y%m%d %H:%M:%S',
> -                    '%Y%m%d%H%M%S', '%Y-%m-%d-%H-%M-%S',
> '%Y/%m/%d/%H/%M/%S', '%Y:%m:%d:%H:%M:%S', '%Y-%m-%d-%H:%M:%S',
> '%Y/%m/%d%H:%M:%S',
> -                    '%Y-%m-%d %H:%M','%Y/%m/%d %H:%M', '%Y:%m:%d
> %H:%M','%Y%m%d %H:%M',
> +                    '%Y%m%d%H%M%S', '%Y-%m-%d-%H-%M-%S',
> '%Y/%m/%d/%H/%M/%S', '%Y:%m:%d:%H:%M:%S', '%Y-%m-%d-%H:%M:%S', '%Y-%m-%d
> %H:%M:%S',
> +                    '%Y/%m/%d%H:%M:%S', '%Y-%m-%d %H:%M','%Y/%m/%d
> %H:%M', '%Y:%m:%d %H:%M','%Y%m%d %H:%M',
>                     '%Y-%m-%d', '%Y/%m/%d', '%Y:%m:%d', '%Y%m%d'
>                     ]
>     count = 0
> -    for format in TIME_FORMATS:
> +    for time_format in TIME_FORMATS:
>             try:
> -                time_base = datetime.strptime(time_base, format)
> +                time_base = datetime.strptime(time_base, time_format)
>                 break
>             except:
>                 count = count + 1
>                 if count == len(TIME_FORMATS):
> -                    raise Exception("The time format is not found. Base
> time is " + str(time_base) + " .")
> +                    err = "The time format is not found. Base time is " +
> str(time_base) + " ."
> +                    raise ValueError(err)
>
>     return time_base
>
>
> -def get_time_step(netcdf, time_variable_name):
> +def _get_time_step(netcdf, time_variable_name):
>     '''Calculate time step from time data.
>
>     :param netcdf: NetCDF dataset object
> @@ -75,7 +76,8 @@ def get_time_step(netcdf, time_variable_
>         time_format = netcdf.variables[time_variable_name].units.encode()
>         since_index = re.search('since', time_format).end()
>     except AttributeError:
> -        raise Exception('Time variable attributes cannot be decoded.')
> +        err = 'Time variable attributes cannot be decoded.'
> +        raise ValueError(err)
>
>     time_step = None
>     TIME_UNITS = ('minutes', 'hours', 'days', 'months', 'years')
> @@ -87,7 +89,7 @@ def get_time_step(netcdf, time_variable_
>     return (time_step, time_format, since_index)
>
>
> -def calculate_time(netcdf, time_raw_values, time_variable_name):
> +def _calculate_time(netcdf, time_raw_values, time_variable_name):
>     '''Convert time data from integer to python datetime.
>
>     :param netcdf: NetCDF dataset object
> @@ -103,8 +105,8 @@ def calculate_time(netcdf, time_raw_valu
>
>     time_values = []
>
> -    time_step, time_format, since_index = get_time_step(netcdf,
> time_variable_name)
> -    time_base = get_time_base(time_format, since_index)
> +    time_step, time_format, since_index = _get_time_step(netcdf,
> time_variable_name)
> +    time_base = _get_time_base(time_format, since_index)
>     time_step = time_step.lower()
>
>     if 'min' in time_step:
> @@ -127,12 +129,13 @@ def calculate_time(netcdf, time_raw_valu
>         for time in time_raw_values:
>             time_values.append(time_base + timedelta(years=int(time)))
>     else:
> -        raise Exception("The time step cannot be defined.")
> +        err = "The time step cannot be defined."
> +        raise ValueError(err)
>
>     return time_values
>
>
> -def get_lat_name(variable_names):
> +def _get_lat_name(variable_names):
>     '''Find the latitude variable name
>
>     :param variable_names: List of netCDF variables' name
> @@ -144,14 +147,15 @@ def get_lat_name(variable_names):
>
>     common_name = set(['lat', 'lats', 'latitude',
> 'latitudes']).intersection(variable_names)
>     if len(common_name) !=1:
> -        raise Exception("Unable to autodetect latitude variable name.")
> +        err = "Unable to autodetect latitude variable name."
> +        raise ValueError(err)
>     else:
>         lat_variable_name = common_name.pop()
>
>     return lat_variable_name
>
>
> -def get_lon_name(variable_names):
> +def _get_lon_name(variable_names):
>     '''Find the longitude variable name
>
>     :param variable_names: List of netCDF variables' name
> @@ -163,14 +167,15 @@ def get_lon_name(variable_names):
>
>     common_name = set(['lon', 'lons', 'longitude',
> 'longitudes']).intersection(variable_names)
>     if len(common_name) !=1:
> -        raise Exception("Unable to autodetect longitude variable name.")
> +        err = "Unable to autodetect longitude variable name."
> +        raise ValueError(err)
>     else:
>         lon_variable_name = common_name.pop()
>
>     return lon_variable_name
>
>
> -def get_time_name(variable_names):
> +def _get_time_name(variable_names):
>     '''Find the time variable name.
>
>     :param: variableNameList: List of netCDF variables' name
> @@ -181,15 +186,17 @@ def get_time_name(variable_names):
>     '''
>
>     common_name = set(['time', 'times', 'date', 'dates',
> 'julian']).intersection(variable_names)
> +
>     if len(common_name) !=1:
> -        raise Exception("Unable to autodetect time variable name.")
> +        err = "Unable to autodetect time variable name. These option(s)
> found: {0} ".format([each for each in common_name])
> +        raise ValueError(err)
>     else:
>         time_variable_name = common_name.pop()
>
>     return time_variable_name
>
>
> -def get_level_name(variable_names):
> +def _get_level_name(variable_names):
>     '''Find the level variable name.
>
>     :param variable_names: List of netCDF variables' name
> @@ -209,7 +216,7 @@ def get_level_name(variable_names):
>     return level_variable_name
>
>
> -def get_value_name(possible_value_name):
> +def _get_value_name(possible_value_name):
>     '''Find the value variable name.
>
>     :param possible_value_name: List of all value variable names
> @@ -222,7 +229,8 @@ def get_value_name(possible_value_name):
>     if len(possible_value_name) == 1:
>         value_variable_name = possible_value_name[0]
>     else:
> -        raise Exception("The given value variable name does not match
> with existing variables name.")
> +        err = "The given value variable name does not match with existing
> variables name."
> +        raise ValueError(err)
>
>     return value_variable_name
>
> @@ -242,27 +250,42 @@ def load_file(file_path, variable_name=N
>     try:
>         netcdf = netCDF4.Dataset(file_path, mode='r')
>     except:
> -        raise Exception("The given file cannot be loaded (Only netCDF
> file can be supported).")
> +        err = "The given file cannot be loaded (Only netCDF file can be
> supported)."
> +        raise ValueError(err)
>
>     variable_names = [variable.encode() for variable in
> netcdf.variables.keys()]
> -    variable_names = [variable.lower() for variable in variable_names]
>
> -    lat_variable_name = get_lat_name(variable_names)
> -    lon_variable_name = get_lon_name(variable_names)
> -    time_variable_name = get_time_name(variable_names)
> -    level_variable_name = get_level_name(variable_names)
> +    lat_variable_name = _get_lat_name(variable_names)
> +    lon_variable_name = _get_lon_name(variable_names)
> +    time_variable_name = _get_time_name(variable_names)
> +    level_variable_name = _get_level_name(variable_names)
>
>     if variable_name in variable_names:
>         value_variable_name = variable_name
>     else:
>         possible_value_name = list(set(variable_names) -
> set([lat_variable_name, lon_variable_name, time_variable_name,
> level_variable_name]))
> -        value_variable_name = get_value_name(possible_value_name)
> +        value_variable_name = _get_value_name(possible_value_name)
>
>     lats = netcdf.variables[lat_variable_name][:]
>     lons = netcdf.variables[lon_variable_name][:]
>     time_raw_values = netcdf.variables[time_variable_name][:]
> -    times = calculate_time(netcdf, time_raw_values, time_variable_name)
> +    times = _calculate_time(netcdf, time_raw_values, time_variable_name)
>     times = numpy.array(times)
>     values = ma.array(netcdf.variables[value_variable_name][:])
>
> +
> +    if len(values.shape) == 4:
> +        value_dimensions_names =
> list(netcdf.variables[value_variable_name].dimensions)
> +        value_dimensions_names = [each.encode() for each in
> value_dimensions_names]
> +        required_variable_names = [lat_variable_name, lon_variable_name,
> time_variable_name]
> +        index =
> value_dimensions_names.index(list(set(value_dimensions_names) -
> set(required_variable_names))[0])
> +        if index == 0:
> +            values = values [0,:,:,:]
> +        elif index == 1:
> +            values = values [:,0,:,:]
> +        elif index == 2:
> +            values = values [:,:,0,:]
> +        else:
> +            values = values [:,:,:,0]
> +
>     return Dataset(lats, lons, times, values, value_variable_name)
>
>
>
>
>


-- 

Sent from a Tin Can attached to a String

Re: svn commit: r1516065 - /incubator/climate/branches/RefactorInput/ocw/data_source/local.py

Posted by "Boustani, Maziyar (398F)" <Ma...@jpl.nasa.gov>.
Hey Cam,

Here is the improved local.py in trunk [1].
Please use the code in your new rcmet.py and get back to me if there is need for more improvement.

[1]: http://svn.apache.org/r1516182

Best regards,
Mazi

On Aug 21, 2013, at 6:27 AM, Cameron Goodale wrote:

Mazi,

Can you please merge these changes into the trunk?

Mike J. merged all the refactored code into the trunk yesterday so our
future development will be back in the trunk.

Thanks man,


Cam


On Tue, Aug 20, 2013 at 8:24 PM, <bo...@apache.org>> wrote:

Author: boustani
Date: Wed Aug 21 03:24:32 2013
New Revision: 1516065

URL: http://svn.apache.org/r1516065
Log:
now local.py will return values as 3D numpy array.

Modified:
   incubator/climate/branches/RefactorInput/ocw/data_source/local.py

Modified: incubator/climate/branches/RefactorInput/ocw/data_source/local.py
URL:
http://svn.apache.org/viewvc/incubator/climate/branches/RefactorInput/ocw/data_source/local.py?rev=1516065&r1=1516064&r2=1516065&view=diff

==============================================================================
--- incubator/climate/branches/RefactorInput/ocw/data_source/local.py
(original)
+++ incubator/climate/branches/RefactorInput/ocw/data_source/local.py Wed
Aug 21 03:24:32 2013
@@ -25,7 +25,7 @@ import calendar
import string


-def get_time_base(time_format, since_index):
+def _get_time_base(time_format, since_index):
    '''Calculate time base from time data.

    :param time_format: Unit of time in netCDF
@@ -41,24 +41,25 @@ def get_time_base(time_format, since_ind
    time_base = time_base.split('.')[0] + '0' if "." in time_base else
time_base
    TIME_FORMATS =[
                    '%Y:%m:%d %H:%M:%S', '%Y-%m-%d %H-%M-%S', '%Y/%m/%d
%H/%M/%S','%Y-%m-%d %H:%M:%S', '%Y/%m/%d %H:%M:%S', '%Y%m%d %H:%M:%S',
-                    '%Y%m%d%H%M%S', '%Y-%m-%d-%H-%M-%S',
'%Y/%m/%d/%H/%M/%S', '%Y:%m:%d:%H:%M:%S', '%Y-%m-%d-%H:%M:%S',
'%Y/%m/%d%H:%M:%S',
-                    '%Y-%m-%d %H:%M','%Y/%m/%d %H:%M', '%Y:%m:%d
%H:%M','%Y%m%d %H:%M',
+                    '%Y%m%d%H%M%S', '%Y-%m-%d-%H-%M-%S',
'%Y/%m/%d/%H/%M/%S', '%Y:%m:%d:%H:%M:%S', '%Y-%m-%d-%H:%M:%S', '%Y-%m-%d
%H:%M:%S',
+                    '%Y/%m/%d%H:%M:%S', '%Y-%m-%d %H:%M','%Y/%m/%d
%H:%M', '%Y:%m:%d %H:%M','%Y%m%d %H:%M',
                    '%Y-%m-%d', '%Y/%m/%d', '%Y:%m:%d', '%Y%m%d'
                    ]
    count = 0
-    for format in TIME_FORMATS:
+    for time_format in TIME_FORMATS:
            try:
-                time_base = datetime.strptime(time_base, format)
+                time_base = datetime.strptime(time_base, time_format)
                break
            except:
                count = count + 1
                if count == len(TIME_FORMATS):
-                    raise Exception("The time format is not found. Base
time is " + str(time_base) + " .")
+                    err = "The time format is not found. Base time is " +
str(time_base) + " ."
+                    raise ValueError(err)

    return time_base


-def get_time_step(netcdf, time_variable_name):
+def _get_time_step(netcdf, time_variable_name):
    '''Calculate time step from time data.

    :param netcdf: NetCDF dataset object
@@ -75,7 +76,8 @@ def get_time_step(netcdf, time_variable_
        time_format = netcdf.variables[time_variable_name].units.encode()
        since_index = re.search('since', time_format).end()
    except AttributeError:
-        raise Exception('Time variable attributes cannot be decoded.')
+        err = 'Time variable attributes cannot be decoded.'
+        raise ValueError(err)

    time_step = None
    TIME_UNITS = ('minutes', 'hours', 'days', 'months', 'years')
@@ -87,7 +89,7 @@ def get_time_step(netcdf, time_variable_
    return (time_step, time_format, since_index)


-def calculate_time(netcdf, time_raw_values, time_variable_name):
+def _calculate_time(netcdf, time_raw_values, time_variable_name):
    '''Convert time data from integer to python datetime.

    :param netcdf: NetCDF dataset object
@@ -103,8 +105,8 @@ def calculate_time(netcdf, time_raw_valu

    time_values = []

-    time_step, time_format, since_index = get_time_step(netcdf,
time_variable_name)
-    time_base = get_time_base(time_format, since_index)
+    time_step, time_format, since_index = _get_time_step(netcdf,
time_variable_name)
+    time_base = _get_time_base(time_format, since_index)
    time_step = time_step.lower()

    if 'min' in time_step:
@@ -127,12 +129,13 @@ def calculate_time(netcdf, time_raw_valu
        for time in time_raw_values:
            time_values.append(time_base + timedelta(years=int(time)))
    else:
-        raise Exception("The time step cannot be defined.")
+        err = "The time step cannot be defined."
+        raise ValueError(err)

    return time_values


-def get_lat_name(variable_names):
+def _get_lat_name(variable_names):
    '''Find the latitude variable name

    :param variable_names: List of netCDF variables' name
@@ -144,14 +147,15 @@ def get_lat_name(variable_names):

    common_name = set(['lat', 'lats', 'latitude',
'latitudes']).intersection(variable_names)
    if len(common_name) !=1:
-        raise Exception("Unable to autodetect latitude variable name.")
+        err = "Unable to autodetect latitude variable name."
+        raise ValueError(err)
    else:
        lat_variable_name = common_name.pop()

    return lat_variable_name


-def get_lon_name(variable_names):
+def _get_lon_name(variable_names):
    '''Find the longitude variable name

    :param variable_names: List of netCDF variables' name
@@ -163,14 +167,15 @@ def get_lon_name(variable_names):

    common_name = set(['lon', 'lons', 'longitude',
'longitudes']).intersection(variable_names)
    if len(common_name) !=1:
-        raise Exception("Unable to autodetect longitude variable name.")
+        err = "Unable to autodetect longitude variable name."
+        raise ValueError(err)
    else:
        lon_variable_name = common_name.pop()

    return lon_variable_name


-def get_time_name(variable_names):
+def _get_time_name(variable_names):
    '''Find the time variable name.

    :param: variableNameList: List of netCDF variables' name
@@ -181,15 +186,17 @@ def get_time_name(variable_names):
    '''

    common_name = set(['time', 'times', 'date', 'dates',
'julian']).intersection(variable_names)
+
    if len(common_name) !=1:
-        raise Exception("Unable to autodetect time variable name.")
+        err = "Unable to autodetect time variable name. These option(s)
found: {0} ".format([each for each in common_name])
+        raise ValueError(err)
    else:
        time_variable_name = common_name.pop()

    return time_variable_name


-def get_level_name(variable_names):
+def _get_level_name(variable_names):
    '''Find the level variable name.

    :param variable_names: List of netCDF variables' name
@@ -209,7 +216,7 @@ def get_level_name(variable_names):
    return level_variable_name


-def get_value_name(possible_value_name):
+def _get_value_name(possible_value_name):
    '''Find the value variable name.

    :param possible_value_name: List of all value variable names
@@ -222,7 +229,8 @@ def get_value_name(possible_value_name):
    if len(possible_value_name) == 1:
        value_variable_name = possible_value_name[0]
    else:
-        raise Exception("The given value variable name does not match
with existing variables name.")
+        err = "The given value variable name does not match with existing
variables name."
+        raise ValueError(err)

    return value_variable_name

@@ -242,27 +250,42 @@ def load_file(file_path, variable_name=N
    try:
        netcdf = netCDF4.Dataset(file_path, mode='r')
    except:
-        raise Exception("The given file cannot be loaded (Only netCDF
file can be supported).")
+        err = "The given file cannot be loaded (Only netCDF file can be
supported)."
+        raise ValueError(err)

    variable_names = [variable.encode() for variable in
netcdf.variables.keys()]
-    variable_names = [variable.lower() for variable in variable_names]

-    lat_variable_name = get_lat_name(variable_names)
-    lon_variable_name = get_lon_name(variable_names)
-    time_variable_name = get_time_name(variable_names)
-    level_variable_name = get_level_name(variable_names)
+    lat_variable_name = _get_lat_name(variable_names)
+    lon_variable_name = _get_lon_name(variable_names)
+    time_variable_name = _get_time_name(variable_names)
+    level_variable_name = _get_level_name(variable_names)

    if variable_name in variable_names:
        value_variable_name = variable_name
    else:
        possible_value_name = list(set(variable_names) -
set([lat_variable_name, lon_variable_name, time_variable_name,
level_variable_name]))
-        value_variable_name = get_value_name(possible_value_name)
+        value_variable_name = _get_value_name(possible_value_name)

    lats = netcdf.variables[lat_variable_name][:]
    lons = netcdf.variables[lon_variable_name][:]
    time_raw_values = netcdf.variables[time_variable_name][:]
-    times = calculate_time(netcdf, time_raw_values, time_variable_name)
+    times = _calculate_time(netcdf, time_raw_values, time_variable_name)
    times = numpy.array(times)
    values = ma.array(netcdf.variables[value_variable_name][:])

+
+    if len(values.shape) == 4:
+        value_dimensions_names =
list(netcdf.variables[value_variable_name].dimensions)
+        value_dimensions_names = [each.encode() for each in
value_dimensions_names]
+        required_variable_names = [lat_variable_name, lon_variable_name,
time_variable_name]
+        index =
value_dimensions_names.index(list(set(value_dimensions_names) -
set(required_variable_names))[0])
+        if index == 0:
+            values = values [0,:,:,:]
+        elif index == 1:
+            values = values [:,0,:,:]
+        elif index == 2:
+            values = values [:,:,0,:]
+        else:
+            values = values [:,:,:,0]
+
    return Dataset(lats, lons, times, values, value_variable_name)





Re: svn commit: r1516065 - /incubator/climate/branches/RefactorInput/ocw/data_source/local.py

Posted by Cameron Goodale <go...@apache.org>.
Mazi,

Can you please merge these changes into the trunk?

Mike J. merged all the refactored code into the trunk yesterday so our
future development will be back in the trunk.

Thanks man,


Cam


On Tue, Aug 20, 2013 at 8:24 PM, <bo...@apache.org> wrote:

> Author: boustani
> Date: Wed Aug 21 03:24:32 2013
> New Revision: 1516065
>
> URL: http://svn.apache.org/r1516065
> Log:
> now local.py will return values as 3D numpy array.
>
> Modified:
>     incubator/climate/branches/RefactorInput/ocw/data_source/local.py
>
> Modified: incubator/climate/branches/RefactorInput/ocw/data_source/local.py
> URL:
> http://svn.apache.org/viewvc/incubator/climate/branches/RefactorInput/ocw/data_source/local.py?rev=1516065&r1=1516064&r2=1516065&view=diff
>
> ==============================================================================
> --- incubator/climate/branches/RefactorInput/ocw/data_source/local.py
> (original)
> +++ incubator/climate/branches/RefactorInput/ocw/data_source/local.py Wed
> Aug 21 03:24:32 2013
> @@ -25,7 +25,7 @@ import calendar
>  import string
>
>
> -def get_time_base(time_format, since_index):
> +def _get_time_base(time_format, since_index):
>      '''Calculate time base from time data.
>
>      :param time_format: Unit of time in netCDF
> @@ -41,24 +41,25 @@ def get_time_base(time_format, since_ind
>      time_base = time_base.split('.')[0] + '0' if "." in time_base else
> time_base
>      TIME_FORMATS =[
>                      '%Y:%m:%d %H:%M:%S', '%Y-%m-%d %H-%M-%S', '%Y/%m/%d
> %H/%M/%S','%Y-%m-%d %H:%M:%S', '%Y/%m/%d %H:%M:%S', '%Y%m%d %H:%M:%S',
> -                    '%Y%m%d%H%M%S', '%Y-%m-%d-%H-%M-%S',
> '%Y/%m/%d/%H/%M/%S', '%Y:%m:%d:%H:%M:%S', '%Y-%m-%d-%H:%M:%S',
> '%Y/%m/%d%H:%M:%S',
> -                    '%Y-%m-%d %H:%M','%Y/%m/%d %H:%M', '%Y:%m:%d
> %H:%M','%Y%m%d %H:%M',
> +                    '%Y%m%d%H%M%S', '%Y-%m-%d-%H-%M-%S',
> '%Y/%m/%d/%H/%M/%S', '%Y:%m:%d:%H:%M:%S', '%Y-%m-%d-%H:%M:%S', '%Y-%m-%d
> %H:%M:%S',
> +                    '%Y/%m/%d%H:%M:%S', '%Y-%m-%d %H:%M','%Y/%m/%d
> %H:%M', '%Y:%m:%d %H:%M','%Y%m%d %H:%M',
>                      '%Y-%m-%d', '%Y/%m/%d', '%Y:%m:%d', '%Y%m%d'
>                      ]
>      count = 0
> -    for format in TIME_FORMATS:
> +    for time_format in TIME_FORMATS:
>              try:
> -                time_base = datetime.strptime(time_base, format)
> +                time_base = datetime.strptime(time_base, time_format)
>                  break
>              except:
>                  count = count + 1
>                  if count == len(TIME_FORMATS):
> -                    raise Exception("The time format is not found. Base
> time is " + str(time_base) + " .")
> +                    err = "The time format is not found. Base time is " +
> str(time_base) + " ."
> +                    raise ValueError(err)
>
>      return time_base
>
>
> -def get_time_step(netcdf, time_variable_name):
> +def _get_time_step(netcdf, time_variable_name):
>      '''Calculate time step from time data.
>
>      :param netcdf: NetCDF dataset object
> @@ -75,7 +76,8 @@ def get_time_step(netcdf, time_variable_
>          time_format = netcdf.variables[time_variable_name].units.encode()
>          since_index = re.search('since', time_format).end()
>      except AttributeError:
> -        raise Exception('Time variable attributes cannot be decoded.')
> +        err = 'Time variable attributes cannot be decoded.'
> +        raise ValueError(err)
>
>      time_step = None
>      TIME_UNITS = ('minutes', 'hours', 'days', 'months', 'years')
> @@ -87,7 +89,7 @@ def get_time_step(netcdf, time_variable_
>      return (time_step, time_format, since_index)
>
>
> -def calculate_time(netcdf, time_raw_values, time_variable_name):
> +def _calculate_time(netcdf, time_raw_values, time_variable_name):
>      '''Convert time data from integer to python datetime.
>
>      :param netcdf: NetCDF dataset object
> @@ -103,8 +105,8 @@ def calculate_time(netcdf, time_raw_valu
>
>      time_values = []
>
> -    time_step, time_format, since_index = get_time_step(netcdf,
> time_variable_name)
> -    time_base = get_time_base(time_format, since_index)
> +    time_step, time_format, since_index = _get_time_step(netcdf,
> time_variable_name)
> +    time_base = _get_time_base(time_format, since_index)
>      time_step = time_step.lower()
>
>      if 'min' in time_step:
> @@ -127,12 +129,13 @@ def calculate_time(netcdf, time_raw_valu
>          for time in time_raw_values:
>              time_values.append(time_base + timedelta(years=int(time)))
>      else:
> -        raise Exception("The time step cannot be defined.")
> +        err = "The time step cannot be defined."
> +        raise ValueError(err)
>
>      return time_values
>
>
> -def get_lat_name(variable_names):
> +def _get_lat_name(variable_names):
>      '''Find the latitude variable name
>
>      :param variable_names: List of netCDF variables' name
> @@ -144,14 +147,15 @@ def get_lat_name(variable_names):
>
>      common_name = set(['lat', 'lats', 'latitude',
> 'latitudes']).intersection(variable_names)
>      if len(common_name) !=1:
> -        raise Exception("Unable to autodetect latitude variable name.")
> +        err = "Unable to autodetect latitude variable name."
> +        raise ValueError(err)
>      else:
>          lat_variable_name = common_name.pop()
>
>      return lat_variable_name
>
>
> -def get_lon_name(variable_names):
> +def _get_lon_name(variable_names):
>      '''Find the longitude variable name
>
>      :param variable_names: List of netCDF variables' name
> @@ -163,14 +167,15 @@ def get_lon_name(variable_names):
>
>      common_name = set(['lon', 'lons', 'longitude',
> 'longitudes']).intersection(variable_names)
>      if len(common_name) !=1:
> -        raise Exception("Unable to autodetect longitude variable name.")
> +        err = "Unable to autodetect longitude variable name."
> +        raise ValueError(err)
>      else:
>          lon_variable_name = common_name.pop()
>
>      return lon_variable_name
>
>
> -def get_time_name(variable_names):
> +def _get_time_name(variable_names):
>      '''Find the time variable name.
>
>      :param: variableNameList: List of netCDF variables' name
> @@ -181,15 +186,17 @@ def get_time_name(variable_names):
>      '''
>
>      common_name = set(['time', 'times', 'date', 'dates',
> 'julian']).intersection(variable_names)
> +
>      if len(common_name) !=1:
> -        raise Exception("Unable to autodetect time variable name.")
> +        err = "Unable to autodetect time variable name. These option(s)
> found: {0} ".format([each for each in common_name])
> +        raise ValueError(err)
>      else:
>          time_variable_name = common_name.pop()
>
>      return time_variable_name
>
>
> -def get_level_name(variable_names):
> +def _get_level_name(variable_names):
>      '''Find the level variable name.
>
>      :param variable_names: List of netCDF variables' name
> @@ -209,7 +216,7 @@ def get_level_name(variable_names):
>      return level_variable_name
>
>
> -def get_value_name(possible_value_name):
> +def _get_value_name(possible_value_name):
>      '''Find the value variable name.
>
>      :param possible_value_name: List of all value variable names
> @@ -222,7 +229,8 @@ def get_value_name(possible_value_name):
>      if len(possible_value_name) == 1:
>          value_variable_name = possible_value_name[0]
>      else:
> -        raise Exception("The given value variable name does not match
> with existing variables name.")
> +        err = "The given value variable name does not match with existing
> variables name."
> +        raise ValueError(err)
>
>      return value_variable_name
>
> @@ -242,27 +250,42 @@ def load_file(file_path, variable_name=N
>      try:
>          netcdf = netCDF4.Dataset(file_path, mode='r')
>      except:
> -        raise Exception("The given file cannot be loaded (Only netCDF
> file can be supported).")
> +        err = "The given file cannot be loaded (Only netCDF file can be
> supported)."
> +        raise ValueError(err)
>
>      variable_names = [variable.encode() for variable in
> netcdf.variables.keys()]
> -    variable_names = [variable.lower() for variable in variable_names]
>
> -    lat_variable_name = get_lat_name(variable_names)
> -    lon_variable_name = get_lon_name(variable_names)
> -    time_variable_name = get_time_name(variable_names)
> -    level_variable_name = get_level_name(variable_names)
> +    lat_variable_name = _get_lat_name(variable_names)
> +    lon_variable_name = _get_lon_name(variable_names)
> +    time_variable_name = _get_time_name(variable_names)
> +    level_variable_name = _get_level_name(variable_names)
>
>      if variable_name in variable_names:
>          value_variable_name = variable_name
>      else:
>          possible_value_name = list(set(variable_names) -
> set([lat_variable_name, lon_variable_name, time_variable_name,
> level_variable_name]))
> -        value_variable_name = get_value_name(possible_value_name)
> +        value_variable_name = _get_value_name(possible_value_name)
>
>      lats = netcdf.variables[lat_variable_name][:]
>      lons = netcdf.variables[lon_variable_name][:]
>      time_raw_values = netcdf.variables[time_variable_name][:]
> -    times = calculate_time(netcdf, time_raw_values, time_variable_name)
> +    times = _calculate_time(netcdf, time_raw_values, time_variable_name)
>      times = numpy.array(times)
>      values = ma.array(netcdf.variables[value_variable_name][:])
>
> +
> +    if len(values.shape) == 4:
> +        value_dimensions_names =
> list(netcdf.variables[value_variable_name].dimensions)
> +        value_dimensions_names = [each.encode() for each in
> value_dimensions_names]
> +        required_variable_names = [lat_variable_name, lon_variable_name,
> time_variable_name]
> +        index =
> value_dimensions_names.index(list(set(value_dimensions_names) -
> set(required_variable_names))[0])
> +        if index == 0:
> +            values = values [0,:,:,:]
> +        elif index == 1:
> +            values = values [:,0,:,:]
> +        elif index == 2:
> +            values = values [:,:,0,:]
> +        else:
> +            values = values [:,:,:,0]
> +
>      return Dataset(lats, lons, times, values, value_variable_name)
>
>
>