You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by jo...@apache.org on 2015/03/12 17:22:30 UTC

[1/4] climate git commit: CLIMATE-589 - Add units instance attribute to class Dataset and updated functions accordingly

Repository: climate
Updated Branches:
  refs/heads/master 8e69e625f -> 2cb39791e


CLIMATE-589 - Add units instance attribute to class Dataset and updated functions accordingly

- As part of adding unit conversion functionality, CLIMATE-587 the following was done:
- Added units attirbute to Dataset to allow for units conversion functionality
- Updated test/test_dataset_processory.py to reflect the new Dataset units attribute


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/c43235b7
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/c43235b7
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/c43235b7

Branch: refs/heads/master
Commit: c43235b7d147a4d553d19370a3d17c733b4771b7
Parents: 8e69e62
Author: Kim Whitehall <k_...@yahoo.com>
Authored: Wed Feb 25 17:37:02 2015 -0800
Committer: Michael Joyce <jo...@apache.org>
Committed: Thu Mar 12 09:00:56 2015 -0700

----------------------------------------------------------------------
 ocw/dataset.py                      | 12 +++++++++---
 ocw/tests/test_dataset_processor.py |  9 +++++----
 2 files changed, 14 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/c43235b7/ocw/dataset.py
----------------------------------------------------------------------
diff --git a/ocw/dataset.py b/ocw/dataset.py
index 3d2cc48..ce604e3 100644
--- a/ocw/dataset.py
+++ b/ocw/dataset.py
@@ -35,8 +35,8 @@ logger = logging.getLogger(__name__)
 class Dataset:
     '''Container for a dataset's attributes and data.'''
 
-    def __init__(self, lats, lons, times, values, variable=None, name="",
-                 origin=None):
+    def __init__(self, lats, lons, times, values, variable=None, units=None,
+                 origin=None, name=""):
         '''Default Dataset constructor
 
         :param lats: One dimensional numpy array of unique latitude values.
@@ -56,6 +56,9 @@ class Dataset:
         :param variable: Name of the value variable.
         :type variable: :mod:`string`
 
+        :param units: Name of the value units
+        :type units: :mod:`string`
+
         :param name: An optional string name for the Dataset.
         :type name: :mod:`string`
 
@@ -73,6 +76,7 @@ class Dataset:
         self.times = times
         self.values = values
         self.variable = variable
+        self.units = units
         self.name = name
         self.origin = origin
 
@@ -204,6 +208,7 @@ Expected shape (%s, %s, %s) but received (%s, %s, %s)""" % (time_count,
             "lon-range: {}, "
             "time_range: {}, "
             "var: {}>"
+            "units: {}>"
         )
 
         return formatted_repr.format(
@@ -211,7 +216,8 @@ Expected shape (%s, %s, %s) but received (%s, %s, %s)""" % (time_count,
             lat_range,
             lon_range,
             time_range,
-            self.variable
+            self.variable,
+            self.units
         )
 
 

http://git-wip-us.apache.org/repos/asf/climate/blob/c43235b7/ocw/tests/test_dataset_processor.py
----------------------------------------------------------------------
diff --git a/ocw/tests/test_dataset_processor.py b/ocw/tests/test_dataset_processor.py
index 5e62aa8..88641bc 100644
--- a/ocw/tests/test_dataset_processor.py
+++ b/ocw/tests/test_dataset_processor.py
@@ -236,6 +236,7 @@ class TestSafeSubset(unittest.TestCase):
                                          times,
                                          values,
                                          variable="test variable name",
+                                         units='test variable units',
                                          name='foo')
 
         self.spatial_out_of_bounds = ds.Bounds(
@@ -364,7 +365,7 @@ def ten_year_monthly_dataset():
     # Ten Years of monthly data
     times = np.array([datetime.datetime(year, month, 1) for year in range(2000, 2010) for month in range(1, 13)])
     values = np.ones([len(times), len(lats), len(lons)])
-    input_dataset = ds.Dataset(lats, lons, times, values, variable="test variable name", name='foo')
+    input_dataset = ds.Dataset(lats, lons, times, values, variable="test variable name", units='test variable units', name='foo')
     return input_dataset
 
 def ten_year_monthly_15th_dataset():
@@ -373,7 +374,7 @@ def ten_year_monthly_15th_dataset():
     # Ten Years of monthly data
     times = np.array([datetime.datetime(year, month, 15) for year in range(2000, 2010) for month in range(1, 13)])
     values = np.ones([len(times), len(lats), len(lons)])
-    input_dataset = ds.Dataset(lats, lons, times, values, variable="test variable name")
+    input_dataset = ds.Dataset(lats, lons, times, values, variable="test variable name", units='test variable units')
     return input_dataset
 
 def two_year_daily_dataset():
@@ -381,7 +382,7 @@ def two_year_daily_dataset():
     lons = np.array(range(-179, 180, 2))
     times = np.array([datetime.datetime(2001, 1, 1) + datetime.timedelta(days=d) for d in range(730)])
     values = np.ones([len(times), len(lats), len(lons)])
-    dataset = ds.Dataset(lats, lons, times, values, variable='random data')
+    dataset = ds.Dataset(lats, lons, times, values, variable='random data',units='test variable units')
     return dataset    
 
 def two_year_daily_2hr_dataset():
@@ -389,7 +390,7 @@ def two_year_daily_2hr_dataset():
     lons = np.array(range(-179, 180, 2))
     times = np.array([datetime.datetime(2001, 1, 1) + datetime.timedelta(days=d, hours=2) for d in range(730)])
     values = np.ones([len(times), len(lats), len(lons)])
-    dataset = ds.Dataset(lats, lons, times, values, variable='random data')
+    dataset = ds.Dataset(lats, lons, times, values, variable='random data', units='test variable units')
     return dataset    
 
 def build_ten_cube_dataset(value):


[2/4] climate git commit: CLIMATE-592 - update dataset_processor.py to accommodate units in Dataset object

Posted by jo...@apache.org.
CLIMATE-592 - update dataset_processor.py to accommodate units in Dataset object


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/d4cefc54
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/d4cefc54
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/d4cefc54

Branch: refs/heads/master
Commit: d4cefc545178b44417f39942d84148815fc11172
Parents: c43235b
Author: Kim Whitehall <k_...@yahoo.com>
Authored: Wed Feb 25 17:54:00 2015 -0800
Committer: Michael Joyce <jo...@apache.org>
Committed: Thu Mar 12 09:04:46 2015 -0700

----------------------------------------------------------------------
 ocw/data_source/local.py |  9 +++++++--
 ocw/data_source/rcmed.py |  5 +++--
 ocw/dataset.py           |  2 +-
 ocw/dataset_processor.py | 38 ++++++++++++++++++++++++++++++++++++++
 ocw/tests/test_local.py  |  1 +
 5 files changed, 50 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/d4cefc54/ocw/data_source/local.py
----------------------------------------------------------------------
diff --git a/ocw/data_source/local.py b/ocw/data_source/local.py
index 2b56d69..f2f0388 100644
--- a/ocw/data_source/local.py
+++ b/ocw/data_source/local.py
@@ -111,6 +111,7 @@ def _get_netcdf_variable_name(valid_var_names, netcdf, netcdf_var):
 
 def load_file(file_path,
               variable_name,
+              variable_unit = None,
               elevation_index=0,
               name='',
               lat_name=None,
@@ -124,6 +125,9 @@ def load_file(file_path,
     :param variable_name: The variable name to load from the NetCDF file.
     :type variable_name: :mod:`string`
 
+    :param variable_unit: (Optional) The variable unit to load from the NetCDF file.
+    :type variable_unit: :mod:`string`
+
     :param elevation_index: (Optional) The elevation index for which data should
         be returned. Climate data is often times 4 dimensional data. Some
         datasets will have readins at different height/elevation levels. OCW
@@ -182,6 +186,7 @@ def load_file(file_path,
     times = utils.decode_time_values(netcdf, time_name)
     times = numpy.array(times)
     values = ma.array(netcdf.variables[variable_name][:])
+    variable_unit = netcdf.variables[variable_name].units
 
     # If the values are 4D then we need to strip out the elevation index
     if len(values.shape) == 4:
@@ -214,5 +219,5 @@ def load_file(file_path,
     }
     if elevation_index != 0: origin['elevation_index'] = elevation_index
 
-    return Dataset(lats, lons, times, values, variable_name,
-                   name=name, origin=origin)
+    return Dataset(lats, lons, times, values, variable=variable_name,
+                   units=variable_unit, name=name, origin=origin)

http://git-wip-us.apache.org/repos/asf/climate/blob/d4cefc54/ocw/data_source/rcmed.py
----------------------------------------------------------------------
diff --git a/ocw/data_source/rcmed.py b/ocw/data_source/rcmed.py
index 4733a45..ef0dc78 100644
--- a/ocw/data_source/rcmed.py
+++ b/ocw/data_source/rcmed.py
@@ -347,7 +347,7 @@ def parameter_dataset(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_l
     '''
     
     parameters_metadata = get_parameters_metadata()
-    parameter_name, time_step, _, _, _, _, _= _get_parameter_info(parameters_metadata, parameter_id)
+    parameter_name, time_step, _, _, _, _, parameter_units = _get_parameter_info(parameters_metadata, parameter_id)
     url = _generate_query_url(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time, time_step)
     lats, lons, times, values = _get_data(url)
 
@@ -365,6 +365,7 @@ def parameter_dataset(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_l
                    unique_lats_lons_times[1],
                    unique_times,
                    values,
-                   parameter_name,
+                   variable=parameter_name,
+                   units=parameter_units,
                    name=name,
                    origin=origin)

http://git-wip-us.apache.org/repos/asf/climate/blob/d4cefc54/ocw/dataset.py
----------------------------------------------------------------------
diff --git a/ocw/dataset.py b/ocw/dataset.py
index ce604e3..ee86532 100644
--- a/ocw/dataset.py
+++ b/ocw/dataset.py
@@ -207,7 +207,7 @@ Expected shape (%s, %s, %s) but received (%s, %s, %s)""" % (time_count,
             "lat-range: {}, "
             "lon-range: {}, "
             "time_range: {}, "
-            "var: {}>"
+            "var: {}, "
             "units: {}>"
         )
 

http://git-wip-us.apache.org/repos/asf/climate/blob/d4cefc54/ocw/dataset_processor.py
----------------------------------------------------------------------
diff --git a/ocw/dataset_processor.py b/ocw/dataset_processor.py
index 37296f2..1f9edf0 100644
--- a/ocw/dataset_processor.py
+++ b/ocw/dataset_processor.py
@@ -62,6 +62,7 @@ def temporal_rebin(target_dataset, temporal_resolution):
                              binned_dates, 
                              binned_values,
                              target_dataset.variable,
+                             target_dataset.units,
                              target_dataset.name)
     
     return new_dataset
@@ -117,12 +118,16 @@ def spatial_regrid(target_dataset, new_latitudes, new_longitudes):
                                    target_dataset.times, 
                                    new_values,
                                    target_dataset.variable,
+                                   target_dataset.units,
                                    target_dataset.name)
     return regridded_dataset
 
 def ensemble(datasets):
     """
     Generate a single dataset which is the mean of the input datasets
+
+    An ensemble datasets combines input datasets assuming the all have
+    similar shape, dimensions, and units. 
     
     :param datasets: Datasets to be used to compose the ensemble dataset from.
         All Datasets must be the same shape.
@@ -140,6 +145,7 @@ def ensemble(datasets):
                                   datasets[0].lons, 
                                   datasets[0].times,
                                   ensemble_values,
+                                  datasets[0].units,
                                   name="Dataset Ensemble")
     
     return ensemble_dataset
@@ -182,6 +188,7 @@ def subset(subregion, target_dataset):
             dataset_slices["lat_start"]:dataset_slices["lat_end"] + 1,
             dataset_slices["lon_start"]:dataset_slices["lon_end"] + 1],
         target_dataset.variable,
+        target_dataset.units,
         target_dataset.name
     )
 
@@ -248,6 +255,7 @@ def normalize_dataset_datetimes(dataset, timestep):
         np.array(new_times),
         dataset.values,
         dataset.variable,
+        dataset.units,
         dataset.name
     )
 
@@ -295,9 +303,39 @@ def write_netcdf(dataset, path, compress=True):
     lons[:] = dataset.lons
     times[:] = netCDF4.date2num(dataset.times, times.units)
     values[:] = dataset.values
+    values.units = dataset.units
 
     out_file.close()
 
+def water_flux_unit_conversion(dataset):
+    ''' Convert water flux variables units as necessary
+
+    Convert full SI units water flux units to more common units.
+
+    :param dataset: The dataset to convert.
+    :type dataset: :class:`dataset.Dataset`
+
+    :returns: A Dataset with values converted to new units.
+    :rtype: :class:`dataset.Dataset`
+    '''
+    waterFluxVariables = ['pr', 'evspsbl', 'mrro', 'swe']
+    variable = dataset.variable.lower()
+
+    if any(subString in variable for subString in waterFluxVariables):
+        dataset_units = dataset.units.lower()
+        if variable in 'swe':
+            if any(unit in dataset_units for unit in ['m', 'meter']):
+                dataset.values = 1.e3 * dataset.values
+                dataset.units = 'km'
+        else:
+            if any(unit in dataset_units 
+                for unit in ['kg m-2 s-1', 'mm s-1', 'mm/sec']):
+                dataset.values = 86400. * dataset.values
+                dataset.units = 'mm/day'
+
+    return dataset
+
+
 def _rcmes_normalize_datetimes(datetimes, timestep):
     """ Normalize Dataset datetime values.
 

http://git-wip-us.apache.org/repos/asf/climate/blob/d4cefc54/ocw/tests/test_local.py
----------------------------------------------------------------------
diff --git a/ocw/tests/test_local.py b/ocw/tests/test_local.py
index 6d67896..254c7f7 100644
--- a/ocw/tests/test_local.py
+++ b/ocw/tests/test_local.py
@@ -152,6 +152,7 @@ def create_netcdf_object():
         values[:] = values
         #Assign time info to time variable
         netCDF_file.variables['time'].units = 'months since 2001-01-01 00:00:00' 
+        netCDF_file.variables['value'].units = 'foo_units'
         netCDF_file.close()
         return file_path
 


[3/4] climate git commit: CLIMATE-596 - Fix dataset processor parameter propagation

Posted by jo...@apache.org.
CLIMATE-596 - Fix dataset processor parameter propagation

- Fix dataset parameter propagation issue that were discovered with the
  fixes in CLIMATE-592. All kwargs are now passed as such instead of
  defaulting to positional arguments.
- Update a broken dataset.py test that didn't properly assign kwargs due
  to changes in previous commits.


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/01f3272b
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/01f3272b
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/01f3272b

Branch: refs/heads/master
Commit: 01f3272bad9324ab3583aafb7a0ac62e0e7750ef
Parents: d4cefc5
Author: Michael Joyce <jo...@apache.org>
Authored: Thu Mar 12 09:16:07 2015 -0700
Committer: Michael Joyce <jo...@apache.org>
Committed: Thu Mar 12 09:16:07 2015 -0700

----------------------------------------------------------------------
 ocw/dataset_processor.py  | 30 +++++++++++++++++-------------
 ocw/tests/test_dataset.py | 10 +++++++---
 2 files changed, 24 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/01f3272b/ocw/dataset_processor.py
----------------------------------------------------------------------
diff --git a/ocw/dataset_processor.py b/ocw/dataset_processor.py
index 1f9edf0..f00ab36 100644
--- a/ocw/dataset_processor.py
+++ b/ocw/dataset_processor.py
@@ -61,9 +61,10 @@ def temporal_rebin(target_dataset, temporal_resolution):
                              target_dataset.lons, 
                              binned_dates, 
                              binned_values,
-                             target_dataset.variable,
-                             target_dataset.units,
-                             target_dataset.name)
+                             variable=target_dataset.variable,
+                             units=target_dataset.units,
+                             name=target_dataset.name,
+                             origin=target_dataset.origin)
     
     return new_dataset
 
@@ -117,9 +118,10 @@ def spatial_regrid(target_dataset, new_latitudes, new_longitudes):
                                    new_longitudes, 
                                    target_dataset.times, 
                                    new_values,
-                                   target_dataset.variable,
-                                   target_dataset.units,
-                                   target_dataset.name)
+                                   variable=target_dataset.variable,
+                                   units=target_dataset.units,
+                                   name=target_dataset.name,
+                                   origin=target_dataset.origin)
     return regridded_dataset
 
 def ensemble(datasets):
@@ -145,7 +147,7 @@ def ensemble(datasets):
                                   datasets[0].lons, 
                                   datasets[0].times,
                                   ensemble_values,
-                                  datasets[0].units,
+                                  units=datasets[0].units,
                                   name="Dataset Ensemble")
     
     return ensemble_dataset
@@ -187,9 +189,10 @@ def subset(subregion, target_dataset):
             dataset_slices["time_start"]:dataset_slices["time_end"] + 1,
             dataset_slices["lat_start"]:dataset_slices["lat_end"] + 1,
             dataset_slices["lon_start"]:dataset_slices["lon_end"] + 1],
-        target_dataset.variable,
-        target_dataset.units,
-        target_dataset.name
+        variable=target_dataset.variable,
+        units=target_dataset.units,
+        name=target_dataset.name,
+        origin=target_dataset.origin
     )
 
 def safe_subset(subregion, target_dataset):
@@ -254,9 +257,10 @@ def normalize_dataset_datetimes(dataset, timestep):
         dataset.lons,
         np.array(new_times),
         dataset.values,
-        dataset.variable,
-        dataset.units,
-        dataset.name
+        variable=dataset.variable,
+        units=dataset.units,
+        name=dataset.name,
+        origin=dataset.origin
     )
 
 def write_netcdf(dataset, path, compress=True):

http://git-wip-us.apache.org/repos/asf/climate/blob/01f3272b/ocw/tests/test_dataset.py
----------------------------------------------------------------------
diff --git a/ocw/tests/test_dataset.py b/ocw/tests/test_dataset.py
index 3edbe83..bd9dbd6 100644
--- a/ocw/tests/test_dataset.py
+++ b/ocw/tests/test_dataset.py
@@ -32,9 +32,13 @@ class TestDatasetAttributes(unittest.TestCase):
         self.variable = 'prec'
         self.name = 'foo'
         self.origin = {'path': '/a/fake/file/path'}
-        self.test_dataset = Dataset(self.lat, self.lon, self.time, 
-                                    self.value, self.variable,
-                                    self.name, self.origin)
+        self.test_dataset = Dataset(self.lat,
+                                    self.lon,
+                                    self.time,
+                                    self.value,
+                                    variable=self.variable,
+                                    name=self.name,
+                                    origin=self.origin)
 
     def test_lats(self):
         self.assertItemsEqual(self.test_dataset.lats, self.lat)


[4/4] climate git commit: Resolve CLIMATE-589, CLIMATE-590, CLIMATE-591, CLIMATE-592, CLIMATE-596.

Posted by jo...@apache.org.
Resolve CLIMATE-589, CLIMATE-590, CLIMATE-591, CLIMATE-592, CLIMATE-596.

Merge #164, Merge #165, Merge #166, Merge #167


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/2cb39791
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/2cb39791
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/2cb39791

Branch: refs/heads/master
Commit: 2cb39791e18fd6a0d9dbba40b74d6cabfeb1bacd
Parents: 8e69e62 01f3272
Author: Michael Joyce <jo...@apache.org>
Authored: Thu Mar 12 09:18:15 2015 -0700
Committer: Michael Joyce <jo...@apache.org>
Committed: Thu Mar 12 09:20:34 2015 -0700

----------------------------------------------------------------------
 ocw/data_source/local.py            |  9 +++--
 ocw/data_source/rcmed.py            |  5 +--
 ocw/dataset.py                      | 14 +++++---
 ocw/dataset_processor.py            | 58 +++++++++++++++++++++++++++-----
 ocw/tests/test_dataset.py           | 10 ++++--
 ocw/tests/test_dataset_processor.py |  9 ++---
 ocw/tests/test_local.py             |  1 +
 7 files changed, 83 insertions(+), 23 deletions(-)
----------------------------------------------------------------------