You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by jo...@apache.org on 2013/08/15 00:10:57 UTC
svn commit: r1514066 - in /incubator/climate/branches/RefactorInput/ocw:
dataset_processor.py tests/test_dataset_processor.py
Author: joyce
Date: Wed Aug 14 22:10:57 2013
New Revision: 1514066
URL: http://svn.apache.org/r1514066
Log:
CLIMATE-237 - Progress on subset generation.
Modified:
incubator/climate/branches/RefactorInput/ocw/dataset_processor.py
incubator/climate/branches/RefactorInput/ocw/tests/test_dataset_processor.py
Modified: incubator/climate/branches/RefactorInput/ocw/dataset_processor.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/RefactorInput/ocw/dataset_processor.py?rev=1514066&r1=1514065&r2=1514066&view=diff
==============================================================================
--- incubator/climate/branches/RefactorInput/ocw/dataset_processor.py (original)
+++ incubator/climate/branches/RefactorInput/ocw/dataset_processor.py Wed Aug 14 22:10:57 2013
@@ -133,6 +133,26 @@ def ensemble(datasets):
return ensemble_dataset
+def subset(subregion, target_dataset):
+ '''Subset given dataset(s) with subregion information
+
+ :param subregion: The bounds with which to subset the target dataset.
+ The expected keys are `latMin, latMax, lonMin, lonMax, start, end`
+ :type subregion: Dictionary
+ :param target_dataset: The Dataset object to subset.
+ :type target_dataset: Dataset
+
+ :returns: The subset-ed Dataset object
+ :rtype: Dataset
+
+ :raises: ValueError
+ '''
+
+ # Ensure that the subregion information is well formed
+ _check_validity_of_subregion(subregion, target_dataset)
+ # Get subregion indices into subregion data
+ # Build new dataset with subset information
+
def _rcmes_spatial_regrid(spatial_values, lat, lon, lat2, lon2, order=1):
'''
Spatial regrid from one set of lat,lon values onto a new set (lat2,lon2)
@@ -530,4 +550,68 @@ def _congrid_neighbor(values, new_dims,
* (base + offset) - offset )
cd = np.array( dimlist ).round().astype(int)
new_values = values[list( cd )]
- return new_values
\ No newline at end of file
+ return new_values
+
+def _check_validity_of_subregion(subregion, target_dataset):
+ if not _all_subregion_keys_exist(subregion):
+ error = (
+ "dataset_processor.subset received malformed subregion. "
+ "Please check the documentation for proper call format."
+ )
+ logging.error(error)
+ raise ValueError(error)
+
+ if _subregion_values_are_not_valid(subregion):
+ error = (
+ "dataset_processor.subset received invalid subregion. "
+ "Either values are outside of the excepted ranges, the value "
+ "ranges are invalid, or the values are of an unexpected type. "
+ "-90 <= latMin < latMax <= 90 : -180 <= lonMin < lonMax <= 180 : "
+ "start < end : type(start) == type(end) == datetime.datetime."
+ )
+ logging.error(error)
+ raise ValueError(error)
+
+ if _subregion_is_not_contained_by_dataset(subregion, target_dataset):
+ error = (
+ "dataset_processor.subset received a subregion that is not "
+ "completely within the bounds of the target dataset."
+ )
+ logging.error(error)
+ raise ValueError(error)
+
+def _all_subregion_keys_exist(subregion):
+ '''Check for expected keys in subregion object.
+
+ :param subregion: The subregion object to validate.
+ :type subregion: Dictionary
+
+ :returns: True if well-formed, False otherwise
+ '''
+ expected_keys = ['latMin', 'latMax', 'lonMin', 'lonMax', 'start', 'end']
+ if expected_keys not in subregion.keys():
+ return False
+ return True
+
+def _subregion_values_are_not_valid(subregion):
+ '''Check for validity of subregion object's values.
+
+ :param subregion: The subregion object to validate.
+ :type subregion: Dictionary
+
+ :returns: True if the values are invalid, False if the values are valid
+ '''
+ return (
+ subregion.latMin < -90 or
+ subregion.latMax > 90 or
+ subregion.latMin >= subregion.latMax or
+ subregion.lonMin < -180 or
+ subregion.lonMax > 180 or
+ subregion.lonMin >= subregion.lonMax or
+ type(subregion.start) is not datetime.datetime or
+ type(subregion.end) is not datetime.datetime or
+ subregion.start > subregion.end
+ )
+
+def _subregion_is_not_contained_by_dataset(subregion, target_dataset):
+ pass
Modified: incubator/climate/branches/RefactorInput/ocw/tests/test_dataset_processor.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/RefactorInput/ocw/tests/test_dataset_processor.py?rev=1514066&r1=1514065&r2=1514066&view=diff
==============================================================================
--- incubator/climate/branches/RefactorInput/ocw/tests/test_dataset_processor.py (original)
+++ incubator/climate/branches/RefactorInput/ocw/tests/test_dataset_processor.py Wed Aug 14 22:10:57 2013
@@ -22,126 +22,129 @@ from ocw import dataset as ds
import numpy as np
import numpy.ma as ma
-class CustomAssertions:
- # Custom Assertions to handle Numpy Arrays
- def assert1DArraysEqual(self, array1, array2):
- self.assertSequenceEqual(tuple(array1), tuple(array2))
-
-class TestEnsemble(unittest.TestCase, CustomAssertions):
-
- def test_unequal_dataset_shapes(self):
- self.ten_year_dataset = ten_year_monthly_dataset()
- self.two_year_dataset = two_year_daily_dataset()
- with self.assertRaises(ValueError):
- self.ensemble_dataset = dp.ensemble([self.ten_year_dataset, self.two_year_dataset])
-
- def test_ensemble_logic(self):
- self.datasets = []
- self.datasets.append(build_ten_cube_dataset(1))
- self.datasets.append(build_ten_cube_dataset(2))
- self.three = build_ten_cube_dataset(3)
- self.datasets.append(self.three)
- self.datasets.append(build_ten_cube_dataset(4))
- self.datasets.append(build_ten_cube_dataset(5))
- self.ensemble = dp.ensemble(self.datasets)
- self.ensemble_flat = self.ensemble.values.flatten()
- self.three_flat = self.three.values.flatten()
- self.assert1DArraysEqual(self.ensemble_flat, self.three_flat)
-
- def test_ensemble_name(self):
- self.ensemble_dataset_name = "Dataset Ensemble"
- self.datasets = []
- self.datasets.append(build_ten_cube_dataset(1))
- self.datasets.append(build_ten_cube_dataset(2))
- self.ensemble = dp.ensemble(self.datasets)
- self.assertEquals(self.ensemble.name, self.ensemble_dataset_name)
-
-
-class TestTemporalRebin(unittest.TestCase, CustomAssertions):
-
- def setUp(self):
- self.ten_year_monthly_dataset = ten_year_monthly_dataset()
- self.ten_year_annual_times = np.array([datetime.datetime(year, 1, 1) for year in range(2000, 2010)])
- self.two_years_daily_dataset = two_year_daily_dataset()
-
- def test_monthly_to_annual_rebin(self):
- annual_dataset = dp.temporal_rebin(self.ten_year_monthly_dataset, datetime.timedelta(days=365))
- self.assert1DArraysEqual(annual_dataset.times, self.ten_year_annual_times)
-
- def test_monthly_to_full_rebin(self):
- full_dataset = dp.temporal_rebin(self.ten_year_monthly_dataset, datetime.timedelta(days=3650))
- full_times = [datetime.datetime(2004, 12, 16)]
- self.assertEqual(full_dataset.times, full_times)
-
- def test_daily_to_monthly_rebin(self):
- """This test takes a really long time to run. TODO: Figure out where the performance drag is"""
- monthly_dataset = dp.temporal_rebin(self.two_years_daily_dataset, datetime.timedelta(days=31))
- bins = list(set([datetime.datetime(time_reading.year, time_reading.month, 1) for time_reading in self.two_years_daily_dataset.times]))
- bins = np.array(bins)
- bins.sort()
- self.assert1DArraysEqual(monthly_dataset.times, bins)
-
- def test_daily_to_annual_rebin(self):
- annual_dataset = dp.temporal_rebin(self.two_years_daily_dataset, datetime.timedelta(days=366))
- bins = list(set([datetime.datetime(time_reading.year, 1, 1) for time_reading in self.two_years_daily_dataset.times]))
- bins = np.array(bins)
- bins.sort()
- self.assert1DArraysEqual(annual_dataset.times, bins)
-
-
- def test_non_rebin(self):
- """This will take a monthly dataset and ask for a monthly rebin of 28 days. The resulting
- dataset should have the same time values"""
- monthly_dataset = dp.temporal_rebin(self.ten_year_monthly_dataset, datetime.timedelta(days=28))
- good_times = self.ten_year_monthly_dataset.times
- self.assert1DArraysEqual(monthly_dataset.times, good_times)
-
-
-class TestRcmesSpatialRegrid(unittest.TestCase):
-
- def test_return_array_shape(self):
- spatial_values = np.ones([90,180])
- spatial_values = ma.array(spatial_values)
-
- lat_range = ma.array(range(-89, 90, 2))
- lon_range = ma.array(range(-179, 180, 2))
-
- lons, lats = np.meshgrid(lon_range, lat_range)
- # Convert these to masked arrays
- lats = ma.array(lats)
- lons = ma.array(lons)
-
- lat2_range = np.array(range(-89, 90, 4))
- lon2_range = np.array(range(-179, 180, 4))
-
- lons2, lats2 = np.meshgrid(lon2_range, lat2_range)
- # Convert to masked arrays
- lats2 = ma.array(lats2)
- lons2 = ma.array(lons2)
-
- regridded_values = dp._rcmes_spatial_regrid(spatial_values, lats, lons, lats2, lons2)
- self.assertEqual(regridded_values.shape, lats2.shape)
- self.assertEqual(regridded_values.shape, lons2.shape)
-
-class TestSpatialRegrid(unittest.TestCase, CustomAssertions):
-
- def setUp(self):
- self.input_dataset = ten_year_monthly_dataset()
- self.new_lats = np.array(range(-89, 90, 4))
- self.new_lons = np.array(range(-179, 180, 4))
- self.regridded_dataset = dp.spatial_regrid(self.input_dataset, self.new_lats, self.new_lons)
-
-
- def test_returned_lats(self):
- self.assert1DArraysEqual(self.regridded_dataset.lats, self.new_lats)
-
- def test_returned_lons(self):
- self.assert1DArraysEqual(self.regridded_dataset.lons, self.new_lons)
-
- def test_shape_of_values(self):
- regridded_data_shape = self.regridded_dataset.values.shape
- expected_data_shape = (len(self.input_dataset.times), len(self.new_lats), len(self.new_lons))
- self.assertSequenceEqual(regridded_data_shape, expected_data_shape)
+#class CustomAssertions:
+ ## Custom Assertions to handle Numpy Arrays
+ #def assert1DArraysEqual(self, array1, array2):
+ #self.assertSequenceEqual(tuple(array1), tuple(array2))
+
+#class TestEnsemble(unittest.TestCase, CustomAssertions):
+ #def test_unequal_dataset_shapes(self):
+ #self.ten_year_dataset = ten_year_monthly_dataset()
+ #self.two_year_dataset = two_year_daily_dataset()
+ #with self.assertRaises(ValueError):
+ #self.ensemble_dataset = dp.ensemble([self.ten_year_dataset, self.two_year_dataset])
+
+ #def test_ensemble_logic(self):
+ #self.datasets = []
+ #self.datasets.append(build_ten_cube_dataset(1))
+ #self.datasets.append(build_ten_cube_dataset(2))
+ #self.three = build_ten_cube_dataset(3)
+ #self.datasets.append(self.three)
+ #self.datasets.append(build_ten_cube_dataset(4))
+ #self.datasets.append(build_ten_cube_dataset(5))
+ #self.ensemble = dp.ensemble(self.datasets)
+ #self.ensemble_flat = self.ensemble.values.flatten()
+ #self.three_flat = self.three.values.flatten()
+ #self.assert1DArraysEqual(self.ensemble_flat, self.three_flat)
+
+ #def test_ensemble_name(self):
+ #self.ensemble_dataset_name = "Dataset Ensemble"
+ #self.datasets = []
+ #self.datasets.append(build_ten_cube_dataset(1))
+ #self.datasets.append(build_ten_cube_dataset(2))
+ #self.ensemble = dp.ensemble(self.datasets)
+ #self.assertEquals(self.ensemble.name, self.ensemble_dataset_name)
+
+
+#class TestTemporalRebin(unittest.TestCase, CustomAssertions):
+
+ #def setUp(self):
+ #self.ten_year_monthly_dataset = ten_year_monthly_dataset()
+ #self.ten_year_annual_times = np.array([datetime.datetime(year, 1, 1) for year in range(2000, 2010)])
+ #self.two_years_daily_dataset = two_year_daily_dataset()
+
+ #def test_monthly_to_annual_rebin(self):
+ #annual_dataset = dp.temporal_rebin(self.ten_year_monthly_dataset, datetime.timedelta(days=365))
+ #self.assert1DArraysEqual(annual_dataset.times, self.ten_year_annual_times)
+
+ #def test_monthly_to_full_rebin(self):
+ #full_dataset = dp.temporal_rebin(self.ten_year_monthly_dataset, datetime.timedelta(days=3650))
+ #full_times = [datetime.datetime(2004, 12, 16)]
+ #self.assertEqual(full_dataset.times, full_times)
+
+ #def test_daily_to_monthly_rebin(self):
+ #"""This test takes a really long time to run. TODO: Figure out where the performance drag is"""
+ #monthly_dataset = dp.temporal_rebin(self.two_years_daily_dataset, datetime.timedelta(days=31))
+ #bins = list(set([datetime.datetime(time_reading.year, time_reading.month, 1) for time_reading in self.two_years_daily_dataset.times]))
+ #bins = np.array(bins)
+ #bins.sort()
+ #self.assert1DArraysEqual(monthly_dataset.times, bins)
+
+ #def test_daily_to_annual_rebin(self):
+ #annual_dataset = dp.temporal_rebin(self.two_years_daily_dataset, datetime.timedelta(days=366))
+ #bins = list(set([datetime.datetime(time_reading.year, 1, 1) for time_reading in self.two_years_daily_dataset.times]))
+ #bins = np.array(bins)
+ #bins.sort()
+ #self.assert1DArraysEqual(annual_dataset.times, bins)
+
+
+ #def test_non_rebin(self):
+ #"""This will take a monthly dataset and ask for a monthly rebin of 28 days. The resulting
+ #dataset should have the same time values"""
+ #monthly_dataset = dp.temporal_rebin(self.ten_year_monthly_dataset, datetime.timedelta(days=28))
+ #good_times = self.ten_year_monthly_dataset.times
+ #self.assert1DArraysEqual(monthly_dataset.times, good_times)
+
+
+#class TestRcmesSpatialRegrid(unittest.TestCase):
+
+ #def test_return_array_shape(self):
+ #spatial_values = np.ones([90,180])
+ #spatial_values = ma.array(spatial_values)
+
+ #lat_range = ma.array(range(-89, 90, 2))
+ #lon_range = ma.array(range(-179, 180, 2))
+
+ #lons, lats = np.meshgrid(lon_range, lat_range)
+ ## Convert these to masked arrays
+ #lats = ma.array(lats)
+ #lons = ma.array(lons)
+
+ #lat2_range = np.array(range(-89, 90, 4))
+ #lon2_range = np.array(range(-179, 180, 4))
+
+ #lons2, lats2 = np.meshgrid(lon2_range, lat2_range)
+ ## Convert to masked arrays
+ #lats2 = ma.array(lats2)
+ #lons2 = ma.array(lons2)
+
+ #regridded_values = dp._rcmes_spatial_regrid(spatial_values, lats, lons, lats2, lons2)
+ #self.assertEqual(regridded_values.shape, lats2.shape)
+ #self.assertEqual(regridded_values.shape, lons2.shape)
+
+#class TestSpatialRegrid(unittest.TestCase, CustomAssertions):
+
+ #def setUp(self):
+ #self.input_dataset = ten_year_monthly_dataset()
+ #self.new_lats = np.array(range(-89, 90, 4))
+ #self.new_lons = np.array(range(-179, 180, 4))
+ #self.regridded_dataset = dp.spatial_regrid(self.input_dataset, self.new_lats, self.new_lons)
+
+
+ #def test_returned_lats(self):
+ #self.assert1DArraysEqual(self.regridded_dataset.lats, self.new_lats)
+
+ #def test_returned_lons(self):
+ #self.assert1DArraysEqual(self.regridded_dataset.lons, self.new_lons)
+
+ #def test_shape_of_values(self):
+ #regridded_data_shape = self.regridded_dataset.values.shape
+ #expected_data_shape = (len(self.input_dataset.times), len(self.new_lats), len(self.new_lons))
+ #self.assertSequenceEqual(regridded_data_shape, expected_data_shape)
+
+class TestSubset(unittest.TestCase):
+ def test_subset(self):
+ pass
def ten_year_monthly_dataset():
lats = np.array(range(-89, 90, 2))
@@ -172,4 +175,4 @@ def build_ten_cube_dataset(value):
if __name__ == '__main__':
- unittest.main()
\ No newline at end of file
+ unittest.main()