You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by jo...@apache.org on 2014/04/05 07:05:24 UTC

[04/16] git commit: CLIMATE-393 - Add dataset_processor.safe_subset

CLIMATE-393 - Add dataset_processor.safe_subset

- Add safe_subset for gracefully handling subsetting when not all of the
  bounding parameters are fully contained in the target dataset. If any
  of the bounding values fall outside of the dataset's bounds they are
  defaulted to the dataset's maximum/minimum.


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/58eb9e78
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/58eb9e78
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/58eb9e78

Branch: refs/heads/master
Commit: 58eb9e786cc9452bdacb6118ef6ba89281c1e48c
Parents: 0e65170
Author: Michael Joyce <jo...@apache.org>
Authored: Fri Apr 4 19:54:40 2014 -0700
Committer: Michael Joyce <jo...@apache.org>
Committed: Fri Apr 4 19:54:40 2014 -0700

----------------------------------------------------------------------
 ocw/dataset_processor.py            | 39 ++++++++++++++++++
 ocw/tests/test_dataset_processor.py | 69 ++++++++++++++++++++++++++++++++
 2 files changed, 108 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/58eb9e78/ocw/dataset_processor.py
----------------------------------------------------------------------
diff --git a/ocw/dataset_processor.py b/ocw/dataset_processor.py
index 72c0172..0d08699 100644
--- a/ocw/dataset_processor.py
+++ b/ocw/dataset_processor.py
@@ -187,6 +187,45 @@ def subset(subregion, target_dataset):
         target_dataset.name
     )
 
+def safe_subset(subregion, target_dataset):
+    '''Safely subset given dataset with subregion information
+
+    A standard subset requires that the provided subregion be entirely contained
+    within the datasets bounds. `safe_subset` returns the overlap of the
+    subregion and dataset without returning an error.
+
+    :param subregion: The Bounds with which to subset the target Dataset.
+    :type subregion: ocw.dataset.Bounds
+    :param target_dataset: The Dataset object to subset.
+    :type target_dataset: ocw.dataset.Dataset
+
+    :returns: The subset-ed Dataset object
+    :rtype: Dataset
+    '''
+
+    lat_min, lat_max, lon_min, lon_max = target_dataset.spatial_boundaries()
+    start, end = target_dataset.time_range()
+
+    if subregion.lat_min < lat_min:
+        subregion.lat_min = lat_min
+
+    if subregion.lat_max > lat_max:
+        subregion.lat_max = lat_max
+
+    if subregion.lon_min < lon_min:
+        subregion.lon_min = lon_min
+
+    if subregion.lon_max > lon_max:
+        subregion.lon_max = lon_max
+
+    if subregion.start < start:
+        subregion.start = start
+
+    if subregion.end > end:
+        subregion.end = end
+
+    return subset(subregion, target_dataset)
+
 def normalize_dataset_datetimes(dataset, timestep):
     ''' Normalize Dataset datetime values.
 

http://git-wip-us.apache.org/repos/asf/climate/blob/58eb9e78/ocw/tests/test_dataset_processor.py
----------------------------------------------------------------------
diff --git a/ocw/tests/test_dataset_processor.py b/ocw/tests/test_dataset_processor.py
index f883ad9..86f3327 100644
--- a/ocw/tests/test_dataset_processor.py
+++ b/ocw/tests/test_dataset_processor.py
@@ -228,6 +228,75 @@ class TestSubset(unittest.TestCase):
                                 "time_end"   : 49}
         self.assertDictEqual(index_slices, control_index_slices)
 
+class TestSafeSubset(unittest.TestCase):
+    def setUp(self):
+        lats = np.array(range(-60, 61, 1))
+        lons = np.array(range(-170, 171, 1))
+        times = np.array([datetime.datetime(year, month, 1)
+                          for year in range(2000, 2010)
+                          for month in range(1, 13)])
+        values = np.ones([len(times), len(lats), len(lons)])
+        self.target_dataset = ds.Dataset(lats,
+                                         lons,
+                                         times,
+                                         values,
+                                         variable="test variable name",
+                                         name='foo')
+
+        self.spatial_out_of_bounds = ds.Bounds(
+            -165, 165,
+            -180, 180,
+            datetime.datetime(2001, 1, 1),
+            datetime.datetime(2004, 1, 1)
+        )
+
+        self.temporal_out_of_bounds = ds.Bounds(
+            -40, 40,
+            -160.25, 160.5,
+            datetime.datetime(1999, 1, 15),
+            datetime.datetime(2222, 2, 15)
+        )
+
+        self.everything_out_of_bounds = ds.Bounds(
+            -165, 165,
+            -180, 180,
+            datetime.datetime(1999, 1, 15),
+            datetime.datetime(2222, 2, 15)
+        )
+
+    def test_partial_spatial_overlap(self):
+        '''Ensure that safe_subset can handle out of bounds spatial values'''
+        ds = dp.safe_subset(self.spatial_out_of_bounds, self.target_dataset)
+        spatial_bounds = ds.spatial_boundaries()
+        self.assertEquals(spatial_bounds[0], -60)
+        self.assertEquals(spatial_bounds[1], 60)
+        self.assertEquals(spatial_bounds[2], -170)
+        self.assertEquals(spatial_bounds[3], 170)
+
+    def test_partial_temporal_overlap(self):
+        '''Ensure that safe_subset can handle out of bounds temporal values'''
+        ds = dp.safe_subset(self.temporal_out_of_bounds, self.target_dataset)
+        temporal_bounds = ds.time_range()
+        start = datetime.datetime(2000, 1, 1)
+        end = datetime.datetime(2009, 12, 1)
+
+        self.assertEquals(temporal_bounds[0], start)
+        self.assertEquals(temporal_bounds[1], end)
+
+    def test_entire_bounds_overlap(self):
+        ds = dp.safe_subset(self.everything_out_of_bounds, self.target_dataset)
+        spatial_bounds = ds.spatial_boundaries()
+        temporal_bounds = ds.time_range()
+        start = datetime.datetime(2000, 1, 1)
+        end = datetime.datetime(2009, 12, 1)
+
+        self.assertEquals(spatial_bounds[0], -60)
+        self.assertEquals(spatial_bounds[1], 60)
+        self.assertEquals(spatial_bounds[2], -170)
+        self.assertEquals(spatial_bounds[3], 170)
+        self.assertEquals(temporal_bounds[0], start)
+        self.assertEquals(temporal_bounds[1], end)
+
 class TestFailingSubset(unittest.TestCase):
     def setUp(self):
         self.target_dataset = ten_year_monthly_dataset()