You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by jo...@apache.org on 2013/08/27 18:10:33 UTC

svn commit: r1517860 - /incubator/climate/trunk/ocw/dataset_processor.py

Author: joyce
Date: Tue Aug 27 16:10:32 2013
New Revision: 1517860

URL: http://svn.apache.org/r1517860
Log:
CLIMATE-276 - Move process functionality into dataset_processor

- Move process._create_mask_using_threshold to
  _rcmes_create_mask_using_threshold. As a result, we no longer needs to
  import old toolkit code into ocw.dataset_processor.
- Remove toolkit.process import.

Modified:
    incubator/climate/trunk/ocw/dataset_processor.py

Modified: incubator/climate/trunk/ocw/dataset_processor.py
URL: http://svn.apache.org/viewvc/incubator/climate/trunk/ocw/dataset_processor.py?rev=1517860&r1=1517859&r2=1517860&view=diff
==============================================================================
--- incubator/climate/trunk/ocw/dataset_processor.py (original)
+++ incubator/climate/trunk/ocw/dataset_processor.py Tue Aug 27 16:10:32 2013
@@ -16,7 +16,6 @@
 #
 
 from ocw import dataset as ds
-from toolkit import process
 
 import datetime
 import numpy as np
@@ -283,6 +282,41 @@ def _rcmes_spatial_regrid(spatial_values
 
     return regridded_values
 
+def _rcmes_create_mask_using_threshold(masked_array, threshold=0.5):
+    '''Mask an array if percent of values missing data is above a threshold.
+
+    For each value along an axis, if the proportion of steps that are missing
+    data is above ``threshold`` then the value is marked as missing data.
+
+    ..note: The 0th axis is currently always used.
+
+    :param masked_array: Masked array of data
+    :type masked_array: Numpy Masked Array
+    :param threshold: (optional) Threshold proportion above which a value is
+        marked as missing data.
+    :type threshold: Float
+
+    :returns: A Numpy array describing the mask for masked_array.
+    '''
+
+    # try, except used as some model files don't have a full mask, but a single bool
+    #  the except catches this situation and deals with it appropriately.
+    try:
+        nT = masked_array.mask.shape[0]
+
+        # For each pixel, count how many times are masked.
+        nMasked = masked_array.mask[:, :, :].sum(axis=0)
+
+        # Define new mask as when a pixel has over a defined threshold ratio of masked data
+        #   e.g. if the threshold is 75%, and there are 10 times,
+        #        then a pixel will be masked if more than 5 times are masked.
+        mymask = nMasked > (nT * threshold)
+
+    except:
+        mymask = np.zeros_like(masked_array.data[0, :, :])
+
+    return mymask
+
 
 def _rcmes_calc_average_on_new_time_unit_K(data, dates, unit):
     """ Rebin 3d array and list of dates using the provided unit parameter
@@ -377,7 +411,7 @@ def _rcmes_calc_average_on_new_time_unit
                 mask[timeunits!=myunit,:,:] = 1.0
                 # Calculate missing data mask within each time unit...
                 datamask_at_this_timeunit = np.zeros_like(data)
-                datamask_at_this_timeunit[:]= process.create_mask_using_threshold(data[timeunits==myunit,:,:],threshold=0.75)
+                datamask_at_this_timeunit[:]= _rcmes_create_mask_using_threshold(data[timeunits==myunit,:,:],threshold=0.75)
                 # Store results for masking later
                 datamask_store.append(datamask_at_this_timeunit[0])
                 # Calculate means for each pixel in this time unit, ignoring missing data (using masked array).