You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@climate.apache.org by hu...@apache.org on 2017/04/17 18:44:29 UTC

[1/3] climate git commit: CLIMATE-905 - Box whisker plot to evaluate simulated trends

Repository: climate
Updated Branches:
  refs/heads/master 6598b9f1a -> 0ed13a19c


CLIMATE-905 - Box whisker plot to evaluate simulated trends

- example/temperature/trends_over_CONUS.py has been updated
- data_source.local.load_multiple_files has been updated to load the CMIP5 files without file concatenation.
- dataset_processor has been updated.
- plotter has a new module (draw_plot_to_compare_trends) to generate a plot to compare observed trends with simulated ones.
- test_local.py has been updated to reflect the changes in
- utils.calculate_temporal_trend_of_time_series has been debugged.
- utils.calculate_ensemble_temporal_trends has been updated.


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/6f6cde83
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/6f6cde83
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/6f6cde83

Branch: refs/heads/master
Commit: 6f6cde83ed23e5d121244896c97638070e280a74
Parents: 6598b9f
Author: huikyole <hu...@argo.jpl.nasa.gov>
Authored: Sun Apr 16 23:01:39 2017 -0700
Committer: huikyole <hu...@argo.jpl.nasa.gov>
Committed: Sun Apr 16 23:01:39 2017 -0700

----------------------------------------------------------------------
 examples/temperature_trends_over_CONUS.py | 100 ++++++++++++++++++-------
 ocw/data_source/local.py                  |  78 ++++++++++---------
 ocw/dataset_processor.py                  |  13 ++--
 ocw/plotter.py                            |  53 ++++++++++++-
 ocw/tests/test_local.py                   |   5 +-
 ocw/utils.py                              |  19 ++---
 6 files changed, 188 insertions(+), 80 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/6f6cde83/examples/temperature_trends_over_CONUS.py
----------------------------------------------------------------------
diff --git a/examples/temperature_trends_over_CONUS.py b/examples/temperature_trends_over_CONUS.py
index 0467291..54f5843 100644
--- a/examples/temperature_trends_over_CONUS.py
+++ b/examples/temperature_trends_over_CONUS.py
@@ -25,17 +25,23 @@ import ocw.dataset_processor as dsp
 import ocw.metrics as metrics
 import ocw.plotter as plotter
 import ocw.utils as utils
-import ssl
-if hasattr(ssl, '_create_unverified_context'):
-    ssl._create_default_https_context = ssl._create_unverified_context
 
 # nClimDiv observation file
 file_obs = 'nClimDiv/nClimDiv_tave_1895-2005.nc'
 
+# CMIP5 simulations
+model_file_path = 'CMIP5_historical'
+dataset_name = ['BNU-ESM','CanESM2','CNRM-CM5','CSIRO-Mk3',
+                'GISS-E2-H','HadCM3','inmcm4','MIROC-ESM',
+                'MPI-ESM-LR','NorESM1-M']
+nmodel = len(dataset_name) # number of CMIP5 simulations
+
 # temporal boundary
 start_date = datetime.datetime(1979, 12, 1)
 end_date = datetime.datetime(2005, 8, 31)
 
+nyear = 26                             
+
 month_start = 6 # June
 month_end = 8   # August
 
@@ -51,6 +57,8 @@ regions.append(['KY', 'VA', 'AR','AL', 'LA','MS', 'FL', 'GA','NC','SC', 'DC','TN
 plotter.fill_US_states_with_color(regions, 'NCA_seven_regions', colors=True,
                                 region_names=['NW','SW','NGP','SGP','MW','NE','SE'])
 
+n_region = 7 # number of regions
+
 # CONUS regional boundaries
 NW_bounds = Bounds(boundary_type='us_states', 
                    us_states=regions[0])
@@ -67,36 +75,78 @@ NE_bounds = Bounds(boundary_type='us_states',
 SE_bounds = Bounds(boundary_type='us_states', 
                    us_states=regions[6])
 
+regional_bounds = [NW_bounds, SW_bounds, NGP_bounds,
+                   SGP_bounds, MW_bounds, NE_bounds, SE_bounds]
 
 """ Load nClimDiv file into OCW Dataset """
 obs_dataset = local.load_file(file_obs, variable_name='tave') 
 
+""" Load CMIP5 simulations into a list of OCW Datasets"""
+model_dataset = local.load_multiple_files(file_path=model_file_path, variable_name='tas',
+                                          dataset_name=dataset_name, variable_unit='K') 
+
 """ Temporal subset of obs_dataset """
 obs_dataset_subset = dsp.temporal_slice(obs_dataset, 
                   start_time=start_date, end_time=end_date)
 obs_dataset_season = dsp.temporal_subset(obs_dataset_subset, month_start, month_end,
                       average_each_year=True)
+
+""" Temporal subset of model_dataset """
+model_dataset_subset = [dsp.temporal_slice(dataset,start_time=start_date, end_time=end_date) 
+                        for dataset in model_dataset]
+model_dataset_season = [dsp.temporal_subset(dataset, month_start, month_end,
+                      average_each_year=True) for dataset in model_dataset_subset]
+
+
 """ Spatial subset of obs_dataset and generate time series """
-NW_timeseries = utils.calc_time_series(dsp.subset(obs_dataset_season, NW_bounds)) 
-SW_timeseries = utils.calc_time_series(dsp.subset(obs_dataset_season, SW_bounds)) 
-NGP_timeseries = utils.calc_time_series(dsp.subset(obs_dataset_season, NGP_bounds)) 
-SGP_timeseries = utils.calc_time_series(dsp.subset(obs_dataset_season, SGP_bounds)) 
-MW_timeseries = utils.calc_time_series(dsp.subset(obs_dataset_season, MW_bounds)) 
-NE_timeseries = utils.calc_time_series(dsp.subset(obs_dataset_season, NE_bounds)) 
-SE_timeseries = utils.calc_time_series(dsp.subset(obs_dataset_season, SE_bounds)) 
-
-year = np.arange(len(NW_timeseries))
-
-regional_trends=[]
-regional_trends.append(utils.calculate_temporal_trend_of_time_series(year, NW_timeseries)[0])
-regional_trends.append(utils.calculate_temporal_trend_of_time_series(year, SW_timeseries)[0])
-regional_trends.append(utils.calculate_temporal_trend_of_time_series(year, NGP_timeseries)[0])
-regional_trends.append(utils.calculate_temporal_trend_of_time_series(year, SGP_timeseries)[0])
-regional_trends.append(utils.calculate_temporal_trend_of_time_series(year, MW_timeseries)[0])
-regional_trends.append(utils.calculate_temporal_trend_of_time_series(year, NE_timeseries)[0])
-regional_trends.append(utils.calculate_temporal_trend_of_time_series(year, SE_timeseries)[0])
-
-plotter.fill_US_states_with_color(regions, 'NCA_tave_trends_JJA_1980-2005', 
-                                  values=regional_trends,
-                                  region_names=['%.3f' %(10*i) for i in regional_trends])
+obs_timeseries = np.zeros([nyear, n_region])   # region index 0-6: NW, SW, NGP, SGP, MW, NE, SE
+model_timeseries = np.zeros([nmodel, nyear, n_region])
+
+for iregion in np.arange(n_region):
+    obs_timeseries[:, iregion] = utils.calc_time_series(
+                         dsp.subset(obs_dataset_season, regional_bounds[iregion])) 
+    for imodel in np.arange(nmodel):
+        model_timeseries[imodel, :, iregion] = utils.calc_time_series(
+                         dsp.subset(model_dataset_season[imodel], regional_bounds[iregion]))
+
+year = np.arange(nyear)
+
+regional_trends_obs = np.zeros(n_region)
+regional_trends_obs_error = np.zeros(n_region)
+regional_trends_model = np.zeros([nmodel, n_region])
+regional_trends_model_error = np.zeros([nmodel, n_region])
+regional_trends_ens = np.zeros(n_region)
+regional_trends_ens_error = np.zeros(n_region)
+
+for iregion in np.arange(n_region):
+    regional_trends_obs[iregion], regional_trends_obs_error[iregion] = utils.calculate_temporal_trend_of_time_series(
+                                                                       year, obs_timeseries[:,iregion])
+    for imodel in np.arange(nmodel):
+        regional_trends_model[imodel, iregion], regional_trends_model_error[iregion] = utils.calculate_temporal_trend_of_time_series(
+                                                                                       year, model_timeseries[imodel, :, iregion])
+    regional_trends_ens[iregion], regional_trends_ens_error[iregion] = utils.calculate_ensemble_temporal_trends(
+                                                                       model_timeseries[:, :, iregion]) 
+
+""" Generate plots """
+
+plotter.fill_US_states_with_color(regions, 'nClimDiv_tave_trends_JJA_1980-2005', 
+                                  values=regional_trends_obs,
+                                  region_names=['%.3f' %(10*i) for i in regional_trends_obs])
+
+plotter.fill_US_states_with_color(regions, 'CMIP5_ENS_tave_trends_JJA_1980-2005', 
+                                  values=regional_trends_ens,
+                                  region_names=['%.3f' %(10*i) for i in regional_trends_ens])
+
+bias_ens = regional_trends_ens - regional_trends_obs
+plotter.fill_US_states_with_color(regions, 'CMIP5_ENS_tave_trends_bias_from_nClimDiv_JJA_1980-2005', 
+                                  values=bias_ens,
+                                  region_names=['%.3f' %(10*i) for i in bias_ens])
+
+obs_data = np.vstack([regional_trends_obs, regional_trends_obs_error])
+ens_data = np.vstack([regional_trends_ens, regional_trends_ens_error])
+
+plotter.draw_plot_to_compare_trends(obs_data, ens_data, regional_trends_model,
+                                    fname='Trends_comparison_btn_CMIP5_and_nClimDiv',
+                                    data_labels=['NW','SW','NGP','SGP','MW','NE','SE'],
+                                    xlabel='NCA regions', ylabel='tas trend [K/year]')
 

http://git-wip-us.apache.org/repos/asf/climate/blob/6f6cde83/ocw/data_source/local.py
----------------------------------------------------------------------
diff --git a/ocw/data_source/local.py b/ocw/data_source/local.py
index 0c29588..78cf3cc 100644
--- a/ocw/data_source/local.py
+++ b/ocw/data_source/local.py
@@ -302,27 +302,24 @@ def load_file(file_path,
 
 def load_multiple_files(file_path,
                         variable_name,
-                        dataset_name='data',
+                        generic_dataset_name=False,
+                        dataset_name=None,
                         variable_unit=None,
                         lat_name=None,
                         lon_name=None,
                         time_name=None):
     ''' load multiple netcdf files with common filename pattern and return an array of OCW datasets
 
-    :param file_path: directory name and common file name patterns where the NetCDF files to load are stored.
+    :param file_path: directory name and (optionally common file name patterns) where the NetCDF files to load are stored.
     :type file_path: :mod:`string`
-    :param dataset_name: a name of dataset when reading a single file
-    :type dataset_name: :mod:'string'
+    :param dataset_name: a list of dataset names. Data filenames must include the elements of dataset_name list.
+    :type dataset_name: :mod:'list'
+    :param generic_dataset_name: If false, data filenames must include the elements of dataset_name list. 
+    :type generic_dataset_name: :mod:'bool'
     :param variable_name: The variable name to load from the NetCDF file.
     :type variable_name: :mod:`string`
     :param variable_unit: (Optional) The variable unit to load from the NetCDF file.
     :type variable_unit: :mod:`string`
-    :param elevation_index: (Optional) The elevation index for which data should
-        be returned. Climate data is often times 4 dimensional data. Some
-        datasets will have readins at different height/elevation levels. OCW
-        expects 3D data so a single layer needs to be stripped out when loading.
-        By default, the first elevation layer is used. If desired you may
-        specify the elevation value to use.
     :param lat_name: (Optional) The latitude variable name to extract from the
         dataset.
     :type lat_name: :mod:`string`
@@ -335,33 +332,42 @@ def load_multiple_files(file_path,
     :returns: An array of OCW Dataset objects
     :rtype: :class:`list`
     '''
+    datasets = []
 
-    data_filenames = []
-    data_filenames.extend(glob(file_path))
-    data_filenames.sort()
-
-    # number of files
-    ndata = len(data_filenames)
-    if type(dataset_name) is str and ndata == 1:
-        data_name = [dataset_name]
-    elif type(dataset_name).__name__ == 'list':
-        if len(dataset_name) == ndata:
-            data_name = [name for name in dataset_name]
-    else:
+    if not dataset_name:
+        data_filename = glob(file_path)
+        data_filename.sort()
+        ndata = len(data_filename)
         data_name = []
-        data_filenames_reversed = []
-        for element in data_filenames:
-            data_filenames_reversed.append(element[::-1])
-        prefix = os.path.commonprefix(data_filenames)
-        postfix = os.path.commonprefix(data_filenames_reversed)[::-1]
-        for element in data_filenames:
-            data_name.append(element.replace(prefix, '').replace(postfix, ''))
+        data_filename_reversed = [i[::-1] for i in data_filename]
+        prefix = os.path.commonprefix(data_filename)
+        postfix = os.path.commonprefix(data_filename_reversed)[::-1]
+        data_name = [i.replace(prefix,'').replace(postfix,'') for i in data_filename]
 
-    datasets = []
-    for ifile, filename in enumerate(data_filenames):
-        datasets.append(load_file(filename, variable_name, variable_unit, name=data_name[ifile],
+        for ifile, filename in enumerate(data_filename):
+            datasets.append(load_file(filename, variable_name, variable_unit, name=data_name[ifile],
+                                  lat_name=lat_name, lon_name=lon_name, time_name=time_name))
+    elif generic_dataset_name:
+        data_name = [i for i in dataset_name]
+        data_filename = glob(file_path)
+        data_filename.sort()
+        for ifile, filename in enumerate(data_filename):
+            datasets.append(load_file(filename, variable_name, variable_unit, name=data_name[ifile],
                                   lat_name=lat_name, lon_name=lon_name, time_name=time_name))
 
+    else:
+        data_name = [i for i in dataset_name]    
+        pattern = [['*'+i+'*'] for i in dataset_name]
+        if file_path[-1] != '/':
+            file_path = file_path+'/'
+        
+        ndata = len(dataset_name)
+        for idata in numpy.arange(ndata):
+            datasets.append(load_dataset_from_multiple_netcdf_files(variable_name,
+                                variable_unit=variable_unit, name=data_name[idata],
+                                lat_name=lat_name, lon_name=lon_name, time_name=time_name,
+                                file_path=file_path, filename_pattern=pattern[idata]))
+
     return datasets
 
 
@@ -443,7 +449,7 @@ def load_WRF_2d_files_RAIN(file_path=None,
     return Dataset(lats, lons, times2, values, variable_name, units=variable_unit, name=name)
 
 
-def load_dataset_from_multiple_netcdf_files(variable_name,
+def load_dataset_from_multiple_netcdf_files(variable_name, variable_unit=None,
                                             lat_name=None, lon_name=None, time_name=None,
                                             name='', file_list=None, file_path=None, filename_pattern=None,
                                             mask_file=None, mask_variable=None, mask_value=0):
@@ -457,6 +463,9 @@ def load_dataset_from_multiple_netcdf_files(variable_name,
     :param variable_name: The variable name to load from the NetCDF file.
     :type variable_name: :mod:`string`
 
+    :param variable_name: The variable's unit to load from the NetCDF file.
+    :type variable_name: :mod:`string`
+
     :param lat_name: (Optional) The latitude variable name to extract from the \
         dataset.
     :type lat_name: :mod:`string`
@@ -527,7 +536,8 @@ def load_dataset_from_multiple_netcdf_files(variable_name,
         else:
             data_values = numpy.concatenate((data_values, values0))
     times = numpy.array(times)
-    return Dataset(lats, lons, times, data_values, variable_name, name=name)
+    return Dataset(lats, lons, times, data_values, 
+                   variable_name, units=variable_unit,name=name)
 
 
 def load_NLDAS_forcingA_files(file_path=None,

http://git-wip-us.apache.org/repos/asf/climate/blob/6f6cde83/ocw/dataset_processor.py
----------------------------------------------------------------------
diff --git a/ocw/dataset_processor.py b/ocw/dataset_processor.py
index 10f41e3..18058ca 100755
--- a/ocw/dataset_processor.py
+++ b/ocw/dataset_processor.py
@@ -391,8 +391,11 @@ def subset(target_dataset, subregion, subregion_name=None, extract=True, user_ma
     '''
 
     if not subregion.start:
-        subregion.start = target_dataset.times[0]
-        subregion.end = target_dataset.times[-1]
+        time_start = target_dataset.times[0]
+        time_end = target_dataset.times[-1]
+    else:
+        time_start = subregion.start
+        time_end = subregion.end
 
     if not subregion_name:
         subregion_name = target_dataset.name
@@ -402,7 +405,7 @@ def subset(target_dataset, subregion, subregion_name=None, extract=True, user_ma
 
         if target_dataset.lats.ndim == 2 and target_dataset.lons.ndim == 2:
             temporal_subset = temporal_slice(
-                target_dataset, subregion.start, subregion.end)
+                target_dataset, time_start, time_end)
             nt = temporal_subset.values.shape[0]
             y_index, x_index = np.where(
                 (target_dataset.lats >= subregion.lat_max) | (
@@ -471,7 +474,7 @@ def subset(target_dataset, subregion, subregion_name=None, extract=True, user_ma
 
     if subregion.boundary_type == 'us_states' or subregion.boundary_type == 'countries':
         temporal_subset = temporal_slice(
-            target_dataset, subregion.start, subregion.end)
+            target_dataset, time_start, time_end)
         spatial_mask = utils.mask_using_shapefile_info(target_dataset.lons, target_dataset.lats,
                                                        subregion.masked_regions, extract=extract)
         subset_values = utils.propagate_spatial_mask_over_time(
@@ -488,7 +491,7 @@ def subset(target_dataset, subregion, subregion_name=None, extract=True, user_ma
 
     if subregion.boundary_type == 'user':
         temporal_subset = temporal_slice(
-            target_dataset, subregion.start, subregion.end)
+            target_dataset, time_start, time_end)
         spatial_mask = utils.regrid_spatial_mask(target_dataset.lons, target_dataset.lats,
                                                  subregion.mask_longitude, subregion.mask_latitude, subregion.mask_variable,
                                                  user_mask_values, extract=extract)

http://git-wip-us.apache.org/repos/asf/climate/blob/6f6cde83/ocw/plotter.py
----------------------------------------------------------------------
diff --git a/ocw/plotter.py b/ocw/plotter.py
index c0a6858..55302a9 100755
--- a/ocw/plotter.py
+++ b/ocw/plotter.py
@@ -1117,7 +1117,7 @@ def fill_US_states_with_color(regions, fname, fmt='png', ptitle='',
     nregion = len(regions)
     if colors:
         cmap = plt.cm.rainbow
-    if values:
+    if not (values is None):
         cmap = plt.cm.seismic
         max_abs = np.abs(values).max()
 
@@ -1143,7 +1143,7 @@ def fill_US_states_with_color(regions, fname, fmt='png', ptitle='',
             lats = np.append(lats, shape[:,1])
         if colors:
             color_to_fill=cmap((iregion+0.5)/nregion)
-        if values:
+        if not (values is None):
             value = values[iregion]
             color_to_fill = cmap(0.5+np.sign(value)*abs(value)/max_abs*0.45)
         ax.add_collection(PatchCollection(patches, facecolor=color_to_fill))
@@ -1157,3 +1157,52 @@ def fill_US_states_with_color(regions, fname, fmt='png', ptitle='',
     # Save the figure
     fig.savefig('%s.%s' % (fname, fmt), bbox_inches='tight', dpi=fig.dpi)
     fig.clf()
+
+def draw_plot_to_compare_trends(obs_data, ens_data, model_data,
+                          fname, fmt='png', ptitle='', data_labels=None,
+                          xlabel='', ylabel=''):
+
+    ''' Fill the States over the contiguous US with colors 
+   
+    :param obs_data: An array of observed trend and standard errors for regions
+    :type obs_data: :class:'numpy.ndarray'
+    :param ens_data: An array of trend and standard errors from a multi-model ensemble for regions 
+    :type ens_data: : class:'numpy.ndarray'
+    :param model_data: An array of trends from models for regions 
+    :type model_data: : class:'numpy.ndarray'
+    :param fname: The filename of the plot.
+    :type fname: :mod:`string`
+    :param fmt: (Optional) filetype for the output.
+    :type fmt: :mod:`string`
+    :param ptitle: (Optional) plot title.
+    :type ptitle: :mod:`string`
+    :param data_labels: (Optional) names of the regions
+    :type data_labels: :mod:`list`
+    :param xlabel: (Optional) a label for x-axis
+    :type xlabel: :mod:`string`
+    :param ylabel: (Optional) a label for y-axis  
+    :type ylabel: :mod:`string`
+    '''
+    nregions = obs_data.shape[1] 
+
+    # Set up the figure
+    fig = plt.figure()
+    fig.set_size_inches((8.5, 11.))
+    fig.dpi = 300
+    ax = fig.add_subplot(111)
+  
+    b_plot = ax.boxplot(model_data, widths=np.repeat(0.2, nregions), positions=np.arange(nregions)+1.3) 
+    plt.setp(b_plot['medians'], color='black')
+    plt.setp(b_plot['whiskers'], color='black')
+    plt.setp(b_plot['boxes'], color='black')
+    plt.setp(b_plot['fliers'], color='black')
+    ax.errorbar(np.arange(nregions)+0.8, obs_data[0,:], yerr=obs_data[1,:], 
+                fmt='o', color='r', ecolor='r')    
+    ax.errorbar(np.arange(nregions)+1., ens_data[0,:], yerr=ens_data[1,:], 
+                fmt='o', color='b', ecolor='b')    
+    ax.set_xticks(np.arange(nregions)+1)
+    ax.set_xlim([0, nregions+1])
+    
+    if data_labels:
+        ax.set_xticklabels(data_labels)
+    fig.savefig('%s.%s' % (fname, fmt), bbox_inches='tight') 

http://git-wip-us.apache.org/repos/asf/climate/blob/6f6cde83/ocw/tests/test_local.py
----------------------------------------------------------------------
diff --git a/ocw/tests/test_local.py b/ocw/tests/test_local.py
index bff2dd1..0927f0f 100644
--- a/ocw/tests/test_local.py
+++ b/ocw/tests/test_local.py
@@ -125,7 +125,7 @@ class TestLoadMultipleFiles(unittest.TestCase):
 
     def test_function_load_multiple_files_data_name(self):
         dataset = local.load_multiple_files(self.file_path, "value")
-        self.assertEqual([dataset[0].name], ['data'])
+        self.assertEqual([dataset[0].name], [''])
 
     def test_function_load_multiple_files_lons(self):
         """To test load_multiple_file function for longitudes"""
@@ -151,7 +151,8 @@ class TestLoadMultipleFiles(unittest.TestCase):
         """Test adding a custom name to a dataset"""
         dataset = local.load_multiple_files(self.file_path,
                                             "value",
-                                            dataset_name='foo')
+                                            generic_dataset_name=True,
+                                            dataset_name=['foo'])
         self.assertEqual(dataset[0].name, 'foo')
 
     def test_dataset_origin(self):

http://git-wip-us.apache.org/repos/asf/climate/blob/6f6cde83/ocw/utils.py
----------------------------------------------------------------------
diff --git a/ocw/utils.py b/ocw/utils.py
index ccc4ea9..0d9eb4e 100755
--- a/ocw/utils.py
+++ b/ocw/utils.py
@@ -649,8 +649,8 @@ def calculate_temporal_trends(dataset):
 
 def calculate_ensemble_temporal_trends(timeseries_array, number_of_samples=1000):
     ''' Calculate temporal trends in an ensemble of time series  
-    :param timeseries_array: A list whose elements are one-dimensional numpy arrays 
-    :type timeseries_array: :class:`list'
+    :param timeseries_array: Two dimensional array. 1st index: model, 2nd index: time. 
+    :type timeseries_array: :class:`numpy.ndarray'
 
     :param sampling: A list whose elements are one-dimensional numpy arrays 
     :type timeseries_array: :class:`list'
@@ -659,22 +659,18 @@ def calculate_ensemble_temporal_trends(timeseries_array, number_of_samples=1000)
     :rtype: :float:`float','float'
     '''
    
-    nmodels = len(timeseries_array)
-    nt = len(timeseries_array[0])
+    nmodels, nt = timeseries_array.shape
     x = np.arange(nt)
-    merged_array = np.zeros([nmodels, nt])
     sampled_trend = np.zeros(number_of_samples)
-    for imodel,timeseries in enumerate(timeseries_array):
-        merged_array[imodel,:] = timeseries[:]
     ensemble_trend, _ = calculate_temporal_trend_of_time_series(
-                        x, np.mean(merged_array, axis=0))
+                        x, np.mean(timeseries_array, axis=0))
     
     for isample in np.arange(number_of_samples):
         index = np.random.choice(nmodels, size=nmodels, replace=True)
-        random_ensemble = np.mean(merged_array[index, :], axis=0)
+        random_ensemble = np.mean(timeseries_array[index, :], axis=0)
         sampled_trend[isample], _ = calculate_temporal_trend_of_time_series(
                                     x, random_ensemble)
-    return ensemble_trend, np.std(sample_trend, ddof=1)
+    return ensemble_trend, np.std(sampled_trend, ddof=1)
      
 
 def calculate_temporal_trend_of_time_series(x,y):
@@ -689,5 +685,4 @@ def calculate_temporal_trend_of_time_series(x,y):
     :rtype: :float:`float','float'
     '''
     slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)
-    n = len(x)
-    return slope, std_err/np.std(x)/n**0.5 
+    return slope, std_err

[2/3] climate git commit: debugging dataset_processor for subregion objects without start and end attributes

Posted by hu...@apache.org.

debugging dataset_processor for subregion objects without start and end attributes


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/c4291784
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/c4291784
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/c4291784

Branch: refs/heads/master
Commit: c42917847e4c8a7fb5badf35961431aab2394514
Parents: 6f6cde8
Author: huikyole <hu...@argo.jpl.nasa.gov>
Authored: Mon Apr 17 11:41:30 2017 -0700
Committer: huikyole <hu...@argo.jpl.nasa.gov>
Committed: Mon Apr 17 11:41:30 2017 -0700

----------------------------------------------------------------------
 ocw/dataset_processor.py | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/c4291784/ocw/dataset_processor.py
----------------------------------------------------------------------
diff --git a/ocw/dataset_processor.py b/ocw/dataset_processor.py
index 18058ca..a99cba9 100755
--- a/ocw/dataset_processor.py
+++ b/ocw/dataset_processor.py
@@ -1466,6 +1466,7 @@ def _are_bounds_contained_by_dataset(dataset, bounds):
     '''
     lat_min, lat_max, lon_min, lon_max = dataset.spatial_boundaries()
     start, end = dataset.temporal_boundaries()
+    
     errors = []
 
     # TODO:  THIS IS TERRIBLY inefficent and we need to use a geometry
@@ -1479,16 +1480,17 @@ def _are_bounds_contained_by_dataset(dataset, bounds):
         error = ("bounds.lon_max: %s is not between lon_min: %s and"
                  " lon_max: %s" % (bounds.lon_max, lon_min, lon_max))
         errors.append(error)
-
-    if not start <= bounds.start <= end:
-        error = ("bounds.start: %s is not between start: %s and end: %s" %
-                 (bounds.start, start, end))
-        errors.append(error)
-
-    if not start <= bounds.end <= end:
-        error = ("bounds.end: %s is not between start: %s and end: %s" %
-                 (bounds.end, start, end))
-        errors.append(error)
+    if not (bounds.start is None):
+        if not start <= bounds.start <= end:
+            error = ("bounds.start: %s is not between start: %s and end: %s" %
+                     (bounds.start, start, end))
+            errors.append(error)
+
+    if not (bounds.end is None):
+        if not start <= bounds.end <= end:
+            error = ("bounds.end: %s is not between start: %s and end: %s" %
+                     (bounds.end, start, end))
+            errors.append(error)
 
     if len(errors) == 0:
         return True
@@ -1514,8 +1516,12 @@ def _get_subregion_slice_indices(target_dataset, subregion):
     lonStart = min(np.nonzero(target_dataset.lons >= subregion.lon_min)[0])
     lonEnd = max(np.nonzero(target_dataset.lons <= subregion.lon_max)[0])
 
-    timeStart = min(np.nonzero(target_dataset.times >= subregion.start)[0])
-    timeEnd = max(np.nonzero(target_dataset.times <= subregion.end)[0])
+    if not (subregion.start is None):
+        timeStart = min(np.nonzero(target_dataset.times >= subregion.start)[0])
+        timeEnd = max(np.nonzero(target_dataset.times <= subregion.end)[0])
+    else:
+        timeStart = 0
+        timeEnd = len(target_dataset.times) -1
 
     return {
         "lat_start": latStart,

[3/3] climate git commit: Merge branch 'CLIMATE-905'

Posted by hu...@apache.org.

Merge branch 'CLIMATE-905'


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/0ed13a19
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/0ed13a19
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/0ed13a19

Branch: refs/heads/master
Commit: 0ed13a19cd3be00625336fe2b825d64bf91a73ef
Parents: 6598b9f c429178
Author: huikyole <hu...@argo.jpl.nasa.gov>
Authored: Mon Apr 17 11:43:52 2017 -0700
Committer: huikyole <hu...@argo.jpl.nasa.gov>
Committed: Mon Apr 17 11:43:52 2017 -0700

----------------------------------------------------------------------
 examples/temperature_trends_over_CONUS.py | 100 ++++++++++++++++++-------
 ocw/data_source/local.py                  |  78 ++++++++++---------
 ocw/dataset_processor.py                  |  43 ++++++-----
 ocw/plotter.py                            |  53 ++++++++++++-
 ocw/tests/test_local.py                   |   5 +-
 ocw/utils.py                              |  19 ++---
 6 files changed, 206 insertions(+), 92 deletions(-)
----------------------------------------------------------------------