You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@climate.apache.org by hu...@apache.org on 2015/08/19 02:30:47 UTC

[1/3] climate git commit: CLIMATE-564 - Managing multiple netcdf files stored on a local machine

Repository: climate
Updated Branches:
  refs/heads/master 3d0c32116 -> b08d37940


CLIMATE-564 - Managing multiple netcdf files stored on a local machine

- A new file loader load_multiple_files can read netcdf files with common file name patterns.


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/579c1f13
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/579c1f13
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/579c1f13

Branch: refs/heads/master
Commit: 579c1f1384583abf83b4cb9c2b8e4eeb6c26bd19
Parents: d4eeb03
Author: huikyole <hu...@argo.jpl.nasa.gov>
Authored: Wed Aug 12 01:34:44 2015 -0700
Committer: huikyole <hu...@argo.jpl.nasa.gov>
Committed: Wed Aug 12 01:34:44 2015 -0700

----------------------------------------------------------------------
 ocw/data_source/local.py | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/579c1f13/ocw/data_source/local.py
----------------------------------------------------------------------
diff --git a/ocw/data_source/local.py b/ocw/data_source/local.py
index 474dffb..be33998 100644
--- a/ocw/data_source/local.py
+++ b/ocw/data_source/local.py
@@ -268,3 +268,38 @@ def load_file(file_path,
 
     return Dataset(lats, lons, times, values, variable=variable_name,
                    units=variable_unit, name=name, origin=origin)
+
+def load_multiple_files(data_info):
+    ''' load files from multiple datasets and return an array of OCW datasets
+   
+    :param data_path: ['datasets']['targets'] in a configuration yaml file.
+    :type data_path: :class:`list`
+
+    :returns: An array of OCW Dataset objects, an array of dataset names
+    :rtype: :class:`list`
+    '''
+
+    data_filenames = glob(data_info['path'])
+    data_filenames.sort()
+    # number of files
+    ndata = len(data_filenames)
+    if ndata == 1:
+        try:
+            data_name = [data_info['data_name']]
+        except:
+            data_name =['ref']
+    else:
+        data_name = []
+        data_filenames_reversed = []
+        for element in data_filenames:
+            data_filenames_reversed.append(element[::-1])
+        prefix = os.path.commonprefix(data_filenames)
+        postfix = os.path.commonprefix(data_filenames_reversed)[::-1]
+        for element in data_filenames:
+            data_name.append(element.replace(prefix,'').replace(postfix,''))
+
+    datasets = []
+    for filename in data_filenames:
+        datasets.append(load_file(filename, data_info['variable'])) 
+    
+    return datasets, data_name

[3/3] climate git commit: CLIMATE-564 - Managing multiple netcdf files stored on a local machine

Posted by hu...@apache.org.

CLIMATE-564 - Managing multiple netcdf files stored on a local machine


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/b08d3794
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/b08d3794
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/b08d3794

Branch: refs/heads/master
Commit: b08d37940c018ba121744ff6200605fb1af351d5
Parents: 3d0c321 20c952b
Author: huikyole <hu...@argo.jpl.nasa.gov>
Authored: Tue Aug 18 17:30:16 2015 -0700
Committer: huikyole <hu...@argo.jpl.nasa.gov>
Committed: Tue Aug 18 17:30:16 2015 -0700

----------------------------------------------------------------------
 ocw/data_source/local.py | 66 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)
----------------------------------------------------------------------

[2/3] climate git commit: Now the multiple file loader has an interface that does not depend on the config file

Posted by hu...@apache.org.

Now the multiple file loader has an interface that does not depend on the config file


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/20c952b0
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/20c952b0
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/20c952b0

Branch: refs/heads/master
Commit: 20c952b0ef7712262cee1f3e82a53850fcee0728
Parents: 579c1f1
Author: huikyole <hu...@argo.jpl.nasa.gov>
Authored: Fri Aug 14 11:18:49 2015 -0700
Committer: huikyole <hu...@argo.jpl.nasa.gov>
Committed: Fri Aug 14 11:18:49 2015 -0700

----------------------------------------------------------------------
 ocw/data_source/local.py | 59 +++++++++++++++++++++++++++++++++----------
 1 file changed, 45 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/20c952b0/ocw/data_source/local.py
----------------------------------------------------------------------
diff --git a/ocw/data_source/local.py b/ocw/data_source/local.py
index be33998..c0d4b07 100644
--- a/ocw/data_source/local.py
+++ b/ocw/data_source/local.py
@@ -21,6 +21,7 @@ from time import strptime
 from glob import glob
 import re
 import string
+import os
 
 from ocw.dataset import Dataset
 import ocw.utils as utils
@@ -269,25 +270,54 @@ def load_file(file_path,
     return Dataset(lats, lons, times, values, variable=variable_name,
                    units=variable_unit, name=name, origin=origin)
 
-def load_multiple_files(data_info):
-    ''' load files from multiple datasets and return an array of OCW datasets
-   
-    :param data_path: ['datasets']['targets'] in a configuration yaml file.
-    :type data_path: :class:`list`
-
+def load_multiple_files(file_path,
+                        filename_pattern,
+                        variable_name,
+                        dataset_name='ref',
+                        variable_unit=None,
+                        lat_name=None,
+                        lon_name=None,
+                        time_name=None):
+    ''' load multiple netcdf files with common filename pattern and return an array of OCW datasets
+
+    :param file_path: directory name where the NetCDF files to load are stored.
+    :type file_path: :mod:`string`
+    :param filename_pattern: common file name patterns
+    :type filename_pattern: :list:`string`
+    :param dataset_name: a name of dataset when reading a single file 
+    :type dataset_name: :mod:'string'
+    :param variable_name: The variable name to load from the NetCDF file.
+    :type variable_name: :mod:`string`
+    :param variable_unit: (Optional) The variable unit to load from the NetCDF file.
+    :type variable_unit: :mod:`string`
+    :param elevation_index: (Optional) The elevation index for which data should
+        be returned. Climate data is often times 4 dimensional data. Some
+        datasets will have readins at different height/elevation levels. OCW
+        expects 3D data so a single layer needs to be stripped out when loading.
+        By default, the first elevation layer is used. If desired you may
+        specify the elevation value to use.
+    :param lat_name: (Optional) The latitude variable name to extract from the
+        dataset.
+    :type lat_name: :mod:`string`
+    :param lon_name: (Optional) The longitude variable name to extract from the
+        dataset.
+    :type lon_name: :mod:`string`
+    :param time_name: (Optional) The time variable name to extract from the
+        dataset.
+    :type time_name: :mod:`string`
     :returns: An array of OCW Dataset objects, an array of dataset names
     :rtype: :class:`list`
     '''
 
-    data_filenames = glob(data_info['path'])
+    data_filenames = []
+    for pattern in filename_pattern:
+        data_filenames.extend(glob(file_path + pattern))
     data_filenames.sort()
+
     # number of files
     ndata = len(data_filenames)
     if ndata == 1:
-        try:
-            data_name = [data_info['data_name']]
-        except:
-            data_name =['ref']
+        data_name = [dataset_name]
     else:
         data_name = []
         data_filenames_reversed = []
@@ -299,7 +329,8 @@ def load_multiple_files(data_info):
             data_name.append(element.replace(prefix,'').replace(postfix,''))
 
     datasets = []
-    for filename in data_filenames:
-        datasets.append(load_file(filename, data_info['variable'])) 
+    for ifile,filename in enumerate(data_filenames):
+        datasets.append(load_file(filename, variable_name, variable_unit, name=data_name[ifile],
+                        lat_name=lat_name, lon_name=lon_name, time_name=time_name))
     
-    return datasets, data_name
+    return datasets