You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by hu...@apache.org on 2015/10/13 23:21:16 UTC

[1/2] climate git commit: CLIMATE-683 - A new loader to read multiple netCDF files with a file list and spatial mask

Repository: climate
Updated Branches:
  refs/heads/master d49c5677d -> 4b37b125c


CLIMATE-683 - A new loader to read multiple netCDF files with a file list and spatial mask

- A new loader, ocw.data_source.local.load_dataset_from_multiple_netcdf_files, is added


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/76b0914a
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/76b0914a
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/76b0914a

Branch: refs/heads/master
Commit: 76b0914a14fde97c1e4bd95185c415e2e69e4a88
Parents: ece260d
Author: huikyole <hu...@argo.jpl.nasa.gov>
Authored: Mon Oct 12 17:26:48 2015 -0700
Committer: huikyole <hu...@argo.jpl.nasa.gov>
Committed: Mon Oct 12 17:26:48 2015 -0700

----------------------------------------------------------------------
 ocw/data_source/local.py | 74 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/76b0914a/ocw/data_source/local.py
----------------------------------------------------------------------
diff --git a/ocw/data_source/local.py b/ocw/data_source/local.py
index 60fcb50..c6114c0 100644
--- a/ocw/data_source/local.py
+++ b/ocw/data_source/local.py
@@ -330,3 +330,77 @@ def load_multiple_files(file_path,
                         lat_name=lat_name, lon_name=lon_name, time_name=time_name))
     
     return datasets, data_name
+
+def load_dataset_from_multiple_netcdf_files(file_list, variable_name,
+                                            lat_name=None, lon_name=None, time_name=None,
+                                            name='', file_path=None, filename_pattern=None,
+                                            mask_file=None, mask_variable=None, mask_value=0):
+    ''' Load multiple netCDF files from the same source (an observation or a model) into a Dataset.
+    The dataset can be spatially subset.
+    :param filelist: A text file including a list of filenames
+    :type filelist: :mod:`string`
+    :param variable_name: The variable name to load from the NetCDF file.
+    :type variable_name: :mod:`string`
+    :param lat_name: (Optional) The latitude variable name to extract from the
+        dataset.
+    :type lat_name: :mod:`string`
+
+    :param lon_name: (Optional) The longitude variable name to extract from the
+        dataset.
+    :type lon_name: :mod:`string`
+
+    :param time_name: (Optional) The time variable name to extract from the
+        dataset.
+    :type time_name: :mod:`string`
+    :param name: (Optional) A name for the loaded dataset.
+    :type name: :mod:`string`
+    :param file_path: Directory to the NetCDF file to load.
+    :type file_path: :mod:`string`
+    :param filename_pattern: Path to the NetCDF file to load.
+    :type filename_pattern: :list:`string`
+    :param mask_file: A netcdf file with two-dimensional mask indices
+    :type filelist: :mod:`string`
+    :param mask_variable: The variable name to load from the mask_file.
+    :type variable_name: :mod:`string`
+    :param mask_value: an index for spatial subsetting a dataset 
+    :type mask_value: :class:`int`
+    :returns: An OCW Dataset object with the requested variable's data from
+        the NetCDF file.
+    :rtype: :class:`dataset.Dataset`
+    :raises ValueError:
+    '''
+    nc_files = []
+    if not file_list:
+        for pattern in filename_pattern:
+            nc_files.extend(glob(file_path + pattern))
+    else:
+        nc_files = [line.rstrip('\n') for line in open(file_list)]
+
+    nc_files.sort()
+
+    dataset0 = load_file(nc_files[0], variable_name=variable_name, lat_name=lat_name, lon_name=lon_name, time_name=time_name)
+    if dataset0.lons.ndim == 1 and dataset0.lats.ndim ==1:
+        lons, lats = numpy.meshgrid(dataset0.lons, dataset0.lats)
+    elif dataset0.lons.ndim == 2 and dataset0.lats.ndim ==2:
+        lons = dataset0.lons
+        lats = dataset0.lats
+
+    if mask_file: 
+        mask_dataset = load_file(mask_file, mask_variable)
+        y_index, x_index = numpy.where(mask_dataset.values == mask_value)
+
+    times = []
+    nfile = len(nc_files)
+    for ifile, file in enumerate(nc_files):
+        print 'NC file '+str(ifile+1)+'/'+str(nfile), file
+        file_object0= load_file(file, variable_name)
+        values0= file_object0.values
+        times.extend(file_object0.times)
+        if mask_file:
+            values0 = values0[:,y_index, x_index]
+        if ifile == 0:
+            data_values = values0
+        else:
+            data_values= numpy.concatenate((data_values, values0))
+    times = numpy.array(times)
+    return Dataset(lats, lons, times, data_values, variable_name, name=name)


[2/2] climate git commit: fix conflict has been addressed

Posted by hu...@apache.org.
fix conflict has been addressed


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/4b37b125
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/4b37b125
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/4b37b125

Branch: refs/heads/master
Commit: 4b37b125ca8b7e488d1e54ae3cab82cc3ef57f3b
Parents: d49c567 76b0914
Author: huikyole <hu...@argo.jpl.nasa.gov>
Authored: Tue Oct 13 14:19:57 2015 -0700
Committer: huikyole <hu...@argo.jpl.nasa.gov>
Committed: Tue Oct 13 14:19:57 2015 -0700

----------------------------------------------------------------------
 ocw/data_source/local.py | 73 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/4b37b125/ocw/data_source/local.py
----------------------------------------------------------------------
diff --cc ocw/data_source/local.py
index ad1f900,c6114c0..ff99aab
--- a/ocw/data_source/local.py
+++ b/ocw/data_source/local.py
@@@ -341,66 -331,76 +341,139 @@@ def load_multiple_files(file_path
      
      return datasets, data_name
  
 +def load_WRF_2d_files_RAIN(file_path=None,
 +                      filename_pattern=None,
 +                      filelist=None,
 +                      name=''):
 +    ''' Load multiple WRF (or nuWRF) original output files containing 2D fields such as precipitation and surface variables into a Dataset.
 +    The dataset can be spatially subset.
 +    :param file_path: Directory to the NetCDF file to load.
 +    :type file_path: :mod:`string`
 +    :param filename_pattern: Path to the NetCDF file to load.
 +    :type filename_pattern: :list:`string`
 +    :param name: (Optional) A name for the loaded dataset.
 +    :type name: :mod:`string`
 +    :returns: An OCW Dataset object with the requested variable's data from
 +        the NetCDF file.
 +    :rtype: :class:`dataset.Dataset`
 +    :raises ValueError:
 +    '''
 +
 +    if not filelist:
 +        WRF_files = []
 +        for pattern in filename_pattern:
 +            WRF_files.extend(glob(file_path + pattern))
 +        WRF_files.sort()
 +    else:
 +        WRF_files=[line.rstrip('\n') for line in open(filelist)]
 +
 +    file_object_first = netCDF4.Dataset(WRF_files[0])
 +    lats = file_object_first.variables['XLAT'][0,:]
 +    lons = file_object_first.variables['XLONG'][0,:]
 +
 +    times = []
 +    nfile = len(WRF_files)
 +    for ifile, file in enumerate(WRF_files):
 +        print 'Reading file '+str(ifile+1)+'/'+str(nfile), file
 +        file_object = netCDF4.Dataset(file)
 +        time_struct_parsed = strptime(file[-19:],"%Y-%m-%d_%H:%M:%S")
 +        for ihour in range(24):
 +            times.append(datetime(*time_struct_parsed[:6]) + timedelta(hours=ihour))
 +        if ifile == 0:
 +            values0= file_object.variables['RAINC'][:]+file_object.variables['RAINNC'][:]
 +        else:
 +            values0= numpy.concatenate((values0, file_object.variables['RAINC'][:]+file_object.variables['RAINNC'][:]))
 +        file_object.close()
 +    times= numpy.array(times)
 +    years = numpy.array([d.year for d in times])
 +    ncycle = numpy.unique(years).size
 +    print 'ncycle=',ncycle
 +    nt, ny, nx = values0.shape
 +    values = numpy.zeros([nt-ncycle*24, ny, nx])
 +    times2 = []
 +    nt2 = nt/ncycle
 +    # remove the first day in each year
 +    nt3 = nt2-24
 +    t_index = 0
 +    for icycle in numpy.arange(ncycle):
 +        for it in numpy.arange(nt3)+24:
 +            values[t_index,:] = values0[icycle*nt2+it,:]-values0[icycle*nt2+it-1,:]
 +            times2.append(times[icycle*nt2+it])
 +            t_index = t_index +1
 +    variable_name = 'PREC'
 +    variable_unit= 'mm/hr'
 +    times2 = numpy.array(times2)
 +    return Dataset(lats, lons, times2, values, variable_name, units=variable_unit, name=name)
++
+ def load_dataset_from_multiple_netcdf_files(file_list, variable_name,
+                                             lat_name=None, lon_name=None, time_name=None,
+                                             name='', file_path=None, filename_pattern=None,
+                                             mask_file=None, mask_variable=None, mask_value=0):
+     ''' Load multiple netCDF files from the same source (an observation or a model) into a Dataset.
+     The dataset can be spatially subset.
+     :param filelist: A text file including a list of filenames
+     :type filelist: :mod:`string`
+     :param variable_name: The variable name to load from the NetCDF file.
+     :type variable_name: :mod:`string`
+     :param lat_name: (Optional) The latitude variable name to extract from the
+         dataset.
+     :type lat_name: :mod:`string`
 -
+     :param lon_name: (Optional) The longitude variable name to extract from the
+         dataset.
+     :type lon_name: :mod:`string`
 -
+     :param time_name: (Optional) The time variable name to extract from the
+         dataset.
+     :type time_name: :mod:`string`
+     :param name: (Optional) A name for the loaded dataset.
+     :type name: :mod:`string`
+     :param file_path: Directory to the NetCDF file to load.
+     :type file_path: :mod:`string`
+     :param filename_pattern: Path to the NetCDF file to load.
+     :type filename_pattern: :list:`string`
+     :param mask_file: A netcdf file with two-dimensional mask indices
+     :type filelist: :mod:`string`
+     :param mask_variable: The variable name to load from the mask_file.
+     :type variable_name: :mod:`string`
+     :param mask_value: an index for spatial subsetting a dataset 
+     :type mask_value: :class:`int`
+     :returns: An OCW Dataset object with the requested variable's data from
+         the NetCDF file.
+     :rtype: :class:`dataset.Dataset`
+     :raises ValueError:
+     '''
+     nc_files = []
+     if not file_list:
+         for pattern in filename_pattern:
+             nc_files.extend(glob(file_path + pattern))
+     else:
+         nc_files = [line.rstrip('\n') for line in open(file_list)]
+ 
+     nc_files.sort()
+ 
+     dataset0 = load_file(nc_files[0], variable_name=variable_name, lat_name=lat_name, lon_name=lon_name, time_name=time_name)
+     if dataset0.lons.ndim == 1 and dataset0.lats.ndim ==1:
+         lons, lats = numpy.meshgrid(dataset0.lons, dataset0.lats)
+     elif dataset0.lons.ndim == 2 and dataset0.lats.ndim ==2:
+         lons = dataset0.lons
+         lats = dataset0.lats
+ 
+     if mask_file: 
+         mask_dataset = load_file(mask_file, mask_variable)
+         y_index, x_index = numpy.where(mask_dataset.values == mask_value)
+ 
+     times = []
+     nfile = len(nc_files)
+     for ifile, file in enumerate(nc_files):
+         print 'NC file '+str(ifile+1)+'/'+str(nfile), file
+         file_object0= load_file(file, variable_name)
+         values0= file_object0.values
+         times.extend(file_object0.times)
+         if mask_file:
+             values0 = values0[:,y_index, x_index]
+         if ifile == 0:
+             data_values = values0
+         else:
+             data_values= numpy.concatenate((data_values, values0))
+     times = numpy.array(times)
+     return Dataset(lats, lons, times, data_values, variable_name, name=name)
++