You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by hu...@apache.org on 2015/10/13 23:21:16 UTC
[1/2] climate git commit: CLIMATE-683 - A new loader to read multiple
netCDF files with a file list and spatial mask
Repository: climate
Updated Branches:
refs/heads/master d49c5677d -> 4b37b125c
CLIMATE-683 - A new loader to read multiple netCDF files with a file list and spatial mask
- A new loader, ocw.data_source.local.load_dataset_from_multiple_netcdf_files, is added
Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/76b0914a
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/76b0914a
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/76b0914a
Branch: refs/heads/master
Commit: 76b0914a14fde97c1e4bd95185c415e2e69e4a88
Parents: ece260d
Author: huikyole <hu...@argo.jpl.nasa.gov>
Authored: Mon Oct 12 17:26:48 2015 -0700
Committer: huikyole <hu...@argo.jpl.nasa.gov>
Committed: Mon Oct 12 17:26:48 2015 -0700
----------------------------------------------------------------------
ocw/data_source/local.py | 74 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 74 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/climate/blob/76b0914a/ocw/data_source/local.py
----------------------------------------------------------------------
diff --git a/ocw/data_source/local.py b/ocw/data_source/local.py
index 60fcb50..c6114c0 100644
--- a/ocw/data_source/local.py
+++ b/ocw/data_source/local.py
@@ -330,3 +330,77 @@ def load_multiple_files(file_path,
lat_name=lat_name, lon_name=lon_name, time_name=time_name))
return datasets, data_name
+
+def load_dataset_from_multiple_netcdf_files(file_list, variable_name,
+ lat_name=None, lon_name=None, time_name=None,
+ name='', file_path=None, filename_pattern=None,
+ mask_file=None, mask_variable=None, mask_value=0):
+ ''' Load multiple netCDF files from the same source (an observation or a model) into a Dataset.
+ The dataset can be spatially subset.
+ :param filelist: A text file including a list of filenames
+ :type filelist: :mod:`string`
+ :param variable_name: The variable name to load from the NetCDF file.
+ :type variable_name: :mod:`string`
+ :param lat_name: (Optional) The latitude variable name to extract from the
+ dataset.
+ :type lat_name: :mod:`string`
+
+ :param lon_name: (Optional) The longitude variable name to extract from the
+ dataset.
+ :type lon_name: :mod:`string`
+
+ :param time_name: (Optional) The time variable name to extract from the
+ dataset.
+ :type time_name: :mod:`string`
+ :param name: (Optional) A name for the loaded dataset.
+ :type name: :mod:`string`
+ :param file_path: Directory to the NetCDF file to load.
+ :type file_path: :mod:`string`
+ :param filename_pattern: Path to the NetCDF file to load.
+ :type filename_pattern: :list:`string`
+ :param mask_file: A netcdf file with two-dimensional mask indices
+ :type filelist: :mod:`string`
+ :param mask_variable: The variable name to load from the mask_file.
+ :type variable_name: :mod:`string`
+ :param mask_value: an index for spatial subsetting a dataset
+ :type mask_value: :class:`int`
+ :returns: An OCW Dataset object with the requested variable's data from
+ the NetCDF file.
+ :rtype: :class:`dataset.Dataset`
+ :raises ValueError:
+ '''
+ nc_files = []
+ if not file_list:
+ for pattern in filename_pattern:
+ nc_files.extend(glob(file_path + pattern))
+ else:
+ nc_files = [line.rstrip('\n') for line in open(file_list)]
+
+ nc_files.sort()
+
+ dataset0 = load_file(nc_files[0], variable_name=variable_name, lat_name=lat_name, lon_name=lon_name, time_name=time_name)
+ if dataset0.lons.ndim == 1 and dataset0.lats.ndim ==1:
+ lons, lats = numpy.meshgrid(dataset0.lons, dataset0.lats)
+ elif dataset0.lons.ndim == 2 and dataset0.lats.ndim ==2:
+ lons = dataset0.lons
+ lats = dataset0.lats
+
+ if mask_file:
+ mask_dataset = load_file(mask_file, mask_variable)
+ y_index, x_index = numpy.where(mask_dataset.values == mask_value)
+
+ times = []
+ nfile = len(nc_files)
+ for ifile, file in enumerate(nc_files):
+ print 'NC file '+str(ifile+1)+'/'+str(nfile), file
+ file_object0= load_file(file, variable_name)
+ values0= file_object0.values
+ times.extend(file_object0.times)
+ if mask_file:
+ values0 = values0[:,y_index, x_index]
+ if ifile == 0:
+ data_values = values0
+ else:
+ data_values= numpy.concatenate((data_values, values0))
+ times = numpy.array(times)
+ return Dataset(lats, lons, times, data_values, variable_name, name=name)
[2/2] climate git commit: fix conflict has been addressed
Posted by hu...@apache.org.
fix conflict has been addressed
Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/4b37b125
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/4b37b125
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/4b37b125
Branch: refs/heads/master
Commit: 4b37b125ca8b7e488d1e54ae3cab82cc3ef57f3b
Parents: d49c567 76b0914
Author: huikyole <hu...@argo.jpl.nasa.gov>
Authored: Tue Oct 13 14:19:57 2015 -0700
Committer: huikyole <hu...@argo.jpl.nasa.gov>
Committed: Tue Oct 13 14:19:57 2015 -0700
----------------------------------------------------------------------
ocw/data_source/local.py | 73 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 73 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/climate/blob/4b37b125/ocw/data_source/local.py
----------------------------------------------------------------------
diff --cc ocw/data_source/local.py
index ad1f900,c6114c0..ff99aab
--- a/ocw/data_source/local.py
+++ b/ocw/data_source/local.py
@@@ -341,66 -331,76 +341,139 @@@ def load_multiple_files(file_path
return datasets, data_name
+def load_WRF_2d_files_RAIN(file_path=None,
+ filename_pattern=None,
+ filelist=None,
+ name=''):
+ ''' Load multiple WRF (or nuWRF) original output files containing 2D fields such as precipitation and surface variables into a Dataset.
+ The dataset can be spatially subset.
+ :param file_path: Directory to the NetCDF file to load.
+ :type file_path: :mod:`string`
+ :param filename_pattern: Path to the NetCDF file to load.
+ :type filename_pattern: :list:`string`
+ :param name: (Optional) A name for the loaded dataset.
+ :type name: :mod:`string`
+ :returns: An OCW Dataset object with the requested variable's data from
+ the NetCDF file.
+ :rtype: :class:`dataset.Dataset`
+ :raises ValueError:
+ '''
+
+ if not filelist:
+ WRF_files = []
+ for pattern in filename_pattern:
+ WRF_files.extend(glob(file_path + pattern))
+ WRF_files.sort()
+ else:
+ WRF_files=[line.rstrip('\n') for line in open(filelist)]
+
+ file_object_first = netCDF4.Dataset(WRF_files[0])
+ lats = file_object_first.variables['XLAT'][0,:]
+ lons = file_object_first.variables['XLONG'][0,:]
+
+ times = []
+ nfile = len(WRF_files)
+ for ifile, file in enumerate(WRF_files):
+ print 'Reading file '+str(ifile+1)+'/'+str(nfile), file
+ file_object = netCDF4.Dataset(file)
+ time_struct_parsed = strptime(file[-19:],"%Y-%m-%d_%H:%M:%S")
+ for ihour in range(24):
+ times.append(datetime(*time_struct_parsed[:6]) + timedelta(hours=ihour))
+ if ifile == 0:
+ values0= file_object.variables['RAINC'][:]+file_object.variables['RAINNC'][:]
+ else:
+ values0= numpy.concatenate((values0, file_object.variables['RAINC'][:]+file_object.variables['RAINNC'][:]))
+ file_object.close()
+ times= numpy.array(times)
+ years = numpy.array([d.year for d in times])
+ ncycle = numpy.unique(years).size
+ print 'ncycle=',ncycle
+ nt, ny, nx = values0.shape
+ values = numpy.zeros([nt-ncycle*24, ny, nx])
+ times2 = []
+ nt2 = nt/ncycle
+ # remove the first day in each year
+ nt3 = nt2-24
+ t_index = 0
+ for icycle in numpy.arange(ncycle):
+ for it in numpy.arange(nt3)+24:
+ values[t_index,:] = values0[icycle*nt2+it,:]-values0[icycle*nt2+it-1,:]
+ times2.append(times[icycle*nt2+it])
+ t_index = t_index +1
+ variable_name = 'PREC'
+ variable_unit= 'mm/hr'
+ times2 = numpy.array(times2)
+ return Dataset(lats, lons, times2, values, variable_name, units=variable_unit, name=name)
++
+ def load_dataset_from_multiple_netcdf_files(file_list, variable_name,
+ lat_name=None, lon_name=None, time_name=None,
+ name='', file_path=None, filename_pattern=None,
+ mask_file=None, mask_variable=None, mask_value=0):
+ ''' Load multiple netCDF files from the same source (an observation or a model) into a Dataset.
+ The dataset can be spatially subset.
+ :param filelist: A text file including a list of filenames
+ :type filelist: :mod:`string`
+ :param variable_name: The variable name to load from the NetCDF file.
+ :type variable_name: :mod:`string`
+ :param lat_name: (Optional) The latitude variable name to extract from the
+ dataset.
+ :type lat_name: :mod:`string`
-
+ :param lon_name: (Optional) The longitude variable name to extract from the
+ dataset.
+ :type lon_name: :mod:`string`
-
+ :param time_name: (Optional) The time variable name to extract from the
+ dataset.
+ :type time_name: :mod:`string`
+ :param name: (Optional) A name for the loaded dataset.
+ :type name: :mod:`string`
+ :param file_path: Directory to the NetCDF file to load.
+ :type file_path: :mod:`string`
+ :param filename_pattern: Path to the NetCDF file to load.
+ :type filename_pattern: :list:`string`
+ :param mask_file: A netcdf file with two-dimensional mask indices
+ :type filelist: :mod:`string`
+ :param mask_variable: The variable name to load from the mask_file.
+ :type variable_name: :mod:`string`
+ :param mask_value: an index for spatial subsetting a dataset
+ :type mask_value: :class:`int`
+ :returns: An OCW Dataset object with the requested variable's data from
+ the NetCDF file.
+ :rtype: :class:`dataset.Dataset`
+ :raises ValueError:
+ '''
+ nc_files = []
+ if not file_list:
+ for pattern in filename_pattern:
+ nc_files.extend(glob(file_path + pattern))
+ else:
+ nc_files = [line.rstrip('\n') for line in open(file_list)]
+
+ nc_files.sort()
+
+ dataset0 = load_file(nc_files[0], variable_name=variable_name, lat_name=lat_name, lon_name=lon_name, time_name=time_name)
+ if dataset0.lons.ndim == 1 and dataset0.lats.ndim ==1:
+ lons, lats = numpy.meshgrid(dataset0.lons, dataset0.lats)
+ elif dataset0.lons.ndim == 2 and dataset0.lats.ndim ==2:
+ lons = dataset0.lons
+ lats = dataset0.lats
+
+ if mask_file:
+ mask_dataset = load_file(mask_file, mask_variable)
+ y_index, x_index = numpy.where(mask_dataset.values == mask_value)
+
+ times = []
+ nfile = len(nc_files)
+ for ifile, file in enumerate(nc_files):
+ print 'NC file '+str(ifile+1)+'/'+str(nfile), file
+ file_object0= load_file(file, variable_name)
+ values0= file_object0.values
+ times.extend(file_object0.times)
+ if mask_file:
+ values0 = values0[:,y_index, x_index]
+ if ifile == 0:
+ data_values = values0
+ else:
+ data_values= numpy.concatenate((data_values, values0))
+ times = numpy.array(times)
+ return Dataset(lats, lons, times, data_values, variable_name, name=name)
++