You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by hu...@apache.org on 2015/10/22 02:55:39 UTC
[3/3] climate git commit: CLIMATE-687 - A new loader to read GPM precipitation data with a file list

CLIMATE-687 - A new loader to read GPM precipitation data with a file list

- ocw.data_source.local.load_GPM_IMERG_files is added

Conflicts:
	ocw/data_source/local.py


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/360b5728
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/360b5728
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/360b5728

Branch: refs/heads/master
Commit: 360b5728f2fb23465a9e897e8136ae08aed92a27
Parents: 6d5c7f8 55d1c4d
Author: huikyole <hu...@argo.jpl.nasa.gov>
Authored: Wed Oct 21 17:54:53 2015 -0700
Committer: huikyole <hu...@argo.jpl.nasa.gov>
Committed: Wed Oct 21 17:54:53 2015 -0700

----------------------------------------------------------------------
 ocw/data_source/local.py | 53 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/360b5728/ocw/data_source/local.py
----------------------------------------------------------------------
diff --cc ocw/data_source/local.py
index 3560b99,50a7f26..9c1d059
--- a/ocw/data_source/local.py
+++ b/ocw/data_source/local.py
@@@ -341,192 -332,56 +342,244 @@@ def load_multiple_files(file_path
      
      return datasets, data_name
  
++<<<<<<< HEAD
 +def load_WRF_2d_files_RAIN(file_path=None,
 +                      filename_pattern=None,
 +                      filelist=None,
 +                      name=''):
 +    ''' Load multiple WRF (or nuWRF) original output files containing 2D fields such as precipitation and surface variables into a Dataset.
 +    The dataset can be spatially subset.
 +    :param file_path: Directory to the NetCDF file to load.
 +    :type file_path: :mod:`string`
 +    :param filename_pattern: Path to the NetCDF file to load.
 +    :type filename_pattern: :list:`string`
 +    :param name: (Optional) A name for the loaded dataset.
 +    :type name: :mod:`string`
 +    :returns: An OCW Dataset object with the requested variable's data from
 +        the NetCDF file.
 +    :rtype: :class:`dataset.Dataset`
 +    :raises ValueError:
 +    '''
 +
 +    if not filelist:
 +        WRF_files = []
 +        for pattern in filename_pattern:
 +            WRF_files.extend(glob(file_path + pattern))
 +        WRF_files.sort()
 +    else:
 +        WRF_files=[line.rstrip('\n') for line in open(filelist)]
 +
 +    file_object_first = netCDF4.Dataset(WRF_files[0])
 +    lats = file_object_first.variables['XLAT'][0,:]
 +    lons = file_object_first.variables['XLONG'][0,:]
 +
 +    times = []
 +    nfile = len(WRF_files)
 +    for ifile, file in enumerate(WRF_files):
 +        print 'Reading file '+str(ifile+1)+'/'+str(nfile), file
 +        file_object = netCDF4.Dataset(file)
 +        time_struct_parsed = strptime(file[-19:],"%Y-%m-%d_%H:%M:%S")
 +        for ihour in range(24):
 +            times.append(datetime(*time_struct_parsed[:6]) + timedelta(hours=ihour))
 +        if ifile == 0:
 +            values0= file_object.variables['RAINC'][:]+file_object.variables['RAINNC'][:]
 +        else:
 +            values0= numpy.concatenate((values0, file_object.variables['RAINC'][:]+file_object.variables['RAINNC'][:]))
 +        file_object.close()
 +    times= numpy.array(times)
 +    years = numpy.array([d.year for d in times])
 +    ncycle = numpy.unique(years).size
 +    print 'ncycle=',ncycle
 +    nt, ny, nx = values0.shape
 +    values = numpy.zeros([nt-ncycle*24, ny, nx])
 +    times2 = []
 +    nt2 = nt/ncycle
 +    # remove the first day in each year
 +    nt3 = nt2-24
 +    t_index = 0
 +    for icycle in numpy.arange(ncycle):
 +        for it in numpy.arange(nt3)+24:
 +            values[t_index,:] = values0[icycle*nt2+it,:]-values0[icycle*nt2+it-1,:]
 +            times2.append(times[icycle*nt2+it])
 +            t_index = t_index +1
 +    variable_name = 'PREC'
 +    variable_unit= 'mm/hr'
 +    times2 = numpy.array(times2)
 +    return Dataset(lats, lons, times2, values, variable_name, units=variable_unit, name=name)
 +
 +def load_dataset_from_multiple_netcdf_files(file_list, variable_name,
 +                                            lat_name=None, lon_name=None, time_name=None,
 +                                            name='', file_path=None, filename_pattern=None,
 +                                            mask_file=None, mask_variable=None, mask_value=0):
 +    ''' Load multiple netCDF files from the same source (an observation or a model) into a Dataset.
 +    The dataset can be spatially subset.
 +    :param filelist: A text file including a list of filenames
 +    :type filelist: :mod:`string`
 +    :param variable_name: The variable name to load from the NetCDF file.
 +    :type variable_name: :mod:`string`
 +    :param lat_name: (Optional) The latitude variable name to extract from the
 +        dataset.
 +    :type lat_name: :mod:`string`
 +    :param lon_name: (Optional) The longitude variable name to extract from the
 +        dataset.
 +    :type lon_name: :mod:`string`
 +    :param time_name: (Optional) The time variable name to extract from the
 +        dataset.
 +    :type time_name: :mod:`string`
 +    :param name: (Optional) A name for the loaded dataset.
 +    :type name: :mod:`string`
 +    :param file_path: Directory to the NetCDF file to load.
 +    :type file_path: :mod:`string`
 +    :param filename_pattern: Path to the NetCDF file to load.
 +    :type filename_pattern: :list:`string`
 +    :param mask_file: A netcdf file with two-dimensional mask indices
 +    :type filelist: :mod:`string`
 +    :param mask_variable: The variable name to load from the mask_file.
 +    :type variable_name: :mod:`string`
 +    :param mask_value: an index for spatial subsetting a dataset 
 +    :type mask_value: :class:`int`
 +    :returns: An OCW Dataset object with the requested variable's data from
 +        the NetCDF file.
 +    :rtype: :class:`dataset.Dataset`
 +    :raises ValueError:
 +    '''
 +    nc_files = []
 +    if not file_list:
 +        for pattern in filename_pattern:
 +            nc_files.extend(glob(file_path + pattern))
 +    else:
 +        nc_files = [line.rstrip('\n') for line in open(file_list)]
 +
 +    nc_files.sort()
 +
 +    dataset0 = load_file(nc_files[0], variable_name=variable_name, lat_name=lat_name, lon_name=lon_name, time_name=time_name)
 +    if dataset0.lons.ndim == 1 and dataset0.lats.ndim ==1:
 +        lons, lats = numpy.meshgrid(dataset0.lons, dataset0.lats)
 +    elif dataset0.lons.ndim == 2 and dataset0.lats.ndim ==2:
 +        lons = dataset0.lons
 +        lats = dataset0.lats
 +
 +    if mask_file: 
 +        mask_dataset = load_file(mask_file, mask_variable)
 +        y_index, x_index = numpy.where(mask_dataset.values == mask_value)
 +
 +    times = []
 +    nfile = len(nc_files)
 +    for ifile, file in enumerate(nc_files):
 +        print 'NC file '+str(ifile+1)+'/'+str(nfile), file
 +        file_object0= load_file(file, variable_name)
 +        values0= file_object0.values
 +        times.extend(file_object0.times)
 +        if mask_file:
 +            values0 = values0[:,y_index, x_index]
 +        if ifile == 0:
 +            data_values = values0
 +        else:
 +            data_values= numpy.concatenate((data_values, values0))
 +    times = numpy.array(times)
 +    return Dataset(lats, lons, times, data_values, variable_name, name=name)
 +
 +def load_NLDAS_forcingA_files(file_path=None,
 +                      filename_pattern=None,
 +                      filelist=None,
 +                      variable_name='APCPsfc_110_SFC_acc1h',
 +                      name=''):
 +    ''' Load multiple NLDAS2 forcingAWRF files containing 2D fields such as precipitation and surface variables into a Dataset.
 +    The dataset can be spatially subset.
 +    :param file_path: Directory to the NetCDF file to load.
 +    :type file_path: :mod:`string`
 +    :param filename_pattern: Path to the NetCDF file to load.
 +    :type filename_pattern: :list:`string`
 +    :param filelist: A list of filenames
 +    :type filelist: :list:`string`
 +    :param variable_name: The variable name to load from the NetCDF file.
++=======
+ def load_GPM_IMERG_files(file_path=None,
+                       filename_pattern=None,
+                       filelist=None,
+                       variable_name='precipitationCal',
+                       name='GPM_IMERG'):
+     ''' Load multiple GPM Level 3 IMEGE files containing calibrated precipitation and generate an OCW Dataset obejct.
+     :param file_path: Directory to the HDF files to load.
+     :type file_path: :mod:`string`
+     :param filename_pattern: Path to the HDF files to load.
+     :type filename_pattern: :list:`string`
+     :param filelist: A list of filenames
+     :type filelist: :list:`string`
+     :param variable_name: The variable name to load from the HDF file.
++>>>>>>> CLIMATE-687
      :type variable_name: :mod:`string`
      :param name: (Optional) A name for the loaded dataset.
      :type name: :mod:`string`
      :returns: An OCW Dataset object with the requested variable's data from
++<<<<<<< HEAD
 +        the NetCDF file.
 +    :rtype: :class:`dataset.Dataset`
 +    :raises ValueError:
 +    '''
 + 
 +    if not filelist:
 +        NLDAS_files = []
 +        for pattern in filename_pattern:
 +            NLDAS_files.extend(glob(file_path + pattern))
 +    else:
 +        NLDAS_files = [line.rstrip('\n') for line in open(filelist)]
 +
 +    NLDAS_files.sort()
 +
 +    file_object_first = netCDF4.Dataset(NLDAS_files[0])
 +    lats = file_object_first.variables['lat_110'][:]
 +    lons = file_object_first.variables['lon_110'][:]
 +    lons, lats = numpy.meshgrid(lons, lats)
 +
 +    times = []
 +    nfile = len(NLDAS_files)
 +    for ifile, file in enumerate(NLDAS_files):
 +        print 'Reading file '+str(ifile+1)+'/'+str(nfile), file
 +        file_object = netCDF4.Dataset(file)
 +        time_struct_parsed = strptime(file[-20:-7],"%Y%m%d.%H%M")
 +        times.append(datetime(*time_struct_parsed[:6]))
 +        
 +        values0 = file_object.variables[variable_name][:]
 +        values0 = numpy.expand_dims(values0, axis=0)
 +        if ifile == 0:
 +            values = values0
 +            variable_unit = file_object.variables[variable_name].units
++=======
+         the HDF file.
+     :rtype: :class:`dataset.Dataset`
+     :raises ValueError:
+     '''
+ 
+     if not filelist:
+         GPM_files = []
+         for pattern in filename_pattern:
+             GPM_files.extend(glob(file_path + pattern))
+     else:
+         GPM_files = [line.rstrip('\n') for line in open(filelist)]
+ 
+     GPM_files.sort()
+ 
+     file_object_first = h5py.File(GPM_files[0])
+     lats = file_object_first['Grid']['lat'][:]
+     lons = file_object_first['Grid']['lon'][:]
+ 
+     lons, lats = numpy.meshgrid(lons, lats)
+ 
+     variable_unit = "mm/hr"
+ 
+     times = []
+     nfile = len(GPM_files)
+     for ifile, file in enumerate(GPM_files):
+         print 'Reading file '+str(ifile+1)+'/'+str(nfile), file
+         file_object = h5py.File(file)
+         time_struct_parsed = strptime(file[-39:-23],"%Y%m%d-S%H%M%S")
+         times.append(datetime(*time_struct_parsed[:6]))
+         values0= numpy.transpose(ma.masked_less(file_object['Grid'][variable_name][:], 0.))
+         values0= numpy.expand_dims(values0, axis=0)
+         if ifile == 0:
+             values = values0
++>>>>>>> CLIMATE-687
          else:
              values = numpy.concatenate((values, values0))
          file_object.close()