You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by le...@apache.org on 2017/10/31 21:53:47 UTC
[4/9] climate git commit: Added ocw-parallel directory for first
version of multicore support.
Added ocw-parallel directory for first version of multicore support.
Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/5c7a9ef1
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/5c7a9ef1
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/5c7a9ef1
Branch: refs/heads/master
Commit: 5c7a9ef1881b0c8875d8a9181afaceb567e072f3
Parents: 56989f5
Author: BrianWilson1 <Br...@jpl.nasa.gov>
Authored: Thu Sep 7 16:43:22 2017 -0700
Committer: BrianWilson1 <Br...@jpl.nasa.gov>
Committed: Thu Sep 7 16:43:22 2017 -0700
----------------------------------------------------------------------
ocw-parallel/README.md | 1 +
ocw-parallel/functions.py | 236 ++++++++++++++
... Workflow (with multicore parallelism).ipynb | 221 +++++++++++++
.../notebooks/Basic RCMES Workflow.ipynb | 323 +++++++++++++++++++
.../notebooks/Flexible RCMES Workflow.ipynb | 211 ++++++++++++
5 files changed, 992 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/climate/blob/5c7a9ef1/ocw-parallel/README.md
----------------------------------------------------------------------
diff --git a/ocw-parallel/README.md b/ocw-parallel/README.md
new file mode 100644
index 0000000..157185b
--- /dev/null
+++ b/ocw-parallel/README.md
@@ -0,0 +1 @@
+# ocw-parallel
http://git-wip-us.apache.org/repos/asf/climate/blob/5c7a9ef1/ocw-parallel/functions.py
----------------------------------------------------------------------
diff --git a/ocw-parallel/functions.py b/ocw-parallel/functions.py
new file mode 100644
index 0000000..0ba3061
--- /dev/null
+++ b/ocw-parallel/functions.py
@@ -0,0 +1,236 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys, os, datetime, urllib, urlparse
+from datetime import timedelta
+import numpy as np
+
+import ocw.data_source.local as local
+import ocw.dataset_processor as dsp
+import ocw.evaluation as evaluation
+import ocw.metrics as metrics
+import ocw.plotter as plotter
+
+
+def loadDataset(urlAndVariable, dir=None):
+ '''Load a dataset (variable), returning a Dataset object.'''
+ shortName = None
+ if len(urlAndVariable) == 3:
+ url, variable, shortName = urlAndVariable
+ else:
+ url, variable = urlAndVariable
+ f = retrieveFile(url, dir)
+ ds = local.load_file(f, variable)
+ if shortName is None or shortName == '':
+ shortName = f + '?' + variable
+ ds.name = shortName
+ shape = ds.values.shape
+ if len(shape) == 3:
+ coords = '(time, lat, lon)'
+ elif len(shape) == 2:
+ coords = '(lat, lon)'
+ print >>sys.stderr, 'loadDataset: File %s has variable %s with shape %s: %s' % (f, variable, coords, shape)
+ return ds
+
+def loadDatasets(urlAndVariables, dir=None):
+ return [loadDataset(uv, dir) for uv in urlAndVariables]
+
+
+def temporalRegrid(dataset, timeRes=timedelta(days=365)):
+ '''Temporally rebin a dataset variable to a specified time resolution (timedelta object).'''
+ dataset = dsp.temporal_rebin(dataset, timeRes)
+ name = dataset.name
+ if name is None: name = ''
+ print >>sys.stderr, 'temporalRebin: Dataset %s has new shape %s' % (name, str(dataset.values.shape))
+ return dataset
+
+def temporalRegrids(datasets, timeRes=timedelta(days=365)):
+ '''Temporally rebin dataset variables to a specified time resolution (timedelta object).'''
+ return [temporalRegrid(d, timeRes) for d in datasets]
+
+
+def spatialBounds(dataset): return dataset.spatial_boundaries()
+
+def commonSpatialBounds(datasets):
+ '''Compute overlapping (intersection) spatial bounds of many datasets.'''
+ bounds = [spatialBounds(ds) for ds in datasets]
+ for i, b in enumerate(bounds):
+ name = datasets[i].name
+ if name is None or name == '': name = str(i)
+ print >>sys.stderr, 'commonSpatialBounds: Dataset %s has boundaries: lat (%s, %s), lon (%s, %s).' % \
+ (name, b[0], b[1], b[2], b[3])
+ minLat = max([b[0] for b in bounds])
+ maxLat = min([b[1] for b in bounds])
+ minLon = max([b[2] for b in bounds])
+ maxLon = min([b[3] for b in bounds])
+ print >>sys.stderr, 'commonSpatialBounds: Common boundaries are: lat (%s, %s), lon (%s, %s).' % \
+ (minLat, maxLat, minLon, maxLon)
+ return (minLat, maxLat, minLon, maxLon)
+
+def generateLatLonGrid(latGrid, lonGrid):
+ '''Generate a uniform lat/lon grid at set resolutions, where latGrid is a tuple of (latMin, latMax, latRes).'''
+ minLat, maxLat, latRes = map(float, latGrid)
+ minLon, maxLon, lonRes = map(float, lonGrid)
+ lats = np.arange(minLat, maxLat, float(latRes))
+ lons = np.arange(minLon, maxLon, float(lonRes))
+ return (lats, lons)
+
+def commonLatLonGrid(datasets, latRes, lonRes):
+ '''Find common (intersect) lat/lon bounds and construct new grid with specified lat/lon resolutions.'''
+ minLat, maxLat, minLon, maxLon = commonSpatialBounds(datasets)
+ latGrid = (minLat, maxLat, latRes)
+ lonGrid = (minLon, maxLon, lonRes)
+ return generateLatLonGrid(latGrid, lonGrid)
+
+
+def spatialRegrid(dataset, lats, lons):
+ '''Spatially regrid dataset variable to a new grid with specified resolution, where lats & lons
+are the new coordinate vectors.
+ '''
+ return dsp.spatial_regrid(dataset, lats, lons)
+
+def spatialRegrids(datasets, lats, lons):
+ '''Spatially regrid dataset variables to a new grid with specified resolution, where lats & lons
+are the new coordinate vectors.
+ '''
+ return [spatialRegrid(d, lats, lons) for d in datasets]
+
+
+def lookupMetrics(metricNames,
+ availableMetrics={'Bias': metrics.Bias, 'StdDevRatio': metrics.StdDevRatio,
+ 'PatternCorrelation': metrics.PatternCorrelation}):
+ '''Return a list of metric objects given a list of string names.'''
+ metrics = []
+ for name in metricNames:
+ try:
+ m = availableMetrics[name]
+ metrics.append(m())
+ except:
+ print >>sys.stderr, 'lookupMetrics: Error, No metric named %s' % name
+ return metrics
+
+def computeMetrics(datasets, metricNames=['Bias'], subregions=None):
+ '''Compute one or more metrics comparing multiple target datasets to a reference dataset.
+This routine assumes that the datasets have already been regridded so that there grid dimensions
+are identical.
+ '''
+ metrics = lookupMetrics(metricNames)
+ if len(metrics) != len(metricNames):
+ print >>sys.stderr, 'computeMetrics: Error, Illegal or misspelled metric name.'
+ eval = evaluation.Evaluation(datasets[0], datasets[1:], metrics)
+ print >>sys.stderr, 'computeMetrics: Evaluating metrics %s . . .' % str(metricNames)
+ eval.run()
+ return eval.results
+
+
+def compareVariablesWithMetrics(datasetUrlsAndVarNames, # URL's pointing to datasets, first one is reference, rest are targets;
+ # each tuple can be (datasetUrl, variableName, shortName), shortName optional
+ metrics, # list of metrics to compute (by name)
+ outputName, # root name for outputs
+ timeRes=timedelta(days=365), # time resolution to regrid all variables to
+ latRes=1., lonRes=1., # lat/lon resolutions to regrid all variables to
+ subregions=None, # list of subregion boundaries
+ dir='./'): # directory for outputs, defaults to current working dir
+ '''Compare multiple target variables to a reference variable, returning the computed metric(s)
+after temporally rebinning to a common time resolution and a common spatial (lat/lon) resolution.
+ '''
+ datasets = loadDatasets(datasetUrlsAndVarNames, dir)
+
+ datasets = temporalRegrids(datasets, timeRes)
+
+ newLats, newLons = commonLatLonGrid(datasets, latRes, lonRes)
+ datasets = spatialRegrids(datasets, newLats, newLons)
+
+# datasets = maskMissingValues(datasets, missingValues)
+
+ metrics = computeMetrics(datasets, metrics, subregions)
+ return (newLats, newLons, metrics)
+
+
+def plotBias(metric, lats, lons, outputName, **config):
+ '''Plot the bias of the reference datasets compared to multiple targets.'''
+ plotFile = outputName + '.png'
+ print 'plotBias: Writing %s' % plotFile
+ plotter.draw_contour_map(metric, lats, lons, outputName, **config)
+ return plotFile
+
+
+# Utilities follow.
+
+def isLocalFile(url):
+ '''Check if URL is a local path.'''
+ u = urlparse.urlparse(url)
+ if u.scheme == '' or u.scheme == 'file':
+ if not path.exists(u.path):
+ print >>sys.stderr, 'isLocalFile: File at local path does not exist: %s' % u.path
+ return (True, u.path)
+ else:
+ return (False, u.path)
+
+def retrieveFile(url, dir=None):
+ '''Retrieve a file from a URL, or if it is a local path then verify it exists.'''
+ if dir is None: dir = './'
+ ok, path = isLocalFile(url)
+ fn = os.path.split(path)[1]
+ outPath = os.path.join(dir, fn)
+ if not ok:
+ if os.path.exists(outPath):
+ print >>sys.stderr, 'retrieveFile: Using cached file: %s' % outPath
+ else:
+ try:
+ print >>sys.stderr, 'retrieveFile: Retrieving (URL) %s to %s' % (url, outPath)
+ urllib.urlretrieve(url, outPath)
+ except:
+ print >>sys.stderr, 'retrieveFile: Cannot retrieve file at URL: %s' % url
+ return None
+ return outPath
+
+
+# Tests and main follow.
+
+def test1(urlsAndVars, outputName, **config):
+ '''Test compareManyWithMetrics routine.'''
+ lats, lons, metrics = compareVariablesWithMetrics(urlsAndVars, ['Bias'], outputName, timedelta(days=365), 1, 1)
+ print metrics
+
+ config = {'gridshape': (4, 5),
+ 'ptitle': 'TASMAX Bias of WRF Compared to KNMI (1989 - 2008)',
+ 'subtitles': range(1989, 2009, 1)}
+ plotFile = plotBias(metrics[0][0], lats, lons, outputName, **config)
+ return plotFile
+
+
+def main(args):
+ '''Main routine to provide command line capability.'''
+ nTest = int(args[0])
+ url1 = args[1]
+ var1 = args[2]
+ outputName = args[3]
+ url2 = args[4]
+ var2 = args[5]
+ urlsAndVars = [(url1, var1), (url2, var2)]
+ if nTest == 1:
+ return test1(urlsAndVars, outputName)
+ elif nTest == 2:
+ return test2(urlsAndVars, outputName)
+
+if __name__ == '__main__':
+ print main(sys.argv[1:])
+
+
+# python functions.py 1 "http://zipper.jpl.nasa.gov/dist/AFRICA_KNMI-RACMO2.2b_CTL_ERAINT_MM_50km_1989-2008_tasmax.nc" "tasmax" "wrf_bias_compared_to_knmi" "http://zipper.jpl.nasa.gov/dist/AFRICA_UC-WRF311_CTL_ERAINT_MM_50km-rg_1989-2008_tasmax.nc" "tasmax"
+