You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@climate.apache.org by jo...@apache.org on 2015/02/27 18:31:11 UTC
[1/2] climate git commit: CLIMATE-588 - Refactor config based
evaluation
Repository: climate
Updated Branches:
refs/heads/master 58703e0cb -> 6e73c8a51
CLIMATE-588 - Refactor config based evaluation
Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/c90440b2
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/c90440b2
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/c90440b2
Branch: refs/heads/master
Commit: c90440b2cd792ff74407468e951c99b091d4de14
Parents: 58703e0
Author: Michael Joyce <jo...@apache.org>
Authored: Fri Feb 27 09:08:41 2015 -0800
Committer: Michael Joyce <jo...@apache.org>
Committed: Fri Feb 27 09:11:26 2015 -0800
----------------------------------------------------------------------
ocw-config-runner/configuration_parsing.py | 218 ++++++
ocw-config-runner/evaluation_creation.py | 132 ++++
ocw-config-runner/ocw_evaluation_from_config.py | 386 +---------
ocw-config-runner/plot_generation.py | 54 ++
ocw-config-runner/tests/test_config_parsing.py | 669 +++++++++++++++++
ocw-config-runner/tests/test_config_runner.py | 711 -------------------
.../tests/test_evaluation_creation.py | 33 +
7 files changed, 1109 insertions(+), 1094 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/climate/blob/c90440b2/ocw-config-runner/configuration_parsing.py
----------------------------------------------------------------------
diff --git a/ocw-config-runner/configuration_parsing.py b/ocw-config-runner/configuration_parsing.py
new file mode 100644
index 0000000..600d7ac
--- /dev/null
+++ b/ocw-config-runner/configuration_parsing.py
@@ -0,0 +1,218 @@
+import logging
+import re
+import sys
+
+import ocw.metrics as metrics
+
+import yaml
+
+logging.basicConfig()
+logger = logging.getLogger(__name__)
+
+def is_config_valid(config_data):
+ """ Validate supplied evaluation configuration data.
+
+ :param config_data: Dictionary of the data parsed from the supplied YAML
+ configuration file.
+ :type config_data: :func:`dict`
+
+ :returns: True if the configuration data is sufficient for an evaluation and
+ seems to be well formed, False otherwise.
+ """
+ if not _valid_minimal_config(config_data):
+ logger.error('Insufficient configuration file data for an evaluation')
+ return False
+
+ if not _config_is_well_formed(config_data):
+ logger.error('Configuration data is not well formed')
+ return False
+
+ return True
+
+def _valid_minimal_config(config_data):
+ """"""
+ if not 'datasets' in config_data.keys():
+ logger.error('No datasets specified in configuration data.')
+ return False
+
+ if not 'metrics' in config_data.keys():
+ logger.error('No metrics specified in configuration data.')
+ return False
+
+ if _contains_unary_metrics(config_data['metrics']):
+ if (not 'reference' in config_data['datasets'].keys() and
+ not 'targets' in config_data['datasets'].keys()):
+ err = (
+ 'Unary metric in configuration data requires either a reference '
+ 'or target dataset to be present for evaluation. Please ensure '
+ 'that your config is well formed.'
+ )
+ logger.error(err)
+ return False
+
+ if _contains_binary_metrics(config_data['metrics']):
+ if (not 'reference' in config_data['datasets'].keys() or
+ not 'targets' in config_data['datasets'].keys()):
+ logger.error(
+ 'Binary metric in configuration requires both a reference '
+ 'and target dataset to be present for evaluation. Please ensure '
+ 'that your config is well formed.'
+ )
+ return False
+
+ return True
+
+def _config_is_well_formed(config_data):
+ """"""
+ is_well_formed = True
+
+ if 'reference' in config_data['datasets']:
+ if not _valid_dataset_config_data(config_data['datasets']['reference']):
+ is_well_formed = False
+
+ if 'targets' in config_data['datasets']:
+ targets = config_data['datasets']['targets']
+ if type(targets) != type(list()):
+ err = (
+ 'Expected to find list of target datasets but instead found '
+ 'object of type {}'
+ ).format(type(targets))
+ logger.error(err)
+ is_well_formed = False
+ else:
+ for t in targets:
+ if not _valid_dataset_config_data(t):
+ is_well_formed = False
+
+ available_metrics = _fetch_built_in_metrics()
+ for metric in config_data['metrics']:
+ if metric not in available_metrics:
+ warning = (
+ 'Unable to locate metric name {} in built-in metrics. If this '
+ 'is not a user defined metric then please check for potential '
+ 'misspellings.'
+ ).format(metric)
+ logger.warn(warning)
+ is_well_formed = False
+
+ if 'plots' in config_data:
+ for plot in config_data['plots']:
+ if not _valid_plot_config_data(plot):
+ is_well_formed = False
+
+ return is_well_formed
+
+def _contains_unary_metrics(config_metric_data):
+ """"""
+ unarys = [cls.__name__ for cls in metrics.UnaryMetric.__subclasses__()]
+ return any(metric in unarys for metric in config_metric_data)
+
+def _contains_binary_metrics(config_metric_data):
+ """"""
+ binarys = [cls.__name__ for cls in metrics.BinaryMetric.__subclasses__()]
+ return any(metric in binarys for metric in config_metric_data)
+
+def _fetch_built_in_metrics():
+ """"""
+ unarys = [cls.__name__ for cls in metrics.UnaryMetric.__subclasses__()]
+ binarys = [cls.__name__ for cls in metrics.BinaryMetric.__subclasses__()]
+ return unarys + binarys
+
+def _valid_dataset_config_data(dataset_config_data):
+ """"""
+ try:
+ data_source = dataset_config_data['data_source']
+ except KeyError:
+ logger.error('Dataset does not contain a data_source attribute.')
+ return False
+
+ if data_source == 'local':
+ required_keys = set(['data_source', 'file_count', 'path', 'variable'])
+ elif data_source == 'rcmed':
+ required_keys = set([
+ 'dataset_id',
+ 'parameter_id',
+ 'min_lat',
+ 'max_lat',
+ 'min_lon',
+ 'max_lon',
+ 'start_time',
+ 'end_time',
+ ])
+ elif data_source == 'esgf':
+ required_keys = set([
+ 'data_source',
+ 'dataset_id',
+ 'variable',
+ 'esgf_username',
+ 'esgf_password'
+ ])
+ elif data_source == 'dap':
+ required_keys = set({'url', 'variable'})
+ else:
+ logger.error('Dataset does not contain a valid data_source location.')
+ return False
+
+ present_keys = set(dataset_config_data.keys())
+ missing_keys = required_keys - present_keys
+ contains_required = len(missing_keys) == 0
+
+ if contains_required:
+ if data_source == 'local' and dataset_config_data['file_count'] > 1:
+ # If the dataset is a multi-file dataset then we need to make sure
+ # that the file glob pattern is included.
+ if not 'file_glob_pattern' in dataset_config_data:
+ logger.error(
+ 'Multi-file local dataset is missing key: file_glob_pattern'
+ )
+ return False
+ return True
+ else:
+ missing = sorted(list(missing_keys))
+ logger.error(
+ 'Dataset does not contain required keys. '
+ 'The following keys are missing: {}'.format(', '.join(missing))
+ )
+ return False
+
+def _valid_plot_config_data(plot_config_data):
+ """"""
+ try:
+ plot_type = plot_config_data['type']
+ except KeyError:
+ logger.error('Plot config does not include a type attribute.')
+ return False
+
+ if plot_type == 'contour':
+ required_keys = set([
+ 'results_indeces',
+ 'lats',
+ 'lons',
+ 'output_name'
+ ])
+ elif plot_type == 'taylor':
+ logger.warn('Taylor diagrams are currently unsupported. Skipping validation')
+ elif plot_type == 'subregion':
+ logger.warn('Subregion plots are currently unsupported. Skipping validation')
+ elif plot_type == 'time_series':
+ logger.warn('Time series plots are currently unsupported. Skipping validation')
+ elif plot_type == 'portrait':
+ logger.warn('Portrait diagrams are currently unsupported. Skipping validation')
+ else:
+ logger.error('Invalid plot type specified.')
+ return False
+
+ present_keys = set(plot_config_data.keys())
+ missing_keys = required_keys - present_keys
+ contains_required = len(missing_keys) == 0
+
+ if not contains_required:
+ missing = sorted(list(missing_keys))
+ logger.error(
+ 'Plot config does not contain required keys. '
+ 'The following keys are missing: {}'.format(', '.join(missing))
+ )
+ return False
+
+ return True
+
http://git-wip-us.apache.org/repos/asf/climate/blob/c90440b2/ocw-config-runner/evaluation_creation.py
----------------------------------------------------------------------
diff --git a/ocw-config-runner/evaluation_creation.py b/ocw-config-runner/evaluation_creation.py
new file mode 100644
index 0000000..66794d9
--- /dev/null
+++ b/ocw-config-runner/evaluation_creation.py
@@ -0,0 +1,132 @@
+import dateutil.parser
+from datetime import timedelta
+import logging
+
+from ocw.dataset import Bounds
+from ocw.evaluation import Evaluation
+import ocw.dataset_processor as dsp
+import ocw.data_source.local as local
+import ocw.data_source.rcmed as rcmed
+import ocw.data_source.esgf as esgf
+import ocw.data_source.dap as dap
+import ocw.metrics as metrics
+
+import numpy as np
+
+logging.basicConfig()
+logger = logging.getLogger(__name__)
+
+def generate_evaluation_from_config(config_data):
+ """ Generate an Evaluation object from configuration data.
+
+ :param config_data: Dictionary of the data parsed from the supplied YAML
+ configuration file.
+ :type config_data: :func:`dict`
+
+ :returns: An Evaluation object containing the data specified in the
+ supplied configuration data.
+ """
+ # Load datasets
+ reference = None
+ targets = None
+ if 'reference' in config_data['datasets']:
+ reference = _load_dataset(config_data['datasets']['reference'])
+
+ if 'targets' in config_data['datasets']:
+ targets = [_load_dataset(t) for t in config_data['datasets']['targets']]
+
+ reference, targets = _prepare_datasets_for_evaluation(reference,
+ targets,
+ config_data)
+ # Load metrics
+ eval_metrics = [_load_metric(m)() for m in config_data['metrics']]
+
+ return Evaluation(reference, targets, eval_metrics)
+
+def _load_dataset(dataset_config_data):
+ """"""
+ if dataset_config_data['data_source'] == 'local':
+ if dataset_config_data['file_count'] > 1:
+ logger.error(
+ 'Multi-file datasets are currently not supported. Cancelling load '
+ 'of the following dataset: {}'.format(dataset_config_data)
+ )
+ return None
+
+ return local.load_file(dataset_config_data['path'],
+ dataset_config_data['variable'],
+ **dataset_config_data.get('optional_args', {}))
+ elif dataset_config_data['data_source'] == 'rcmed':
+ return rcmed.parameter_dataset(dataset_config_data['dataset_id'],
+ dataset_config_data['parameter_id'],
+ dataset_config_data['min_lat'],
+ dataset_config_data['max_lat'],
+ dataset_config_data['min_lon'],
+ dataset_config_data['min_lon'],
+ dataset_config_data['start_time'],
+ dataset_config_data['end_time'],
+ **dataset_config_data.get('optional_args', {}))
+ elif dataset_config_data['data_source'] == 'esgf':
+ return esgf.load_dataset(dataset_config_data['dataset_id'],
+ dataset_config_data['variable'],
+ dataset_config_data['esgf_username'],
+ dataset_config_data['esgf_password'],
+ **dataset_config_data.get('optional_args', {}))
+ elif dataset_config_data['data_source'] == 'dap':
+ return dap.load(dataset_config_data['url'],
+ dataset_config_data['variable'],
+ **dataset_config_data('optional_args', {}))
+
+def _prepare_datasets_for_evaluation(reference, targets, config_data):
+ """"""
+ subset = config_data['evaluation'].get('subset', None)
+ temporal_time_delta = config_data['evaluation'].get('temporal_time_delta', None)
+ spatial_regrid_lats = config_data['evaluation'].get('spatial_regrid_lats', None)
+ spatial_regrid_lons = config_data['evaluation'].get('spatial_regrid_lons', None)
+
+ if subset:
+ start = dateutil.parser.parse(subset[4])
+ end = dateutil.parser.parse(subset[5])
+ bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end)
+
+ if reference:
+ reference = dsp.safe_subset(bounds, reference)
+
+ if targets:
+ targets = [dsp.safe_subset(bounds, t) for t in targets]
+
+ if temporal_time_delta:
+ resolution = timedelta(temporal_time_delta)
+
+ if reference:
+ reference = dsp.temporal_rebin(reference, resolution)
+
+ if targets:
+ targets = [dsp.temporal_rebin(t, resolution) for t in targets]
+
+ if spatial_regrid_lats and spatial_regrid_lons:
+ lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1], spatial_regrid_lats[2])
+ lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1], spatial_regrid_lons[2])
+
+ if reference:
+ reference = dsp.spatial_regrid(reference, lats, lons)
+
+ if targets:
+ targets = [dsp.spatial_regrid(t, lats, lons) for t in targets]
+
+ return reference, targets
+
+def _load_metric(metric_config_data):
+ """"""
+ # If the dataset is user defined outside of ocw.metrics we won't currently
+ # handle loading it.
+ if '.' in metric_config_data:
+ logger.error(
+ 'User-defined metrics outside of the ocw.metrics module '
+ 'cannot currently be loaded. If you just wanted a metric '
+ 'found in ocw.metrics then do not specify the full '
+ 'package and module names. See the documentation for examples.'
+ )
+ return None
+
+ return getattr(metrics, metric_config_data)
http://git-wip-us.apache.org/repos/asf/climate/blob/c90440b2/ocw-config-runner/ocw_evaluation_from_config.py
----------------------------------------------------------------------
diff --git a/ocw-config-runner/ocw_evaluation_from_config.py b/ocw-config-runner/ocw_evaluation_from_config.py
index aba6fef..e27acc2 100644
--- a/ocw-config-runner/ocw_evaluation_from_config.py
+++ b/ocw-config-runner/ocw_evaluation_from_config.py
@@ -16,23 +16,12 @@
# under the License.
import argparse
-import dateutil.parser
-from datetime import timedelta
import logging
-import re
-import sys
-from ocw.dataset import Bounds
-from ocw.evaluation import Evaluation
-import ocw.metrics as metrics
-import ocw.plotter as plots
-import ocw.dataset_processor as dsp
-import ocw.data_source.local as local
-import ocw.data_source.rcmed as rcmed
-import ocw.data_source.esgf as esgf
-import ocw.data_source.dap as dap
+from configuration_parsing import is_config_valid
+from evaluation_creation import generate_evaluation_from_config
+from plot_generation import plot_from_config
-import numpy as np
import yaml
logging.basicConfig()
@@ -61,375 +50,6 @@ def run_evaluation_from_config(config_file_path):
plot_from_config(evaluation, config)
-def is_config_valid(config_data):
- """ Validate supplied evaluation configuration data.
-
- :param config_data: Dictionary of the data parsed from the supplied YAML
- configuration file.
- :type config_data: :func:`dict`
-
- :returns: True if the configuration data is sufficient for an evaluation and
- seems to be well formed, False otherwise.
- """
- if not _valid_minimal_config(config_data):
- logger.error('Insufficient configuration file data for an evaluation')
- return False
-
- if not _config_is_well_formed(config_data):
- logger.error('Configuration data is not well formed')
- return False
-
- return True
-
-def generate_evaluation_from_config(config_data):
- """ Generate an Evaluation object from configuration data.
-
- :param config_data: Dictionary of the data parsed from the supplied YAML
- configuration file.
- :type config_data: :func:`dict`
-
- :returns: An Evaluation object containing the data specified in the
- supplied configuration data.
- """
- # Load datasets
- reference = None
- targets = None
- if 'reference' in config_data['datasets']:
- reference = _load_dataset(config_data['datasets']['reference'])
-
- if 'targets' in config_data['datasets']:
- targets = [_load_dataset(t) for t in config_data['datasets']['targets']]
-
- reference, targets = _prepare_datasets_for_evaluation(reference,
- targets,
- config_data)
- # Load metrics
- eval_metrics = [_load_metric(m)() for m in config_data['metrics']]
-
- return Evaluation(reference, targets, eval_metrics)
-
-def plot_from_config(evaluation, config_data):
- """ Generate plots for an evaluation from configuration data.
-
- :param evaluation: The Evaluation for which to generate plots.
- :type evaluation: :class:`ocw.evaluation.Evaluation`
- :param config_data: Dictionary of the data parsed from the supplied YAML
- configuration file.
- :type: :func:`dict`
- """
- for plot in config_data['plots']:
- if plot['type'] == 'contour':
- _draw_contour_plot(evaluation, plot)
- elif plot['type'] == 'subregion':
- logger.warn('Subregion plots are currently unsupported. Skipping ...')
- continue
- elif plot['type'] == 'taylor':
- logger.warn('Taylor diagrams are currently unsupported. Skipping ...')
- continue
- elif plot['type'] == 'time_series':
- logger.warn('Time series plots are currently unsupported. Skipping ...')
- continue
- elif plot['type'] == 'portrait':
- logger.warn('Portrait diagrams are currently unsupported. Skipping ...')
- continue
- else:
- logger.error('Unrecognized plot type requested: {}'.format(plot['type']))
- continue
-
-def _valid_minimal_config(config_data):
- """"""
- if not 'datasets' in config_data.keys():
- logger.error('No datasets specified in configuration data.')
- return False
-
- if not 'metrics' in config_data.keys():
- logger.error('No metrics specified in configuration data.')
- return False
-
- if _contains_unary_metrics(config_data['metrics']):
- if (not 'reference' in config_data['datasets'].keys() and
- not 'targets' in config_data['datasets'].keys()):
- err = (
- 'Unary metric in configuration data requires either a reference '
- 'or target dataset to be present for evaluation. Please ensure '
- 'that your config is well formed.'
- )
- logger.error(err)
- return False
-
- if _contains_binary_metrics(config_data['metrics']):
- if (not 'reference' in config_data['datasets'].keys() or
- not 'targets' in config_data['datasets'].keys()):
- logger.error(
- 'Binary metric in configuration requires both a reference '
- 'and target dataset to be present for evaluation. Please ensure '
- 'that your config is well formed.'
- )
- return False
-
- return True
-
-def _config_is_well_formed(config_data):
- """"""
- is_well_formed = True
-
- if 'reference' in config_data['datasets']:
- if not _valid_dataset_config_data(config_data['datasets']['reference']):
- is_well_formed = False
-
- if 'targets' in config_data['datasets']:
- targets = config_data['datasets']['targets']
- if type(targets) != type(list()):
- err = (
- 'Expected to find list of target datasets but instead found '
- 'object of type {}'
- ).format(type(targets))
- logger.error(err)
- is_well_formed = False
- else:
- for t in targets:
- if not _valid_dataset_config_data(t):
- is_well_formed = False
-
- available_metrics = _fetch_built_in_metrics()
- for metric in config_data['metrics']:
- if metric not in available_metrics:
- warning = (
- 'Unable to locate metric name {} in built-in metrics. If this '
- 'is not a user defined metric then please check for potential '
- 'misspellings.'
- ).format(metric)
- logger.warn(warning)
- is_well_formed = False
-
- if 'plots' in config_data:
- for plot in config_data['plots']:
- if not _valid_plot_config_data(plot):
- is_well_formed = False
-
- return is_well_formed
-
-def _contains_unary_metrics(config_metric_data):
- """"""
- unarys = [cls.__name__ for cls in metrics.UnaryMetric.__subclasses__()]
- return any(metric in unarys for metric in config_metric_data)
-
-def _contains_binary_metrics(config_metric_data):
- """"""
- binarys = [cls.__name__ for cls in metrics.BinaryMetric.__subclasses__()]
- return any(metric in binarys for metric in config_metric_data)
-
-def _fetch_built_in_metrics():
- """"""
- unarys = [cls.__name__ for cls in metrics.UnaryMetric.__subclasses__()]
- binarys = [cls.__name__ for cls in metrics.BinaryMetric.__subclasses__()]
- return unarys + binarys
-
-def _valid_dataset_config_data(dataset_config_data):
- """"""
- try:
- data_source = dataset_config_data['data_source']
- except KeyError:
- logger.error('Dataset does not contain a data_source attribute.')
- return False
-
- if data_source == 'local':
- required_keys = set(['data_source', 'file_count', 'path', 'variable'])
- elif data_source == 'rcmed':
- required_keys = set([
- 'dataset_id',
- 'parameter_id',
- 'min_lat',
- 'max_lat',
- 'min_lon',
- 'max_lon',
- 'start_time',
- 'end_time',
- ])
- elif data_source == 'esgf':
- required_keys = set([
- 'data_source',
- 'dataset_id',
- 'variable',
- 'esgf_username',
- 'esgf_password'
- ])
- elif data_source == 'dap':
- required_keys = set({'url', 'variable'})
- else:
- logger.error('Dataset does not contain a valid data_source location.')
- return False
-
- present_keys = set(dataset_config_data.keys())
- missing_keys = required_keys - present_keys
- contains_required = len(missing_keys) == 0
-
- if contains_required:
- if data_source == 'local' and dataset_config_data['file_count'] > 1:
- # If the dataset is a multi-file dataset then we need to make sure
- # that the file glob pattern is included.
- if not 'file_glob_pattern' in dataset_config_data:
- logger.error(
- 'Multi-file local dataset is missing key: file_glob_pattern'
- )
- return False
- return True
- else:
- missing = sorted(list(missing_keys))
- logger.error(
- 'Dataset does not contain required keys. '
- 'The following keys are missing: {}'.format(', '.join(missing))
- )
- return False
-
-def _valid_plot_config_data(plot_config_data):
- """"""
- try:
- plot_type = plot_config_data['type']
- except KeyError:
- logger.error('Plot config does not include a type attribute.')
- return False
-
- if plot_type == 'contour':
- required_keys = set([
- 'results_indeces',
- 'lats',
- 'lons',
- 'output_name'
- ])
- elif plot_type == 'taylor':
- logger.warn('Taylor diagrams are currently unsupported. Skipping validation')
- elif plot_type == 'subregion':
- logger.warn('Subregion plots are currently unsupported. Skipping validation')
- elif plot_type == 'time_series':
- logger.warn('Time series plots are currently unsupported. Skipping validation')
- elif plot_type == 'portrait':
- logger.warn('Portrait diagrams are currently unsupported. Skipping validation')
- else:
- logger.error('Invalid plot type specified.')
- return False
-
- present_keys = set(plot_config_data.keys())
- missing_keys = required_keys - present_keys
- contains_required = len(missing_keys) == 0
-
- if not contains_required:
- missing = sorted(list(missing_keys))
- logger.error(
- 'Plot config does not contain required keys. '
- 'The following keys are missing: {}'.format(', '.join(missing))
- )
- return False
-
- return True
-
-def _load_dataset(dataset_config_data):
- """"""
- if dataset_config_data['data_source'] == 'local':
- if dataset_config_data['file_count'] > 1:
- logger.error(
- 'Multi-file datasets are currently not supported. Cancelling load '
- 'of the following dataset: {}'.format(dataset_config_data)
- )
- return None
-
- return local.load_file(dataset_config_data['path'],
- dataset_config_data['variable'],
- **dataset_config_data.get('optional_args', {}))
- elif dataset_config_data['data_source'] == 'rcmed':
- return rcmed.parameter_dataset(dataset_config_data['dataset_id'],
- dataset_config_data['parameter_id'],
- dataset_config_data['min_lat'],
- dataset_config_data['max_lat'],
- dataset_config_data['min_lon'],
- dataset_config_data['min_lon'],
- dataset_config_data['start_time'],
- dataset_config_data['end_time'],
- **dataset_config_data.get('optional_args', {}))
- elif dataset_config_data['data_source'] == 'esgf':
- return esgf.load_dataset(dataset_config_data['dataset_id'],
- dataset_config_data['variable'],
- dataset_config_data['esgf_username'],
- dataset_config_data['esgf_password'],
- **dataset_config_data.get('optional_args', {}))
- elif dataset_config_data['data_source'] == 'dap':
- return dap.load(dataset_config_data['url'],
- dataset_config_data['variable'],
- **dataset_config_data('optional_args', {}))
-
-def _prepare_datasets_for_evaluation(reference, target, config_data):
- """"""
- subset = config_data['evaluation'].get('subset', None)
- temporal_time_delta = config_data['evaluation'].get('temporal_time_delta', None)
- spatial_regrid_lats = config_data['evaluation'].get('spatial_regrid_lats', None)
- spatial_regrid_lons = config_data['evaluation'].get('spatial_regrid_lons', None)
-
- if subset:
- start = dateutil.parser.parse(subset[4])
- end = dateutil.parser.parse(subset[5])
- bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end)
-
- if reference:
- reference = dsp.safe_subset(bounds, reference)
-
- if targets:
- targets = [dsp.safe_subset(bounds, t) for t in targets]
-
- if temporal_time_delta:
- resolution = timedelta(temporal_time_delta)
-
- if reference:
- reference = dsp.temporal_rebin(reference, resolution)
-
- if targets:
- targets = [dsp.temporal_rebin(t, resolution) for t in targets]
-
- if spatial_regrid_lats and spatial_regrid_lons:
- lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1], spatial_regrid_lats[2])
- lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1], spatial_regrid_lons[2])
-
- if reference:
- reference = dsp.spatial_regrid(reference, lats, lons)
-
- if targets:
- targets = [dsp.spatial_regrid(t, lats, lons) for t in targets]
-
- return reference, target
-
-
-def _load_metric(metric_config_data):
- """"""
- # If the dataset is user defined outside of ocw.metrics we won't currently
- # handle loading it.
- if '.' in metric_config_data:
- logger.error(
- 'User-defined metrics outside of the ocw.metrics module '
- 'cannot currently be loaded. If you just wanted a metric '
- 'found in ocw.metrics then do not specify the full '
- 'package and module names. See the documentation for examples.'
- )
- return None
-
- return getattr(metrics, metric_config_data)
-
-def _draw_contour_plot(evaluation, plot_config):
- """"""
- row, col = plot_config['results_indeces'][0]
-
- lats = plot_config['lats']
- if type(lats) != type(list):
- lats = range(lats['range_min'], lats['range_max'], lats['range_step'])
-
- lons = plot_config['lons']
- if type(lons) != type(list):
- lons = range(lons['range_min'], lons['range_max'], lons['range_step'])
-
- plots.draw_contour_map(evaluation.results[row][col],
- np.array(lats),
- np.array(lons),
- plot_config['output_name'],
- **plot_config.get('optional_args', {}))
-
if __name__ == '__main__':
description = 'OCW Config Based Evaluation'
epilog = 'Additional information at https://cwiki.apache.org/confluence/display/climate/home#'
http://git-wip-us.apache.org/repos/asf/climate/blob/c90440b2/ocw-config-runner/plot_generation.py
----------------------------------------------------------------------
diff --git a/ocw-config-runner/plot_generation.py b/ocw-config-runner/plot_generation.py
new file mode 100644
index 0000000..c802af2
--- /dev/null
+++ b/ocw-config-runner/plot_generation.py
@@ -0,0 +1,54 @@
+import logging
+
+import ocw.plotter as plots
+
+import numpy as np
+
+logging.basicConfig()
+logger = logging.getLogger(__name__)
+
+def plot_from_config(evaluation, config_data):
+ """ Generate plots for an evaluation from configuration data.
+
+ :param evaluation: The Evaluation for which to generate plots.
+ :type evaluation: :class:`ocw.evaluation.Evaluation`
+ :param config_data: Dictionary of the data parsed from the supplied YAML
+ configuration file.
+ :type: :func:`dict`
+ """
+ for plot in config_data['plots']:
+ if plot['type'] == 'contour':
+ _draw_contour_plot(evaluation, plot)
+ elif plot['type'] == 'subregion':
+ logger.warn('Subregion plots are currently unsupported. Skipping ...')
+ continue
+ elif plot['type'] == 'taylor':
+ logger.warn('Taylor diagrams are currently unsupported. Skipping ...')
+ continue
+ elif plot['type'] == 'time_series':
+ logger.warn('Time series plots are currently unsupported. Skipping ...')
+ continue
+ elif plot['type'] == 'portrait':
+ logger.warn('Portrait diagrams are currently unsupported. Skipping ...')
+ continue
+ else:
+ logger.error('Unrecognized plot type requested: {}'.format(plot['type']))
+ continue
+
+def _draw_contour_plot(evaluation, plot_config):
+ """"""
+ row, col = plot_config['results_indeces'][0]
+
+ lats = plot_config['lats']
+ if type(lats) != type(list):
+ lats = range(lats['range_min'], lats['range_max'], lats['range_step'])
+
+ lons = plot_config['lons']
+ if type(lons) != type(list):
+ lons = range(lons['range_min'], lons['range_max'], lons['range_step'])
+
+ plots.draw_contour_map(evaluation.results[row][col],
+ np.array(lats),
+ np.array(lons),
+ plot_config['output_name'],
+ **plot_config.get('optional_args', {}))
http://git-wip-us.apache.org/repos/asf/climate/blob/c90440b2/ocw-config-runner/tests/test_config_parsing.py
----------------------------------------------------------------------
diff --git a/ocw-config-runner/tests/test_config_parsing.py b/ocw-config-runner/tests/test_config_parsing.py
new file mode 100644
index 0000000..d51bf43
--- /dev/null
+++ b/ocw-config-runner/tests/test_config_parsing.py
@@ -0,0 +1,669 @@
+from mock import patch
+import unittest
+
+import configuration_parsing as parser
+import ocw.metrics as metrics
+
+import yaml
+
+
+class TestIsConfigValid(unittest.TestCase):
+ @classmethod
+ def setUpClass(self):
+ not_minimal_config = """
+ datasets:
+ """
+ self.not_minimal = yaml.load(not_minimal_config)
+
+ not_well_formed_config = """
+ datasets:
+ reference:
+ data_source: local
+ file_count: 1
+ path: /a/fake/path/file.py
+ variable: pr
+
+ targets:
+ - data_source: local
+ file_count: 5
+ file_glob_pattern: something for globbing files here
+ variable: pr
+ optional_args:
+ name: Target1
+
+ - data_source: esgf
+ dataset_id: fake dataset id
+ variable: pr
+ esgf_username: my esgf username
+ esgf_password: my esgf password
+
+ metrics:
+ - Bias
+ - TemporalStdDev
+ """
+ self.not_well_formed = yaml.load(not_well_formed_config)
+
+ @patch('configuration_parsing.logger')
+ def test_not_minimal_config(self, mock_logger):
+ ret = parser.is_config_valid(self.not_minimal)
+ self.assertFalse(ret)
+
+ mock_logger.error.assert_called_with(
+ 'Insufficient configuration file data for an evaluation'
+ )
+
+ @patch('configuration_parsing.logger')
+ def test_not_valid_config(self, mock_logger):
+ ret = parser.is_config_valid(self.not_well_formed)
+ self.assertFalse(ret)
+
+ mock_logger.error.assert_called_with(
+ 'Configuration data is not well formed'
+ )
+
+
+class TestValidMinimalConfig(unittest.TestCase):
+ @classmethod
+ def setUpClass(self):
+ no_datasets_config = """
+ metrics:
+ - Bias
+ """
+ self.no_datasets = yaml.load(no_datasets_config)
+
+ no_metrics_config = """
+ datasets:
+ reference:
+ data_source: dap
+ url: afakeurl.com
+ variable: pr
+ """
+ self.no_metrics = yaml.load(no_metrics_config)
+
+ unary_with_reference_config = """
+ datasets:
+ reference:
+ data_source: dap
+ url: afakeurl.com
+ variable: pr
+
+ metrics:
+ - TemporalStdDev
+ """
+ self.unary_with_reference = yaml.load(unary_with_reference_config)
+
+ unary_with_target_config = """
+ datasets:
+ targets:
+ - data_source: dap
+ url: afakeurl.com
+ variable: pr
+
+ metrics:
+ - TemporalStdDev
+ """
+ self.unary_with_target = yaml.load(unary_with_target_config)
+
+ unary_no_reference_or_target = """
+ datasets:
+ not_ref_or_target:
+ - data_source: dap
+ url: afakeurl.com
+ variable: pr
+
+ metrics:
+ - TemporalStdDev
+ """
+ self.unary_no_ref_or_target = yaml.load(unary_no_reference_or_target)
+
+ binary_valid_config = """
+ datasets:
+ reference:
+ data_source: dap
+ url: afakeurl.com
+ variable: pr
+
+ targets:
+ - data_source: dap
+ url: afakeurl.com
+ variable: pr
+ metrics:
+ - Bias
+ """
+ self.binary_valid = yaml.load(binary_valid_config)
+
+ binary_no_reference_config = """
+ datasets:
+ targets:
+ - data_source: dap
+ url: afakeurl.com
+ variable: pr
+ metrics:
+ - Bias
+ """
+ self.binary_no_reference = yaml.load(binary_no_reference_config)
+
+ binary_no_target_config = """
+ datasets:
+ reference:
+ data_source: dap
+ url: afakeurl.com
+ variable: pr
+
+ metrics:
+ - Bias
+ """
+ self.binary_no_target = yaml.load(binary_no_target_config)
+
+ @patch('configuration_parsing.logger')
+ def test_no_datasets(self, mock_logger):
+ ret = parser._valid_minimal_config(self.no_datasets)
+ self.assertFalse(ret)
+
+ mock_logger.error.assert_called_with(
+ 'No datasets specified in configuration data.'
+ )
+
+ @patch('configuration_parsing.logger')
+ def test_no_metrics(self, mock_logger):
+ ret = parser._valid_minimal_config(self.no_metrics)
+ self.assertFalse(ret)
+
+ mock_logger.error.assert_called_with(
+ 'No metrics specified in configuration data.'
+ )
+
+ def test_unary_with_reference(self):
+ ret = parser._valid_minimal_config(self.unary_with_reference)
+ self.assertTrue(ret)
+
+ def test_unary_with_target(self):
+ ret = parser._valid_minimal_config(self.unary_with_target)
+ self.assertTrue(ret)
+
+ @patch('configuration_parsing.logger')
+ def test_unary_no_datasets(self, mock_logger):
+ ret = parser._valid_minimal_config(self.unary_no_ref_or_target)
+ self.assertFalse(ret)
+
+ mock_logger.error.assert_called_with(
+ 'Unary metric in configuration data requires either a reference '
+ 'or target dataset to be present for evaluation. Please ensure '
+ 'that your config is well formed.'
+ )
+
+ def test_valid_binary(self):
+ ret = parser._valid_minimal_config(self.binary_valid)
+ self.assertTrue(ret)
+
+ @patch('configuration_parsing.logger')
+ def test_binary_no_reference(self, mock_logger):
+ ret = parser._valid_minimal_config(self.binary_no_reference)
+ self.assertFalse(ret)
+
+ mock_logger.error.assert_called_with(
+ 'Binary metric in configuration requires both a reference '
+ 'and target dataset to be present for evaluation. Please ensure '
+ 'that your config is well formed.'
+ )
+
+ @patch('configuration_parsing.logger')
+ def test_binary_no_target(self, mock_logger):
+ ret = parser._valid_minimal_config(self.binary_no_target)
+ self.assertFalse(ret)
+
+ mock_logger.error.assert_called_with(
+ 'Binary metric in configuration requires both a reference '
+ 'and target dataset to be present for evaluation. Please ensure '
+ 'that your config is well formed.'
+ )
+
+
+class TestConfigIsWellFormed(unittest.TestCase):
+ @classmethod
+ def setUpClass(self):
+ malformed_reference_config = """
+ datasets:
+ reference:
+ data_source: notavalidlocation
+
+ metrics:
+ - Bias
+ """
+ self.malformed_reference_conf = yaml.load(malformed_reference_config)
+
+ malformed_target_list_config = """
+ datasets:
+ targets:
+ notalist:
+ a_key: a_value
+
+ alsonotalist:
+ a_key: a_value
+
+ metrics:
+ - Bias
+ """
+ self.malformed_target_list = yaml.load(malformed_target_list_config)
+
+ missing_metric_name_config = """
+ datasets:
+ reference:
+ data_source: dap
+ url: afakeurl.com
+ variable: pr
+
+ metrics:
+ - NotABuiltInMetric
+ """
+ self.missing_metric_name = yaml.load(missing_metric_name_config)
+
+ bad_plot_config = """
+ datasets:
+ reference:
+ data_source: dap
+ url: afakeurl.com
+ variable: pr
+
+ metrics:
+ - Bias
+
+ plots:
+ - type: NotARealPlotName
+ """
+ bad_plot = yaml.load(bad_plot_config)
+
+ def test_malformed_reference_config(self):
+ ret = parser._config_is_well_formed(self.malformed_reference_conf)
+ self.assertFalse(ret)
+
+ @patch('configuration_parsing.logger')
+ def test_malformed_target_dataset_list(self, mock_logger):
+ ret = parser._config_is_well_formed(self.malformed_target_list)
+ self.assertFalse(ret)
+
+ mock_logger.error.assert_called_with(
+ "Expected to find list of target datasets but instead found "
+ "object of type <type 'dict'>"
+ )
+
+ def test_not_builtin_metric(self):
+ ret = parser._config_is_well_formed(self.missing_metric_name)
+ self.assertFalse(ret)
+
+ @patch('configuration_parsing.logger')
+ def test_warns_regarding_not_builtin_metric(self, mock_logger):
+ ret = parser._config_is_well_formed(self.missing_metric_name)
+ mock_logger.warn.assert_called_with(
+ 'Unable to locate metric name NotABuiltInMetric in built-in '
+ 'metrics. If this is not a user defined metric then please check '
+ 'for potential misspellings.'
+ )
+
+ def test_bad_plot_config(self):
+ ret = parser._config_is_well_formed(self.missing_metric_name)
+ self.assertFalse(ret)
+
+
+class MetricFetchTest(unittest.TestCase):
+ @classmethod
+ def setUpClass(self):
+ binary_config = """
+ metrics:
+ - Bias
+ - StdDevRatio
+ """
+ unary_config = """
+ metrics:
+ - TemporalStdDev
+ """
+ self.unary_conf = yaml.load(unary_config)
+ self.binary_conf = yaml.load(binary_config)
+
+ def test_contains_binary_metric(self):
+ ret = parser._contains_binary_metrics(self.binary_conf['metrics'])
+ self.assertTrue(ret)
+
+ def test_does_not_contain_binary_metric(self):
+ ret = parser._contains_binary_metrics(self.unary_conf['metrics'])
+ self.assertFalse(ret)
+
+ def test_contains_unary_metric(self):
+ ret = parser._contains_unary_metrics(self.unary_conf['metrics'])
+ self.assertTrue(ret)
+
+ def test_does_not_contain_unary_metric(self):
+ ret = parser._contains_unary_metrics(self.binary_conf['metrics'])
+ self.assertFalse(ret)
+
+
+class InvalidDatasetConfig(unittest.TestCase):
+ @classmethod
+ def setUpClass(self):
+ example_config_yaml = """
+ - file_count: 1
+ path: /a/fake/path
+ variable: pr
+
+ - data_source: invalid_location_identifier
+ """
+ conf = yaml.load(example_config_yaml)
+ self.missing_data_source = conf[0]
+ self.invalid_data_source = conf[1]
+
+ @patch('configuration_parsing.logger')
+ def test_missing_data_source_config(self, mock_logger):
+ parser._valid_dataset_config_data(self.missing_data_source)
+ mock_logger.error.assert_called_with(
+ 'Dataset does not contain a data_source attribute.'
+ )
+
+ @patch('configuration_parsing.logger')
+ def test_invalid_data_source(self, mock_logger):
+ parser._valid_dataset_config_data(self.invalid_data_source)
+ mock_logger.error.assert_called_with(
+ 'Dataset does not contain a valid data_source location.'
+ )
+
+
+class TestLocalDatasetConfig(unittest.TestCase):
+ @classmethod
+ def setUpClass(self):
+ self.required_local_keys = set(['data_source', 'file_count', 'path', 'variable'])
+ example_config_yaml = """
+ - data_source: local
+ file_count: 1
+ path: /a/fake/path
+ variable: pr
+ optional_args:
+ name: Target1
+
+ - data_source: local
+
+ - data_source: local
+ file_count: 5
+ file_glob_pattern: something for globbing files here
+ variable: pr
+ path: /a/fake/path
+ optional_args:
+ name: Target1
+
+ - data_source: local
+ file_count: 5
+ variable: pr
+ path: /a/fake/path
+ """
+
+ conf = yaml.load(example_config_yaml)
+ self.valid_local_single = conf[0]
+ self.invalid_local_single = conf[1]
+ self.valid_local_multi = conf[2]
+ self.invalid_local_multi = conf[1]
+ self.invalid_local_multi_file_glob = conf[3]
+
+ def test_valid_local_config_single_file(self):
+ ret = parser._valid_dataset_config_data(self.valid_local_single)
+ self.assertTrue(ret)
+
+ def test_valid_local_config_multi_file(self):
+ ret = parser._valid_dataset_config_data(self.valid_local_multi)
+ self.assertTrue(ret)
+
+ @patch('configuration_parsing.logger')
+ def test_invalid_local_config(self, mock_logger):
+ parser._valid_dataset_config_data(self.invalid_local_single)
+
+ present_keys = set(self.invalid_local_single.keys())
+ missing_keys = self.required_local_keys - present_keys
+ missing = sorted(list(missing_keys))
+
+ error = (
+ 'Dataset does not contain required keys. '
+ 'The following keys are missing: {}'.format(', '.join(missing))
+ )
+ mock_logger.error.assert_called_with(error)
+
+ @patch('configuration_parsing.logger')
+ def test_invalid_local_config_multi_file(self, mock_logger):
+ # mutlifile config is handled slightly differently. We should see the
+ # same missing keys in this situation as we would on the single file
+ # local config. We will test for a missing file_glob_pattern in a
+ # different test.
+ parser._valid_dataset_config_data(self.invalid_local_multi)
+
+ present_keys = set(self.invalid_local_multi.keys())
+ missing_keys = self.required_local_keys - present_keys
+ missing = sorted(list(missing_keys))
+
+ error = (
+ 'Dataset does not contain required keys. '
+ 'The following keys are missing: {}'.format(', '.join(missing))
+ )
+ mock_logger.error.assert_called_with(error)
+
+ @patch('configuration_parsing.logger')
+ def test_invalid_local_config_multi_file_missing_file_glob(self, mock_logger):
+ # We can't check for the file_glob_pattern pattern until after we have
+ # verified that the single local file config has been met.
+ parser._valid_dataset_config_data(self.invalid_local_multi_file_glob)
+
+ mock_logger.error.assert_called_with(
+ 'Multi-file local dataset is missing key: file_glob_pattern'
+ )
+
+
+class TestRCMEDDatasetConfig(unittest.TestCase):
+ @classmethod
+ def setUpClass(self):
+ self.required_rcmed_keys = set([
+ 'dataset_id',
+ 'parameter_id',
+ 'min_lat',
+ 'max_lat',
+ 'min_lon',
+ 'max_lon',
+ 'start_time',
+ 'end_time'
+ ])
+ example_config_yaml = """
+ - data_source: rcmed
+ dataset_id: 4
+ parameter_id: 4
+ min_lat: -40
+ max_lat: 40
+ min_lon: -50
+ max_lon: 50
+ start_time: YYYY-MM-DDThh:mm:ss
+ end_time: YYYY-MM-DDThh:mm:ss
+
+ - data_source: rcmed
+ """
+ conf = yaml.load(example_config_yaml)
+ self.valid_rcmed = conf[0]
+ self.invalid_rcmed = conf[1]
+
+ def test_valid_rcmed_config(self):
+ ret = config_runner._valid_dataset_config_data(self.valid_rcmed)
+ self.assertTrue(ret)
+
+ @patch('ocw_evaluation_from_config.logger')
+ def test_invalid_rcmed_config(self, mock_logger):
+ config_runner._valid_dataset_config_data(self.invalid_rcmed)
+
+ present_keys = set(self.invalid_rcmed.keys())
+ missing_keys = self.required_rcmed_keys - present_keys
+ missing = sorted(list(missing_keys))
+
+ error = (
+ 'Dataset does not contain required keys. '
+ 'The following keys are missing: {}'.format(', '.join(missing))
+ )
+ mock_logger.error.assert_called_with(error)
+
+
+class TestESGFDatasetConfig(unittest.TestCase):
+ @classmethod
+ def setUpClass(self):
+ self.required_esgf_keys = set([
+ 'data_source',
+ 'dataset_id',
+ 'variable',
+ 'esgf_username',
+ 'esgf_password'
+ ])
+ example_config_yaml = """
+ - data_source: esgf
+ dataset_id: fake dataset id
+ variable: pr
+ esgf_username: my esgf username
+ esgf_password: my esgf password
+
+ - data_source: esgf
+ """
+ conf = yaml.load(example_config_yaml)
+ self.valid_esgf = conf[0]
+ self.invalid_esgf = conf[1]
+
+ def test_valid_esgf_conf(self):
+ ret = parser._valid_dataset_config_data(self.valid_esgf)
+ self.assertTrue(ret)
+
+ @patch('configuration_parsing.logger')
+ def test_invalid_esgf_conf(self, mock_logger):
+ parser._valid_dataset_config_data(self.invalid_esgf)
+
+ present_keys = set(self.invalid_esgf.keys())
+ missing_keys = self.required_esgf_keys - present_keys
+ missing = sorted(list(missing_keys))
+
+ error = (
+ 'Dataset does not contain required keys. '
+ 'The following keys are missing: {}'.format(', '.join(missing))
+ )
+ mock_logger.error.assert_called_with(error)
+
+
+class TestDAPDatasetConfig(unittest.TestCase):
+ @classmethod
+ def setUpClass(self):
+ self.required_dap_keys = set(['url', 'variable'])
+ example_config_yaml = """
+ - data_source: dap
+ url: afakeurl.com
+ variable: pr
+
+ - data_source: dap
+ """
+ conf = yaml.load(example_config_yaml)
+ self.valid_dap = conf[0]
+ self.invalid_dap = conf[1]
+
+ def test_valid_dap_config(self):
+ ret = parser._valid_dataset_config_data(self.valid_dap)
+ self.assertTrue(ret)
+
+ @patch('configuration_parsing.logger')
+ def test_invalid_dap_config(self, mock_logger):
+ parser._valid_dataset_config_data(self.invalid_dap)
+
+ present_keys = set(self.invalid_dap.keys())
+ missing_keys = self.required_dap_keys - present_keys
+ missing = sorted(list(missing_keys))
+
+ error = (
+ 'Dataset does not contain required keys. '
+ 'The following keys are missing: {}'.format(', '.join(missing))
+ )
+ mock_logger.error.assert_called_with(error)
+
+
+class ContourMapConfig(unittest.TestCase):
+ @classmethod
+ def setUpClass(self):
+ valid_contour_config = """
+ type: contour
+ results_indeces:
+ - !!python/tuple [0, 0]
+ lats:
+ range_min: -20
+ range_max: 20
+ range_step: 1
+ lons:
+ range_min: -20
+ range_max: 20
+ range_step: 1
+ output_name: wrf_bias_compared_to_knmi
+ """
+ self.valid_contour = yaml.load(valid_contour_config)
+
+ missing_keys_contour_config = """
+ type: contour
+ """
+ self.missing_keys_contour = yaml.load(missing_keys_contour_config)
+
+ self.required_contour_keys = set([
+ 'results_indeces',
+ 'lats',
+ 'lons',
+ 'output_name'
+ ])
+
+ def test_valid_contour(self):
+ ret = parser._valid_plot_config_data(self.valid_contour)
+ self.assertTrue(ret)
+
+ @patch('configuration_parsing.logger')
+ def test_missing_keys_contour(self, mock_logger):
+ ret = parser._valid_plot_config_data(self.missing_keys_contour)
+
+ present_keys = set(self.missing_keys_contour.keys())
+ missing_keys = self.required_contour_keys - present_keys
+ missing = sorted(list(missing_keys))
+
+ err = (
+ 'Plot config does not contain required keys. '
+ 'The following keys are missing: {}'
+ ).format(', '.join(missing))
+ mock_logger.error.assert_called_with(err)
+
+
+class TestInvalidPlotConfig(unittest.TestCase):
+ @classmethod
+ def setUpClass(self):
+ bad_plot_type_config = """
+ type: NotAPlotType
+ """
+ self.bad_plot_type = yaml.load(bad_plot_type_config)
+
+ missing_plot_type_config = """
+ results_indeces:
+ - !!python/tuple [0, 0]
+ lats:
+ range_min: -20
+ range_max: 20
+ range_step: 1
+ lons:
+ range_min: -20
+ range_max: 20
+ range_step: 1
+ output_name: wrf_bias_compared_to_knmi
+ """
+ self.missing_plot_type = yaml.load(missing_plot_type_config)
+
+ @patch('configuration_parsing.logger')
+ def test_invalid_plot_type(self, mock_logger):
+ ret = parser._valid_plot_config_data(self.bad_plot_type)
+ self.assertFalse(ret)
+
+ mock_logger.error.assert_called_with(
+ 'Invalid plot type specified.'
+ )
+
+ @patch('configuration_parsing.logger')
+ def test_missing_plot_type(self, mock_logger):
+ ret = parser._valid_plot_config_data(self.missing_plot_type)
+ self.assertFalse(ret)
+
+ mock_logger.error.assert_called_with(
+ 'Plot config does not include a type attribute.'
+ )
http://git-wip-us.apache.org/repos/asf/climate/blob/c90440b2/ocw-config-runner/tests/test_config_runner.py
----------------------------------------------------------------------
diff --git a/ocw-config-runner/tests/test_config_runner.py b/ocw-config-runner/tests/test_config_runner.py
deleted file mode 100644
index 993971e..0000000
--- a/ocw-config-runner/tests/test_config_runner.py
+++ /dev/null
@@ -1,711 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from mock import patch
-import unittest
-
-import ocw_evaluation_from_config as config_runner
-import ocw.metrics
-
-import yaml
-
-class TestMetricLoad(unittest.TestCase):
- def test_valid_metric_load(self):
- config = yaml.load("""
- metrics:
- - Bias
- """)
- loaded_metrics = [config_runner._load_metric(m)()
- for m in config['metrics']]
- self.assertTrue(isinstance(loaded_metrics[0], ocw.metrics.Bias))
-
- @patch('ocw_evaluation_from_config.logger')
- def test_invalid_metric_load(self, mock_logger):
- config = yaml.load("""
- metrics:
- - ocw.metrics.Bias
- """)
- config_runner._load_metric(config['metrics'][0])
- error = (
- 'User-defined metrics outside of the ocw.metrics module '
- 'cannot currently be loaded. If you just wanted a metric '
- 'found in ocw.metrics then do not specify the full '
- 'package and module names. See the documentation for examples.'
- )
- mock_logger.error.assert_called_with(error)
-
-
-class TestRCMEDDatasetConfig(unittest.TestCase):
- @classmethod
- def setUpClass(self):
- self.required_rcmed_keys = set([
- 'dataset_id',
- 'parameter_id',
- 'min_lat',
- 'max_lat',
- 'min_lon',
- 'max_lon',
- 'start_time',
- 'end_time'
- ])
- example_config_yaml = """
- - data_source: rcmed
- dataset_id: 4
- parameter_id: 4
- min_lat: -40
- max_lat: 40
- min_lon: -50
- max_lon: 50
- start_time: YYYY-MM-DDThh:mm:ss
- end_time: YYYY-MM-DDThh:mm:ss
-
- - data_source: rcmed
- """
- conf = yaml.load(example_config_yaml)
- self.valid_rcmed = conf[0]
- self.invalid_rcmed = conf[1]
-
- def test_valid_rcmed_config(self):
- ret = config_runner._valid_dataset_config_data(self.valid_rcmed)
- self.assertTrue(ret)
-
- @patch('ocw_evaluation_from_config.logger')
- def test_invalid_rcmed_config(self, mock_logger):
- config_runner._valid_dataset_config_data(self.invalid_rcmed)
-
- present_keys = set(self.invalid_rcmed.keys())
- missing_keys = self.required_rcmed_keys - present_keys
- missing = sorted(list(missing_keys))
-
- error = (
- 'Dataset does not contain required keys. '
- 'The following keys are missing: {}'.format(', '.join(missing))
- )
- mock_logger.error.assert_called_with(error)
-
-
-class TestLocalDatasetConfig(unittest.TestCase):
- @classmethod
- def setUpClass(self):
- self.required_local_keys = set(['data_source', 'file_count', 'path', 'variable'])
- example_config_yaml = """
- - data_source: local
- file_count: 1
- path: /a/fake/path
- variable: pr
- optional_args:
- name: Target1
-
- - data_source: local
-
- - data_source: local
- file_count: 5
- file_glob_pattern: something for globbing files here
- variable: pr
- path: /a/fake/path
- optional_args:
- name: Target1
-
- - data_source: local
- file_count: 5
- variable: pr
- path: /a/fake/path
- """
-
- conf = yaml.load(example_config_yaml)
- self.valid_local_single = conf[0]
- self.invalid_local_single = conf[1]
- self.valid_local_multi = conf[2]
- self.invalid_local_multi = conf[1]
- self.invalid_local_multi_file_glob = conf[3]
-
- def test_valid_local_config_single_file(self):
- ret = config_runner._valid_dataset_config_data(self.valid_local_single)
- self.assertTrue(ret)
-
- def test_valid_local_config_multi_file(self):
- ret = config_runner._valid_dataset_config_data(self.valid_local_multi)
- self.assertTrue(ret)
-
- @patch('ocw_evaluation_from_config.logger')
- def test_invalid_local_config(self, mock_logger):
- config_runner._valid_dataset_config_data(self.invalid_local_single)
-
- present_keys = set(self.invalid_local_single.keys())
- missing_keys = self.required_local_keys - present_keys
- missing = sorted(list(missing_keys))
-
- error = (
- 'Dataset does not contain required keys. '
- 'The following keys are missing: {}'.format(', '.join(missing))
- )
- mock_logger.error.assert_called_with(error)
-
- @patch('ocw_evaluation_from_config.logger')
- def test_invalid_local_config_multi_file(self, mock_logger):
- # mutlifile config is handled slightly differently. We should see the
- # same missing keys in this situation as we would on the single file
- # local config. We will test for a missing file_glob_pattern in a
- # different test.
- config_runner._valid_dataset_config_data(self.invalid_local_multi)
-
- present_keys = set(self.invalid_local_multi.keys())
- missing_keys = self.required_local_keys - present_keys
- missing = sorted(list(missing_keys))
-
- error = (
- 'Dataset does not contain required keys. '
- 'The following keys are missing: {}'.format(', '.join(missing))
- )
- mock_logger.error.assert_called_with(error)
-
- @patch('ocw_evaluation_from_config.logger')
- def test_invalid_local_config_multi_file_missing_file_glob(self, mock_logger):
- # We can't check for the file_glob_pattern pattern until after we have
- # verified that the single local file config has been met.
- config_runner._valid_dataset_config_data(self.invalid_local_multi_file_glob)
-
- mock_logger.error.assert_called_with(
- 'Multi-file local dataset is missing key: file_glob_pattern'
- )
-
-
-class TestESGFDatasetConfig(unittest.TestCase):
- @classmethod
- def setUpClass(self):
- self.required_esgf_keys = set([
- 'data_source',
- 'dataset_id',
- 'variable',
- 'esgf_username',
- 'esgf_password'
- ])
- example_config_yaml = """
- - data_source: esgf
- dataset_id: fake dataset id
- variable: pr
- esgf_username: my esgf username
- esgf_password: my esgf password
-
- - data_source: esgf
- """
- conf = yaml.load(example_config_yaml)
- self.valid_esgf = conf[0]
- self.invalid_esgf = conf[1]
-
- def test_valid_esgf_conf(self):
- ret = config_runner._valid_dataset_config_data(self.valid_esgf)
- self.assertTrue(ret)
-
- @patch('ocw_evaluation_from_config.logger')
- def test_invalid_esgf_conf(self, mock_logger):
- config_runner._valid_dataset_config_data(self.invalid_esgf)
-
- present_keys = set(self.invalid_esgf.keys())
- missing_keys = self.required_esgf_keys - present_keys
- missing = sorted(list(missing_keys))
-
- error = (
- 'Dataset does not contain required keys. '
- 'The following keys are missing: {}'.format(', '.join(missing))
- )
- mock_logger.error.assert_called_with(error)
-
-
-class TestDAPDatasetConfig(unittest.TestCase):
- @classmethod
- def setUpClass(self):
- self.required_dap_keys = set(['url', 'variable'])
- example_config_yaml = """
- - data_source: dap
- url: afakeurl.com
- variable: pr
-
- - data_source: dap
- """
- conf = yaml.load(example_config_yaml)
- self.valid_dap = conf[0]
- self.invalid_dap = conf[1]
-
- def test_valid_dap_config(self):
- ret = config_runner._valid_dataset_config_data(self.valid_dap)
- self.assertTrue(ret)
-
- @patch('ocw_evaluation_from_config.logger')
- def test_invalid_dap_config(self, mock_logger):
- config_runner._valid_dataset_config_data(self.invalid_dap)
-
- present_keys = set(self.invalid_dap.keys())
- missing_keys = self.required_dap_keys - present_keys
- missing = sorted(list(missing_keys))
-
- error = (
- 'Dataset does not contain required keys. '
- 'The following keys are missing: {}'.format(', '.join(missing))
- )
- mock_logger.error.assert_called_with(error)
-
-
-class InvalidDatasetConfig(unittest.TestCase):
- @classmethod
- def setUpClass(self):
- example_config_yaml = """
- - file_count: 1
- path: /a/fake/path
- variable: pr
-
- - data_source: invalid_location_identifier
- """
- conf = yaml.load(example_config_yaml)
- self.missing_data_source = conf[0]
- self.invalid_data_source = conf[1]
-
- @patch('ocw_evaluation_from_config.logger')
- def test_missing_data_source_config(self, mock_logger):
- config_runner._valid_dataset_config_data(self.missing_data_source)
- mock_logger.error.assert_called_with(
- 'Dataset does not contain a data_source attribute.'
- )
-
- @patch('ocw_evaluation_from_config.logger')
- def test_invalid_data_source(self, mock_logger):
- config_runner._valid_dataset_config_data(self.invalid_data_source)
- mock_logger.error.assert_called_with(
- 'Dataset does not contain a valid data_source location.'
- )
-
-
-class MetricFetchTest(unittest.TestCase):
- @classmethod
- def setUpClass(self):
- binary_config = """
- metrics:
- - Bias
- - StdDevRatio
- """
- unary_config = """
- metrics:
- - TemporalStdDev
- """
- self.unary_conf = yaml.load(unary_config)
- self.binary_conf = yaml.load(binary_config)
-
- def test_contains_binary_metric(self):
- ret = config_runner._contains_binary_metrics(self.binary_conf['metrics'])
- self.assertTrue(ret)
-
- def test_does_not_contain_binary_metric(self):
- ret = config_runner._contains_binary_metrics(self.unary_conf['metrics'])
- self.assertFalse(ret)
-
- def test_contains_unary_metric(self):
- ret = config_runner._contains_unary_metrics(self.unary_conf['metrics'])
- self.assertTrue(ret)
-
- def test_does_not_contain_unary_metric(self):
- ret = config_runner._contains_unary_metrics(self.binary_conf['metrics'])
- self.assertFalse(ret)
-
-
-class ContourMapConfig(unittest.TestCase):
- @classmethod
- def setUpClass(self):
- valid_contour_config = """
- type: contour
- results_indeces:
- - !!python/tuple [0, 0]
- lats:
- range_min: -20
- range_max: 20
- range_step: 1
- lons:
- range_min: -20
- range_max: 20
- range_step: 1
- output_name: wrf_bias_compared_to_knmi
- """
- self.valid_contour = yaml.load(valid_contour_config)
-
- missing_keys_contour_config = """
- type: contour
- """
- self.missing_keys_contour = yaml.load(missing_keys_contour_config)
-
- self.required_contour_keys = set([
- 'results_indeces',
- 'lats',
- 'lons',
- 'output_name'
- ])
-
- def test_valid_contour(self):
- ret = config_runner._valid_plot_config_data(self.valid_contour)
- self.assertTrue(ret)
-
- @patch('ocw_evaluation_from_config.logger')
- def test_missing_keys_contour(self, mock_logger):
- ret = config_runner._valid_plot_config_data(self.missing_keys_contour)
-
- present_keys = set(self.missing_keys_contour.keys())
- missing_keys = self.required_contour_keys - present_keys
- missing = sorted(list(missing_keys))
-
- err = (
- 'Plot config does not contain required keys. '
- 'The following keys are missing: {}'
- ).format(', '.join(missing))
- mock_logger.error.assert_called_with(err)
-
-
-class TestInvalidPlotConfig(unittest.TestCase):
- @classmethod
- def setUpClass(self):
- bad_plot_type_config = """
- type: NotAPlotType
- """
- self.bad_plot_type = yaml.load(bad_plot_type_config)
-
- missing_plot_type_config = """
- results_indeces:
- - !!python/tuple [0, 0]
- lats:
- range_min: -20
- range_max: 20
- range_step: 1
- lons:
- range_min: -20
- range_max: 20
- range_step: 1
- output_name: wrf_bias_compared_to_knmi
- """
- self.missing_plot_type = yaml.load(missing_plot_type_config)
-
- @patch('ocw_evaluation_from_config.logger')
- def test_invalid_plot_type(self, mock_logger):
- ret = config_runner._valid_plot_config_data(self.bad_plot_type)
- self.assertFalse(ret)
-
- mock_logger.error.assert_called_with(
- 'Invalid plot type specified.'
- )
-
- @patch('ocw_evaluation_from_config.logger')
- def test_missing_plot_type(self, mock_logger):
- ret = config_runner._valid_plot_config_data(self.missing_plot_type)
- self.assertFalse(ret)
-
- mock_logger.error.assert_called_with(
- 'Plot config does not include a type attribute.'
- )
-
-
-class TestValidMinimalConfig(unittest.TestCase):
- @classmethod
- def setUpClass(self):
- no_datasets_config = """
- metrics:
- - Bias
- """
- self.no_datasets = yaml.load(no_datasets_config)
-
- no_metrics_config = """
- datasets:
- reference:
- data_source: dap
- url: afakeurl.com
- variable: pr
- """
- self.no_metrics = yaml.load(no_metrics_config)
-
- unary_with_reference_config = """
- datasets:
- reference:
- data_source: dap
- url: afakeurl.com
- variable: pr
-
- metrics:
- - TemporalStdDev
- """
- self.unary_with_reference = yaml.load(unary_with_reference_config)
-
- unary_with_target_config = """
- datasets:
- targets:
- - data_source: dap
- url: afakeurl.com
- variable: pr
-
- metrics:
- - TemporalStdDev
- """
- self.unary_with_target = yaml.load(unary_with_target_config)
-
- unary_no_reference_or_target = """
- datasets:
- not_ref_or_target:
- - data_source: dap
- url: afakeurl.com
- variable: pr
-
- metrics:
- - TemporalStdDev
- """
- self.unary_no_ref_or_target = yaml.load(unary_no_reference_or_target)
-
- binary_valid_config = """
- datasets:
- reference:
- data_source: dap
- url: afakeurl.com
- variable: pr
-
- targets:
- - data_source: dap
- url: afakeurl.com
- variable: pr
- metrics:
- - Bias
- """
- self.binary_valid = yaml.load(binary_valid_config)
-
- binary_no_reference_config = """
- datasets:
- targets:
- - data_source: dap
- url: afakeurl.com
- variable: pr
- metrics:
- - Bias
- """
- self.binary_no_reference = yaml.load(binary_no_reference_config)
-
- binary_no_target_config = """
- datasets:
- reference:
- data_source: dap
- url: afakeurl.com
- variable: pr
-
- metrics:
- - Bias
- """
- self.binary_no_target = yaml.load(binary_no_target_config)
-
- @patch('ocw_evaluation_from_config.logger')
- def test_no_datasets(self, mock_logger):
- ret = config_runner._valid_minimal_config(self.no_datasets)
- self.assertFalse(ret)
-
- mock_logger.error.assert_called_with(
- 'No datasets specified in configuration data.'
- )
-
- @patch('ocw_evaluation_from_config.logger')
- def test_no_metrics(self, mock_logger):
- ret = config_runner._valid_minimal_config(self.no_metrics)
- self.assertFalse(ret)
-
- mock_logger.error.assert_called_with(
- 'No metrics specified in configuration data.'
- )
-
- def test_unary_with_reference(self):
- ret = config_runner._valid_minimal_config(self.unary_with_reference)
- self.assertTrue(ret)
-
- def test_unary_with_target(self):
- ret = config_runner._valid_minimal_config(self.unary_with_target)
- self.assertTrue(ret)
-
- @patch('ocw_evaluation_from_config.logger')
- def test_unary_no_datasets(self, mock_logger):
- ret = config_runner._valid_minimal_config(self.unary_no_ref_or_target)
- self.assertFalse(ret)
-
- mock_logger.error.assert_called_with(
- 'Unary metric in configuration data requires either a reference '
- 'or target dataset to be present for evaluation. Please ensure '
- 'that your config is well formed.'
- )
-
- def test_valid_binary(self):
- ret = config_runner._valid_minimal_config(self.binary_valid)
- self.assertTrue(ret)
-
- @patch('ocw_evaluation_from_config.logger')
- def test_binary_no_reference(self, mock_logger):
- ret = config_runner._valid_minimal_config(self.binary_no_reference)
- self.assertFalse(ret)
-
- mock_logger.error.assert_called_with(
- 'Binary metric in configuration requires both a reference '
- 'and target dataset to be present for evaluation. Please ensure '
- 'that your config is well formed.'
- )
-
- @patch('ocw_evaluation_from_config.logger')
- def test_binary_no_target(self, mock_logger):
- ret = config_runner._valid_minimal_config(self.binary_no_target)
- self.assertFalse(ret)
-
- mock_logger.error.assert_called_with(
- 'Binary metric in configuration requires both a reference '
- 'and target dataset to be present for evaluation. Please ensure '
- 'that your config is well formed.'
- )
-
-
-class TestIsConfigValid(unittest.TestCase):
- @classmethod
- def setUpClass(self):
- not_minimal_config = """
- datasets:
- """
- self.not_minimal = yaml.load(not_minimal_config)
-
- not_well_formed_config = """
- datasets:
- reference:
- data_source: local
- file_count: 1
- path: /a/fake/path/file.py
- variable: pr
-
- targets:
- - data_source: local
- file_count: 5
- file_glob_pattern: something for globbing files here
- variable: pr
- optional_args:
- name: Target1
-
- - data_source: esgf
- dataset_id: fake dataset id
- variable: pr
- esgf_username: my esgf username
- esgf_password: my esgf password
-
- metrics:
- - Bias
- - TemporalStdDev
- """
- self.not_well_formed = yaml.load(not_well_formed_config)
-
- @patch('ocw_evaluation_from_config.logger')
- def test_not_minimal_config(self, mock_logger):
- ret = config_runner.is_config_valid(self.not_minimal)
- self.assertFalse(ret)
-
- mock_logger.error.assert_called_with(
- 'Insufficient configuration file data for an evaluation'
- )
-
- @patch('ocw_evaluation_from_config.logger')
- def test_not_valid_config(self, mock_logger):
- ret = config_runner.is_config_valid(self.not_well_formed)
- self.assertFalse(ret)
-
- mock_logger.error.assert_called_with(
- 'Configuration data is not well formed'
- )
-
-
-class TestConfigIsWellFormed(unittest.TestCase):
- @classmethod
- def setUpClass(self):
- malformed_reference_config = """
- datasets:
- reference:
- data_source: notavalidlocation
-
- metrics:
- - Bias
- """
- self.malformed_reference_conf = yaml.load(malformed_reference_config)
-
- malformed_target_list_config = """
- datasets:
- targets:
- notalist:
- a_key: a_value
-
- alsonotalist:
- a_key: a_value
-
- metrics:
- - Bias
- """
- self.malformed_target_list = yaml.load(malformed_target_list_config)
-
- missing_metric_name_config = """
- datasets:
- reference:
- data_source: dap
- url: afakeurl.com
- variable: pr
-
- metrics:
- - NotABuiltInMetric
- """
- self.missing_metric_name = yaml.load(missing_metric_name_config)
-
- bad_plot_config = """
- datasets:
- reference:
- data_source: dap
- url: afakeurl.com
- variable: pr
-
- metrics:
- - Bias
-
- plots:
- - type: NotARealPlotName
- """
- bad_plot = yaml.load(bad_plot_config)
-
- def test_malformed_reference_config(self):
- ret = config_runner._config_is_well_formed(self.malformed_reference_conf)
- self.assertFalse(ret)
-
- @patch('ocw_evaluation_from_config.logger')
- def test_malformed_target_dataset_list(self, mock_logger):
- ret = config_runner._config_is_well_formed(self.malformed_target_list)
- self.assertFalse(ret)
-
- mock_logger.error.assert_called_with(
- "Expected to find list of target datasets but instead found "
- "object of type <type 'dict'>"
- )
-
- def test_not_builtin_metric(self):
- ret = config_runner._config_is_well_formed(self.missing_metric_name)
- self.assertFalse(ret)
-
- @patch('ocw_evaluation_from_config.logger')
- def test_warns_regarding_not_builtin_metric(self, mock_logger):
- ret = config_runner._config_is_well_formed(self.missing_metric_name)
- mock_logger.warn.assert_called_with(
- 'Unable to locate metric name NotABuiltInMetric in built-in '
- 'metrics. If this is not a user defined metric then please check '
- 'for potential misspellings.'
- )
-
- def test_bad_plot_config(self):
- ret = config_runner._config_is_well_formed(self.missing_metric_name)
- self.assertFalse(ret)
http://git-wip-us.apache.org/repos/asf/climate/blob/c90440b2/ocw-config-runner/tests/test_evaluation_creation.py
----------------------------------------------------------------------
diff --git a/ocw-config-runner/tests/test_evaluation_creation.py b/ocw-config-runner/tests/test_evaluation_creation.py
new file mode 100644
index 0000000..6fc79f8
--- /dev/null
+++ b/ocw-config-runner/tests/test_evaluation_creation.py
@@ -0,0 +1,33 @@
+from mock import patch
+import unittest
+
+import evaluation_creation as eval_create
+import ocw.metrics
+
+import yaml
+
+
+class TestMetricLoad(unittest.TestCase):
+ def test_valid_metric_load(self):
+ config = yaml.load("""
+ metrics:
+ - Bias
+ """)
+ loaded_metrics = [eval_create._load_metric(m)()
+ for m in config['metrics']]
+ self.assertTrue(isinstance(loaded_metrics[0], ocw.metrics.Bias))
+
+ @patch('evaluation_creation.logger')
+ def test_invalid_metric_load(self, mock_logger):
+ config = yaml.load("""
+ metrics:
+ - ocw.metrics.Bias
+ """)
+ eval_create._load_metric(config['metrics'][0])
+ error = (
+ 'User-defined metrics outside of the ocw.metrics module '
+ 'cannot currently be loaded. If you just wanted a metric '
+ 'found in ocw.metrics then do not specify the full '
+ 'package and module names. See the documentation for examples.'
+ )
+ mock_logger.error.assert_called_with(error)
[2/2] climate git commit: Resolve CLIMATE-588. Merge PR #159.
Posted by jo...@apache.org.
Resolve CLIMATE-588. Merge PR #159.
Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/6e73c8a5
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/6e73c8a5
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/6e73c8a5
Branch: refs/heads/master
Commit: 6e73c8a519f6a2236c3a62dd926716b336e1a028
Parents: 58703e0 c90440b
Author: Michael Joyce <jo...@apache.org>
Authored: Fri Feb 27 09:30:55 2015 -0800
Committer: Michael Joyce <jo...@apache.org>
Committed: Fri Feb 27 09:30:55 2015 -0800
----------------------------------------------------------------------
ocw-config-runner/configuration_parsing.py | 218 ++++++
ocw-config-runner/evaluation_creation.py | 132 ++++
ocw-config-runner/ocw_evaluation_from_config.py | 386 +---------
ocw-config-runner/plot_generation.py | 54 ++
ocw-config-runner/tests/test_config_parsing.py | 669 +++++++++++++++++
ocw-config-runner/tests/test_config_runner.py | 711 -------------------
.../tests/test_evaluation_creation.py | 33 +
7 files changed, 1109 insertions(+), 1094 deletions(-)
----------------------------------------------------------------------