You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by ea...@apache.org on 2020/09/14 20:50:42 UTC
[incubator-sdap-nexus] branch support-deseason updated: compute
deseason
This is an automated email from the ASF dual-hosted git repository.
eamonford pushed a commit to branch support-deseason
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git
The following commit(s) were added to refs/heads/support-deseason by this push:
new 2c32fbd compute deseason
2c32fbd is described below
commit 2c32fbd4605f549a13249c4a5c56ac6482081442
Author: Eamon Ford <ea...@gmail.com>
AuthorDate: Mon Sep 14 13:50:23 2020 -0700
compute deseason
---
.../webservice/algorithms_spark/TimeSeriesSpark.py | 30 ++++++++++++++--------
1 file changed, 19 insertions(+), 11 deletions(-)
diff --git a/analysis/webservice/algorithms_spark/TimeSeriesSpark.py b/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
index d56b46b..079143b 100644
--- a/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
+++ b/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
@@ -204,27 +204,35 @@ class TimeSeriesSparkHandlerImpl(NexusCalcSparkHandler):
if apply_seasonal_cycle_filter:
the_time = datetime.now()
# get time series for _clim dataset
+ shortName_clim = shortName + "_clim"
daysinrange_clim = self._get_tile_service().find_days_in_range_asc(bounding_polygon.bounds[1],
bounding_polygon.bounds[3],
bounding_polygon.bounds[0],
bounding_polygon.bounds[2],
- shortName,
+ shortName_clim,
0,
31535999,
metrics_callback=metrics_record.record_metrics)
+ results_clim, _ = spark_driver(daysinrange_clim,
+ bounding_polygon,
+ shortName_clim,
+ self._tile_service_factory,
+ metrics_record.record_metrics,
+ spark_nparts=spark_nparts,
+ sc=self._sc)
+ clim_indexed_by_month = {datetime.utcfromtimestamp(
+ result['time']).month: result for result in results_clim}
+ if len(clim_indexed_by_month) < 12:
+ raise NexusProcessingException(reason="There are only " +
+ len(clim_indexed_by_month) + " months of climatology data for dataset " +
+ shortName + ". A full year of climatology data is required for computing deseasoned timeseries.")
for result in results:
- # aline _clim time series with original time series
-
month = datetime.utcfromtimestamp(result['time']).month
- month_mean, month_max, month_min = self.calculate_monthly_average(month, bounding_polygon.wkt,
- shortName)
- seasonal_mean = result['mean'] - month_mean
- seasonal_min = result['min'] - month_min
- seasonal_max = result['max'] - month_max
- result['meanSeasonal'] = seasonal_mean
- result['minSeasonal'] = seasonal_min
- result['maxSeasonal'] = seasonal_max
+
+ result['meanSeasonal'] = result['mean'] - clim_indexed_by_month[month]['mean']
+ result['minSeasonal'] = result['min'] - clim_indexed_by_month[month]['min']
+ result['maxSeasonal'] = result['max'] - clim_indexed_by_month[month]['max']
self.log.info(
"Seasonal calculation took %s for dataset %s" % (str(datetime.now() - the_time), shortName))