You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by ea...@apache.org on 2020/09/14 20:50:42 UTC

[incubator-sdap-nexus] branch support-deseason updated: compute deseason

This is an automated email from the ASF dual-hosted git repository.

eamonford pushed a commit to branch support-deseason
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git


The following commit(s) were added to refs/heads/support-deseason by this push:
     new 2c32fbd  compute deseason
2c32fbd is described below

commit 2c32fbd4605f549a13249c4a5c56ac6482081442
Author: Eamon Ford <ea...@gmail.com>
AuthorDate: Mon Sep 14 13:50:23 2020 -0700

    compute deseason
---
 .../webservice/algorithms_spark/TimeSeriesSpark.py | 30 ++++++++++++++--------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/analysis/webservice/algorithms_spark/TimeSeriesSpark.py b/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
index d56b46b..079143b 100644
--- a/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
+++ b/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
@@ -204,27 +204,35 @@ class TimeSeriesSparkHandlerImpl(NexusCalcSparkHandler):
             if apply_seasonal_cycle_filter:
                 the_time = datetime.now()
                 # get time series for _clim dataset
+                shortName_clim = shortName + "_clim"
                 daysinrange_clim = self._get_tile_service().find_days_in_range_asc(bounding_polygon.bounds[1],
                                                                                    bounding_polygon.bounds[3],
                                                                                    bounding_polygon.bounds[0],
                                                                                    bounding_polygon.bounds[2],
-                                                                                   shortName,
+                                                                                   shortName_clim,
                                                                                    0,
                                                                                    31535999,
                                                                                    metrics_callback=metrics_record.record_metrics)
+                results_clim, _ = spark_driver(daysinrange_clim,
+                                               bounding_polygon,
+                                               shortName_clim,
+                                               self._tile_service_factory,
+                                               metrics_record.record_metrics,
+                                               spark_nparts=spark_nparts,
+                                               sc=self._sc)
+                clim_indexed_by_month = {datetime.utcfromtimestamp(
+                    result['time']).month: result for result in results_clim}
+                if len(clim_indexed_by_month) < 12:
+                    raise NexusProcessingException(reason="There are only " +
+                                                   len(clim_indexed_by_month) + " months of climatology data for dataset " + 
+                                                   shortName + ". A full year of climatology data is required for computing deseasoned timeseries.")
 
                 for result in results:
-                    # aline _clim time series with original time series
-
                     month = datetime.utcfromtimestamp(result['time']).month
-                    month_mean, month_max, month_min = self.calculate_monthly_average(month, bounding_polygon.wkt,
-                                                                                      shortName)
-                    seasonal_mean = result['mean'] - month_mean
-                    seasonal_min = result['min'] - month_min
-                    seasonal_max = result['max'] - month_max
-                    result['meanSeasonal'] = seasonal_mean
-                    result['minSeasonal'] = seasonal_min
-                    result['maxSeasonal'] = seasonal_max
+
+                    result['meanSeasonal'] = result['mean'] - clim_indexed_by_month[month]['mean']
+                    result['minSeasonal'] = result['min'] - clim_indexed_by_month[month]['min']
+                    result['maxSeasonal'] = result['max'] - clim_indexed_by_month[month]['max']
                 self.log.info(
                     "Seasonal calculation took %s for dataset %s" % (str(datetime.now() - the_time), shortName))