You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by rk...@apache.org on 2023/08/21 20:03:02 UTC
[incubator-sdap-nexus] branch master updated: SDAP-475 Algorithm bug fixes (#259)

This is an automated email from the ASF dual-hosted git repository.

rkk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git


The following commit(s) were added to refs/heads/master by this push:
     new 2fea55a  SDAP-475 Algorithm bug fixes (#259)
2fea55a is described below

commit 2fea55a2e9e0ccc2eaed14bc359038c0f477f92e
Author: Kevin <ke...@gmail.com>
AuthorDate: Mon Aug 21 13:02:56 2023 -0700

    SDAP-475 Algorithm bug fixes (#259)
    
    * Bug fix for less than 12 months of climatology
    
    * Bug fix for newer numpy versions breaking array creation when underlying sequence contains elements of different sizes
    
    * Update CHANGELOG.md
    
    ---------
    
    Co-authored-by: Riley Kuttruff <72...@users.noreply.github.com>
---
 CHANGELOG.md                                            |  1 +
 analysis/webservice/algorithms_spark/TimeAvgMapSpark.py |  4 ++--
 analysis/webservice/algorithms_spark/TimeSeriesSpark.py | 15 +++++----------
 3 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 14b2576..61db7a7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - SDAP-465: Removed `climatology` directory. 
 ### Fixed
 - SDAP-474: Fixed bug in CSV attributes where secondary dataset would be rendered as comma separated characters
+- SDAP-475: Bug fixes for `/timeSeriesSpark` and `/timeAvgMapSpark`
 ### Security
 
 ## [1.1.0] - 2023-04-26
diff --git a/analysis/webservice/algorithms_spark/TimeAvgMapSpark.py b/analysis/webservice/algorithms_spark/TimeAvgMapSpark.py
index 45130ca..750ba59 100644
--- a/analysis/webservice/algorithms_spark/TimeAvgMapSpark.py
+++ b/analysis/webservice/algorithms_spark/TimeAvgMapSpark.py
@@ -169,9 +169,9 @@ class TimeAvgMapNexusSparkHandlerImpl(NexusCalcSparkHandler):
                                                               self._maxLonCent))
 
         # Create array of tuples to pass to Spark map function
-        nexus_tiles_spark = [[self._find_tile_bounds(t),
+        nexus_tiles_spark = np.array([[self._find_tile_bounds(t),
                               self._startTime, self._endTime,
-                              self._ds] for t in nexus_tiles]
+                              self._ds] for t in nexus_tiles], dtype='object')
 
         # Remove empty tiles (should have bounds set to None)
         bad_tile_inds = np.where([t[0] is None for t in nexus_tiles_spark])[0]
diff --git a/analysis/webservice/algorithms_spark/TimeSeriesSpark.py b/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
index faeaa0b..90ae14d 100644
--- a/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
+++ b/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
@@ -226,19 +226,14 @@ class TimeSeriesSparkHandlerImpl(NexusCalcSparkHandler):
                                                spark_nparts=spark_nparts,
                                                sc=self._sc)
                 clim_indexed_by_month = {datetime.utcfromtimestamp(result['time']).month: result for result in results_clim}
-                if len(clim_indexed_by_month) < 12:
-                    raise NexusProcessingException(reason="There are only " +
-                                                   len(clim_indexed_by_month) + " months of climatology data for dataset " + 
-                                                   shortName + ". A full year of climatology data is required for computing deseasoned timeseries.")
-
+                
                 for result in results:
                     month = datetime.utcfromtimestamp(result['time']).month
 
-                    result['meanSeasonal'] = result['mean'] - clim_indexed_by_month[month]['mean']
-                    result['minSeasonal'] = result['min'] - clim_indexed_by_month[month]['min']
-                    result['maxSeasonal'] = result['max'] - clim_indexed_by_month[month]['max']
-                self.log.info(
-                    "Seasonal calculation took %s for dataset %s" % (str(datetime.now() - the_time), shortName))
+                    result['meanSeasonal'] = result['mean'] - clim_indexed_by_month.get(month, result)['mean']
+                    result['minSeasonal'] = result['min'] - clim_indexed_by_month.get(month, result)['min']
+                    result['maxSeasonal'] = result['max'] - clim_indexed_by_month.get(month, result)['max']
+                self.log.info("Seasonal calculation took %s for dataset %s" % (str(datetime.now() - the_time), shortName))
 
             the_time = datetime.now()
             filtering.applyAllFiltersOnField(results, 'mean', applySeasonal=False, applyLowPass=apply_low_pass_filter)