You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by rk...@apache.org on 2023/08/21 20:03:02 UTC
[incubator-sdap-nexus] branch master updated: SDAP-475 Algorithm bug fixes (#259)
This is an automated email from the ASF dual-hosted git repository.
rkk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git
The following commit(s) were added to refs/heads/master by this push:
new 2fea55a SDAP-475 Algorithm bug fixes (#259)
2fea55a is described below
commit 2fea55a2e9e0ccc2eaed14bc359038c0f477f92e
Author: Kevin <ke...@gmail.com>
AuthorDate: Mon Aug 21 13:02:56 2023 -0700
SDAP-475 Algorithm bug fixes (#259)
* Bug fix for less than 12 months of climatology
* Bug fix for newer numpy versions breaking array creation when underlying sequence contains elements of different sizes
* Update CHANGELOG.md
---------
Co-authored-by: Riley Kuttruff <72...@users.noreply.github.com>
---
CHANGELOG.md | 1 +
analysis/webservice/algorithms_spark/TimeAvgMapSpark.py | 4 ++--
analysis/webservice/algorithms_spark/TimeSeriesSpark.py | 15 +++++----------
3 files changed, 8 insertions(+), 12 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 14b2576..61db7a7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- SDAP-465: Removed `climatology` directory.
### Fixed
- SDAP-474: Fixed bug in CSV attributes where secondary dataset would be rendered as comma separated characters
+- SDAP-475: Bug fixes for `/timeSeriesSpark` and `/timeAvgMapSpark`
### Security
## [1.1.0] - 2023-04-26
diff --git a/analysis/webservice/algorithms_spark/TimeAvgMapSpark.py b/analysis/webservice/algorithms_spark/TimeAvgMapSpark.py
index 45130ca..750ba59 100644
--- a/analysis/webservice/algorithms_spark/TimeAvgMapSpark.py
+++ b/analysis/webservice/algorithms_spark/TimeAvgMapSpark.py
@@ -169,9 +169,9 @@ class TimeAvgMapNexusSparkHandlerImpl(NexusCalcSparkHandler):
self._maxLonCent))
# Create array of tuples to pass to Spark map function
- nexus_tiles_spark = [[self._find_tile_bounds(t),
+ nexus_tiles_spark = np.array([[self._find_tile_bounds(t),
self._startTime, self._endTime,
- self._ds] for t in nexus_tiles]
+ self._ds] for t in nexus_tiles], dtype='object')
# Remove empty tiles (should have bounds set to None)
bad_tile_inds = np.where([t[0] is None for t in nexus_tiles_spark])[0]
diff --git a/analysis/webservice/algorithms_spark/TimeSeriesSpark.py b/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
index faeaa0b..90ae14d 100644
--- a/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
+++ b/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
@@ -226,19 +226,14 @@ class TimeSeriesSparkHandlerImpl(NexusCalcSparkHandler):
spark_nparts=spark_nparts,
sc=self._sc)
clim_indexed_by_month = {datetime.utcfromtimestamp(result['time']).month: result for result in results_clim}
- if len(clim_indexed_by_month) < 12:
- raise NexusProcessingException(reason="There are only " +
- len(clim_indexed_by_month) + " months of climatology data for dataset " +
- shortName + ". A full year of climatology data is required for computing deseasoned timeseries.")
-
+
for result in results:
month = datetime.utcfromtimestamp(result['time']).month
- result['meanSeasonal'] = result['mean'] - clim_indexed_by_month[month]['mean']
- result['minSeasonal'] = result['min'] - clim_indexed_by_month[month]['min']
- result['maxSeasonal'] = result['max'] - clim_indexed_by_month[month]['max']
- self.log.info(
- "Seasonal calculation took %s for dataset %s" % (str(datetime.now() - the_time), shortName))
+ result['meanSeasonal'] = result['mean'] - clim_indexed_by_month.get(month, result)['mean']
+ result['minSeasonal'] = result['min'] - clim_indexed_by_month.get(month, result)['min']
+ result['maxSeasonal'] = result['max'] - clim_indexed_by_month.get(month, result)['max']
+ self.log.info("Seasonal calculation took %s for dataset %s" % (str(datetime.now() - the_time), shortName))
the_time = datetime.now()
filtering.applyAllFiltersOnField(results, 'mean', applySeasonal=False, applyLowPass=apply_low_pass_filter)