You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by ea...@apache.org on 2020/10/13 18:43:48 UTC
[incubator-sdap-nexus] branch support-deseason updated (2c32fbd ->
27d82a6)
This is an automated email from the ASF dual-hosted git repository.
eamonford pushed a change to branch support-deseason
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git.
from 2c32fbd compute deseason
add b880c63 SDAP-285: Upgrade custom Solr image to include JTS, and update solr-create-collection image to create geo field (#108)
new 448c93d wip
new c122c55 compute deseason
new 27d82a6 Merge branch 'support-deseason' of github.com:apache/incubator-sdap-nexus into support-deseason
The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
docker/.gitignore | 1 -
docker/cassandra/Dockerfile | 31 --------
docker/cassandra/README.md | 0
docker/cassandra/docker-entrypoint.sh | 85 --------------------
docker/solr/Dockerfile | 23 ++----
docker/solr/cloud-init/create-collection.py | 35 ++++++++
docker/solr/cloud/Dockerfile | 31 --------
docker/solr/cloud/Readme.rst | 93 ----------------------
.../docker-entrypoint-initdb.d/0-init-home.sh | 26 ------
.../docker-entrypoint-initdb.d/1-bootstrap-zk.sh | 23 ------
docker/solr/cloud/tmp/solr.xml | 53 ------------
docker/solr/cloud/tmp/zoo.cfg | 31 --------
docker/solr/singlenode/Dockerfile | 30 -------
docker/solr/singlenode/Readme.rst | 42 ----------
docker/solr/singlenode/create-core.sh | 25 ------
15 files changed, 40 insertions(+), 489 deletions(-)
delete mode 100644 docker/.gitignore
delete mode 100644 docker/cassandra/Dockerfile
delete mode 100644 docker/cassandra/README.md
delete mode 100755 docker/cassandra/docker-entrypoint.sh
delete mode 100644 docker/solr/cloud/Dockerfile
delete mode 100644 docker/solr/cloud/Readme.rst
delete mode 100755 docker/solr/cloud/docker-entrypoint-initdb.d/0-init-home.sh
delete mode 100755 docker/solr/cloud/docker-entrypoint-initdb.d/1-bootstrap-zk.sh
delete mode 100644 docker/solr/cloud/tmp/solr.xml
delete mode 100644 docker/solr/cloud/tmp/zoo.cfg
delete mode 100644 docker/solr/singlenode/Dockerfile
delete mode 100644 docker/solr/singlenode/Readme.rst
delete mode 100755 docker/solr/singlenode/create-core.sh
[incubator-sdap-nexus] 01/03: wip
Posted by ea...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
eamonford pushed a commit to branch support-deseason
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git
commit 448c93d0448d65304a52865915d72dcf50913b59
Author: Eamon Ford <ea...@gmail.com>
AuthorDate: Tue Aug 25 17:32:49 2020 -0700
wip
---
.../webservice/algorithms_spark/TimeSeriesSpark.py | 46 ++++++++++++++--------
1 file changed, 29 insertions(+), 17 deletions(-)
diff --git a/analysis/webservice/algorithms_spark/TimeSeriesSpark.py b/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
index 43f7f6d..d56b46b 100644
--- a/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
+++ b/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
@@ -117,7 +117,7 @@ class TimeSeriesSparkHandlerImpl(NexusCalcSparkHandler):
except:
try:
west, south, east, north = request.get_min_lon(), request.get_min_lat(), \
- request.get_max_lon(), request.get_max_lat()
+ request.get_max_lon(), request.get_max_lat()
bounding_polygon = shapely.geometry.Polygon(
[(west, south), (east, south), (east, north), (west, north), (west, south)])
except:
@@ -160,7 +160,7 @@ class TimeSeriesSparkHandlerImpl(NexusCalcSparkHandler):
def calc(self, request, **args):
"""
-
+
:param request: StatsComputeOptions
:param args: dict
:return:
@@ -176,13 +176,13 @@ class TimeSeriesSparkHandlerImpl(NexusCalcSparkHandler):
the_time = datetime.now()
daysinrange = self._get_tile_service().find_days_in_range_asc(bounding_polygon.bounds[1],
- bounding_polygon.bounds[3],
- bounding_polygon.bounds[0],
- bounding_polygon.bounds[2],
- shortName,
- start_seconds_from_epoch,
- end_seconds_from_epoch,
- metrics_callback=metrics_record.record_metrics)
+ bounding_polygon.bounds[3],
+ bounding_polygon.bounds[0],
+ bounding_polygon.bounds[2],
+ shortName,
+ start_seconds_from_epoch,
+ end_seconds_from_epoch,
+ metrics_callback=metrics_record.record_metrics)
self.log.info("Finding days in range took %s for dataset %s" % (str(datetime.now() - the_time), shortName))
ndays = len(daysinrange)
@@ -203,7 +203,19 @@ class TimeSeriesSparkHandlerImpl(NexusCalcSparkHandler):
if apply_seasonal_cycle_filter:
the_time = datetime.now()
+ # get time series for _clim dataset
+ daysinrange_clim = self._get_tile_service().find_days_in_range_asc(bounding_polygon.bounds[1],
+ bounding_polygon.bounds[3],
+ bounding_polygon.bounds[0],
+ bounding_polygon.bounds[2],
+ shortName,
+ 0,
+ 31535999,
+ metrics_callback=metrics_record.record_metrics)
+
for result in results:
+ # aline _clim time series with original time series
+
month = datetime.utcfromtimestamp(result['time']).month
month_mean, month_max, month_min = self.calculate_monthly_average(month, bounding_polygon.wkt,
shortName)
@@ -288,12 +300,12 @@ class TimeSeriesSparkHandlerImpl(NexusCalcSparkHandler):
start = (pytz.UTC.localize(beginning_of_month) - EPOCH).total_seconds()
end = (pytz.UTC.localize(end_of_month) - EPOCH).total_seconds()
tile_stats = self._get_tile_service().find_tiles_in_polygon(bounding_polygon, ds, start, end,
- fl=('id,'
- 'tile_avg_val_d,tile_count_i,'
- 'tile_min_val_d,tile_max_val_d,'
- 'tile_min_lat,tile_max_lat,'
- 'tile_min_lon,tile_max_lon'),
- fetch_data=False)
+ fl=('id,'
+ 'tile_avg_val_d,tile_count_i,'
+ 'tile_min_val_d,tile_max_val_d,'
+ 'tile_min_lat,tile_max_lat,'
+ 'tile_min_lon,tile_max_lon'),
+ fetch_data=False)
if len(tile_stats) == 0:
continue
@@ -338,8 +350,8 @@ class TimeSeriesSparkHandlerImpl(NexusCalcSparkHandler):
weights = np.array(monthly_counts) / count_sum
return np.average(monthly_averages, None, weights).item(), \
- np.average(monthly_averages, None, weights).item(), \
- np.average(monthly_averages, None, weights).item()
+ np.average(monthly_averages, None, weights).item(), \
+ np.average(monthly_averages, None, weights).item()
@lru_cache()
def get_min_max_date(self, ds=None):
[incubator-sdap-nexus] 03/03: Merge branch 'support-deseason' of
github.com:apache/incubator-sdap-nexus into support-deseason
Posted by ea...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
eamonford pushed a commit to branch support-deseason
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git
commit 27d82a69c4c5c71e32ade902810e07569717359c
Merge: c122c55 2c32fbd
Author: Eamon Ford <ea...@gmail.com>
AuthorDate: Tue Oct 13 11:42:00 2020 -0700
Merge branch 'support-deseason' of github.com:apache/incubator-sdap-nexus into support-deseason
[incubator-sdap-nexus] 02/03: compute deseason
Posted by ea...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
eamonford pushed a commit to branch support-deseason
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git
commit c122c5585a5b0e8b4d0a0bf25b84464de5e9c6ce
Author: Eamon Ford <ea...@gmail.com>
AuthorDate: Mon Sep 14 13:50:23 2020 -0700
compute deseason
---
.../webservice/algorithms_spark/TimeSeriesSpark.py | 30 ++++++++++++++--------
1 file changed, 19 insertions(+), 11 deletions(-)
diff --git a/analysis/webservice/algorithms_spark/TimeSeriesSpark.py b/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
index d56b46b..079143b 100644
--- a/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
+++ b/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
@@ -204,27 +204,35 @@ class TimeSeriesSparkHandlerImpl(NexusCalcSparkHandler):
if apply_seasonal_cycle_filter:
the_time = datetime.now()
# get time series for _clim dataset
+ shortName_clim = shortName + "_clim"
daysinrange_clim = self._get_tile_service().find_days_in_range_asc(bounding_polygon.bounds[1],
bounding_polygon.bounds[3],
bounding_polygon.bounds[0],
bounding_polygon.bounds[2],
- shortName,
+ shortName_clim,
0,
31535999,
metrics_callback=metrics_record.record_metrics)
+ results_clim, _ = spark_driver(daysinrange_clim,
+ bounding_polygon,
+ shortName_clim,
+ self._tile_service_factory,
+ metrics_record.record_metrics,
+ spark_nparts=spark_nparts,
+ sc=self._sc)
+ clim_indexed_by_month = {datetime.utcfromtimestamp(
+ result['time']).month: result for result in results_clim}
+ if len(clim_indexed_by_month) < 12:
+ raise NexusProcessingException(reason="There are only " +
+ len(clim_indexed_by_month) + " months of climatology data for dataset " +
+ shortName + ". A full year of climatology data is required for computing deseasoned timeseries.")
for result in results:
- # aline _clim time series with original time series
-
month = datetime.utcfromtimestamp(result['time']).month
- month_mean, month_max, month_min = self.calculate_monthly_average(month, bounding_polygon.wkt,
- shortName)
- seasonal_mean = result['mean'] - month_mean
- seasonal_min = result['min'] - month_min
- seasonal_max = result['max'] - month_max
- result['meanSeasonal'] = seasonal_mean
- result['minSeasonal'] = seasonal_min
- result['maxSeasonal'] = seasonal_max
+
+ result['meanSeasonal'] = result['mean'] - clim_indexed_by_month[month]['mean']
+ result['minSeasonal'] = result['min'] - clim_indexed_by_month[month]['min']
+ result['maxSeasonal'] = result['max'] - clim_indexed_by_month[month]['max']
self.log.info(
"Seasonal calculation took %s for dataset %s" % (str(datetime.now() - the_time), shortName))