You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by ea...@apache.org on 2020/10/13 18:43:48 UTC

[incubator-sdap-nexus] branch support-deseason updated (2c32fbd -> 27d82a6)

This is an automated email from the ASF dual-hosted git repository.

eamonford pushed a change to branch support-deseason
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git.


    from 2c32fbd  compute deseason
     add b880c63  SDAP-285: Upgrade custom Solr image to include JTS, and update solr-create-collection image to create geo field  (#108)
     new 448c93d  wip
     new c122c55  compute deseason
     new 27d82a6  Merge branch 'support-deseason' of github.com:apache/incubator-sdap-nexus into support-deseason

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 docker/.gitignore                                  |  1 -
 docker/cassandra/Dockerfile                        | 31 --------
 docker/cassandra/README.md                         |  0
 docker/cassandra/docker-entrypoint.sh              | 85 --------------------
 docker/solr/Dockerfile                             | 23 ++----
 docker/solr/cloud-init/create-collection.py        | 35 ++++++++
 docker/solr/cloud/Dockerfile                       | 31 --------
 docker/solr/cloud/Readme.rst                       | 93 ----------------------
 .../docker-entrypoint-initdb.d/0-init-home.sh      | 26 ------
 .../docker-entrypoint-initdb.d/1-bootstrap-zk.sh   | 23 ------
 docker/solr/cloud/tmp/solr.xml                     | 53 ------------
 docker/solr/cloud/tmp/zoo.cfg                      | 31 --------
 docker/solr/singlenode/Dockerfile                  | 30 -------
 docker/solr/singlenode/Readme.rst                  | 42 ----------
 docker/solr/singlenode/create-core.sh              | 25 ------
 15 files changed, 40 insertions(+), 489 deletions(-)
 delete mode 100644 docker/.gitignore
 delete mode 100644 docker/cassandra/Dockerfile
 delete mode 100644 docker/cassandra/README.md
 delete mode 100755 docker/cassandra/docker-entrypoint.sh
 delete mode 100644 docker/solr/cloud/Dockerfile
 delete mode 100644 docker/solr/cloud/Readme.rst
 delete mode 100755 docker/solr/cloud/docker-entrypoint-initdb.d/0-init-home.sh
 delete mode 100755 docker/solr/cloud/docker-entrypoint-initdb.d/1-bootstrap-zk.sh
 delete mode 100644 docker/solr/cloud/tmp/solr.xml
 delete mode 100644 docker/solr/cloud/tmp/zoo.cfg
 delete mode 100644 docker/solr/singlenode/Dockerfile
 delete mode 100644 docker/solr/singlenode/Readme.rst
 delete mode 100755 docker/solr/singlenode/create-core.sh


[incubator-sdap-nexus] 01/03: wip

Posted by ea...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

eamonford pushed a commit to branch support-deseason
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git

commit 448c93d0448d65304a52865915d72dcf50913b59
Author: Eamon Ford <ea...@gmail.com>
AuthorDate: Tue Aug 25 17:32:49 2020 -0700

    wip
---
 .../webservice/algorithms_spark/TimeSeriesSpark.py | 46 ++++++++++++++--------
 1 file changed, 29 insertions(+), 17 deletions(-)

diff --git a/analysis/webservice/algorithms_spark/TimeSeriesSpark.py b/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
index 43f7f6d..d56b46b 100644
--- a/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
+++ b/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
@@ -117,7 +117,7 @@ class TimeSeriesSparkHandlerImpl(NexusCalcSparkHandler):
         except:
             try:
                 west, south, east, north = request.get_min_lon(), request.get_min_lat(), \
-                                           request.get_max_lon(), request.get_max_lat()
+                    request.get_max_lon(), request.get_max_lat()
                 bounding_polygon = shapely.geometry.Polygon(
                     [(west, south), (east, south), (east, north), (west, north), (west, south)])
             except:
@@ -160,7 +160,7 @@ class TimeSeriesSparkHandlerImpl(NexusCalcSparkHandler):
 
     def calc(self, request, **args):
         """
-    
+
         :param request: StatsComputeOptions
         :param args: dict
         :return:
@@ -176,13 +176,13 @@ class TimeSeriesSparkHandlerImpl(NexusCalcSparkHandler):
 
             the_time = datetime.now()
             daysinrange = self._get_tile_service().find_days_in_range_asc(bounding_polygon.bounds[1],
-                                                                    bounding_polygon.bounds[3],
-                                                                    bounding_polygon.bounds[0],
-                                                                    bounding_polygon.bounds[2],
-                                                                    shortName,
-                                                                    start_seconds_from_epoch,
-                                                                    end_seconds_from_epoch,
-                                                                    metrics_callback=metrics_record.record_metrics)
+                                                                          bounding_polygon.bounds[3],
+                                                                          bounding_polygon.bounds[0],
+                                                                          bounding_polygon.bounds[2],
+                                                                          shortName,
+                                                                          start_seconds_from_epoch,
+                                                                          end_seconds_from_epoch,
+                                                                          metrics_callback=metrics_record.record_metrics)
             self.log.info("Finding days in range took %s for dataset %s" % (str(datetime.now() - the_time), shortName))
 
             ndays = len(daysinrange)
@@ -203,7 +203,19 @@ class TimeSeriesSparkHandlerImpl(NexusCalcSparkHandler):
 
             if apply_seasonal_cycle_filter:
                 the_time = datetime.now()
+                # get time series for _clim dataset
+                daysinrange_clim = self._get_tile_service().find_days_in_range_asc(bounding_polygon.bounds[1],
+                                                                                   bounding_polygon.bounds[3],
+                                                                                   bounding_polygon.bounds[0],
+                                                                                   bounding_polygon.bounds[2],
+                                                                                   shortName,
+                                                                                   0,
+                                                                                   31535999,
+                                                                                   metrics_callback=metrics_record.record_metrics)
+
                 for result in results:
+                    # aline _clim time series with original time series
+
                     month = datetime.utcfromtimestamp(result['time']).month
                     month_mean, month_max, month_min = self.calculate_monthly_average(month, bounding_polygon.wkt,
                                                                                       shortName)
@@ -288,12 +300,12 @@ class TimeSeriesSparkHandlerImpl(NexusCalcSparkHandler):
             start = (pytz.UTC.localize(beginning_of_month) - EPOCH).total_seconds()
             end = (pytz.UTC.localize(end_of_month) - EPOCH).total_seconds()
             tile_stats = self._get_tile_service().find_tiles_in_polygon(bounding_polygon, ds, start, end,
-                                                                  fl=('id,'
-                                                                      'tile_avg_val_d,tile_count_i,'
-                                                                      'tile_min_val_d,tile_max_val_d,'
-                                                                      'tile_min_lat,tile_max_lat,'
-                                                                      'tile_min_lon,tile_max_lon'),
-                                                                  fetch_data=False)
+                                                                        fl=('id,'
+                                                                            'tile_avg_val_d,tile_count_i,'
+                                                                            'tile_min_val_d,tile_max_val_d,'
+                                                                            'tile_min_lat,tile_max_lat,'
+                                                                            'tile_min_lon,tile_max_lon'),
+                                                                        fetch_data=False)
             if len(tile_stats) == 0:
                 continue
 
@@ -338,8 +350,8 @@ class TimeSeriesSparkHandlerImpl(NexusCalcSparkHandler):
         weights = np.array(monthly_counts) / count_sum
 
         return np.average(monthly_averages, None, weights).item(), \
-               np.average(monthly_averages, None, weights).item(), \
-               np.average(monthly_averages, None, weights).item()
+            np.average(monthly_averages, None, weights).item(), \
+            np.average(monthly_averages, None, weights).item()
 
     @lru_cache()
     def get_min_max_date(self, ds=None):


[incubator-sdap-nexus] 03/03: Merge branch 'support-deseason' of github.com:apache/incubator-sdap-nexus into support-deseason

Posted by ea...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

eamonford pushed a commit to branch support-deseason
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git

commit 27d82a69c4c5c71e32ade902810e07569717359c
Merge: c122c55 2c32fbd
Author: Eamon Ford <ea...@gmail.com>
AuthorDate: Tue Oct 13 11:42:00 2020 -0700

    Merge branch 'support-deseason' of github.com:apache/incubator-sdap-nexus into support-deseason



[incubator-sdap-nexus] 02/03: compute deseason

Posted by ea...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

eamonford pushed a commit to branch support-deseason
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git

commit c122c5585a5b0e8b4d0a0bf25b84464de5e9c6ce
Author: Eamon Ford <ea...@gmail.com>
AuthorDate: Mon Sep 14 13:50:23 2020 -0700

    compute deseason
---
 .../webservice/algorithms_spark/TimeSeriesSpark.py | 30 ++++++++++++++--------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/analysis/webservice/algorithms_spark/TimeSeriesSpark.py b/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
index d56b46b..079143b 100644
--- a/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
+++ b/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
@@ -204,27 +204,35 @@ class TimeSeriesSparkHandlerImpl(NexusCalcSparkHandler):
             if apply_seasonal_cycle_filter:
                 the_time = datetime.now()
                 # get time series for _clim dataset
+                shortName_clim = shortName + "_clim"
                 daysinrange_clim = self._get_tile_service().find_days_in_range_asc(bounding_polygon.bounds[1],
                                                                                    bounding_polygon.bounds[3],
                                                                                    bounding_polygon.bounds[0],
                                                                                    bounding_polygon.bounds[2],
-                                                                                   shortName,
+                                                                                   shortName_clim,
                                                                                    0,
                                                                                    31535999,
                                                                                    metrics_callback=metrics_record.record_metrics)
+                results_clim, _ = spark_driver(daysinrange_clim,
+                                               bounding_polygon,
+                                               shortName_clim,
+                                               self._tile_service_factory,
+                                               metrics_record.record_metrics,
+                                               spark_nparts=spark_nparts,
+                                               sc=self._sc)
+                clim_indexed_by_month = {datetime.utcfromtimestamp(
+                    result['time']).month: result for result in results_clim}
+                if len(clim_indexed_by_month) < 12:
+                    raise NexusProcessingException(reason="There are only " +
+                                                   len(clim_indexed_by_month) + " months of climatology data for dataset " + 
+                                                   shortName + ". A full year of climatology data is required for computing deseasoned timeseries.")
 
                 for result in results:
-                    # aline _clim time series with original time series
-
                     month = datetime.utcfromtimestamp(result['time']).month
-                    month_mean, month_max, month_min = self.calculate_monthly_average(month, bounding_polygon.wkt,
-                                                                                      shortName)
-                    seasonal_mean = result['mean'] - month_mean
-                    seasonal_min = result['min'] - month_min
-                    seasonal_max = result['max'] - month_max
-                    result['meanSeasonal'] = seasonal_mean
-                    result['minSeasonal'] = seasonal_min
-                    result['maxSeasonal'] = seasonal_max
+
+                    result['meanSeasonal'] = result['mean'] - clim_indexed_by_month[month]['mean']
+                    result['minSeasonal'] = result['min'] - clim_indexed_by_month[month]['min']
+                    result['maxSeasonal'] = result['max'] - clim_indexed_by_month[month]['max']
                 self.log.info(
                     "Seasonal calculation took %s for dataset %s" % (str(datetime.now() - the_time), shortName))