You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by nc...@apache.org on 2024/02/01 20:39:45 UTC

(incubator-sdap-nexus) branch develop updated: SDAP-493: Pagination improvements (#282)

This is an automated email from the ASF dual-hosted git repository.

nchung pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git


The following commit(s) were added to refs/heads/develop by this push:
     new 5afdec2  SDAP-493: Pagination improvements (#282)
5afdec2 is described below

commit 5afdec2a517ac058e5ee4884e34ab38d0e3cb23c
Author: Stepheny Perez <sk...@users.noreply.github.com>
AuthorDate: Thu Feb 1 12:39:40 2024 -0800

    SDAP-493: Pagination improvements (#282)
    
    * removed resultSizeLimit param from matchup
    
    * Add # of primaries/avergae secondaries to job output
    
    * rename to executionId
    
    * update changelog
    
    * add totalSecondaryMatched field to /job output
    
    * num unique secondaries addition
    
    * updated docs to use correct sea_water_temperature param name
    
    * bugfix
    
    * fix division by zero bug
---
 CHANGELOG.md                                       | 13 +++++++++++
 .../algorithms/doms/DomsInitialization.py          |  5 +++--
 .../webservice/algorithms/doms/ExecutionStatus.py  | 13 ++++++++++-
 .../webservice/algorithms/doms/ResultsStorage.py   | 24 +++++++++++----------
 analysis/webservice/algorithms_spark/Matchup.py    | 25 +++++++---------------
 analysis/webservice/apidocs/openapi.yml            | 20 +++--------------
 .../webservice/webmodel/NexusExecutionResults.py   | 20 ++++++++++++++---
 7 files changed, 69 insertions(+), 51 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 55c5bc6..2cbcc2e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,19 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [Unreleased]
+### Added
+### Changed
+- SDAP-493: 
+  - Updated /job endpoint to use `executionId` terminology for consistency with existing `/cdmsresults` endpoint
+  - Updated /job endpoint with details about number of primary and secondary tiles.
+### Deprecated
+### Removed
+- SDAP-493: 
+  - Removed `resultSizeLimit` from /match_spark endpoint 
+### Fixed
+### Security
+
 ## [1.2.0] - 2023-11-22
 ### Added
 - SDAP-467: Added pagination to cdmsresults endpoint
diff --git a/analysis/webservice/algorithms/doms/DomsInitialization.py b/analysis/webservice/algorithms/doms/DomsInitialization.py
index 43627b1..a10a7e7 100644
--- a/analysis/webservice/algorithms/doms/DomsInitialization.py
+++ b/analysis/webservice/algorithms/doms/DomsInitialization.py
@@ -173,7 +173,7 @@ class DomsInitializer:
 
     def createDomsExecutionStatsTable(self, session):
         log = logging.getLogger(__name__)
-        log.info("Verifying doms_execuction_stats table")
+        log.info("Verifying doms_execution_stats table")
         cql = """
             CREATE TABLE IF NOT EXISTS doms_execution_stats (
               execution_id uuid PRIMARY KEY,
@@ -181,7 +181,8 @@ class DomsInitializer:
               num_gridded_checked int,
               num_insitu_matched int,
               num_insitu_checked int,
-              time_to_complete int
+              time_to_complete int,
+              num_unique_secondaries int
             );
         """
         session.execute(cql)
diff --git a/analysis/webservice/algorithms/doms/ExecutionStatus.py b/analysis/webservice/algorithms/doms/ExecutionStatus.py
index d21ab44..17c6ca9 100644
--- a/analysis/webservice/algorithms/doms/ExecutionStatus.py
+++ b/analysis/webservice/algorithms/doms/ExecutionStatus.py
@@ -53,6 +53,14 @@ class ExecutionStatusHandler(BaseDomsHandler.BaseDomsQueryCalcHandler):
                     code=404
                 )
 
+        # Get execution stats. This call will raise an exception if the
+        # execution is not done.
+        with ResultsRetrieval(self.config) as retrieval:
+            try:
+                execution_stats = retrieval.retrieveStats(execution_id)
+            except NexusProcessingException:
+                execution_stats = {}
+
         job_status = NexusExecutionResults.ExecutionStatus(execution_details['status'])
         host = f'https://{request.requestHandler.request.host}'
 
@@ -63,5 +71,8 @@ class ExecutionStatusHandler(BaseDomsHandler.BaseDomsQueryCalcHandler):
             execution_id=execution_id,
             message=execution_details['message'],
             params=execution_params,
-            host=host
+            host=host,
+            num_primary_matched=execution_stats.get('numPrimaryMatched'),
+            num_secondary_matched=execution_stats.get('numSecondaryMatched'),
+            num_unique_secondaries=execution_stats.get('numUniqueSecondaries')
         )
diff --git a/analysis/webservice/algorithms/doms/ResultsStorage.py b/analysis/webservice/algorithms/doms/ResultsStorage.py
index fe5947c..1dea161 100644
--- a/analysis/webservice/algorithms/doms/ResultsStorage.py
+++ b/analysis/webservice/algorithms/doms/ResultsStorage.py
@@ -169,17 +169,18 @@ class ResultsStorage(AbstractResultsContainer):
     def __insertStats(self, execution_id, stats):
         cql = """
            INSERT INTO doms_execution_stats
-                (execution_id, num_gridded_matched, num_gridded_checked, num_insitu_matched, num_insitu_checked, time_to_complete)
+                (execution_id, num_gridded_matched, num_gridded_checked, num_insitu_matched, num_insitu_checked, time_to_complete, num_unique_secondaries)
            VALUES
-                (%s, %s, %s, %s, %s, %s)
+                (%s, %s, %s, %s, %s, %s, %s)
         """
         self._session.execute(cql, (
             execution_id,
-            stats["numPrimaryMatched"],
+            stats['numPrimaryMatched'],
             None,
-            stats["numSecondaryMatched"],
+            stats['numSecondaryMatched'],
             None,
-            stats["timeToComplete"]
+            stats['timeToComplete'],
+            stats['numUniqueSecondaries']
         ))
 
     def __insertResults(self, execution_id, results):
@@ -289,7 +290,7 @@ class ResultsRetrieval(AbstractResultsContainer):
             execution_id = uuid.UUID(execution_id)
 
         params = self.retrieveParams(execution_id)
-        stats = self.__retrieveStats(execution_id)
+        stats = self.retrieveStats(execution_id)
         data = self.__retrieveData(execution_id, trim_data=trim_data, page_num=page_num, page_size=page_size)
         return params, stats, data
 
@@ -360,14 +361,15 @@ class ResultsRetrieval(AbstractResultsContainer):
 
         return entry
 
-    def __retrieveStats(self, id):
-        cql = "SELECT num_gridded_matched, num_insitu_matched, time_to_complete FROM doms_execution_stats where execution_id = %s limit 1"
+    def retrieveStats(self, id):
+        cql = "SELECT num_gridded_matched, num_insitu_matched, time_to_complete, num_unique_secondaries FROM doms_execution_stats where execution_id = %s limit 1"
         rows = self._session.execute(cql, (id,))
         for row in rows:
             stats = {
-                "timeToComplete": row.time_to_complete,
-                "numSecondaryMatched": row.num_insitu_matched,
-                "numPrimaryMatched": row.num_gridded_matched,
+                'timeToComplete': row.time_to_complete,
+                'numSecondaryMatched': row.num_insitu_matched,
+                'numPrimaryMatched': row.num_gridded_matched,
+                'numUniqueSecondaries': row.num_unique_secondaries
             }
             return stats
 
diff --git a/analysis/webservice/algorithms_spark/Matchup.py b/analysis/webservice/algorithms_spark/Matchup.py
index a55f61d..8955d95 100644
--- a/analysis/webservice/algorithms_spark/Matchup.py
+++ b/analysis/webservice/algorithms_spark/Matchup.py
@@ -137,14 +137,6 @@ class Matchup(NexusCalcSparkTornadoHandler):
                            + "If true, only the nearest point will be returned for each primary point. "
                            + "If false, all points within the tolerances will be returned for each primary point. Default: False"
         },
-        "resultSizeLimit": {
-            "name": "Result Size Limit",
-            "type": "int",
-            "description": "Optional integer value that limits the number of results returned from the matchup. "
-                           "If the number of primary matches is greater than this limit, the service will respond with "
-                           "(HTTP 202: Accepted) and an empty response body. A value of 0 means return all results. "
-                           "Default: 500"
-        },
         "prioritizeDistance": {
             "name": "Prioritize distance",
             "type": "boolean",
@@ -223,8 +215,6 @@ class Matchup(NexusCalcSparkTornadoHandler):
 
         match_once = request.get_boolean_arg("matchOnce", default=False)
 
-        result_size_limit = request.get_int_arg("resultSizeLimit", default=500)
-
         start_seconds_from_epoch = int((start_time - EPOCH).total_seconds())
         end_seconds_from_epoch = int((end_time - EPOCH).total_seconds())
 
@@ -234,7 +224,7 @@ class Matchup(NexusCalcSparkTornadoHandler):
         return bounding_polygon, primary_ds_name, secondary_ds_names, parameter_s, \
                start_time, start_seconds_from_epoch, end_time, end_seconds_from_epoch, \
                depth_min, depth_max, time_tolerance, radius_tolerance, \
-               platforms, match_once, result_size_limit, prioritize_distance
+               platforms, match_once, prioritize_distance
 
     def get_job_pool(self, tile_ids):
         if len(tile_ids) > LARGE_JOB_THRESHOLD:
@@ -244,7 +234,7 @@ class Matchup(NexusCalcSparkTornadoHandler):
     def async_calc(self, execution_id, tile_ids, bounding_polygon, primary_ds_name,
                    secondary_ds_names, parameter_s, start_time, end_time, depth_min,
                    depth_max, time_tolerance, radius_tolerance, platforms, match_once,
-                   result_size_limit, start, prioritize_distance):
+                   start, prioritize_distance):
         # Call spark_matchup
         self.log.debug("Calling Spark Driver")
 
@@ -286,10 +276,12 @@ class Matchup(NexusCalcSparkTornadoHandler):
 
         total_keys = len(list(spark_result.keys()))
         total_values = sum(len(v) for v in spark_result.values())
+        unique_values = len(set([point.data_id for v in spark_result.values() for point in v]))
         details = {
-            "timeToComplete": int((end - start).total_seconds()),
-            "numSecondaryMatched": total_values,
-            "numPrimaryMatched": total_keys
+            'timeToComplete': int((end - start).total_seconds()),
+            'numSecondaryMatched': total_values,
+            'numPrimaryMatched': total_keys,
+            'numUniqueSecondaries': unique_values
         }
 
         matches = Matchup.convert_to_matches(spark_result)
@@ -310,7 +302,7 @@ class Matchup(NexusCalcSparkTornadoHandler):
         bounding_polygon, primary_ds_name, secondary_ds_names, parameter_s, \
         start_time, start_seconds_from_epoch, end_time, end_seconds_from_epoch, \
         depth_min, depth_max, time_tolerance, radius_tolerance, \
-        platforms, match_once, result_size_limit, prioritize_distance = self.parse_arguments(request)
+        platforms, match_once, prioritize_distance = self.parse_arguments(request)
 
         args = {
             "primary": primary_ds_name,
@@ -380,7 +372,6 @@ class Matchup(NexusCalcSparkTornadoHandler):
             radius_tolerance=radius_tolerance,
             platforms=platforms,
             match_once=match_once,
-            result_size_limit=result_size_limit,
             start=start,
             prioritize_distance=prioritize_distance
         ))
diff --git a/analysis/webservice/apidocs/openapi.yml b/analysis/webservice/apidocs/openapi.yml
index 6ea173a..0a21087 100644
--- a/analysis/webservice/apidocs/openapi.yml
+++ b/analysis/webservice/apidocs/openapi.yml
@@ -139,8 +139,7 @@ paths:
           required: false
           schema:
             type: string
-            default: sea_surface_temperature
-          example: sea_surface_temperature
+          example: sea_water_temperature
         - in: query
           name: matchOnce
           description: |
@@ -154,19 +153,6 @@ paths:
             type: boolean
             default: false
           example: false
-        - in: query
-          name: resultSizeLimit
-          description: |
-            Optional integer value that limits the number of results
-            returned from the matchup. If the number of primary matches
-            is greater than this limit, the service will respond with
-            (HTTP 202 Accepted) and an empty response body. A value of
-            0 means return all results.
-          required: false
-          schema:
-            type: integer
-            default: 500
-          example: 500
         - in: query
           name: prioritizeDistance
           description: |
@@ -697,7 +683,7 @@ paths:
         - in: query
           name: id
           description: |
-            The job execution ID
+            The execution ID
           required: true
           schema:
             type: string
@@ -715,7 +701,7 @@ paths:
         - in: query
           name: id
           description: |
-            The job execution ID
+            The execution ID
           required: true
           schema:
             type: string
diff --git a/analysis/webservice/webmodel/NexusExecutionResults.py b/analysis/webservice/webmodel/NexusExecutionResults.py
index d5c1204..7dd7af9 100644
--- a/analysis/webservice/webmodel/NexusExecutionResults.py
+++ b/analysis/webservice/webmodel/NexusExecutionResults.py
@@ -40,11 +40,12 @@ def construct_job_status(job_state, created, updated, execution_id, params, host
             'rel': 'self'
         }],
         'params': params,
-        'jobID': execution_id
+        'executionID': execution_id
     }
 
 
-def construct_done(status, created, completed, execution_id, params, host):
+def construct_done(status, created, completed, execution_id, params, host,
+                   num_primary_matched, num_secondary_matched, num_unique_secondaries):
     job_body = construct_job_status(
         status,
         created,
@@ -53,6 +54,12 @@ def construct_done(status, created, completed, execution_id, params, host):
         params,
         host
     )
+    # Add stats to body
+    job_body['totalPrimaryMatched'] = num_primary_matched
+    job_body['totalSecondaryMatched'] = num_secondary_matched
+    job_body['averageSecondaryMatched'] = round(num_secondary_matched/num_primary_matched) \
+        if num_primary_matched > 0 else 0
+    job_body['totalUniqueSecondaryMatched'] = num_unique_secondaries
 
     # Construct urls
     formats = [
@@ -112,7 +119,8 @@ def construct_cancelled(status, created, completed, execution_id, params, host):
 
 class NexusExecutionResults:
     def __init__(self, status=None, created=None, completed=None, execution_id=None, message='',
-                 params=None, host=None, status_code=200):
+                 params=None, host=None, status_code=200, num_primary_matched=None,
+                 num_secondary_matched=None, num_unique_secondaries=None):
         self.status_code = status_code
         self.status = status
         self.created = created
@@ -121,6 +129,9 @@ class NexusExecutionResults:
         self.message = message
         self.execution_params = params
         self.host = host
+        self.num_primary_matched = num_primary_matched
+        self.num_secondary_matched = num_secondary_matched
+        self.num_unique_secondaries = num_unique_secondaries
 
     def toJson(self):
         params = {
@@ -132,6 +143,9 @@ class NexusExecutionResults:
         }
         if self.status == ExecutionStatus.SUCCESS:
             params['completed'] = self.completed
+            params['num_primary_matched'] = self.num_primary_matched
+            params['num_secondary_matched'] = self.num_secondary_matched
+            params['num_unique_secondaries'] = self.num_unique_secondaries
             construct = construct_done
         elif self.status == ExecutionStatus.RUNNING:
             construct = construct_running