You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by nc...@apache.org on 2024/02/01 20:39:45 UTC
(incubator-sdap-nexus) branch develop updated: SDAP-493: Pagination improvements (#282)
This is an automated email from the ASF dual-hosted git repository.
nchung pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git
The following commit(s) were added to refs/heads/develop by this push:
new 5afdec2 SDAP-493: Pagination improvements (#282)
5afdec2 is described below
commit 5afdec2a517ac058e5ee4884e34ab38d0e3cb23c
Author: Stepheny Perez <sk...@users.noreply.github.com>
AuthorDate: Thu Feb 1 12:39:40 2024 -0800
SDAP-493: Pagination improvements (#282)
* removed resultSizeLimit param from matchup
* Add # of primaries/avergae secondaries to job output
* rename to executionId
* update changelog
* add totalSecondaryMatched field to /job output
* num unique secondaries addition
* updated docs to use correct sea_water_temperature param name
* bugfix
* fix division by zero bug
---
CHANGELOG.md | 13 +++++++++++
.../algorithms/doms/DomsInitialization.py | 5 +++--
.../webservice/algorithms/doms/ExecutionStatus.py | 13 ++++++++++-
.../webservice/algorithms/doms/ResultsStorage.py | 24 +++++++++++----------
analysis/webservice/algorithms_spark/Matchup.py | 25 +++++++---------------
analysis/webservice/apidocs/openapi.yml | 20 +++--------------
.../webservice/webmodel/NexusExecutionResults.py | 20 ++++++++++++++---
7 files changed, 69 insertions(+), 51 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 55c5bc6..2cbcc2e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,19 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [Unreleased]
+### Added
+### Changed
+- SDAP-493:
+ - Updated /job endpoint to use `executionId` terminology for consistency with existing `/cdmsresults` endpoint
+ - Updated /job endpoint with details about number of primary and secondary tiles.
+### Deprecated
+### Removed
+- SDAP-493:
+ - Removed `resultSizeLimit` from /match_spark endpoint
+### Fixed
+### Security
+
## [1.2.0] - 2023-11-22
### Added
- SDAP-467: Added pagination to cdmsresults endpoint
diff --git a/analysis/webservice/algorithms/doms/DomsInitialization.py b/analysis/webservice/algorithms/doms/DomsInitialization.py
index 43627b1..a10a7e7 100644
--- a/analysis/webservice/algorithms/doms/DomsInitialization.py
+++ b/analysis/webservice/algorithms/doms/DomsInitialization.py
@@ -173,7 +173,7 @@ class DomsInitializer:
def createDomsExecutionStatsTable(self, session):
log = logging.getLogger(__name__)
- log.info("Verifying doms_execuction_stats table")
+ log.info("Verifying doms_execution_stats table")
cql = """
CREATE TABLE IF NOT EXISTS doms_execution_stats (
execution_id uuid PRIMARY KEY,
@@ -181,7 +181,8 @@ class DomsInitializer:
num_gridded_checked int,
num_insitu_matched int,
num_insitu_checked int,
- time_to_complete int
+ time_to_complete int,
+ num_unique_secondaries int
);
"""
session.execute(cql)
diff --git a/analysis/webservice/algorithms/doms/ExecutionStatus.py b/analysis/webservice/algorithms/doms/ExecutionStatus.py
index d21ab44..17c6ca9 100644
--- a/analysis/webservice/algorithms/doms/ExecutionStatus.py
+++ b/analysis/webservice/algorithms/doms/ExecutionStatus.py
@@ -53,6 +53,14 @@ class ExecutionStatusHandler(BaseDomsHandler.BaseDomsQueryCalcHandler):
code=404
)
+ # Get execution stats. This call will raise an exception if the
+ # execution is not done.
+ with ResultsRetrieval(self.config) as retrieval:
+ try:
+ execution_stats = retrieval.retrieveStats(execution_id)
+ except NexusProcessingException:
+ execution_stats = {}
+
job_status = NexusExecutionResults.ExecutionStatus(execution_details['status'])
host = f'https://{request.requestHandler.request.host}'
@@ -63,5 +71,8 @@ class ExecutionStatusHandler(BaseDomsHandler.BaseDomsQueryCalcHandler):
execution_id=execution_id,
message=execution_details['message'],
params=execution_params,
- host=host
+ host=host,
+ num_primary_matched=execution_stats.get('numPrimaryMatched'),
+ num_secondary_matched=execution_stats.get('numSecondaryMatched'),
+ num_unique_secondaries=execution_stats.get('numUniqueSecondaries')
)
diff --git a/analysis/webservice/algorithms/doms/ResultsStorage.py b/analysis/webservice/algorithms/doms/ResultsStorage.py
index fe5947c..1dea161 100644
--- a/analysis/webservice/algorithms/doms/ResultsStorage.py
+++ b/analysis/webservice/algorithms/doms/ResultsStorage.py
@@ -169,17 +169,18 @@ class ResultsStorage(AbstractResultsContainer):
def __insertStats(self, execution_id, stats):
cql = """
INSERT INTO doms_execution_stats
- (execution_id, num_gridded_matched, num_gridded_checked, num_insitu_matched, num_insitu_checked, time_to_complete)
+ (execution_id, num_gridded_matched, num_gridded_checked, num_insitu_matched, num_insitu_checked, time_to_complete, num_unique_secondaries)
VALUES
- (%s, %s, %s, %s, %s, %s)
+ (%s, %s, %s, %s, %s, %s, %s)
"""
self._session.execute(cql, (
execution_id,
- stats["numPrimaryMatched"],
+ stats['numPrimaryMatched'],
None,
- stats["numSecondaryMatched"],
+ stats['numSecondaryMatched'],
None,
- stats["timeToComplete"]
+ stats['timeToComplete'],
+ stats['numUniqueSecondaries']
))
def __insertResults(self, execution_id, results):
@@ -289,7 +290,7 @@ class ResultsRetrieval(AbstractResultsContainer):
execution_id = uuid.UUID(execution_id)
params = self.retrieveParams(execution_id)
- stats = self.__retrieveStats(execution_id)
+ stats = self.retrieveStats(execution_id)
data = self.__retrieveData(execution_id, trim_data=trim_data, page_num=page_num, page_size=page_size)
return params, stats, data
@@ -360,14 +361,15 @@ class ResultsRetrieval(AbstractResultsContainer):
return entry
- def __retrieveStats(self, id):
- cql = "SELECT num_gridded_matched, num_insitu_matched, time_to_complete FROM doms_execution_stats where execution_id = %s limit 1"
+ def retrieveStats(self, id):
+ cql = "SELECT num_gridded_matched, num_insitu_matched, time_to_complete, num_unique_secondaries FROM doms_execution_stats where execution_id = %s limit 1"
rows = self._session.execute(cql, (id,))
for row in rows:
stats = {
- "timeToComplete": row.time_to_complete,
- "numSecondaryMatched": row.num_insitu_matched,
- "numPrimaryMatched": row.num_gridded_matched,
+ 'timeToComplete': row.time_to_complete,
+ 'numSecondaryMatched': row.num_insitu_matched,
+ 'numPrimaryMatched': row.num_gridded_matched,
+ 'numUniqueSecondaries': row.num_unique_secondaries
}
return stats
diff --git a/analysis/webservice/algorithms_spark/Matchup.py b/analysis/webservice/algorithms_spark/Matchup.py
index a55f61d..8955d95 100644
--- a/analysis/webservice/algorithms_spark/Matchup.py
+++ b/analysis/webservice/algorithms_spark/Matchup.py
@@ -137,14 +137,6 @@ class Matchup(NexusCalcSparkTornadoHandler):
+ "If true, only the nearest point will be returned for each primary point. "
+ "If false, all points within the tolerances will be returned for each primary point. Default: False"
},
- "resultSizeLimit": {
- "name": "Result Size Limit",
- "type": "int",
- "description": "Optional integer value that limits the number of results returned from the matchup. "
- "If the number of primary matches is greater than this limit, the service will respond with "
- "(HTTP 202: Accepted) and an empty response body. A value of 0 means return all results. "
- "Default: 500"
- },
"prioritizeDistance": {
"name": "Prioritize distance",
"type": "boolean",
@@ -223,8 +215,6 @@ class Matchup(NexusCalcSparkTornadoHandler):
match_once = request.get_boolean_arg("matchOnce", default=False)
- result_size_limit = request.get_int_arg("resultSizeLimit", default=500)
-
start_seconds_from_epoch = int((start_time - EPOCH).total_seconds())
end_seconds_from_epoch = int((end_time - EPOCH).total_seconds())
@@ -234,7 +224,7 @@ class Matchup(NexusCalcSparkTornadoHandler):
return bounding_polygon, primary_ds_name, secondary_ds_names, parameter_s, \
start_time, start_seconds_from_epoch, end_time, end_seconds_from_epoch, \
depth_min, depth_max, time_tolerance, radius_tolerance, \
- platforms, match_once, result_size_limit, prioritize_distance
+ platforms, match_once, prioritize_distance
def get_job_pool(self, tile_ids):
if len(tile_ids) > LARGE_JOB_THRESHOLD:
@@ -244,7 +234,7 @@ class Matchup(NexusCalcSparkTornadoHandler):
def async_calc(self, execution_id, tile_ids, bounding_polygon, primary_ds_name,
secondary_ds_names, parameter_s, start_time, end_time, depth_min,
depth_max, time_tolerance, radius_tolerance, platforms, match_once,
- result_size_limit, start, prioritize_distance):
+ start, prioritize_distance):
# Call spark_matchup
self.log.debug("Calling Spark Driver")
@@ -286,10 +276,12 @@ class Matchup(NexusCalcSparkTornadoHandler):
total_keys = len(list(spark_result.keys()))
total_values = sum(len(v) for v in spark_result.values())
+ unique_values = len(set([point.data_id for v in spark_result.values() for point in v]))
details = {
- "timeToComplete": int((end - start).total_seconds()),
- "numSecondaryMatched": total_values,
- "numPrimaryMatched": total_keys
+ 'timeToComplete': int((end - start).total_seconds()),
+ 'numSecondaryMatched': total_values,
+ 'numPrimaryMatched': total_keys,
+ 'numUniqueSecondaries': unique_values
}
matches = Matchup.convert_to_matches(spark_result)
@@ -310,7 +302,7 @@ class Matchup(NexusCalcSparkTornadoHandler):
bounding_polygon, primary_ds_name, secondary_ds_names, parameter_s, \
start_time, start_seconds_from_epoch, end_time, end_seconds_from_epoch, \
depth_min, depth_max, time_tolerance, radius_tolerance, \
- platforms, match_once, result_size_limit, prioritize_distance = self.parse_arguments(request)
+ platforms, match_once, prioritize_distance = self.parse_arguments(request)
args = {
"primary": primary_ds_name,
@@ -380,7 +372,6 @@ class Matchup(NexusCalcSparkTornadoHandler):
radius_tolerance=radius_tolerance,
platforms=platforms,
match_once=match_once,
- result_size_limit=result_size_limit,
start=start,
prioritize_distance=prioritize_distance
))
diff --git a/analysis/webservice/apidocs/openapi.yml b/analysis/webservice/apidocs/openapi.yml
index 6ea173a..0a21087 100644
--- a/analysis/webservice/apidocs/openapi.yml
+++ b/analysis/webservice/apidocs/openapi.yml
@@ -139,8 +139,7 @@ paths:
required: false
schema:
type: string
- default: sea_surface_temperature
- example: sea_surface_temperature
+ example: sea_water_temperature
- in: query
name: matchOnce
description: |
@@ -154,19 +153,6 @@ paths:
type: boolean
default: false
example: false
- - in: query
- name: resultSizeLimit
- description: |
- Optional integer value that limits the number of results
- returned from the matchup. If the number of primary matches
- is greater than this limit, the service will respond with
- (HTTP 202 Accepted) and an empty response body. A value of
- 0 means return all results.
- required: false
- schema:
- type: integer
- default: 500
- example: 500
- in: query
name: prioritizeDistance
description: |
@@ -697,7 +683,7 @@ paths:
- in: query
name: id
description: |
- The job execution ID
+ The execution ID
required: true
schema:
type: string
@@ -715,7 +701,7 @@ paths:
- in: query
name: id
description: |
- The job execution ID
+ The execution ID
required: true
schema:
type: string
diff --git a/analysis/webservice/webmodel/NexusExecutionResults.py b/analysis/webservice/webmodel/NexusExecutionResults.py
index d5c1204..7dd7af9 100644
--- a/analysis/webservice/webmodel/NexusExecutionResults.py
+++ b/analysis/webservice/webmodel/NexusExecutionResults.py
@@ -40,11 +40,12 @@ def construct_job_status(job_state, created, updated, execution_id, params, host
'rel': 'self'
}],
'params': params,
- 'jobID': execution_id
+ 'executionID': execution_id
}
-def construct_done(status, created, completed, execution_id, params, host):
+def construct_done(status, created, completed, execution_id, params, host,
+ num_primary_matched, num_secondary_matched, num_unique_secondaries):
job_body = construct_job_status(
status,
created,
@@ -53,6 +54,12 @@ def construct_done(status, created, completed, execution_id, params, host):
params,
host
)
+ # Add stats to body
+ job_body['totalPrimaryMatched'] = num_primary_matched
+ job_body['totalSecondaryMatched'] = num_secondary_matched
+ job_body['averageSecondaryMatched'] = round(num_secondary_matched/num_primary_matched) \
+ if num_primary_matched > 0 else 0
+ job_body['totalUniqueSecondaryMatched'] = num_unique_secondaries
# Construct urls
formats = [
@@ -112,7 +119,8 @@ def construct_cancelled(status, created, completed, execution_id, params, host):
class NexusExecutionResults:
def __init__(self, status=None, created=None, completed=None, execution_id=None, message='',
- params=None, host=None, status_code=200):
+ params=None, host=None, status_code=200, num_primary_matched=None,
+ num_secondary_matched=None, num_unique_secondaries=None):
self.status_code = status_code
self.status = status
self.created = created
@@ -121,6 +129,9 @@ class NexusExecutionResults:
self.message = message
self.execution_params = params
self.host = host
+ self.num_primary_matched = num_primary_matched
+ self.num_secondary_matched = num_secondary_matched
+ self.num_unique_secondaries = num_unique_secondaries
def toJson(self):
params = {
@@ -132,6 +143,9 @@ class NexusExecutionResults:
}
if self.status == ExecutionStatus.SUCCESS:
params['completed'] = self.completed
+ params['num_primary_matched'] = self.num_primary_matched
+ params['num_secondary_matched'] = self.num_secondary_matched
+ params['num_unique_secondaries'] = self.num_unique_secondaries
construct = construct_done
elif self.status == ExecutionStatus.RUNNING:
construct = construct_running