You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by nc...@apache.org on 2024/02/01 22:37:40 UTC
(incubator-sdap-nexus) branch develop updated: SDAP-506: STAC Catalog for Matchup outputs (#291)
This is an automated email from the ASF dual-hosted git repository.
nchung pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git
The following commit(s) were added to refs/heads/develop by this push:
new bd391f1 SDAP-506: STAC Catalog for Matchup outputs (#291)
bd391f1 is described below
commit bd391f1a6e6d95073aa235548dd4b1897c8bad12
Author: Stepheny Perez <sk...@users.noreply.github.com>
AuthorDate: Thu Feb 1 14:37:35 2024 -0800
SDAP-506: STAC Catalog for Matchup outputs (#291)
* removed resultSizeLimit param from matchup
* Add # of primaries/avergae secondaries to job output
* rename to executionId
* update changelog
* add totalSecondaryMatched field to /job output
* num unique secondaries addition
* updated docs to use correct sea_water_temperature param name
* bugfix
* fix division by zero bug
* add page number to default filename for matchup output
* pagination improvements
* removed debugging line
* changelog
* Update helm cassandra dependency (#289)
* Update helm cassandra dependency
* Bump default cassandra PV to 4
* Bump default cassandra PV to 4 in tools
* Changelog
* Fixed small documentation issue
---------
Co-authored-by: rileykk <ri...@jpl.nasa.gov>
* stac catalog
* Updated openapi spec
* move stac endpoints to matchup tag in openapi spec
* Revert "Update helm cassandra dependency (#289)"
This reverts commit 1e8cc4e9d31d295e172c0db4bba61a5776642bea.
* fix bug where still-running jobs failed /job endpoint due to missing metadata
* Update .asf.yaml (#293)
Co-authored-by: rileykk <ri...@jpl.nasa.gov>
* update changelog
* re-add removed changelog entry
---------
Co-authored-by: Riley Kuttruff <72...@users.noreply.github.com>
Co-authored-by: rileykk <ri...@jpl.nasa.gov>
---
.asf.yaml | 14 ++
CHANGELOG.md | 2 +
.../webservice/algorithms/doms/ExecutionStatus.py | 3 +
.../webservice/algorithms/doms/ResultsStorage.py | 2 -
analysis/webservice/algorithms/doms/StacCatalog.py | 166 +++++++++++++++++++++
analysis/webservice/algorithms/doms/__init__.py | 1 +
analysis/webservice/apidocs/openapi.yml | 52 +++++++
.../webservice/webmodel/NexusExecutionResults.py | 6 +
8 files changed, 244 insertions(+), 2 deletions(-)
diff --git a/.asf.yaml b/.asf.yaml
index 7574d14..035f24d 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -16,3 +16,17 @@
github:
autolink_jira: SDAP
+ protected_branches:
+ master:
+ strict: true # Require branches be up to date
+ required_pull_request_reviews:
+ dismiss_stale_reviews: true
+ require_code_owner_reviews: true
+ required_approving_review_count: 1
+ develop:
+ strict: true # Require branches be up to date
+ required_pull_request_reviews:
+ dismiss_stale_reviews: true
+ require_code_owner_reviews: true
+ required_approving_review_count: 1
+ del_branch_on_merge: true
diff --git a/CHANGELOG.md b/CHANGELOG.md
index da2b3c2..94e2fa6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Added
+- SDAP-506:
+ - Added STAC Catalog endpoint for matchup outputs
### Changed
- SDAP-493:
- Updated /job endpoint to use `executionId` terminology for consistency with existing `/cdmsresults` endpoint
diff --git a/analysis/webservice/algorithms/doms/ExecutionStatus.py b/analysis/webservice/algorithms/doms/ExecutionStatus.py
index 63cf423..9817b07 100644
--- a/analysis/webservice/algorithms/doms/ExecutionStatus.py
+++ b/analysis/webservice/algorithms/doms/ExecutionStatus.py
@@ -63,6 +63,9 @@ class ExecutionStatusHandler(BaseDomsHandler.BaseDomsQueryCalcHandler):
except NexusProcessingException:
execution_stats = {}
+ if execution_stats is None:
+ execution_stats = {}
+
job_status = NexusExecutionResults.ExecutionStatus(execution_details['status'])
host = f'https://{request.requestHandler.request.host}'
diff --git a/analysis/webservice/algorithms/doms/ResultsStorage.py b/analysis/webservice/algorithms/doms/ResultsStorage.py
index 1dea161..0cc5bd1 100644
--- a/analysis/webservice/algorithms/doms/ResultsStorage.py
+++ b/analysis/webservice/algorithms/doms/ResultsStorage.py
@@ -373,8 +373,6 @@ class ResultsRetrieval(AbstractResultsContainer):
}
return stats
- raise NexusProcessingException(reason=f'No stats found for id {str(id)}', code=404)
-
def retrieveParams(self, id):
cql = "SELECT * FROM doms_params where execution_id = %s limit 1"
rows = self._session.execute(cql, (id,))
diff --git a/analysis/webservice/algorithms/doms/StacCatalog.py b/analysis/webservice/algorithms/doms/StacCatalog.py
new file mode 100644
index 0000000..2c1aa12
--- /dev/null
+++ b/analysis/webservice/algorithms/doms/StacCatalog.py
@@ -0,0 +1,166 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the 'License'); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import re
+import uuid
+from typing import List
+
+from webservice.NexusHandler import nexus_handler
+from webservice.algorithms.doms.ResultsStorage import ResultsRetrieval
+from webservice.webmodel import NexusProcessingException
+from webservice.webmodel import NexusResults
+
+from . import BaseDomsHandler
+
+
+class StacResults(NexusResults):
+ def __init__(self, contents):
+ NexusResults.__init__(self)
+ self.contents = contents
+
+ def toJson(self):
+ return json.dumps(self.contents, indent=4)
+
+
+@nexus_handler
+class StacCatalog(BaseDomsHandler.BaseDomsQueryCalcHandler):
+ name = 'STAC Catalog Handler'
+ path = '^/cdmscatalog/?.*$'
+ description = ''
+ params = {}
+ singleton = True
+
+ def __init__(self, tile_service_factory, config=None):
+ BaseDomsHandler.BaseDomsQueryCalcHandler.__init__(self, tile_service_factory)
+ self.config = config
+
+ def construct_catalog(self, execution_id: str):
+ return {
+ 'stac_version': '1.0.0',
+ 'type': 'Catalog',
+ 'id': str(execution_id),
+ 'description': 'STAC Catalog for CDMS output',
+ 'links': [
+ {
+ 'rel': 'collection',
+ 'href': f'https://{self.host}/cdmscatalog/{execution_id}/{output_format}',
+ 'title': f'Collection of pages for {execution_id} {output_format} output'
+ }
+ for output_format in ['CSV', 'JSON', 'NETCDF']
+ ]
+ }
+
+ def construct_collection(self, execution_id: str, output_format: str,
+ num_primary_matched: int, page_size: int, start_time: str,
+ end_time: str, bbox: List[float]):
+ links = [
+ {
+ 'rel': 'self',
+ 'href': f'https://{self.host}/cdmscatalog/{execution_id}/{output_format}',
+ 'title': 'The current page',
+ 'type': 'application/json'
+ },
+ {
+ 'rel': 'root',
+ 'href': f'https://{self.host}/cdmscatalog/{execution_id}',
+ 'title': f'Root catalog for {execution_id}',
+ }
+ ]
+
+ url = f'https://{self.host}/cdmsresults?id={execution_id}&output={output_format}'
+ for page_num in range(1, num_primary_matched, page_size):
+ links.append({
+ 'rel': 'data',
+ 'href': f'{url}&pageNum={page_num}&pageSize={page_size}'
+ })
+
+ return {
+ 'stac_version': '1.0.0',
+ 'type': 'Collection',
+ 'license': 'not-provided',
+ 'id': f'{execution_id}.{output_format}',
+ 'description': 'Collection of results for CDMS execution and result format',
+ 'extent': {
+ 'spatial': {
+ 'bbox': bbox
+ },
+ 'temporal': {
+ 'interval': [start_time, end_time]
+ }
+ },
+ 'links': links,
+ }
+
+ def calc(self, request, **args):
+ page_size = request.get_int_arg('pageSize', default=1000)
+ url_path_regex = '^\/cdmscatalog\/?(?P<id>[a-zA-Z0-9-]*)\/?(?P<format>[a-zA-Z0-9]*)'
+ match = re.search(url_path_regex, request.requestHandler.request.path)
+
+ execution_id = match.group('id')
+ output_format = match.group('format')
+
+ self.host = request.requestHandler.request.host
+
+ if not execution_id:
+ raise NexusProcessingException(
+ reason=f'Execution ID path param must be provided.',
+ code=400
+ )
+
+ if execution_id:
+ try:
+ execution_id = uuid.UUID(execution_id)
+ except ValueError:
+ raise NexusProcessingException(
+ reason=f'"{execution_id}" is not a valid uuid',
+ code=400
+ )
+
+ if output_format and output_format.upper() not in ['CSV', 'JSON', 'NETCDF']:
+ raise NexusProcessingException(
+ reason=f'"{output_format}" is not a valid format. Should be CSV, JSON, or NETCDF.',
+ code=400
+ )
+
+ if execution_id and not output_format:
+ # Route to STAC catalog for execution
+ stac_output = self.construct_catalog(execution_id)
+ elif execution_id and output_format:
+ # Route to STAC collection for execution+format
+
+ with ResultsRetrieval(self.config) as retrieval:
+ try:
+ execution_stats = retrieval.retrieveStats(execution_id)
+ execution_params = retrieval.retrieveParams(execution_id)
+ except NexusProcessingException:
+ execution_stats = {}
+
+ num_primary_matched = execution_stats.get('numPrimaryMatched', 0)
+ start_time = execution_params['startTime'].isoformat()
+ end_time = execution_params['endTime'].isoformat()
+ bbox = list(map(float, execution_params['bbox'].split(',')))
+
+ stac_output = self.construct_collection(
+ execution_id, output_format, num_primary_matched, page_size,
+ start_time, end_time, bbox
+ )
+ else:
+ raise NexusProcessingException(
+ reason=f'Invalid path parameters were provided',
+ code=400
+ )
+
+ return StacResults(stac_output)
diff --git a/analysis/webservice/algorithms/doms/__init__.py b/analysis/webservice/algorithms/doms/__init__.py
index 8bddad9..3668b59 100644
--- a/analysis/webservice/algorithms/doms/__init__.py
+++ b/analysis/webservice/algorithms/doms/__init__.py
@@ -20,6 +20,7 @@ from . import ExecutionCancel
from . import DatasetListQuery
from . import DomsInitialization
from . import MatchupQuery
+from . import StacCatalog
from . import MetadataQuery
from . import ResultsRetrieval
from . import ResultsStorage
diff --git a/analysis/webservice/apidocs/openapi.yml b/analysis/webservice/apidocs/openapi.yml
index c2298c2..5539a97 100644
--- a/analysis/webservice/apidocs/openapi.yml
+++ b/analysis/webservice/apidocs/openapi.yml
@@ -721,6 +721,58 @@ paths:
type: string
format: uuid
example: c864a51b-3d87-4872-9070-632820b1cae2
+ /cdmscatalog/{executionId}:
+ get:
+ summary: |
+ Get STAC Catalog for execution
+ operationId: cdmscatalog
+ tags:
+ - Matchup
+ description: "Get STAC catalog by execution id"
+ parameters:
+ - in: path
+ name: executionId
+ description: |
+ The job execution ID
+ required: true
+ schema:
+ type: string
+ format: uuid
+ example: c864a51b-3d87-4872-9070-632820b1cae2
+ /cdmscatalog/{executionId}/{format}:
+ get:
+ summary: |
+ Get STAC Catalog format catalog for execution
+ operationId: cdmscatalogcollection
+ tags:
+ - Matchup
+ description: "Get STAC catalog by execution id"
+ parameters:
+ - in: path
+ name: executionId
+ description: |
+ The execution ID
+ required: true
+ schema:
+ type: string
+ format: uuid
+ example: c864a51b-3d87-4872-9070-632820b1cae2
+ - in: path
+ name: format
+ description: |
+ CDMS results format
+ required: true
+ schema:
+ type: string
+ enum: [JSON,CSV,NETCDF]
+ example: JSON
+ - in: query
+ name: pageSize
+ description: |
+ How many primary matches on each page of CDMS results
+ required: false
+ schema:
+ type: integer
externalDocs:
description: Documentation
url: https://incubator-sdap-nexus.readthedocs.io/en/latest/index.html
diff --git a/analysis/webservice/webmodel/NexusExecutionResults.py b/analysis/webservice/webmodel/NexusExecutionResults.py
index 47a891a..425c763 100644
--- a/analysis/webservice/webmodel/NexusExecutionResults.py
+++ b/analysis/webservice/webmodel/NexusExecutionResults.py
@@ -72,6 +72,12 @@ def construct_done(status, created, completed, execution_id, params, host,
('JSON', 'application/json'),
('NETCDF', 'binary/octet-stream')
]
+ job_body['links'].append({
+ 'href': f'{host}/cdmscatalog/{execution_id}',
+ 'title': 'STAC Catalog for execution results',
+ 'type': 'application/json',
+ 'rel': 'stac'
+ })
data_links = [{
'href': f'{host}/cdmsresults?id={execution_id}&output={output_format}{filename_param}',
'title': f'Download {output_format} results',