You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by nc...@apache.org on 2024/02/01 22:37:40 UTC

(incubator-sdap-nexus) branch develop updated: SDAP-506: STAC Catalog for Matchup outputs (#291)

This is an automated email from the ASF dual-hosted git repository.

nchung pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git


The following commit(s) were added to refs/heads/develop by this push:
     new bd391f1  SDAP-506: STAC Catalog for Matchup outputs (#291)
bd391f1 is described below

commit bd391f1a6e6d95073aa235548dd4b1897c8bad12
Author: Stepheny Perez <sk...@users.noreply.github.com>
AuthorDate: Thu Feb 1 14:37:35 2024 -0800

    SDAP-506: STAC Catalog for Matchup outputs (#291)
    
    * removed resultSizeLimit param from matchup
    
    * Add # of primaries/avergae secondaries to job output
    
    * rename to executionId
    
    * update changelog
    
    * add totalSecondaryMatched field to /job output
    
    * num unique secondaries addition
    
    * updated docs to use correct sea_water_temperature param name
    
    * bugfix
    
    * fix division by zero bug
    
    * add page number to default filename for matchup output
    
    * pagination improvements
    
    * removed debugging line
    
    * changelog
    
    * Update helm cassandra dependency (#289)
    
    * Update helm cassandra dependency
    
    * Bump default cassandra PV to 4
    
    * Bump default cassandra PV to 4 in tools
    
    * Changelog
    
    * Fixed small documentation issue
    
    ---------
    
    Co-authored-by: rileykk <ri...@jpl.nasa.gov>
    
    * stac catalog
    
    * Updated openapi spec
    
    * move stac endpoints to matchup tag in openapi spec
    
    * Revert "Update helm cassandra dependency (#289)"
    
    This reverts commit 1e8cc4e9d31d295e172c0db4bba61a5776642bea.
    
    * fix bug where still-running jobs failed /job endpoint due to missing metadata
    
    * Update .asf.yaml (#293)
    
    Co-authored-by: rileykk <ri...@jpl.nasa.gov>
    
    * update changelog
    
    * re-add removed changelog entry
    
    ---------
    
    Co-authored-by: Riley Kuttruff <72...@users.noreply.github.com>
    Co-authored-by: rileykk <ri...@jpl.nasa.gov>
---
 .asf.yaml                                          |  14 ++
 CHANGELOG.md                                       |   2 +
 .../webservice/algorithms/doms/ExecutionStatus.py  |   3 +
 .../webservice/algorithms/doms/ResultsStorage.py   |   2 -
 analysis/webservice/algorithms/doms/StacCatalog.py | 166 +++++++++++++++++++++
 analysis/webservice/algorithms/doms/__init__.py    |   1 +
 analysis/webservice/apidocs/openapi.yml            |  52 +++++++
 .../webservice/webmodel/NexusExecutionResults.py   |   6 +
 8 files changed, 244 insertions(+), 2 deletions(-)

diff --git a/.asf.yaml b/.asf.yaml
index 7574d14..035f24d 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -16,3 +16,17 @@
 
 github:
   autolink_jira: SDAP
+  protected_branches:
+    master:
+      strict: true # Require branches be up to date
+      required_pull_request_reviews:
+        dismiss_stale_reviews: true
+        require_code_owner_reviews: true
+        required_approving_review_count: 1
+    develop:
+      strict: true # Require branches be up to date
+      required_pull_request_reviews:
+        dismiss_stale_reviews: true
+        require_code_owner_reviews: true
+        required_approving_review_count: 1
+  del_branch_on_merge: true
diff --git a/CHANGELOG.md b/CHANGELOG.md
index da2b3c2..94e2fa6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 ### Added
+- SDAP-506:
+  - Added STAC Catalog endpoint for matchup outputs
 ### Changed
 - SDAP-493: 
   - Updated /job endpoint to use `executionId` terminology for consistency with existing `/cdmsresults` endpoint
diff --git a/analysis/webservice/algorithms/doms/ExecutionStatus.py b/analysis/webservice/algorithms/doms/ExecutionStatus.py
index 63cf423..9817b07 100644
--- a/analysis/webservice/algorithms/doms/ExecutionStatus.py
+++ b/analysis/webservice/algorithms/doms/ExecutionStatus.py
@@ -63,6 +63,9 @@ class ExecutionStatusHandler(BaseDomsHandler.BaseDomsQueryCalcHandler):
             except NexusProcessingException:
                 execution_stats = {}
 
+        if execution_stats is None:
+            execution_stats = {}
+
         job_status = NexusExecutionResults.ExecutionStatus(execution_details['status'])
         host = f'https://{request.requestHandler.request.host}'
 
diff --git a/analysis/webservice/algorithms/doms/ResultsStorage.py b/analysis/webservice/algorithms/doms/ResultsStorage.py
index 1dea161..0cc5bd1 100644
--- a/analysis/webservice/algorithms/doms/ResultsStorage.py
+++ b/analysis/webservice/algorithms/doms/ResultsStorage.py
@@ -373,8 +373,6 @@ class ResultsRetrieval(AbstractResultsContainer):
             }
             return stats
 
-        raise NexusProcessingException(reason=f'No stats found for id {str(id)}', code=404)
-
     def retrieveParams(self, id):
         cql = "SELECT * FROM doms_params where execution_id = %s limit 1"
         rows = self._session.execute(cql, (id,))
diff --git a/analysis/webservice/algorithms/doms/StacCatalog.py b/analysis/webservice/algorithms/doms/StacCatalog.py
new file mode 100644
index 0000000..2c1aa12
--- /dev/null
+++ b/analysis/webservice/algorithms/doms/StacCatalog.py
@@ -0,0 +1,166 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the 'License'); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import re
+import uuid
+from typing import List
+
+from webservice.NexusHandler import nexus_handler
+from webservice.algorithms.doms.ResultsStorage import ResultsRetrieval
+from webservice.webmodel import NexusProcessingException
+from webservice.webmodel import NexusResults
+
+from . import BaseDomsHandler
+
+
+class StacResults(NexusResults):
+    def __init__(self, contents):
+        NexusResults.__init__(self)
+        self.contents = contents
+
+    def toJson(self):
+        return json.dumps(self.contents, indent=4)
+
+
+@nexus_handler
+class StacCatalog(BaseDomsHandler.BaseDomsQueryCalcHandler):
+    name = 'STAC Catalog Handler'
+    path = '^/cdmscatalog/?.*$'
+    description = ''
+    params = {}
+    singleton = True
+
+    def __init__(self, tile_service_factory, config=None):
+        BaseDomsHandler.BaseDomsQueryCalcHandler.__init__(self, tile_service_factory)
+        self.config = config
+
+    def construct_catalog(self, execution_id: str):
+        return {
+            'stac_version': '1.0.0',
+            'type': 'Catalog',
+            'id': str(execution_id),
+            'description': 'STAC Catalog for CDMS output',
+            'links': [
+                {
+                    'rel': 'collection',
+                    'href': f'https://{self.host}/cdmscatalog/{execution_id}/{output_format}',
+                    'title': f'Collection of pages for {execution_id} {output_format} output'
+                }
+                for output_format in ['CSV', 'JSON', 'NETCDF']
+            ]
+        }
+
+    def construct_collection(self, execution_id: str, output_format: str,
+                             num_primary_matched: int, page_size: int, start_time: str,
+                             end_time: str, bbox: List[float]):
+        links = [
+            {
+                'rel': 'self',
+                'href': f'https://{self.host}/cdmscatalog/{execution_id}/{output_format}',
+                'title': 'The current page',
+                'type': 'application/json'
+            },
+            {
+                'rel': 'root',
+                'href': f'https://{self.host}/cdmscatalog/{execution_id}',
+                'title': f'Root catalog for {execution_id}',
+            }
+        ]
+
+        url = f'https://{self.host}/cdmsresults?id={execution_id}&output={output_format}'
+        for page_num in range(1, num_primary_matched, page_size):
+            links.append({
+                'rel': 'data',
+                'href': f'{url}&pageNum={page_num}&pageSize={page_size}'
+            })
+
+        return {
+            'stac_version': '1.0.0',
+            'type': 'Collection',
+            'license': 'not-provided',
+            'id': f'{execution_id}.{output_format}',
+            'description': 'Collection of results for CDMS execution and result format',
+            'extent': {
+                'spatial': {
+                    'bbox': bbox
+                },
+                'temporal': {
+                    'interval': [start_time, end_time]
+                }
+            },
+            'links': links,
+        }
+
+    def calc(self, request, **args):
+        page_size = request.get_int_arg('pageSize', default=1000)
+        url_path_regex = '^\/cdmscatalog\/?(?P<id>[a-zA-Z0-9-]*)\/?(?P<format>[a-zA-Z0-9]*)'
+        match = re.search(url_path_regex, request.requestHandler.request.path)
+
+        execution_id = match.group('id')
+        output_format = match.group('format')
+
+        self.host = request.requestHandler.request.host
+
+        if not execution_id:
+            raise NexusProcessingException(
+                reason=f'Execution ID path param must be provided.',
+                code=400
+            )
+
+        if execution_id:
+            try:
+                execution_id = uuid.UUID(execution_id)
+            except ValueError:
+                raise NexusProcessingException(
+                    reason=f'"{execution_id}" is not a valid uuid',
+                    code=400
+                )
+
+        if output_format and output_format.upper() not in ['CSV', 'JSON', 'NETCDF']:
+            raise NexusProcessingException(
+                reason=f'"{output_format}" is not a valid format. Should be CSV, JSON, or NETCDF.',
+                code=400
+            )
+
+        if execution_id and not output_format:
+            # Route to STAC catalog for execution
+            stac_output = self.construct_catalog(execution_id)
+        elif execution_id and output_format:
+            # Route to STAC collection for execution+format
+
+            with ResultsRetrieval(self.config) as retrieval:
+                try:
+                    execution_stats = retrieval.retrieveStats(execution_id)
+                    execution_params = retrieval.retrieveParams(execution_id)
+                except NexusProcessingException:
+                    execution_stats = {}
+
+            num_primary_matched = execution_stats.get('numPrimaryMatched', 0)
+            start_time = execution_params['startTime'].isoformat()
+            end_time = execution_params['endTime'].isoformat()
+            bbox = list(map(float, execution_params['bbox'].split(',')))
+
+            stac_output = self.construct_collection(
+                execution_id, output_format, num_primary_matched, page_size,
+                start_time, end_time, bbox
+            )
+        else:
+            raise NexusProcessingException(
+                reason=f'Invalid path parameters were provided',
+                code=400
+            )
+
+        return StacResults(stac_output)
diff --git a/analysis/webservice/algorithms/doms/__init__.py b/analysis/webservice/algorithms/doms/__init__.py
index 8bddad9..3668b59 100644
--- a/analysis/webservice/algorithms/doms/__init__.py
+++ b/analysis/webservice/algorithms/doms/__init__.py
@@ -20,6 +20,7 @@ from . import ExecutionCancel
 from . import DatasetListQuery
 from . import DomsInitialization
 from . import MatchupQuery
+from . import StacCatalog
 from . import MetadataQuery
 from . import ResultsRetrieval
 from . import ResultsStorage
diff --git a/analysis/webservice/apidocs/openapi.yml b/analysis/webservice/apidocs/openapi.yml
index c2298c2..5539a97 100644
--- a/analysis/webservice/apidocs/openapi.yml
+++ b/analysis/webservice/apidocs/openapi.yml
@@ -721,6 +721,58 @@ paths:
             type: string
             format: uuid
           example: c864a51b-3d87-4872-9070-632820b1cae2
+  /cdmscatalog/{executionId}:
+    get:
+      summary: |
+        Get STAC Catalog for execution
+      operationId: cdmscatalog
+      tags:
+        - Matchup
+      description: "Get STAC catalog by execution id"
+      parameters:
+        - in: path
+          name: executionId
+          description: |
+            The job execution ID
+          required: true
+          schema:
+            type: string
+            format: uuid
+          example: c864a51b-3d87-4872-9070-632820b1cae2
+  /cdmscatalog/{executionId}/{format}:
+    get:
+      summary: |
+        Get STAC Catalog format catalog for execution
+      operationId: cdmscatalogcollection
+      tags:
+        - Matchup
+      description: "Get STAC catalog by execution id"
+      parameters:
+        - in: path
+          name: executionId
+          description: |
+            The execution ID
+          required: true
+          schema:
+            type: string
+            format: uuid
+          example: c864a51b-3d87-4872-9070-632820b1cae2
+        - in: path
+          name: format
+          description: |
+            CDMS results format
+          required: true
+          schema:
+            type: string
+            enum: [JSON,CSV,NETCDF]
+          example: JSON
+        - in: query
+          name: pageSize
+          description: |
+            How many primary matches on each page of CDMS results
+          required: false
+          schema:
+            type: integer
 externalDocs:
   description: Documentation
   url: https://incubator-sdap-nexus.readthedocs.io/en/latest/index.html
diff --git a/analysis/webservice/webmodel/NexusExecutionResults.py b/analysis/webservice/webmodel/NexusExecutionResults.py
index 47a891a..425c763 100644
--- a/analysis/webservice/webmodel/NexusExecutionResults.py
+++ b/analysis/webservice/webmodel/NexusExecutionResults.py
@@ -72,6 +72,12 @@ def construct_done(status, created, completed, execution_id, params, host,
         ('JSON', 'application/json'),
         ('NETCDF', 'binary/octet-stream')
     ]
+    job_body['links'].append({
+        'href': f'{host}/cdmscatalog/{execution_id}',
+        'title': 'STAC Catalog for execution results',
+        'type': 'application/json',
+        'rel': 'stac'
+    })
     data_links = [{
         'href': f'{host}/cdmsresults?id={execution_id}&output={output_format}{filename_param}',
         'title': f'Download {output_format} results',