You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by sk...@apache.org on 2023/06/28 21:38:28 UTC
[incubator-sdap-nexus] 01/01: pagination
This is an automated email from the ASF dual-hosted git repository.
skperez pushed a commit to branch SDAP-467
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git
commit d7fc917ffa01fe14e314142cd45635f6b1fa4bca
Author: skorper <st...@gmail.com>
AuthorDate: Wed Jun 28 14:38:16 2023 -0700
pagination
---
CHANGELOG.md | 1 +
analysis/webservice/algorithms/doms/BaseDomsHandler.py | 11 ++++++++++-
.../webservice/algorithms/doms/ResultsRetrieval.py | 6 ++++--
analysis/webservice/algorithms/doms/ResultsStorage.py | 18 +++++++++---------
data-access/requirements.txt | 1 +
5 files changed, 25 insertions(+), 12 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0fad2a2..0496e78 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## Unreleased
### Added
+- SDAP-467: Added pagination to cdmsresults endpoint
- SDAP-461: Added 4 remaining Saildrone insitu datasets.
### Changed
- SDAP-453: Updated results storage and retrieval to support output JSON from `/cdmsresults` that matches output from `/match_spark`.
diff --git a/analysis/webservice/algorithms/doms/BaseDomsHandler.py b/analysis/webservice/algorithms/doms/BaseDomsHandler.py
index a31666d..0e6b308 100644
--- a/analysis/webservice/algorithms/doms/BaseDomsHandler.py
+++ b/analysis/webservice/algorithms/doms/BaseDomsHandler.py
@@ -85,7 +85,7 @@ class DomsEncoder(json.JSONEncoder):
class DomsQueryResults(NexusResults):
def __init__(self, results=None, args=None, bounds=None, count=None, details=None, computeOptions=None,
- executionId=None, status_code=200):
+ executionId=None, status_code=200, page_num=None, page_size=None):
NexusResults.__init__(self, results=results, meta=None, stats=None, computeOptions=computeOptions,
status_code=status_code)
self.__args = args
@@ -94,6 +94,10 @@ class DomsQueryResults(NexusResults):
self.__details = details
self.__executionId = str(executionId)
+ # Add page num and size to details block
+ self.__details['pageNum'] = page_num
+ self.__details['pageSize'] = page_size
+
def toJson(self):
bounds = self.__bounds.toMap() if self.__bounds is not None else {}
return json.dumps(
@@ -276,6 +280,9 @@ class DomsCSVFormatter:
{"Global Attribute": "date_created", "Value": datetime.utcnow().replace(tzinfo=UTC).strftime(ISO_8601)},
{"Global Attribute": "URI_Matchup", "Value": "https://doms.jpl.nasa.gov/domsresults?id=" + executionId + "&output=CSV"}, # TODO how to replace with actual req URL
+
+ {"Global Attribute": "CDMS_page_num", "Value": details["pageNum"]},
+ {"Global Attribute": "CDMS_page_size", "Value": details["pageSize"]},
]
writer = csv.DictWriter(csvfile, sorted(next(iter(global_attrs)).keys()))
@@ -326,6 +333,8 @@ class DomsNetCDFFormatter:
dataset.CDMS_primary = params["primary"]
dataset.CDMS_time_to_complete = details["timeToComplete"]
dataset.CDMS_time_to_complete_units = "seconds"
+ dataset.CDMS_page_num = details["pageNum"]
+ dataset.CDMS_page_size = details["pageSize"]
insituDatasets = params["matchup"]
insituLinks = set()
diff --git a/analysis/webservice/algorithms/doms/ResultsRetrieval.py b/analysis/webservice/algorithms/doms/ResultsRetrieval.py
index c3b95b0..f03c1ca 100644
--- a/analysis/webservice/algorithms/doms/ResultsRetrieval.py
+++ b/analysis/webservice/algorithms/doms/ResultsRetrieval.py
@@ -35,6 +35,8 @@ class DomsResultsRetrievalHandler(BaseDomsHandler.BaseDomsQueryCalcHandler):
def calc(self, computeOptions, **args):
execution_id = computeOptions.get_argument("id", None)
+ page_num = computeOptions.get_int_arg('pageNum', default=1)
+ page_size = computeOptions.get_int_arg('pageSize', default=1000)
try:
execution_id = uuid.UUID(execution_id)
@@ -44,7 +46,7 @@ class DomsResultsRetrievalHandler(BaseDomsHandler.BaseDomsQueryCalcHandler):
simple_results = computeOptions.get_boolean_arg("simpleResults", default=False)
with ResultsStorage.ResultsRetrieval(self.config) as storage:
- params, stats, data = storage.retrieveResults(execution_id, trim_data=simple_results)
+ params, stats, data = storage.retrieveResults(execution_id, trim_data=simple_results, page_num=page_num, page_size=page_size)
return BaseDomsHandler.DomsQueryResults(results=data, args=params, details=stats, bounds=None, count=len(data),
- computeOptions=None, executionId=execution_id)
+ computeOptions=None, executionId=execution_id, page_num=page_num, page_size=page_size)
diff --git a/analysis/webservice/algorithms/doms/ResultsStorage.py b/analysis/webservice/algorithms/doms/ResultsStorage.py
index 7a9a48d..98409d1 100644
--- a/analysis/webservice/algorithms/doms/ResultsStorage.py
+++ b/analysis/webservice/algorithms/doms/ResultsStorage.py
@@ -26,7 +26,7 @@ import pkg_resources
from cassandra.auth import PlainTextAuthProvider
from cassandra.cluster import Cluster
from cassandra.policies import TokenAwarePolicy, DCAwareRoundRobinPolicy
-from cassandra.query import BatchStatement
+from cassandra.query import BatchStatement, SimpleStatement
from pytz import UTC
from webservice.algorithms.doms.BaseDomsHandler import DomsEncoder
from webservice.webmodel import NexusProcessingException
@@ -274,17 +274,17 @@ class ResultsRetrieval(AbstractResultsContainer):
def __init__(self, config=None):
AbstractResultsContainer.__init__(self, config)
- def retrieveResults(self, execution_id, trim_data=False):
+ def retrieveResults(self, execution_id, trim_data=False, page_num=1, page_size=1000):
if isinstance(execution_id, str):
execution_id = uuid.UUID(execution_id)
params = self.retrieveParams(execution_id)
stats = self.__retrieveStats(execution_id)
- data = self.__retrieveData(execution_id, trim_data=trim_data)
+ data = self.__retrieveData(execution_id, trim_data=trim_data, page_num=page_num, page_size=page_size)
return params, stats, data
- def __retrieveData(self, id, trim_data=False):
- dataMap = self.__retrievePrimaryData(id, trim_data=trim_data)
+ def __retrieveData(self, id, trim_data=False, page_num=1, page_size=1000):
+ dataMap = self.__retrievePrimaryData(id, trim_data=trim_data, page_num=page_num, page_size=page_size)
self.__enrichPrimaryDataWithMatches(id, dataMap, trim_data=trim_data)
data = [dataMap[name] for name in dataMap]
return data
@@ -302,12 +302,12 @@ class ResultsRetrieval(AbstractResultsContainer):
else:
print(row)
- def __retrievePrimaryData(self, id, trim_data=False):
- cql = "SELECT * FROM doms_data where execution_id = %s and is_primary = true"
- rows = self._session.execute(cql, (id,))
+ def __retrievePrimaryData(self, id, trim_data=False, page_num=2, page_size=10):
+ cql = "SELECT * FROM doms_data where execution_id = %s and is_primary = true limit %s"
+ rows = self._session.execute(cql, [id, page_num * page_size])
dataMap = {}
- for row in rows:
+ for row in rows[(page_num-1)*page_size:page_num*page_size]:
entry = self.__rowToDataEntry(row, trim_data=trim_data)
dataMap[row.value_id] = entry
return dataMap
diff --git a/data-access/requirements.txt b/data-access/requirements.txt
index 5127018..d2ffd3f 100644
--- a/data-access/requirements.txt
+++ b/data-access/requirements.txt
@@ -20,3 +20,4 @@ urllib3==1.26.2
requests
nexusproto
Shapely
+numpy==1.24.3