You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by sk...@apache.org on 2023/06/28 21:38:27 UTC
[incubator-sdap-nexus] branch SDAP-467 created (now d7fc917)
This is an automated email from the ASF dual-hosted git repository.
skperez pushed a change to branch SDAP-467
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git
at d7fc917 pagination
This branch includes the following new commits:
new d7fc917 pagination
The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
[incubator-sdap-nexus] 01/01: pagination
Posted by sk...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
skperez pushed a commit to branch SDAP-467
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git
commit d7fc917ffa01fe14e314142cd45635f6b1fa4bca
Author: skorper <st...@gmail.com>
AuthorDate: Wed Jun 28 14:38:16 2023 -0700
pagination
---
CHANGELOG.md | 1 +
analysis/webservice/algorithms/doms/BaseDomsHandler.py | 11 ++++++++++-
.../webservice/algorithms/doms/ResultsRetrieval.py | 6 ++++--
analysis/webservice/algorithms/doms/ResultsStorage.py | 18 +++++++++---------
data-access/requirements.txt | 1 +
5 files changed, 25 insertions(+), 12 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0fad2a2..0496e78 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## Unreleased
### Added
+- SDAP-467: Added pagination to cdmsresults endpoint
- SDAP-461: Added 4 remaining Saildrone insitu datasets.
### Changed
- SDAP-453: Updated results storage and retrieval to support output JSON from `/cdmsresults` that matches output from `/match_spark`.
diff --git a/analysis/webservice/algorithms/doms/BaseDomsHandler.py b/analysis/webservice/algorithms/doms/BaseDomsHandler.py
index a31666d..0e6b308 100644
--- a/analysis/webservice/algorithms/doms/BaseDomsHandler.py
+++ b/analysis/webservice/algorithms/doms/BaseDomsHandler.py
@@ -85,7 +85,7 @@ class DomsEncoder(json.JSONEncoder):
class DomsQueryResults(NexusResults):
def __init__(self, results=None, args=None, bounds=None, count=None, details=None, computeOptions=None,
- executionId=None, status_code=200):
+ executionId=None, status_code=200, page_num=None, page_size=None):
NexusResults.__init__(self, results=results, meta=None, stats=None, computeOptions=computeOptions,
status_code=status_code)
self.__args = args
@@ -94,6 +94,10 @@ class DomsQueryResults(NexusResults):
self.__details = details
self.__executionId = str(executionId)
+ # Add page num and size to details block
+ self.__details['pageNum'] = page_num
+ self.__details['pageSize'] = page_size
+
def toJson(self):
bounds = self.__bounds.toMap() if self.__bounds is not None else {}
return json.dumps(
@@ -276,6 +280,9 @@ class DomsCSVFormatter:
{"Global Attribute": "date_created", "Value": datetime.utcnow().replace(tzinfo=UTC).strftime(ISO_8601)},
{"Global Attribute": "URI_Matchup", "Value": "https://doms.jpl.nasa.gov/domsresults?id=" + executionId + "&output=CSV"}, # TODO how to replace with actual req URL
+
+ {"Global Attribute": "CDMS_page_num", "Value": details["pageNum"]},
+ {"Global Attribute": "CDMS_page_size", "Value": details["pageSize"]},
]
writer = csv.DictWriter(csvfile, sorted(next(iter(global_attrs)).keys()))
@@ -326,6 +333,8 @@ class DomsNetCDFFormatter:
dataset.CDMS_primary = params["primary"]
dataset.CDMS_time_to_complete = details["timeToComplete"]
dataset.CDMS_time_to_complete_units = "seconds"
+ dataset.CDMS_page_num = details["pageNum"]
+ dataset.CDMS_page_size = details["pageSize"]
insituDatasets = params["matchup"]
insituLinks = set()
diff --git a/analysis/webservice/algorithms/doms/ResultsRetrieval.py b/analysis/webservice/algorithms/doms/ResultsRetrieval.py
index c3b95b0..f03c1ca 100644
--- a/analysis/webservice/algorithms/doms/ResultsRetrieval.py
+++ b/analysis/webservice/algorithms/doms/ResultsRetrieval.py
@@ -35,6 +35,8 @@ class DomsResultsRetrievalHandler(BaseDomsHandler.BaseDomsQueryCalcHandler):
def calc(self, computeOptions, **args):
execution_id = computeOptions.get_argument("id", None)
+ page_num = computeOptions.get_int_arg('pageNum', default=1)
+ page_size = computeOptions.get_int_arg('pageSize', default=1000)
try:
execution_id = uuid.UUID(execution_id)
@@ -44,7 +46,7 @@ class DomsResultsRetrievalHandler(BaseDomsHandler.BaseDomsQueryCalcHandler):
simple_results = computeOptions.get_boolean_arg("simpleResults", default=False)
with ResultsStorage.ResultsRetrieval(self.config) as storage:
- params, stats, data = storage.retrieveResults(execution_id, trim_data=simple_results)
+ params, stats, data = storage.retrieveResults(execution_id, trim_data=simple_results, page_num=page_num, page_size=page_size)
return BaseDomsHandler.DomsQueryResults(results=data, args=params, details=stats, bounds=None, count=len(data),
- computeOptions=None, executionId=execution_id)
+ computeOptions=None, executionId=execution_id, page_num=page_num, page_size=page_size)
diff --git a/analysis/webservice/algorithms/doms/ResultsStorage.py b/analysis/webservice/algorithms/doms/ResultsStorage.py
index 7a9a48d..98409d1 100644
--- a/analysis/webservice/algorithms/doms/ResultsStorage.py
+++ b/analysis/webservice/algorithms/doms/ResultsStorage.py
@@ -26,7 +26,7 @@ import pkg_resources
from cassandra.auth import PlainTextAuthProvider
from cassandra.cluster import Cluster
from cassandra.policies import TokenAwarePolicy, DCAwareRoundRobinPolicy
-from cassandra.query import BatchStatement
+from cassandra.query import BatchStatement, SimpleStatement
from pytz import UTC
from webservice.algorithms.doms.BaseDomsHandler import DomsEncoder
from webservice.webmodel import NexusProcessingException
@@ -274,17 +274,17 @@ class ResultsRetrieval(AbstractResultsContainer):
def __init__(self, config=None):
AbstractResultsContainer.__init__(self, config)
- def retrieveResults(self, execution_id, trim_data=False):
+ def retrieveResults(self, execution_id, trim_data=False, page_num=1, page_size=1000):
if isinstance(execution_id, str):
execution_id = uuid.UUID(execution_id)
params = self.retrieveParams(execution_id)
stats = self.__retrieveStats(execution_id)
- data = self.__retrieveData(execution_id, trim_data=trim_data)
+ data = self.__retrieveData(execution_id, trim_data=trim_data, page_num=page_num, page_size=page_size)
return params, stats, data
- def __retrieveData(self, id, trim_data=False):
- dataMap = self.__retrievePrimaryData(id, trim_data=trim_data)
+ def __retrieveData(self, id, trim_data=False, page_num=1, page_size=1000):
+ dataMap = self.__retrievePrimaryData(id, trim_data=trim_data, page_num=page_num, page_size=page_size)
self.__enrichPrimaryDataWithMatches(id, dataMap, trim_data=trim_data)
data = [dataMap[name] for name in dataMap]
return data
@@ -302,12 +302,12 @@ class ResultsRetrieval(AbstractResultsContainer):
else:
print(row)
- def __retrievePrimaryData(self, id, trim_data=False):
- cql = "SELECT * FROM doms_data where execution_id = %s and is_primary = true"
- rows = self._session.execute(cql, (id,))
+ def __retrievePrimaryData(self, id, trim_data=False, page_num=2, page_size=10):
+ cql = "SELECT * FROM doms_data where execution_id = %s and is_primary = true limit %s"
+ rows = self._session.execute(cql, [id, page_num * page_size])
dataMap = {}
- for row in rows:
+ for row in rows[(page_num-1)*page_size:page_num*page_size]:
entry = self.__rowToDataEntry(row, trim_data=trim_data)
dataMap[row.value_id] = entry
return dataMap
diff --git a/data-access/requirements.txt b/data-access/requirements.txt
index 5127018..d2ffd3f 100644
--- a/data-access/requirements.txt
+++ b/data-access/requirements.txt
@@ -20,3 +20,4 @@ urllib3==1.26.2
requests
nexusproto
Shapely
+numpy==1.24.3