You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by sk...@apache.org on 2023/06/28 21:38:28 UTC
[incubator-sdap-nexus] 01/01: pagination

This is an automated email from the ASF dual-hosted git repository.

skperez pushed a commit to branch SDAP-467
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git

commit d7fc917ffa01fe14e314142cd45635f6b1fa4bca
Author: skorper <st...@gmail.com>
AuthorDate: Wed Jun 28 14:38:16 2023 -0700

    pagination
---
 CHANGELOG.md                                           |  1 +
 analysis/webservice/algorithms/doms/BaseDomsHandler.py | 11 ++++++++++-
 .../webservice/algorithms/doms/ResultsRetrieval.py     |  6 ++++--
 analysis/webservice/algorithms/doms/ResultsStorage.py  | 18 +++++++++---------
 data-access/requirements.txt                           |  1 +
 5 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0fad2a2..0496e78 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## Unreleased
 ### Added
+- SDAP-467: Added pagination to cdmsresults endpoint
 - SDAP-461: Added 4 remaining Saildrone insitu datasets.
 ### Changed
 - SDAP-453: Updated results storage and retrieval to support output JSON from `/cdmsresults` that matches output from `/match_spark`.
diff --git a/analysis/webservice/algorithms/doms/BaseDomsHandler.py b/analysis/webservice/algorithms/doms/BaseDomsHandler.py
index a31666d..0e6b308 100644
--- a/analysis/webservice/algorithms/doms/BaseDomsHandler.py
+++ b/analysis/webservice/algorithms/doms/BaseDomsHandler.py
@@ -85,7 +85,7 @@ class DomsEncoder(json.JSONEncoder):
 
 class DomsQueryResults(NexusResults):
     def __init__(self, results=None, args=None, bounds=None, count=None, details=None, computeOptions=None,
-                 executionId=None, status_code=200):
+                 executionId=None, status_code=200, page_num=None, page_size=None):
         NexusResults.__init__(self, results=results, meta=None, stats=None, computeOptions=computeOptions,
                               status_code=status_code)
         self.__args = args
@@ -94,6 +94,10 @@ class DomsQueryResults(NexusResults):
         self.__details = details
         self.__executionId = str(executionId)
 
+        # Add page num and size to details block
+        self.__details['pageNum'] = page_num
+        self.__details['pageSize'] = page_size
+
     def toJson(self):
         bounds = self.__bounds.toMap() if self.__bounds is not None else {}
         return json.dumps(
@@ -276,6 +280,9 @@ class DomsCSVFormatter:
             {"Global Attribute": "date_created", "Value": datetime.utcnow().replace(tzinfo=UTC).strftime(ISO_8601)},
 
             {"Global Attribute": "URI_Matchup", "Value": "https://doms.jpl.nasa.gov/domsresults?id=" + executionId + "&output=CSV"}, # TODO how to replace with actual req URL
+
+            {"Global Attribute": "CDMS_page_num", "Value": details["pageNum"]},
+            {"Global Attribute": "CDMS_page_size", "Value": details["pageSize"]},
         ]
 
         writer = csv.DictWriter(csvfile, sorted(next(iter(global_attrs)).keys()))
@@ -326,6 +333,8 @@ class DomsNetCDFFormatter:
         dataset.CDMS_primary = params["primary"]
         dataset.CDMS_time_to_complete = details["timeToComplete"]
         dataset.CDMS_time_to_complete_units = "seconds"
+        dataset.CDMS_page_num = details["pageNum"]
+        dataset.CDMS_page_size = details["pageSize"]
 
         insituDatasets = params["matchup"]
         insituLinks = set()
diff --git a/analysis/webservice/algorithms/doms/ResultsRetrieval.py b/analysis/webservice/algorithms/doms/ResultsRetrieval.py
index c3b95b0..f03c1ca 100644
--- a/analysis/webservice/algorithms/doms/ResultsRetrieval.py
+++ b/analysis/webservice/algorithms/doms/ResultsRetrieval.py
@@ -35,6 +35,8 @@ class DomsResultsRetrievalHandler(BaseDomsHandler.BaseDomsQueryCalcHandler):
 
     def calc(self, computeOptions, **args):
         execution_id = computeOptions.get_argument("id", None)
+        page_num = computeOptions.get_int_arg('pageNum', default=1)
+        page_size = computeOptions.get_int_arg('pageSize', default=1000)
 
         try:
             execution_id = uuid.UUID(execution_id)
@@ -44,7 +46,7 @@ class DomsResultsRetrievalHandler(BaseDomsHandler.BaseDomsQueryCalcHandler):
         simple_results = computeOptions.get_boolean_arg("simpleResults", default=False)
 
         with ResultsStorage.ResultsRetrieval(self.config) as storage:
-            params, stats, data = storage.retrieveResults(execution_id, trim_data=simple_results)
+            params, stats, data = storage.retrieveResults(execution_id, trim_data=simple_results, page_num=page_num, page_size=page_size)
 
         return BaseDomsHandler.DomsQueryResults(results=data, args=params, details=stats, bounds=None, count=len(data),
-                                                computeOptions=None, executionId=execution_id)
+                                                computeOptions=None, executionId=execution_id, page_num=page_num, page_size=page_size)
diff --git a/analysis/webservice/algorithms/doms/ResultsStorage.py b/analysis/webservice/algorithms/doms/ResultsStorage.py
index 7a9a48d..98409d1 100644
--- a/analysis/webservice/algorithms/doms/ResultsStorage.py
+++ b/analysis/webservice/algorithms/doms/ResultsStorage.py
@@ -26,7 +26,7 @@ import pkg_resources
 from cassandra.auth import PlainTextAuthProvider
 from cassandra.cluster import Cluster
 from cassandra.policies import TokenAwarePolicy, DCAwareRoundRobinPolicy
-from cassandra.query import BatchStatement
+from cassandra.query import BatchStatement, SimpleStatement
 from pytz import UTC
 from webservice.algorithms.doms.BaseDomsHandler import DomsEncoder
 from webservice.webmodel import NexusProcessingException
@@ -274,17 +274,17 @@ class ResultsRetrieval(AbstractResultsContainer):
     def __init__(self, config=None):
         AbstractResultsContainer.__init__(self, config)
 
-    def retrieveResults(self, execution_id, trim_data=False):
+    def retrieveResults(self, execution_id, trim_data=False, page_num=1, page_size=1000):
         if isinstance(execution_id, str):
             execution_id = uuid.UUID(execution_id)
 
         params = self.retrieveParams(execution_id)
         stats = self.__retrieveStats(execution_id)
-        data = self.__retrieveData(execution_id, trim_data=trim_data)
+        data = self.__retrieveData(execution_id, trim_data=trim_data, page_num=page_num, page_size=page_size)
         return params, stats, data
 
-    def __retrieveData(self, id, trim_data=False):
-        dataMap = self.__retrievePrimaryData(id, trim_data=trim_data)
+    def __retrieveData(self, id, trim_data=False, page_num=1, page_size=1000):
+        dataMap = self.__retrievePrimaryData(id, trim_data=trim_data, page_num=page_num, page_size=page_size)
         self.__enrichPrimaryDataWithMatches(id, dataMap, trim_data=trim_data)
         data = [dataMap[name] for name in dataMap]
         return data
@@ -302,12 +302,12 @@ class ResultsRetrieval(AbstractResultsContainer):
             else:
                 print(row)
 
-    def __retrievePrimaryData(self, id, trim_data=False):
-        cql = "SELECT * FROM doms_data where execution_id = %s and is_primary = true"
-        rows = self._session.execute(cql, (id,))
+    def __retrievePrimaryData(self, id, trim_data=False, page_num=2, page_size=10):
+        cql = "SELECT * FROM doms_data where execution_id = %s and is_primary = true limit %s"
+        rows = self._session.execute(cql, [id, page_num * page_size])
 
         dataMap = {}
-        for row in rows:
+        for row in rows[(page_num-1)*page_size:page_num*page_size]:
             entry = self.__rowToDataEntry(row, trim_data=trim_data)
             dataMap[row.value_id] = entry
         return dataMap
diff --git a/data-access/requirements.txt b/data-access/requirements.txt
index 5127018..d2ffd3f 100644
--- a/data-access/requirements.txt
+++ b/data-access/requirements.txt
@@ -20,3 +20,4 @@ urllib3==1.26.2
 requests
 nexusproto
 Shapely
+numpy==1.24.3