You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@sdap.apache.org by sk...@apache.org on 2023/06/28 21:38:27 UTC

[incubator-sdap-nexus] branch SDAP-467 created (now d7fc917)

This is an automated email from the ASF dual-hosted git repository.

skperez pushed a change to branch SDAP-467
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git


      at d7fc917  pagination

This branch includes the following new commits:

     new d7fc917  pagination

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.

[incubator-sdap-nexus] 01/01: pagination

Posted by sk...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

skperez pushed a commit to branch SDAP-467
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git

commit d7fc917ffa01fe14e314142cd45635f6b1fa4bca
Author: skorper <st...@gmail.com>
AuthorDate: Wed Jun 28 14:38:16 2023 -0700

    pagination
---
 CHANGELOG.md                                           |  1 +
 analysis/webservice/algorithms/doms/BaseDomsHandler.py | 11 ++++++++++-
 .../webservice/algorithms/doms/ResultsRetrieval.py     |  6 ++++--
 analysis/webservice/algorithms/doms/ResultsStorage.py  | 18 +++++++++---------
 data-access/requirements.txt                           |  1 +
 5 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0fad2a2..0496e78 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## Unreleased
 ### Added
+- SDAP-467: Added pagination to cdmsresults endpoint
 - SDAP-461: Added 4 remaining Saildrone insitu datasets.
 ### Changed
 - SDAP-453: Updated results storage and retrieval to support output JSON from `/cdmsresults` that matches output from `/match_spark`.
diff --git a/analysis/webservice/algorithms/doms/BaseDomsHandler.py b/analysis/webservice/algorithms/doms/BaseDomsHandler.py
index a31666d..0e6b308 100644
--- a/analysis/webservice/algorithms/doms/BaseDomsHandler.py
+++ b/analysis/webservice/algorithms/doms/BaseDomsHandler.py
@@ -85,7 +85,7 @@ class DomsEncoder(json.JSONEncoder):
 
 class DomsQueryResults(NexusResults):
     def __init__(self, results=None, args=None, bounds=None, count=None, details=None, computeOptions=None,
-                 executionId=None, status_code=200):
+                 executionId=None, status_code=200, page_num=None, page_size=None):
         NexusResults.__init__(self, results=results, meta=None, stats=None, computeOptions=computeOptions,
                               status_code=status_code)
         self.__args = args
@@ -94,6 +94,10 @@ class DomsQueryResults(NexusResults):
         self.__details = details
         self.__executionId = str(executionId)
 
+        # Add page num and size to details block
+        self.__details['pageNum'] = page_num
+        self.__details['pageSize'] = page_size
+
     def toJson(self):
         bounds = self.__bounds.toMap() if self.__bounds is not None else {}
         return json.dumps(
@@ -276,6 +280,9 @@ class DomsCSVFormatter:
             {"Global Attribute": "date_created", "Value": datetime.utcnow().replace(tzinfo=UTC).strftime(ISO_8601)},
 
             {"Global Attribute": "URI_Matchup", "Value": "https://doms.jpl.nasa.gov/domsresults?id=" + executionId + "&output=CSV"}, # TODO how to replace with actual req URL
+
+            {"Global Attribute": "CDMS_page_num", "Value": details["pageNum"]},
+            {"Global Attribute": "CDMS_page_size", "Value": details["pageSize"]},
         ]
 
         writer = csv.DictWriter(csvfile, sorted(next(iter(global_attrs)).keys()))
@@ -326,6 +333,8 @@ class DomsNetCDFFormatter:
         dataset.CDMS_primary = params["primary"]
         dataset.CDMS_time_to_complete = details["timeToComplete"]
         dataset.CDMS_time_to_complete_units = "seconds"
+        dataset.CDMS_page_num = details["pageNum"]
+        dataset.CDMS_page_size = details["pageSize"]
 
         insituDatasets = params["matchup"]
         insituLinks = set()
diff --git a/analysis/webservice/algorithms/doms/ResultsRetrieval.py b/analysis/webservice/algorithms/doms/ResultsRetrieval.py
index c3b95b0..f03c1ca 100644
--- a/analysis/webservice/algorithms/doms/ResultsRetrieval.py
+++ b/analysis/webservice/algorithms/doms/ResultsRetrieval.py
@@ -35,6 +35,8 @@ class DomsResultsRetrievalHandler(BaseDomsHandler.BaseDomsQueryCalcHandler):
 
     def calc(self, computeOptions, **args):
         execution_id = computeOptions.get_argument("id", None)
+        page_num = computeOptions.get_int_arg('pageNum', default=1)
+        page_size = computeOptions.get_int_arg('pageSize', default=1000)
 
         try:
             execution_id = uuid.UUID(execution_id)
@@ -44,7 +46,7 @@ class DomsResultsRetrievalHandler(BaseDomsHandler.BaseDomsQueryCalcHandler):
         simple_results = computeOptions.get_boolean_arg("simpleResults", default=False)
 
         with ResultsStorage.ResultsRetrieval(self.config) as storage:
-            params, stats, data = storage.retrieveResults(execution_id, trim_data=simple_results)
+            params, stats, data = storage.retrieveResults(execution_id, trim_data=simple_results, page_num=page_num, page_size=page_size)
 
         return BaseDomsHandler.DomsQueryResults(results=data, args=params, details=stats, bounds=None, count=len(data),
-                                                computeOptions=None, executionId=execution_id)
+                                                computeOptions=None, executionId=execution_id, page_num=page_num, page_size=page_size)
diff --git a/analysis/webservice/algorithms/doms/ResultsStorage.py b/analysis/webservice/algorithms/doms/ResultsStorage.py
index 7a9a48d..98409d1 100644
--- a/analysis/webservice/algorithms/doms/ResultsStorage.py
+++ b/analysis/webservice/algorithms/doms/ResultsStorage.py
@@ -26,7 +26,7 @@ import pkg_resources
 from cassandra.auth import PlainTextAuthProvider
 from cassandra.cluster import Cluster
 from cassandra.policies import TokenAwarePolicy, DCAwareRoundRobinPolicy
-from cassandra.query import BatchStatement
+from cassandra.query import BatchStatement, SimpleStatement
 from pytz import UTC
 from webservice.algorithms.doms.BaseDomsHandler import DomsEncoder
 from webservice.webmodel import NexusProcessingException
@@ -274,17 +274,17 @@ class ResultsRetrieval(AbstractResultsContainer):
     def __init__(self, config=None):
         AbstractResultsContainer.__init__(self, config)
 
-    def retrieveResults(self, execution_id, trim_data=False):
+    def retrieveResults(self, execution_id, trim_data=False, page_num=1, page_size=1000):
         if isinstance(execution_id, str):
             execution_id = uuid.UUID(execution_id)
 
         params = self.retrieveParams(execution_id)
         stats = self.__retrieveStats(execution_id)
-        data = self.__retrieveData(execution_id, trim_data=trim_data)
+        data = self.__retrieveData(execution_id, trim_data=trim_data, page_num=page_num, page_size=page_size)
         return params, stats, data
 
-    def __retrieveData(self, id, trim_data=False):
-        dataMap = self.__retrievePrimaryData(id, trim_data=trim_data)
+    def __retrieveData(self, id, trim_data=False, page_num=1, page_size=1000):
+        dataMap = self.__retrievePrimaryData(id, trim_data=trim_data, page_num=page_num, page_size=page_size)
         self.__enrichPrimaryDataWithMatches(id, dataMap, trim_data=trim_data)
         data = [dataMap[name] for name in dataMap]
         return data
@@ -302,12 +302,12 @@ class ResultsRetrieval(AbstractResultsContainer):
             else:
                 print(row)
 
-    def __retrievePrimaryData(self, id, trim_data=False):
-        cql = "SELECT * FROM doms_data where execution_id = %s and is_primary = true"
-        rows = self._session.execute(cql, (id,))
+    def __retrievePrimaryData(self, id, trim_data=False, page_num=2, page_size=10):
+        cql = "SELECT * FROM doms_data where execution_id = %s and is_primary = true limit %s"
+        rows = self._session.execute(cql, [id, page_num * page_size])
 
         dataMap = {}
-        for row in rows:
+        for row in rows[(page_num-1)*page_size:page_num*page_size]:
             entry = self.__rowToDataEntry(row, trim_data=trim_data)
             dataMap[row.value_id] = entry
         return dataMap
diff --git a/data-access/requirements.txt b/data-access/requirements.txt
index 5127018..d2ffd3f 100644
--- a/data-access/requirements.txt
+++ b/data-access/requirements.txt
@@ -20,3 +20,4 @@ urllib3==1.26.2
 requests
 nexusproto
 Shapely
+numpy==1.24.3