You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by sk...@apache.org on 2023/08/16 21:38:57 UTC

[incubator-sdap-nexus] branch SDAP-481 created (now 3856934)

This is an automated email from the ASF dual-hosted git repository.

skperez pushed a change to branch SDAP-481
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git


      at 3856934  Add suport for netcdf compression

This branch includes the following new commits:

     new 96c66a9  Merge branch 'SDAP-455', remote-tracking branch 'origin' into SDAP-481
     new 3856934  Add suport for netcdf compression

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[incubator-sdap-nexus] 02/02: Add suport for netcdf compression

Posted by sk...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

skperez pushed a commit to branch SDAP-481
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git

commit 3856934980ff652166f9622ced330d05c6127913
Author: skorper <st...@gmail.com>
AuthorDate: Wed Aug 16 14:38:48 2023 -0700

    Add suport for netcdf compression
---
 CHANGELOG.md                                       |  1 +
 analysis/conda-requirements.txt                    |  2 +-
 .../webservice/algorithms/doms/BaseDomsHandler.py  | 40 +++++++++++++++++-----
 3 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 620d65f..6c19c56 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - SDAP-467: Added pagination to cdmsresults endpoint
 - SDAP-461: Added 4 remaining Saildrone insitu datasets.
 - SDAP-473: Added support for matchup job prioritization
+- SDAP-481: Added support for NetCDF compression
 ### Changed
 - SDAP-453: Updated results storage and retrieval to support output JSON from `/cdmsresults` that matches output from `/match_spark`.
   - **NOTE:** Deploying these changes to an existing SDAP deployment will require modifying the Cassandra database with stored results. There is a script to do so at `/tools/update-doms-data-schema/update.py`
diff --git a/analysis/conda-requirements.txt b/analysis/conda-requirements.txt
index e27bdea..c092350 100644
--- a/analysis/conda-requirements.txt
+++ b/analysis/conda-requirements.txt
@@ -14,7 +14,7 @@
 # limitations under the License.
 
 
-netcdf4==1.5.5.1
+netcdf4==1.6.4
 basemap==1.2.2
 scipy==1.6.0
 pyspark==3.2.1
diff --git a/analysis/webservice/algorithms/doms/BaseDomsHandler.py b/analysis/webservice/algorithms/doms/BaseDomsHandler.py
index 84c9163..84627c1 100644
--- a/analysis/webservice/algorithms/doms/BaseDomsHandler.py
+++ b/analysis/webservice/algorithms/doms/BaseDomsHandler.py
@@ -297,6 +297,8 @@ class DomsCSVFormatter:
 
 
 class DomsNetCDFFormatter:
+    compression = 'zlib'
+    comp_level = 5
     @staticmethod
     def create(executionId, results, params, details):
 
@@ -362,17 +364,20 @@ class DomsNetCDFFormatter:
 
         #Create Satellite group, variables, and attributes
         satelliteGroup = dataset.createGroup(satellite_group_name)
-        satelliteWriter = DomsNetCDFValueWriter(satelliteGroup, params["parameter"])
+        satelliteWriter = DomsNetCDFValueWriter(satelliteGroup, DomsNetCDFFormatter.compression, DomsNetCDFFormatter.comp_level)
 
         # Create InSitu group, variables, and attributes
         insituGroup = dataset.createGroup(insitu_group_name)
-        insituWriter = DomsNetCDFValueWriter(insituGroup, params["parameter"])
+        insituWriter = DomsNetCDFValueWriter(insituGroup, DomsNetCDFFormatter.compression, DomsNetCDFFormatter.comp_level)
 
         # Add data to Insitu and Satellite groups, generate array of match ID pairs
         matches = DomsNetCDFFormatter.__writeResults(results, satelliteWriter, insituWriter)
         dataset.createDimension("MatchedRecords", size=None)
         dataset.createDimension("MatchedGroups", size=2)
-        matchArray = dataset.createVariable("matchIDs", "f4", ("MatchedRecords", "MatchedGroups"))
+        matchArray = dataset.createVariable(
+            'matchIDs', 'f4', ('MatchedRecords', 'MatchedGroups'),
+            compression=DomsNetCDFFormatter.compression, complevel=DomsNetCDFFormatter.comp_level
+        )
         matchArray[:] = matches
 
         dataset.close()
@@ -441,7 +446,7 @@ class DomsNetCDFFormatter:
 
 
 class DomsNetCDFValueWriter:
-    def __init__(self, group, matchup_parameter):
+    def __init__(self, group, compression=None, comp_level=None):
         group.createDimension("dim", size=None)
         self.group = group
 
@@ -454,6 +459,9 @@ class DomsNetCDFValueWriter:
         self.secondary_group_name = "SecondaryData"
         self.data_map = defaultdict(list)
 
+        self.compression = compression
+        self.comp_level = comp_level
+
     def addData(self, result_item):
         """
         Populate DomsNetCDFValueWriter fields from matchup results dict
@@ -491,9 +499,18 @@ class DomsNetCDFValueWriter:
         #
         # Create variables, enrich with attributes, and add data
         #
-        lonVar = self.group.createVariable('lon', 'f4', ('dim',), fill_value=-32767.0)
-        latVar = self.group.createVariable('lat', 'f4', ('dim',), fill_value=-32767.0)
-        timeVar = self.group.createVariable('time', 'f4', ('dim',), fill_value=-32767.0)
+        lonVar = self.group.createVariable(
+            'lon', 'f4', ('dim',), fill_value=-32767.0,
+            compression=self.compression, complevel=self.comp_level
+        )
+        latVar = self.group.createVariable(
+            'lat', 'f4', ('dim',), fill_value=-32767.0,
+            compression=self.compression, complevel=self.comp_level
+        )
+        timeVar = self.group.createVariable(
+            'time', 'f4', ('dim',), fill_value=-32767.0,
+            compression=self.compression, complevel=self.comp_level
+        )
 
         self.__enrichLon(lonVar, min(self.lon), max(self.lon))
         self.__enrichLat(latVar, min(self.lat), max(self.lat))
@@ -505,7 +522,10 @@ class DomsNetCDFValueWriter:
 
         # Add depth variable, if present
         if self.depth and any(self.depth):
-            depthVar = self.group.createVariable('depth', 'f4', ('dim',), fill_value=-32767.0)
+            depthVar = self.group.createVariable(
+                'depth', 'f4', ('dim',), fill_value=-32767.0,
+                compression=self.compression, complevel=self.comp_level
+            )
             self.__enrichDepth(depthVar, self.__calcMin(self.depth), max(self.depth))
             depthVar[:] = self.depth
 
@@ -533,7 +553,9 @@ class DomsNetCDFValueWriter:
                 cf_name = variable[1]
 
                 data_variable = self.group.createVariable(
-                    cf_name if cf_name is not None and cf_name != '' else name, 'f4', ('dim',), fill_value=-32767.0)
+                    cf_name if cf_name is not None and cf_name != '' else name, 'f4', ('dim',),
+                    fill_value=-32767.0, compression=self.compression, complevel=self.comp_level
+                )
                 # Find min/max for data variables. It is possible for 'None' to
                 # be in this list, so filter those out when doing the calculation.
                 min_data = np.nanmin(variables[variable])


[incubator-sdap-nexus] 01/02: Merge branch 'SDAP-455', remote-tracking branch 'origin' into SDAP-481

Posted by sk...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

skperez pushed a commit to branch SDAP-481
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git

commit 96c66a99fbdcee8abc95ba1c3398bc47e8462a8e
Merge: e3c08b9 27dfc9f
Author: skorper <st...@gmail.com>
AuthorDate: Mon Aug 14 17:11:02 2023 -0700

    Merge branch 'SDAP-455', remote-tracking branch 'origin' into SDAP-481

 CHANGELOG.md                                       |   5 +-
 analysis/setup.py                                  |   3 +-
 .../webservice/algorithms/doms/BaseDomsHandler.py  |  19 +-
 .../webservice/algorithms/doms/DatasetListQuery.py |  14 +-
 .../algorithms/doms/DomsInitialization.py          |   3 +-
 .../webservice/algorithms/doms/ExecutionCancel.py  |  83 +++++++++
 .../webservice/algorithms/doms/ExecutionStatus.py  |  67 ++++++++
 .../webservice/algorithms/doms/ResultsRetrieval.py |   6 +-
 .../webservice/algorithms/doms/ResultsStorage.py   |  78 ++++++---
 analysis/webservice/algorithms/doms/__init__.py    |   2 +
 analysis/webservice/algorithms_spark/Matchup.py    | 191 ++++++++++++++-------
 .../NexusCalcSparkTornadoHandler.py                |  15 +-
 analysis/webservice/apidocs/openapi.yml            |  52 ++++++
 analysis/webservice/config/scheduler.xml           |  10 ++
 .../app_builders/HandlerArgsBuilder.py             |   1 +
 .../app_builders/SparkContextBuilder.py            |   9 +-
 .../request/handlers/NexusRequestHandler.py        |  22 ++-
 .../webservice/webmodel/NexusExecutionResults.py   | 150 ++++++++++++++++
 data-access/requirements.txt                       |   2 +
 19 files changed, 628 insertions(+), 104 deletions(-)