You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by sk...@apache.org on 2023/08/16 21:38:59 UTC
[incubator-sdap-nexus] 02/02: Add suport for netcdf compression
This is an automated email from the ASF dual-hosted git repository.
skperez pushed a commit to branch SDAP-481
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git
commit 3856934980ff652166f9622ced330d05c6127913
Author: skorper <st...@gmail.com>
AuthorDate: Wed Aug 16 14:38:48 2023 -0700
Add suport for netcdf compression
---
CHANGELOG.md | 1 +
analysis/conda-requirements.txt | 2 +-
.../webservice/algorithms/doms/BaseDomsHandler.py | 40 +++++++++++++++++-----
3 files changed, 33 insertions(+), 10 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 620d65f..6c19c56 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- SDAP-467: Added pagination to cdmsresults endpoint
- SDAP-461: Added 4 remaining Saildrone insitu datasets.
- SDAP-473: Added support for matchup job prioritization
+- SDAP-481: Added support for NetCDF compression
### Changed
- SDAP-453: Updated results storage and retrieval to support output JSON from `/cdmsresults` that matches output from `/match_spark`.
- **NOTE:** Deploying these changes to an existing SDAP deployment will require modifying the Cassandra database with stored results. There is a script to do so at `/tools/update-doms-data-schema/update.py`
diff --git a/analysis/conda-requirements.txt b/analysis/conda-requirements.txt
index e27bdea..c092350 100644
--- a/analysis/conda-requirements.txt
+++ b/analysis/conda-requirements.txt
@@ -14,7 +14,7 @@
# limitations under the License.
-netcdf4==1.5.5.1
+netcdf4==1.6.4
basemap==1.2.2
scipy==1.6.0
pyspark==3.2.1
diff --git a/analysis/webservice/algorithms/doms/BaseDomsHandler.py b/analysis/webservice/algorithms/doms/BaseDomsHandler.py
index 84c9163..84627c1 100644
--- a/analysis/webservice/algorithms/doms/BaseDomsHandler.py
+++ b/analysis/webservice/algorithms/doms/BaseDomsHandler.py
@@ -297,6 +297,8 @@ class DomsCSVFormatter:
class DomsNetCDFFormatter:
+ compression = 'zlib'
+ comp_level = 5
@staticmethod
def create(executionId, results, params, details):
@@ -362,17 +364,20 @@ class DomsNetCDFFormatter:
#Create Satellite group, variables, and attributes
satelliteGroup = dataset.createGroup(satellite_group_name)
- satelliteWriter = DomsNetCDFValueWriter(satelliteGroup, params["parameter"])
+ satelliteWriter = DomsNetCDFValueWriter(satelliteGroup, DomsNetCDFFormatter.compression, DomsNetCDFFormatter.comp_level)
# Create InSitu group, variables, and attributes
insituGroup = dataset.createGroup(insitu_group_name)
- insituWriter = DomsNetCDFValueWriter(insituGroup, params["parameter"])
+ insituWriter = DomsNetCDFValueWriter(insituGroup, DomsNetCDFFormatter.compression, DomsNetCDFFormatter.comp_level)
# Add data to Insitu and Satellite groups, generate array of match ID pairs
matches = DomsNetCDFFormatter.__writeResults(results, satelliteWriter, insituWriter)
dataset.createDimension("MatchedRecords", size=None)
dataset.createDimension("MatchedGroups", size=2)
- matchArray = dataset.createVariable("matchIDs", "f4", ("MatchedRecords", "MatchedGroups"))
+ matchArray = dataset.createVariable(
+ 'matchIDs', 'f4', ('MatchedRecords', 'MatchedGroups'),
+ compression=DomsNetCDFFormatter.compression, complevel=DomsNetCDFFormatter.comp_level
+ )
matchArray[:] = matches
dataset.close()
@@ -441,7 +446,7 @@ class DomsNetCDFFormatter:
class DomsNetCDFValueWriter:
- def __init__(self, group, matchup_parameter):
+ def __init__(self, group, compression=None, comp_level=None):
group.createDimension("dim", size=None)
self.group = group
@@ -454,6 +459,9 @@ class DomsNetCDFValueWriter:
self.secondary_group_name = "SecondaryData"
self.data_map = defaultdict(list)
+ self.compression = compression
+ self.comp_level = comp_level
+
def addData(self, result_item):
"""
Populate DomsNetCDFValueWriter fields from matchup results dict
@@ -491,9 +499,18 @@ class DomsNetCDFValueWriter:
#
# Create variables, enrich with attributes, and add data
#
- lonVar = self.group.createVariable('lon', 'f4', ('dim',), fill_value=-32767.0)
- latVar = self.group.createVariable('lat', 'f4', ('dim',), fill_value=-32767.0)
- timeVar = self.group.createVariable('time', 'f4', ('dim',), fill_value=-32767.0)
+ lonVar = self.group.createVariable(
+ 'lon', 'f4', ('dim',), fill_value=-32767.0,
+ compression=self.compression, complevel=self.comp_level
+ )
+ latVar = self.group.createVariable(
+ 'lat', 'f4', ('dim',), fill_value=-32767.0,
+ compression=self.compression, complevel=self.comp_level
+ )
+ timeVar = self.group.createVariable(
+ 'time', 'f4', ('dim',), fill_value=-32767.0,
+ compression=self.compression, complevel=self.comp_level
+ )
self.__enrichLon(lonVar, min(self.lon), max(self.lon))
self.__enrichLat(latVar, min(self.lat), max(self.lat))
@@ -505,7 +522,10 @@ class DomsNetCDFValueWriter:
# Add depth variable, if present
if self.depth and any(self.depth):
- depthVar = self.group.createVariable('depth', 'f4', ('dim',), fill_value=-32767.0)
+ depthVar = self.group.createVariable(
+ 'depth', 'f4', ('dim',), fill_value=-32767.0,
+ compression=self.compression, complevel=self.comp_level
+ )
self.__enrichDepth(depthVar, self.__calcMin(self.depth), max(self.depth))
depthVar[:] = self.depth
@@ -533,7 +553,9 @@ class DomsNetCDFValueWriter:
cf_name = variable[1]
data_variable = self.group.createVariable(
- cf_name if cf_name is not None and cf_name != '' else name, 'f4', ('dim',), fill_value=-32767.0)
+ cf_name if cf_name is not None and cf_name != '' else name, 'f4', ('dim',),
+ fill_value=-32767.0, compression=self.compression, complevel=self.comp_level
+ )
# Find min/max for data variables. It is possible for 'None' to
# be in this list, so filter those out when doing the calculation.
min_data = np.nanmin(variables[variable])