You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by sk...@apache.org on 2023/08/16 21:38:57 UTC
[incubator-sdap-nexus] branch SDAP-481 created (now 3856934)
This is an automated email from the ASF dual-hosted git repository.
skperez pushed a change to branch SDAP-481
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git
at 3856934 Add suport for netcdf compression
This branch includes the following new commits:
new 96c66a9 Merge branch 'SDAP-455', remote-tracking branch 'origin' into SDAP-481
new 3856934 Add suport for netcdf compression
The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
[incubator-sdap-nexus] 02/02: Add suport for netcdf compression
Posted by sk...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
skperez pushed a commit to branch SDAP-481
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git
commit 3856934980ff652166f9622ced330d05c6127913
Author: skorper <st...@gmail.com>
AuthorDate: Wed Aug 16 14:38:48 2023 -0700
Add suport for netcdf compression
---
CHANGELOG.md | 1 +
analysis/conda-requirements.txt | 2 +-
.../webservice/algorithms/doms/BaseDomsHandler.py | 40 +++++++++++++++++-----
3 files changed, 33 insertions(+), 10 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 620d65f..6c19c56 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- SDAP-467: Added pagination to cdmsresults endpoint
- SDAP-461: Added 4 remaining Saildrone insitu datasets.
- SDAP-473: Added support for matchup job prioritization
+- SDAP-481: Added support for NetCDF compression
### Changed
- SDAP-453: Updated results storage and retrieval to support output JSON from `/cdmsresults` that matches output from `/match_spark`.
- **NOTE:** Deploying these changes to an existing SDAP deployment will require modifying the Cassandra database with stored results. There is a script to do so at `/tools/update-doms-data-schema/update.py`
diff --git a/analysis/conda-requirements.txt b/analysis/conda-requirements.txt
index e27bdea..c092350 100644
--- a/analysis/conda-requirements.txt
+++ b/analysis/conda-requirements.txt
@@ -14,7 +14,7 @@
# limitations under the License.
-netcdf4==1.5.5.1
+netcdf4==1.6.4
basemap==1.2.2
scipy==1.6.0
pyspark==3.2.1
diff --git a/analysis/webservice/algorithms/doms/BaseDomsHandler.py b/analysis/webservice/algorithms/doms/BaseDomsHandler.py
index 84c9163..84627c1 100644
--- a/analysis/webservice/algorithms/doms/BaseDomsHandler.py
+++ b/analysis/webservice/algorithms/doms/BaseDomsHandler.py
@@ -297,6 +297,8 @@ class DomsCSVFormatter:
class DomsNetCDFFormatter:
+ compression = 'zlib'
+ comp_level = 5
@staticmethod
def create(executionId, results, params, details):
@@ -362,17 +364,20 @@ class DomsNetCDFFormatter:
#Create Satellite group, variables, and attributes
satelliteGroup = dataset.createGroup(satellite_group_name)
- satelliteWriter = DomsNetCDFValueWriter(satelliteGroup, params["parameter"])
+ satelliteWriter = DomsNetCDFValueWriter(satelliteGroup, DomsNetCDFFormatter.compression, DomsNetCDFFormatter.comp_level)
# Create InSitu group, variables, and attributes
insituGroup = dataset.createGroup(insitu_group_name)
- insituWriter = DomsNetCDFValueWriter(insituGroup, params["parameter"])
+ insituWriter = DomsNetCDFValueWriter(insituGroup, DomsNetCDFFormatter.compression, DomsNetCDFFormatter.comp_level)
# Add data to Insitu and Satellite groups, generate array of match ID pairs
matches = DomsNetCDFFormatter.__writeResults(results, satelliteWriter, insituWriter)
dataset.createDimension("MatchedRecords", size=None)
dataset.createDimension("MatchedGroups", size=2)
- matchArray = dataset.createVariable("matchIDs", "f4", ("MatchedRecords", "MatchedGroups"))
+ matchArray = dataset.createVariable(
+ 'matchIDs', 'f4', ('MatchedRecords', 'MatchedGroups'),
+ compression=DomsNetCDFFormatter.compression, complevel=DomsNetCDFFormatter.comp_level
+ )
matchArray[:] = matches
dataset.close()
@@ -441,7 +446,7 @@ class DomsNetCDFFormatter:
class DomsNetCDFValueWriter:
- def __init__(self, group, matchup_parameter):
+ def __init__(self, group, compression=None, comp_level=None):
group.createDimension("dim", size=None)
self.group = group
@@ -454,6 +459,9 @@ class DomsNetCDFValueWriter:
self.secondary_group_name = "SecondaryData"
self.data_map = defaultdict(list)
+ self.compression = compression
+ self.comp_level = comp_level
+
def addData(self, result_item):
"""
Populate DomsNetCDFValueWriter fields from matchup results dict
@@ -491,9 +499,18 @@ class DomsNetCDFValueWriter:
#
# Create variables, enrich with attributes, and add data
#
- lonVar = self.group.createVariable('lon', 'f4', ('dim',), fill_value=-32767.0)
- latVar = self.group.createVariable('lat', 'f4', ('dim',), fill_value=-32767.0)
- timeVar = self.group.createVariable('time', 'f4', ('dim',), fill_value=-32767.0)
+ lonVar = self.group.createVariable(
+ 'lon', 'f4', ('dim',), fill_value=-32767.0,
+ compression=self.compression, complevel=self.comp_level
+ )
+ latVar = self.group.createVariable(
+ 'lat', 'f4', ('dim',), fill_value=-32767.0,
+ compression=self.compression, complevel=self.comp_level
+ )
+ timeVar = self.group.createVariable(
+ 'time', 'f4', ('dim',), fill_value=-32767.0,
+ compression=self.compression, complevel=self.comp_level
+ )
self.__enrichLon(lonVar, min(self.lon), max(self.lon))
self.__enrichLat(latVar, min(self.lat), max(self.lat))
@@ -505,7 +522,10 @@ class DomsNetCDFValueWriter:
# Add depth variable, if present
if self.depth and any(self.depth):
- depthVar = self.group.createVariable('depth', 'f4', ('dim',), fill_value=-32767.0)
+ depthVar = self.group.createVariable(
+ 'depth', 'f4', ('dim',), fill_value=-32767.0,
+ compression=self.compression, complevel=self.comp_level
+ )
self.__enrichDepth(depthVar, self.__calcMin(self.depth), max(self.depth))
depthVar[:] = self.depth
@@ -533,7 +553,9 @@ class DomsNetCDFValueWriter:
cf_name = variable[1]
data_variable = self.group.createVariable(
- cf_name if cf_name is not None and cf_name != '' else name, 'f4', ('dim',), fill_value=-32767.0)
+ cf_name if cf_name is not None and cf_name != '' else name, 'f4', ('dim',),
+ fill_value=-32767.0, compression=self.compression, complevel=self.comp_level
+ )
# Find min/max for data variables. It is possible for 'None' to
# be in this list, so filter those out when doing the calculation.
min_data = np.nanmin(variables[variable])
[incubator-sdap-nexus] 01/02: Merge branch 'SDAP-455', remote-tracking branch 'origin' into SDAP-481
Posted by sk...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
skperez pushed a commit to branch SDAP-481
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git
commit 96c66a99fbdcee8abc95ba1c3398bc47e8462a8e
Merge: e3c08b9 27dfc9f
Author: skorper <st...@gmail.com>
AuthorDate: Mon Aug 14 17:11:02 2023 -0700
Merge branch 'SDAP-455', remote-tracking branch 'origin' into SDAP-481
CHANGELOG.md | 5 +-
analysis/setup.py | 3 +-
.../webservice/algorithms/doms/BaseDomsHandler.py | 19 +-
.../webservice/algorithms/doms/DatasetListQuery.py | 14 +-
.../algorithms/doms/DomsInitialization.py | 3 +-
.../webservice/algorithms/doms/ExecutionCancel.py | 83 +++++++++
.../webservice/algorithms/doms/ExecutionStatus.py | 67 ++++++++
.../webservice/algorithms/doms/ResultsRetrieval.py | 6 +-
.../webservice/algorithms/doms/ResultsStorage.py | 78 ++++++---
analysis/webservice/algorithms/doms/__init__.py | 2 +
analysis/webservice/algorithms_spark/Matchup.py | 191 ++++++++++++++-------
.../NexusCalcSparkTornadoHandler.py | 15 +-
analysis/webservice/apidocs/openapi.yml | 52 ++++++
analysis/webservice/config/scheduler.xml | 10 ++
.../app_builders/HandlerArgsBuilder.py | 1 +
.../app_builders/SparkContextBuilder.py | 9 +-
.../request/handlers/NexusRequestHandler.py | 22 ++-
.../webservice/webmodel/NexusExecutionResults.py | 150 ++++++++++++++++
data-access/requirements.txt | 2 +
19 files changed, 628 insertions(+), 104 deletions(-)