You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by ea...@apache.org on 2020/09/15 16:46:30 UTC
[incubator-sdap-nexus] 02/04: revert doms
This is an automated email from the ASF dual-hosted git repository.
eamonford pushed a commit to branch bug_fixes
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git
commit d562fa80ca66673e58eb86d9fb7429d08337ab0a
Author: Eamon Ford <ea...@gmail.com>
AuthorDate: Mon Aug 10 12:02:32 2020 -0700
revert doms
---
.gitignore | 1 +
analysis/setup.py | 3 +-
analysis/webservice/algorithms_spark/__init__.py | 6 +
analysis/webservice/config/web.ini | 2 +-
data-access/nexustiles/dao/CassandraProxy.py | 3 -
data-access/tests/config/datastores.ini | 9 ++
tools/doms/README.md | 66 +++++++++++
tools/doms/doms_reader.py | 144 +++++++++++++++++++++++
8 files changed, 229 insertions(+), 5 deletions(-)
diff --git a/.gitignore b/.gitignore
index 4e4cf6e..3e29626 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,5 +2,6 @@
*.code-workspace
*.idea
*.DS_Store
+analysis/webservice/algorithms/doms/domsconfig.ini
data-access/nexustiles/config/datastores.ini
venv/
diff --git a/analysis/setup.py b/analysis/setup.py
index 9a449ce..62a6891 100644
--- a/analysis/setup.py
+++ b/analysis/setup.py
@@ -50,7 +50,8 @@ setuptools.setup(
# 'webservice.nexus_tornado.request.renderers'
#],
package_data={
- 'webservice': ['config/web.ini', 'config/algorithms.ini']
+ 'webservice': ['config/web.ini', 'config/algorithms.ini'],
+ 'webservice.algorithms.doms': ['domsconfig.ini.default']
},
data_files=[
('static', ['static/index.html'])
diff --git a/analysis/webservice/algorithms_spark/__init__.py b/analysis/webservice/algorithms_spark/__init__.py
index a25c8d5..d6ed83f 100644
--- a/analysis/webservice/algorithms_spark/__init__.py
+++ b/analysis/webservice/algorithms_spark/__init__.py
@@ -20,6 +20,7 @@ import ClimMapSpark
import CorrMapSpark
import DailyDifferenceAverageSpark
import HofMoellerSpark
+import Matchup
import MaximaMinimaSpark
import NexusCalcSparkHandler
import TimeAvgMapSpark
@@ -46,6 +47,11 @@ if module_exists("pyspark"):
pass
try:
+ import Matchup
+ except ImportError:
+ pass
+
+ try:
import TimeAvgMapSpark
except ImportError:
pass
diff --git a/analysis/webservice/config/web.ini b/analysis/webservice/config/web.ini
index a1ecb2c..2644ade 100644
--- a/analysis/webservice/config/web.ini
+++ b/analysis/webservice/config/web.ini
@@ -14,4 +14,4 @@ static_enabled=true
static_dir=static
[modules]
-module_dirs=webservice.algorithms,webservice.algorithms_spark
\ No newline at end of file
+module_dirs=webservice.algorithms,webservice.algorithms_spark,webservice.algorithms.doms
\ No newline at end of file
diff --git a/data-access/nexustiles/dao/CassandraProxy.py b/data-access/nexustiles/dao/CassandraProxy.py
index 54a849b..a8a4e6e 100644
--- a/data-access/nexustiles/dao/CassandraProxy.py
+++ b/data-access/nexustiles/dao/CassandraProxy.py
@@ -161,9 +161,6 @@ class CassandraProxy(object):
self.__cass_protocol_version = config.getint("cassandra", "protocol_version")
self.__cass_dc_policy = config.get("cassandra", "dc_policy")
- logger.info("Setting cassandra host to " + self.__cass_url)
- logger.info("Setting cassandra username to " + self.__cass_username)
-
try:
self.__cass_port = config.getint("cassandra", "port")
except NoOptionError:
diff --git a/data-access/tests/config/datastores.ini b/data-access/tests/config/datastores.ini
new file mode 100644
index 0000000..194760c
--- /dev/null
+++ b/data-access/tests/config/datastores.ini
@@ -0,0 +1,9 @@
+[cassandra]
+host=127.0.0.1
+keyspace=nexustiles
+local_datacenter=datacenter1
+protocol_version=3
+
+[solr]
+host=localhost:8983
+core=nexustiles
\ No newline at end of file
diff --git a/tools/doms/README.md b/tools/doms/README.md
new file mode 100644
index 0000000..c49fa4a
--- /dev/null
+++ b/tools/doms/README.md
@@ -0,0 +1,66 @@
+# doms_reader.py
+The functions in doms_reader.py read a DOMS netCDF file into memory, assemble a list of matches of satellite and in situ data, and optionally output the matches to a CSV file. Each matched pair contains one satellite data record and one in situ data record.
+
+The DOMS netCDF files hold satellite data and in situ data in different groups (`SatelliteData` and `InsituData`). The `matchIDs` netCDF variable contains pairs of IDs (matches) which reference a satellite data record and an in situ data record in their respective groups. These records have a many-to-many relationship; one satellite record may match to many in situ records, and one in situ record may match to many satellite records. The `assemble_matches` function assembles the individua [...]
+
+## Requirements
+This tool was developed and tested with Python 2.7.5 and 3.7.0a0.
+Imported packages:
+* argparse
+* netcdf4
+* sys
+* datetime
+* csv
+* collections
+* logging
+
+
+## Functions
+### Function: `assemble_matches(filename)`
+Read a DOMS netCDF file into memory and return a list of matches from the file.
+
+#### Parameters
+- `filename` (str): the DOMS netCDF file name.
+
+#### Returns
+- `matches` (list): List of matches.
+
+Each list element in `matches` is a dictionary organized as follows:
+ For match `m`, netCDF group `GROUP` ('SatelliteData' or 'InsituData'), and netCDF group variable `VARIABLE`:
+
+`matches[m][GROUP]['matchID']`: netCDF `MatchedRecords` dimension ID for the match
+`matches[m][GROUP]['GROUPID']`: GROUP netCDF `dim` dimension ID for the record
+`matches[m][GROUP][VARIABLE]`: variable value
+
+For example, to access the timestamps of the satellite data and the in situ data of the first match in the list, along with the `MatchedRecords` dimension ID and the groups' `dim` dimension ID:
+```python
+matches[0]['SatelliteData']['time']
+matches[0]['InsituData']['time']
+matches[0]['SatelliteData']['matchID']
+matches[0]['SatelliteData']['SatelliteDataID']
+matches[0]['InsituData']['InsituDataID']
+```
+
+
+### Function: `matches_to_csv(matches, csvfile)`
+Write the DOMS matches to a CSV file. Include a header of column names which are based on the group and variable names from the netCDF file.
+
+#### Parameters:
+- `matches` (list): the list of dictionaries containing the DOMS matches as returned from the `assemble_matches` function.
+- `csvfile` (str): the name of the CSV output file.
+
+## Usage
+For example, to read some DOMS netCDF file called `doms_file.nc`:
+### Command line
+The main function for `doms_reader.py` takes one `filename` parameter (`doms_file.nc` argument in this example) for the DOMS netCDF file to read, calls the `assemble_matches` function, then calls the `matches_to_csv` function to write the matches to a CSV file `doms_matches.csv`.
+```
+python doms_reader.py doms_file.nc
+```
+```
+python3 doms_reader.py doms_file.nc
+```
+### Importing `assemble_matches`
+```python
+from doms_reader import assemble_matches
+matches = assemble_matches('doms_file.nc')
+```
diff --git a/tools/doms/doms_reader.py b/tools/doms/doms_reader.py
new file mode 100644
index 0000000..c8229c4
--- /dev/null
+++ b/tools/doms/doms_reader.py
@@ -0,0 +1,144 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+from netCDF4 import Dataset, num2date
+import sys
+import datetime
+import csv
+from collections import OrderedDict
+import logging
+
+LOGGER = logging.getLogger("doms_reader")
+
+def assemble_matches(filename):
+ """
+ Read a DOMS netCDF file and return a list of matches.
+
+ Parameters
+ ----------
+ filename : str
+ The DOMS netCDF file name.
+
+ Returns
+ -------
+ matches : list
+ List of matches. Each list element is a dictionary.
+ For match m, netCDF group GROUP (SatelliteData or InsituData), and
+ group variable VARIABLE:
+ matches[m][GROUP]['matchID']: MatchedRecords dimension ID for the match
+ matches[m][GROUP]['GROUPID']: GROUP dim dimension ID for the record
+ matches[m][GROUP][VARIABLE]: variable value
+ """
+
+ try:
+ # Open the netCDF file
+ with Dataset(filename, 'r') as doms_nc:
+ # Check that the number of groups is consistent w/ the MatchedGroups
+ # dimension
+ assert len(doms_nc.groups) == doms_nc.dimensions['MatchedGroups'].size,\
+ ("Number of groups isn't the same as MatchedGroups dimension.")
+
+ matches = []
+ matched_records = doms_nc.dimensions['MatchedRecords'].size
+
+ # Loop through the match IDs to assemble matches
+ for match in range(0, matched_records):
+ match_dict = OrderedDict()
+ # Grab the data from each platform (group) in the match
+ for group_num, group in enumerate(doms_nc.groups):
+ match_dict[group] = OrderedDict()
+ match_dict[group]['matchID'] = match
+ ID = doms_nc.variables['matchIDs'][match][group_num]
+ match_dict[group][group + 'ID'] = ID
+ for var in doms_nc.groups[group].variables.keys():
+ match_dict[group][var] = doms_nc.groups[group][var][ID]
+
+ # Create a UTC datetime field from timestamp
+ dt = num2date(match_dict[group]['time'],
+ doms_nc.groups[group]['time'].units)
+ match_dict[group]['datetime'] = dt
+ LOGGER.info(match_dict)
+ matches.append(match_dict)
+
+ return matches
+ except (OSError, IOError) as err:
+ LOGGER.exception("Error reading netCDF file " + filename)
+ raise err
+
+def matches_to_csv(matches, csvfile):
+ """
+ Write the DOMS matches to a CSV file. Include a header of column names
+ which are based on the group and variable names from the netCDF file.
+
+ Parameters
+ ----------
+ matches : list
+ The list of dictionaries containing the DOMS matches as returned from
+ assemble_matches.
+ csvfile : str
+ The name of the CSV output file.
+ """
+ # Create a header for the CSV. Column names are GROUP_VARIABLE or
+ # GROUP_GROUPID.
+ header = []
+ for key, value in matches[0].items():
+ for otherkey in value.keys():
+ header.append(key + "_" + otherkey)
+
+ try:
+ # Write the CSV file
+ with open(csvfile, 'w') as output_file:
+ csv_writer = csv.writer(output_file)
+ csv_writer.writerow(header)
+ for match in matches:
+ row = []
+ for group, data in match.items():
+ for value in data.values():
+ row.append(value)
+ csv_writer.writerow(row)
+ except (OSError, IOError) as err:
+ LOGGER.exception("Error writing CSV file " + csvfile)
+ raise err
+
+if __name__ == '__main__':
+ """
+ Execution:
+ python doms_reader.py filename
+ OR
+ python3 doms_reader.py filename
+ """
+ logging.basicConfig(format='%(asctime)s %(levelname)-8s %(message)s',
+ level=logging.INFO,
+ datefmt='%Y-%m-%d %H:%M:%S')
+
+ p = argparse.ArgumentParser()
+ p.add_argument('filename', help='DOMS netCDF file to read')
+ args = p.parse_args()
+
+ doms_matches = assemble_matches(args.filename)
+
+ matches_to_csv(doms_matches, 'doms_matches.csv')
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file