You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by ea...@apache.org on 2020/09/15 16:46:30 UTC

[incubator-sdap-nexus] 02/04: revert doms

This is an automated email from the ASF dual-hosted git repository.

eamonford pushed a commit to branch bug_fixes
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git

commit d562fa80ca66673e58eb86d9fb7429d08337ab0a
Author: Eamon Ford <ea...@gmail.com>
AuthorDate: Mon Aug 10 12:02:32 2020 -0700

    revert doms
---
 .gitignore                                       |   1 +
 analysis/setup.py                                |   3 +-
 analysis/webservice/algorithms_spark/__init__.py |   6 +
 analysis/webservice/config/web.ini               |   2 +-
 data-access/nexustiles/dao/CassandraProxy.py     |   3 -
 data-access/tests/config/datastores.ini          |   9 ++
 tools/doms/README.md                             |  66 +++++++++++
 tools/doms/doms_reader.py                        | 144 +++++++++++++++++++++++
 8 files changed, 229 insertions(+), 5 deletions(-)

diff --git a/.gitignore b/.gitignore
index 4e4cf6e..3e29626 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,5 +2,6 @@
 *.code-workspace
 *.idea
 *.DS_Store
+analysis/webservice/algorithms/doms/domsconfig.ini
 data-access/nexustiles/config/datastores.ini
 venv/
diff --git a/analysis/setup.py b/analysis/setup.py
index 9a449ce..62a6891 100644
--- a/analysis/setup.py
+++ b/analysis/setup.py
@@ -50,7 +50,8 @@ setuptools.setup(
     #    'webservice.nexus_tornado.request.renderers'
     #],
     package_data={
-        'webservice': ['config/web.ini', 'config/algorithms.ini']
+        'webservice': ['config/web.ini', 'config/algorithms.ini'],
+        'webservice.algorithms.doms': ['domsconfig.ini.default']
     },
     data_files=[
         ('static', ['static/index.html'])
diff --git a/analysis/webservice/algorithms_spark/__init__.py b/analysis/webservice/algorithms_spark/__init__.py
index a25c8d5..d6ed83f 100644
--- a/analysis/webservice/algorithms_spark/__init__.py
+++ b/analysis/webservice/algorithms_spark/__init__.py
@@ -20,6 +20,7 @@ import ClimMapSpark
 import CorrMapSpark
 import DailyDifferenceAverageSpark
 import HofMoellerSpark
+import Matchup
 import MaximaMinimaSpark
 import NexusCalcSparkHandler
 import TimeAvgMapSpark
@@ -46,6 +47,11 @@ if module_exists("pyspark"):
         pass
 
     try:
+        import Matchup
+    except ImportError:
+        pass
+
+    try:
         import TimeAvgMapSpark
     except ImportError:
         pass
diff --git a/analysis/webservice/config/web.ini b/analysis/webservice/config/web.ini
index a1ecb2c..2644ade 100644
--- a/analysis/webservice/config/web.ini
+++ b/analysis/webservice/config/web.ini
@@ -14,4 +14,4 @@ static_enabled=true
 static_dir=static
 
 [modules]
-module_dirs=webservice.algorithms,webservice.algorithms_spark
\ No newline at end of file
+module_dirs=webservice.algorithms,webservice.algorithms_spark,webservice.algorithms.doms
\ No newline at end of file
diff --git a/data-access/nexustiles/dao/CassandraProxy.py b/data-access/nexustiles/dao/CassandraProxy.py
index 54a849b..a8a4e6e 100644
--- a/data-access/nexustiles/dao/CassandraProxy.py
+++ b/data-access/nexustiles/dao/CassandraProxy.py
@@ -161,9 +161,6 @@ class CassandraProxy(object):
         self.__cass_protocol_version = config.getint("cassandra", "protocol_version")
         self.__cass_dc_policy = config.get("cassandra", "dc_policy")
 
-        logger.info("Setting cassandra host to " + self.__cass_url)
-        logger.info("Setting cassandra username to " + self.__cass_username)
-
         try:
             self.__cass_port = config.getint("cassandra", "port")
         except NoOptionError:
diff --git a/data-access/tests/config/datastores.ini b/data-access/tests/config/datastores.ini
new file mode 100644
index 0000000..194760c
--- /dev/null
+++ b/data-access/tests/config/datastores.ini
@@ -0,0 +1,9 @@
+[cassandra]
+host=127.0.0.1
+keyspace=nexustiles
+local_datacenter=datacenter1
+protocol_version=3
+
+[solr]
+host=localhost:8983
+core=nexustiles
\ No newline at end of file
diff --git a/tools/doms/README.md b/tools/doms/README.md
new file mode 100644
index 0000000..c49fa4a
--- /dev/null
+++ b/tools/doms/README.md
@@ -0,0 +1,66 @@
+# doms_reader.py
+The functions in doms_reader.py read a DOMS netCDF file into memory, assemble a list of matches of satellite and in situ data, and optionally output the matches to a CSV file. Each matched pair contains one satellite data record and one in situ data record.
+
+The DOMS netCDF files hold satellite data and in situ data in different groups (`SatelliteData` and `InsituData`). The `matchIDs` netCDF variable contains pairs of IDs (matches) which reference a satellite data record and an in situ data record in their respective groups. These records have a many-to-many relationship; one satellite record may match to many in situ records, and one in situ record may match to many satellite records. The `assemble_matches` function assembles the individua [...]
+
+## Requirements
+This tool was developed and tested with Python 2.7.5 and 3.7.0a0.
+Imported packages:
+* argparse
+* netcdf4
+* sys
+* datetime
+* csv
+* collections
+* logging
+    
+
+## Functions
+### Function: `assemble_matches(filename)`
+Read a DOMS netCDF file into memory and return a list of matches from the file.
+
+#### Parameters 
+- `filename` (str): the DOMS netCDF file name.
+    
+#### Returns
+- `matches` (list): List of matches. 
+
+Each list element in `matches` is a dictionary organized as follows:
+    For match `m`, netCDF group `GROUP` ('SatelliteData' or 'InsituData'), and netCDF group variable `VARIABLE`:
+
+`matches[m][GROUP]['matchID']`: netCDF `MatchedRecords` dimension ID for the match
+`matches[m][GROUP]['GROUPID']`: GROUP netCDF `dim` dimension ID for the record
+`matches[m][GROUP][VARIABLE]`: variable value 
+
+For example, to access the timestamps of the satellite data and the in situ data of the first match in the list, along with the `MatchedRecords` dimension ID and the groups' `dim` dimension ID:
+```python
+matches[0]['SatelliteData']['time']
+matches[0]['InsituData']['time']
+matches[0]['SatelliteData']['matchID']
+matches[0]['SatelliteData']['SatelliteDataID']
+matches[0]['InsituData']['InsituDataID']
+```
+
+        
+### Function: `matches_to_csv(matches, csvfile)`
+Write the DOMS matches to a CSV file. Include a header of column names which are based on the group and variable names from the netCDF file.
+    
+#### Parameters:
+- `matches` (list): the list of dictionaries containing the DOMS matches as returned from the `assemble_matches` function.
+- `csvfile` (str): the name of the CSV output file.
+
+## Usage
+For example, to read some DOMS netCDF file called `doms_file.nc`:
+### Command line
+The main function for `doms_reader.py` takes one `filename` parameter (`doms_file.nc` argument in this example) for the DOMS netCDF file to read, calls the `assemble_matches` function, then calls the `matches_to_csv` function to write the matches to a CSV file `doms_matches.csv`.
+```
+python doms_reader.py doms_file.nc
+```
+```
+python3 doms_reader.py doms_file.nc
+```
+### Importing `assemble_matches`
+```python
+from doms_reader import assemble_matches
+matches = assemble_matches('doms_file.nc')
+```
diff --git a/tools/doms/doms_reader.py b/tools/doms/doms_reader.py
new file mode 100644
index 0000000..c8229c4
--- /dev/null
+++ b/tools/doms/doms_reader.py
@@ -0,0 +1,144 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+from netCDF4 import Dataset, num2date
+import sys
+import datetime
+import csv
+from collections import OrderedDict
+import logging
+
+LOGGER = logging.getLogger("doms_reader")
+
+def assemble_matches(filename):
+    """
+    Read a DOMS netCDF file and return a list of matches.
+    
+    Parameters
+    ----------
+    filename : str
+        The DOMS netCDF file name.
+    
+    Returns
+    -------
+    matches : list
+        List of matches. Each list element is a dictionary.
+        For match m, netCDF group GROUP (SatelliteData or InsituData), and
+        group variable VARIABLE:
+        matches[m][GROUP]['matchID']: MatchedRecords dimension ID for the match
+        matches[m][GROUP]['GROUPID']: GROUP dim dimension ID for the record
+        matches[m][GROUP][VARIABLE]: variable value 
+    """
+    
+    try:
+        # Open the netCDF file
+        with Dataset(filename, 'r') as doms_nc:
+            # Check that the number of groups is consistent w/ the MatchedGroups
+            # dimension
+            assert len(doms_nc.groups) == doms_nc.dimensions['MatchedGroups'].size,\
+                ("Number of groups isn't the same as MatchedGroups dimension.")
+            
+            matches = []
+            matched_records = doms_nc.dimensions['MatchedRecords'].size
+            
+            # Loop through the match IDs to assemble matches
+            for match in range(0, matched_records):
+                match_dict = OrderedDict()
+                # Grab the data from each platform (group) in the match
+                for group_num, group in enumerate(doms_nc.groups):
+                    match_dict[group] = OrderedDict()
+                    match_dict[group]['matchID'] = match
+                    ID = doms_nc.variables['matchIDs'][match][group_num]
+                    match_dict[group][group + 'ID'] = ID
+                    for var in doms_nc.groups[group].variables.keys():
+                        match_dict[group][var] = doms_nc.groups[group][var][ID]
+                    
+                    # Create a UTC datetime field from timestamp
+                    dt = num2date(match_dict[group]['time'],
+                                  doms_nc.groups[group]['time'].units)
+                    match_dict[group]['datetime'] = dt
+                LOGGER.info(match_dict)
+                matches.append(match_dict)
+            
+            return matches
+    except (OSError, IOError) as err:
+        LOGGER.exception("Error reading netCDF file " + filename)
+        raise err
+    
+def matches_to_csv(matches, csvfile):
+    """
+    Write the DOMS matches to a CSV file. Include a header of column names
+    which are based on the group and variable names from the netCDF file.
+    
+    Parameters
+    ----------
+    matches : list
+        The list of dictionaries containing the DOMS matches as returned from
+        assemble_matches.      
+    csvfile : str
+        The name of the CSV output file.
+    """
+    # Create a header for the CSV. Column names are GROUP_VARIABLE or
+    # GROUP_GROUPID.
+    header = []
+    for key, value in matches[0].items():
+        for otherkey in value.keys():
+            header.append(key + "_" + otherkey)
+    
+    try:
+        # Write the CSV file
+        with open(csvfile, 'w') as output_file:
+            csv_writer = csv.writer(output_file)
+            csv_writer.writerow(header)
+            for match in matches:
+                row = []
+                for group, data in match.items():
+                    for value in data.values():
+                        row.append(value)
+                csv_writer.writerow(row)
+    except (OSError, IOError) as err:
+        LOGGER.exception("Error writing CSV file " + csvfile)
+        raise err
+
+if __name__ == '__main__':
+    """
+    Execution:
+        python doms_reader.py filename
+        OR
+        python3 doms_reader.py filename
+    """
+    logging.basicConfig(format='%(asctime)s %(levelname)-8s %(message)s',
+                    level=logging.INFO,
+                    datefmt='%Y-%m-%d %H:%M:%S')
+
+    p = argparse.ArgumentParser()
+    p.add_argument('filename', help='DOMS netCDF file to read')
+    args = p.parse_args()
+
+    doms_matches = assemble_matches(args.filename)
+
+    matches_to_csv(doms_matches, 'doms_matches.csv')
+    
+    
+    
+    
+    
+    
+    
+    
+
+    
+    
\ No newline at end of file