You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by tl...@apache.org on 2022/06/08 17:40:42 UTC
[incubator-sdap-nexus] branch sdap-388-proxy updated: proxy with redirect feature tested on laptop
This is an automated email from the ASF dual-hosted git repository.
tloubrieu pushed a commit to branch sdap-388-proxy
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git
The following commit(s) were added to refs/heads/sdap-388-proxy by this push:
new 8a1ef76 proxy with redirect feature tested on laptop
8a1ef76 is described below
commit 8a1ef7680016bbb7ac2cbe92b41f1f51ac053924
Author: Thomas Loubrieu <lo...@jpl.nasa.gov>
AuthorDate: Wed Jun 8 19:40:31 2022 +0200
proxy with redirect feature tested on laptop
---
analysis/README.md | 22 +++-
analysis/conda-requirements.txt | 4 +-
analysis/setup.py | 2 +-
analysis/tests/redirect/__init__.py | 0
analysis/tests/redirect/collections-config.yaml | 20 +++
analysis/tests/webapp_test.py | 14 +++
analysis/webservice/algorithms/DataSeriesList.py | 19 ++-
.../algorithms/doms/DomsInitialization.py | 42 ++++---
.../app_builders/HandlerArgsBuilder.py | 41 +++++++
.../nexus_tornado/app_builders/NexusAppBuilder.py | 75 ++++++++++++
.../app_builders/RedirectAppBuilder.py | 16 +++
.../app_builders/SparkContextBuilder.py | 17 +++
.../nexus_tornado/app_builders/__init__.py | 2 +
analysis/webservice/redirect/RedirectHandler.py | 23 +++-
.../webservice/redirect/RemoteCollectionMatcher.py | 37 +++---
analysis/webservice/webapp.py | 135 ++++++---------------
data-access/nexustiles/dao/CassandraProxy.py | 15 ++-
data-access/nexustiles/dao/SolrProxy.py | 2 +
docker/nexus-webapp/Dockerfile | 2 +-
helm/templates/ingress.yml | 45 ++++---
helm/templates/webapp.yml | 6 +-
helm/values.yaml | 6 +-
22 files changed, 374 insertions(+), 171 deletions(-)
diff --git a/analysis/README.md b/analysis/README.md
index a55841b..f34c8f8 100644
--- a/analysis/README.md
+++ b/analysis/README.md
@@ -10,7 +10,8 @@ Python module that exposes NEXUS analytical capabilities via a HTTP webservice.
1. Setup a separate conda env or activate an existing one
````
- conda create --name nexus-analysis python=2.7.17
+ conda create --name nexus-analysis python=3.8
+ conda create -n nexus-analysis-38 --no-default-packages python=3.
conda activate nexus-analysis
````
@@ -18,9 +19,7 @@ Python module that exposes NEXUS analytical capabilities via a HTTP webservice.
````
cd analysis
- conda install pyspark
conda install -c conda-forge --file conda-requirements.txt
- #conda install numpy matplotlib mpld3 scipy netCDF4 basemap gdal pyproj=1.9.5.1 libnetcdf=4.3.3.1
````
3. Update the configuration for solr and cassandra
@@ -46,7 +45,18 @@ BUT be carefull to remove them when you build the docker image. Otherwise they w
python setup.py install
````
-5. Set environment variables (examples):
+5. Launch other requirements, through helm:
+
+ ````
+ cd ../helm/
+ kubectl create namespace sdap
+ kubectl create configmap collections-config --from-file=../analysis/tests/redirect/ -n sdap
+ helm install nexus . -n sdap
+ kubectl port-forward service/nexus-cassandra 9042:9042 -n sdap
+
+ ````
+
+7. Set environment variables (examples):
```
PYTHONUNBUFFERED=1
@@ -54,10 +64,12 @@ BUT be carefull to remove them when you build the docker image. Otherwise they w
JAVA_HOME=/Library/Java/JavaVirtualMachines/jdk1.8.0_241.jdk/Contents/Home
```
-5. Launch unit tests
+8. Launch unit tests
pip install pytest
+ pip install -e .
pytest
+9.
5. Launch `python webservice/webapp.py` in command line or run it from the IDE.
diff --git a/analysis/conda-requirements.txt b/analysis/conda-requirements.txt
index 775cf29..3190b44 100644
--- a/analysis/conda-requirements.txt
+++ b/analysis/conda-requirements.txt
@@ -1,6 +1,7 @@
netcdf4==1.5.5.1
basemap==1.2.2
scipy==1.6.0
+pyspark==3.2.1
pytz==2021.1
utm==0.6.0
shapely==1.7.1
@@ -14,5 +15,6 @@ pyyaml==6.0
geos==3.8.1
gdal==3.2.1
mock==4.0.3
-singledispatch==3.4.0.3
+importlib_metadata==4.11.4
+#singledispatch==3.4.0.3
diff --git a/analysis/setup.py b/analysis/setup.py
index 3eced72..663d9bd 100644
--- a/analysis/setup.py
+++ b/analysis/setup.py
@@ -57,7 +57,7 @@ setuptools.setup(
('static', ['static/index.html'])
],
platforms='any',
- python_requires='~=3.7',
+ python_requires='~=3.8',
classifiers=[
'Development Status :: 1 - Pre-Alpha',
'Intended Audience :: Developers',
diff --git a/analysis/tests/redirect/__init__.py b/analysis/tests/redirect/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/analysis/tests/redirect/collections-config.yaml b/analysis/tests/redirect/collections-config.yaml
new file mode 100644
index 0000000..2e26d3b
--- /dev/null
+++ b/analysis/tests/redirect/collections-config.yaml
@@ -0,0 +1,20 @@
+collections:
+ - id: gmu-pm25
+ path: https://aq-sdap.stcenter.net/nexus/
+ remote-id: pm25
+ - id: ECCO_v4_r4_EVELMASS_latlon
+ path: /data/datasets/ecco-distribution-archive/nexus-ingest/EVELMASSv4r4/*.nc
+ priority: 6
+ forward-processing-priority: 7
+ projection: Grid
+ dimensionNames:
+ latitude: latitude
+ longitude: longitude
+ depth: Z
+ time: time
+ variable: EVELMASS
+ slices:
+ time: 1
+ i: 30
+ j: 30
+ k: 1
\ No newline at end of file
diff --git a/analysis/tests/webapp_test.py b/analysis/tests/webapp_test.py
index 096f5bc..16db747 100644
--- a/analysis/tests/webapp_test.py
+++ b/analysis/tests/webapp_test.py
@@ -2,8 +2,10 @@ import unittest
import pkg_resources
import configparser
import sys
+import os
import logging
import mock
+
from webservice.webapp import inject_args_in_config
logging.basicConfig(
@@ -64,6 +66,18 @@ class MyTestCase(unittest.TestCase):
# nothing should happend we just check that there is no section named after the option
self.assertEqual(False, algorithm_config.has_section('port'))
+ @mock.patch('tornado.options')
+ @mock.patch('tornado.options._Option')
+ def test_sdap_redirection(self):
+ mock_option.name = 'collections_path'
+ mock_option.value.return_value = os.path.join(
+ os.path.dirname(__file__),
+ 'collections_config.yaml'
+ )
+ mock_options._options = {'collections_path': mock_option}
+
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/analysis/webservice/algorithms/DataSeriesList.py b/analysis/webservice/algorithms/DataSeriesList.py
index e9275ed..e79df4e 100644
--- a/analysis/webservice/algorithms/DataSeriesList.py
+++ b/analysis/webservice/algorithms/DataSeriesList.py
@@ -32,6 +32,11 @@ class DataSeriesListCalcHandlerImpl(NexusCalcHandler):
description = "Lists datasets currently available for analysis"
params = {}
+ def __init__(self, tile_service_factory, remote_collections, **kwargs):
+ super().__init__(tile_service_factory, **kwargs)
+ self._remote_collections = remote_collections
+
+
@cached(ttl=(60 * 60 * 1000)) # 1 hour cached
def calc(self, computeOptions, **args):
class SimpleResult(object):
@@ -41,4 +46,16 @@ class DataSeriesListCalcHandlerImpl(NexusCalcHandler):
def toJson(self):
return json.dumps(self.result)
- return SimpleResult(self._get_tile_service().get_dataseries_list())
+ collection_list = self._get_tile_service().get_dataseries_list()
+
+ # add remote collections
+ for collection in self._remote_collections.values():
+ collection_list.append(
+ {
+ "shortName": collection["id"],
+ "remoteUrl": collection["path"],
+ "remoteShortName": collection["remote_id"] if 'remote_id' in collection else collection["id"]
+ }
+ )
+
+ return SimpleResult(collection_list)
diff --git a/analysis/webservice/algorithms/doms/DomsInitialization.py b/analysis/webservice/algorithms/doms/DomsInitialization.py
index a2cf4b1..d1ed5a5 100644
--- a/analysis/webservice/algorithms/doms/DomsInitialization.py
+++ b/analysis/webservice/algorithms/doms/DomsInitialization.py
@@ -21,6 +21,7 @@ import pkg_resources
from cassandra.auth import PlainTextAuthProvider
from cassandra.cluster import Cluster
+from cassandra.cluster import NoHostAvailable
from cassandra.policies import (DCAwareRoundRobinPolicy, TokenAwarePolicy,
WhiteListRoundRobinPolicy)
from webservice.NexusHandler import nexus_initializer
@@ -32,8 +33,8 @@ class DomsInitializer:
pass
def init(self, config):
- log = logging.getLogger(__name__)
- log.info("*** STARTING DOMS INITIALIZATION ***")
+ self.log = logging.getLogger(__name__)
+ self.log.info("*** STARTING DOMS INITIALIZATION ***")
domsconfig = configparser.SafeConfigParser()
domsconfig.read(DomsInitializer._get_config_files('domsconfig.ini'))
@@ -52,11 +53,11 @@ class DomsInitializer:
except configparser.NoOptionError:
cassCreateKeyspaceGranted = "True"
- log.info("Cassandra Host(s): %s" % (cassHost))
- log.info("Cassandra Keyspace: %s" % (cassKeyspace))
- log.info("Cassandra Datacenter: %s" % (cassDatacenter))
- log.info("Cassandra Protocol Version: %s" % (cassVersion))
- log.info("Cassandra DC Policy: %s" % (cassPolicy))
+ self.log.info("Cassandra Host(s): %s" % (cassHost))
+ self.log.info("Cassandra Keyspace: %s" % (cassKeyspace))
+ self.log.info("Cassandra Datacenter: %s" % (cassDatacenter))
+ self.log.info("Cassandra Protocol Version: %s" % (cassVersion))
+ self.log.info("Cassandra DC Policy: %s" % (cassPolicy))
if cassPolicy == 'DCAwareRoundRobinPolicy':
dc_policy = DCAwareRoundRobinPolicy(cassDatacenter)
@@ -69,19 +70,24 @@ class DomsInitializer:
else:
auth_provider = None
- with Cluster([host for host in cassHost.split(',')],
- port=int(cassPort),
- load_balancing_policy=token_policy,
- protocol_version=cassVersion,
- auth_provider=auth_provider) as cluster:
- session = cluster.connect()
+ try:
+ with Cluster([host for host in cassHost.split(',')],
+ port=int(cassPort),
+ load_balancing_policy=token_policy,
+ protocol_version=cassVersion,
+ auth_provider=auth_provider) as cluster:
+ session = cluster.connect()
+
+ if cassCreateKeyspaceGranted in ["True", "true"]:
+ self.createKeyspace(session, cassKeyspace)
+ else:
+ session.set_keyspace(cassKeyspace)
+
+ self.createTables(session)
- if cassCreateKeyspaceGranted in ["True", "true"]:
- self.createKeyspace(session, cassKeyspace)
- else:
- session.set_keyspace(cassKeyspace)
+ except NoHostAvailable as e:
+ self.log.error("Unable to connect to Cassandra, Nexus will not be able to access local data ", e)
- self.createTables(session)
def override_config(self, first, second):
for section in second.sections():
diff --git a/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py b/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py
new file mode 100644
index 0000000..7f6334b
--- /dev/null
+++ b/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py
@@ -0,0 +1,41 @@
+import tornado
+import webservice.algorithms_spark.NexusCalcSparkHandler
+from .SparkContextBuilder import SparkContextBuilder
+
+
+class HandlerArgsBuilder:
+ def __init__(self, max_request_threads, tile_service_factory, algorithm_config, remote_collections):
+ self.request_thread_pool = tornado.concurrent.futures.ThreadPoolExecutor(max_request_threads)
+ self.tile_service_factory = tile_service_factory
+ self.algorithm_config = algorithm_config
+ self.remote_collections = remote_collections
+
+ @staticmethod
+ def handler_needs_algorithm_config(class_wrapper):
+ return (
+ class_wrapper == webservice.algorithms_spark.Matchup.Matchup
+ or issubclass(class_wrapper, webservice.algorithms.doms.BaseDomsHandler.BaseDomsQueryCalcHandler)
+ or class_wrapper == webservice.algorithms.doms.ResultsRetrieval.DomsResultsRetrievalHandler
+ or class_wrapper == webservice.algorithms.doms.ResultsPlotQuery.DomsResultsPlotHandler
+ )
+ @staticmethod
+ def handler_needs_remote_collections(class_wrapper):
+ return class_wrapper == webservice.algorithms.DataSeriesList.D
+
+ def get_args(self, clazz_wrapper):
+ args = dict(
+ clazz=clazz_wrapper,
+ tile_service_factory=self.tile_service_factory,
+ thread_pool=self. request_thread_pool
+ )
+
+ if issubclass(clazz_wrapper, webservice.algorithms_spark.NexusCalcSparkHandler.NexusCalcSparkHandler):
+ args['sc'] = SparkContextBuilder.get_spark_context()
+
+ if self.handler_needs_algorithm_config(clazz_wrapper):
+ args['config'] = self.algorithm_config
+
+ if clazz_wrapper == webservice.algorithms.DataSeriesList.DataSeriesListCalcHandlerImpl:
+ args['remote_collections'] = self.remote_collections
+
+ return args
diff --git a/analysis/webservice/nexus_tornado/app_builders/NexusAppBuilder.py b/analysis/webservice/nexus_tornado/app_builders/NexusAppBuilder.py
new file mode 100644
index 0000000..20eb335
--- /dev/null
+++ b/analysis/webservice/nexus_tornado/app_builders/NexusAppBuilder.py
@@ -0,0 +1,75 @@
+import logging
+from pathlib import Path
+import importlib
+from functools import partial
+import pkg_resources
+import tornado
+from nexustiles.nexustiles import NexusTileService
+from webservice import NexusHandler
+from webservice.nexus_tornado.request.handlers import NexusRequestHandler
+from .HandlerArgsBuilder import HandlerArgsBuilder
+
+
+class NexusAppBuilder:
+ def __init__(self):
+ self.handlers = []
+ self.log = logging.getLogger(__name__)
+
+ class VersionHandler(tornado.web.RequestHandler):
+ def get(self):
+ self.write(pkg_resources.get_distribution("nexusanalysis").version)
+
+ self.handlers.append((r"/version", VersionHandler))
+
+ self.handlers.append(
+ (r'/apidocs', tornado.web.RedirectHandler, {"url": "/apidocs/"}))
+
+ apidocs_path = Path(__file__).parent.parent.joinpath('apidocs').resolve()
+ self.handlers.append(
+ (
+ r'/apidocs/(.*)', tornado.web.StaticFileHandler,
+ {'path': str(apidocs_path), "default_filename": "index.html"}))
+
+ def set_modules(self, module_dir, algorithm_config, remote_collections, max_request_threads=4):
+ for moduleDir in module_dir:
+ self.log.info("Loading modules from %s" % moduleDir)
+ importlib.import_module(moduleDir)
+
+ self.log.info("Running Nexus Initializers")
+ NexusHandler.executeInitializers(algorithm_config)
+
+ self.log.info("Initializing request ThreadPool to %s" % max_request_threads)
+ tile_service_factory = partial(NexusTileService, False, False, algorithm_config)
+ handler_args_builder = HandlerArgsBuilder(
+ max_request_threads,
+ tile_service_factory,
+ algorithm_config,
+ remote_collections
+ )
+
+ for clazzWrapper in NexusHandler.AVAILABLE_HANDLERS:
+ self.handlers.append(
+ (
+ clazzWrapper.path,
+ NexusRequestHandler,
+ handler_args_builder.get_args(clazzWrapper)
+ )
+ )
+
+ return self
+
+ def enable_static(self, static_dir):
+ self.log.info("Using static root path '%s'" % static_dir)
+ self.handlers.append(
+ (r'/(.*)', tornado.web.StaticFileHandler, {'path': static_dir, "default_filename": "index.html"}))
+
+ return self
+
+ def build(self, host=None, debug=False):
+
+ return tornado.web.Application(
+ self.handlers,
+ default_host=host,
+ debug=debug
+ )
+
diff --git a/analysis/webservice/nexus_tornado/app_builders/RedirectAppBuilder.py b/analysis/webservice/nexus_tornado/app_builders/RedirectAppBuilder.py
new file mode 100644
index 0000000..cae9b43
--- /dev/null
+++ b/analysis/webservice/nexus_tornado/app_builders/RedirectAppBuilder.py
@@ -0,0 +1,16 @@
+from webservice.redirect import RedirectHandler
+from webservice.redirect import RemoteCollectionMatcher
+import tornado
+
+
+class RedirectAppBuilder:
+ def __init__(self, remote_collection_matcher: RemoteCollectionMatcher):
+ redirected_collections = remote_collection_matcher.get_remote_collections()
+ self.redirect_handler = (r'/(.*)', RedirectHandler, {'redirected_collections': redirected_collections})
+
+ def build(self, host=None, debug=False):
+ return tornado.web.Application(
+ [self.redirect_handler],
+ default_host=host,
+ debug=debug
+ )
diff --git a/analysis/webservice/nexus_tornado/app_builders/SparkContextBuilder.py b/analysis/webservice/nexus_tornado/app_builders/SparkContextBuilder.py
new file mode 100644
index 0000000..6c0aa75
--- /dev/null
+++ b/analysis/webservice/nexus_tornado/app_builders/SparkContextBuilder.py
@@ -0,0 +1,17 @@
+
+class SparkContextBuilder:
+ def __init__(self):
+ pass
+
+ spark_context = None
+
+ @classmethod
+ def get_spark_context(cls):
+ if cls.spark_context is None:
+ from pyspark.sql import SparkSession
+
+ spark = SparkSession.builder.appName("nexus-analysis").getOrCreate()
+ cls.spark_context = spark.sparkContext
+
+ return cls.spark_context
+
diff --git a/analysis/webservice/nexus_tornado/app_builders/__init__.py b/analysis/webservice/nexus_tornado/app_builders/__init__.py
new file mode 100644
index 0000000..8276984
--- /dev/null
+++ b/analysis/webservice/nexus_tornado/app_builders/__init__.py
@@ -0,0 +1,2 @@
+from .NexusAppBuilder import NexusAppBuilder
+from .RedirectAppBuilder import RedirectAppBuilder
\ No newline at end of file
diff --git a/analysis/webservice/redirect/RedirectHandler.py b/analysis/webservice/redirect/RedirectHandler.py
index bad0482..7b20178 100644
--- a/analysis/webservice/redirect/RedirectHandler.py
+++ b/analysis/webservice/redirect/RedirectHandler.py
@@ -4,13 +4,28 @@ from webservice.webmodel.RequestParameters import RequestParameters
class RedirectHandler(tornado.web.RequestHandler):
- def initialize(self, redirected_collections):
+ def initialize(self, redirected_collections=None):
self._redirected_collections = redirected_collections
- def get(self, *args, **kwargs):
- ds = self.get_arguments(ds=request.get_argument(RequestParameters.DATASET, None))
+ @tornado.gen.coroutine
+ def get(self, algo):
+ collection_id = self.request.query_arguments[RequestParameters.DATASET][0].decode('utf-8')
+ collection = self._redirected_collections[collection_id]
+ full_url = self.request.full_url()
+
+ #redirect to new URL
+ base_url = full_url[:full_url.find(algo)].rstrip('/')
+ new_base_url = collection['path'].rstrip('/')
+ new_full_url = full_url.replace(base_url, new_base_url)
+
+ # use remote collection id
+ if 'remote_id' in collection:
+ dataset_param = f"ds={collection_id}"
+ new_dataset_param = f"ds={collection['remote_id']}"
+ new_full_url = new_full_url.replace(dataset_param, new_dataset_param)
+
self.redirect(
- self._redirected_collections[ds]['url'],
+ new_full_url,
permanent=True
)
diff --git a/analysis/webservice/redirect/RemoteCollectionMatcher.py b/analysis/webservice/redirect/RemoteCollectionMatcher.py
index f7656ff..d0e6e43 100644
--- a/analysis/webservice/redirect/RemoteCollectionMatcher.py
+++ b/analysis/webservice/redirect/RemoteCollectionMatcher.py
@@ -1,29 +1,38 @@
import yaml
+from tornado.routing import Matcher
from webservice.webmodel.RequestParameters import RequestParameters
+from tornado.httputil import HTTPServerRequest
class RemoteCollectionMatcher(Matcher):
def __init__(self, collections_config: str):
self._collections_config = collections_config
- self._redirected_collections = None
+ self._remote_collections = None
- def get_redirected_collections(self):
- if self._redirected_collections is None:
- self._redirected_collections = self.get_redirected_collections(self._collections_config)
- return self._redicted_collections
+ def get_remote_collections(self):
+ if self._remote_collections is None:
+ self._remote_collections = self._get_remote_collections(self._collections_config)
+ return self._remote_collections
@staticmethod
- def _get_redirected_collections(collections_config: str):
- _redirected_collections = {}
+ def _get_remote_collections(collections_config: str):
+ _remote_collections = {}
with open(collections_config, 'r') as f:
collections_yaml = yaml.load(f, Loader=yaml.FullLoader)
for collection in collections_yaml['collections']:
- if "url" in collection:
- _redirected_collections[collection["id"]] = collection
+ if "path" in collection and collection['path'].startswith('http'):
+ _remote_collections[collection["id"]] = {k.replace('-', '_'): v for k, v in collection.items()}
- return _redirected_collections
+ return _remote_collections
- def match(self, request):
- ds = request.get_argument(RequestParameters.DATASET, None)
- if ds in self._redicted_collections:
- return self._redicted_collection[ds]
\ No newline at end of file
+ def match(self, request: HTTPServerRequest):
+ if RequestParameters.DATASET in request.query_arguments:
+ # the returmed values are not used because I did not find how to use them
+ # just return empty dict() works to signify the request matches
+ # TODO do not hardcode utf-8, no time to do better today
+ collection = request.query_arguments[RequestParameters.DATASET][0].decode('utf-8')
+ if collection in self._remote_collections:
+ return dict()
+
+ # when request does not match
+ return None
\ No newline at end of file
diff --git a/analysis/webservice/webapp.py b/analysis/webservice/webapp.py
index f2f42b1..8db6459 100644
--- a/analysis/webservice/webapp.py
+++ b/analysis/webservice/webapp.py
@@ -14,24 +14,18 @@
# limitations under the License.
import configparser
-import importlib
+
import logging
import sys
import os
-from pathlib import Path
-from functools import partial
-import yaml
-import pkg_resources
import tornado.web
+from tornado.routing import Rule, RuleRouter, AnyMatches
from tornado.options import define, options, parse_command_line
-import webservice.algorithms_spark.NexusCalcSparkHandler
-from nexustiles.nexustiles import NexusTileService
-from webservice import NexusHandler
-from webservice.nexus_tornado.request.handlers import NexusRequestHandler
-from webservice.redirect import RedirectHandler
from webservice.redirect import RemoteCollectionMatcher
+from webservice.nexus_tornado.app_builders import NexusAppBuilder
+from webservice.nexus_tornado.app_builders import RedirectAppBuilder
def inject_args_in_config(args, config):
@@ -54,24 +48,25 @@ def inject_args_in_config(args, config):
return config
-if __name__ == "__main__":
+def main():
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
- datefmt="%Y-%m-%dT%H:%M:%S", stream=sys.stdout)
+ datefmt="%Y-%m-%dT%H:%M:%S", stream=sys.stdout
+ )
log = logging.getLogger(__name__)
- webconfig = configparser.RawConfigParser()
- webconfig.read_file(open(os.path.join(os.path.dirname(__file__), "config", "web.ini")))
+ web_config = configparser.RawConfigParser()
+ web_config.read_file(open(os.path.join(os.path.dirname(__file__), "config", "web.ini")))
algorithm_config = configparser.RawConfigParser()
algorithm_config.read_file(open(os.path.join(os.path.dirname(__file__), "config", "algorithms.ini")))
define("debug", default=False, help="run in debug mode")
- define("port", default=webconfig.get("global", "server.socket_port"), help="run on the given port", type=int)
- define("address", default=webconfig.get("global", "server.socket_host"), help="Bind to the given address")
+ define("port", default=web_config.get("global", "server.socket_port"), help="run on the given port", type=int)
+ define("address", default=web_config.get("global", "server.socket_host"), help="Bind to the given address")
define('solr_time_out', default=60,
help='time out for solr requests in seconds, default (60) is ok for most deployments'
' when solr performances are not good this might need to be increased')
@@ -84,97 +79,43 @@ if __name__ == "__main__":
parse_command_line()
algorithm_config = inject_args_in_config(options, algorithm_config)
- moduleDirs = webconfig.get("modules", "module_dirs").split(",")
- for moduleDir in moduleDirs:
- log.info("Loading modules from %s" % moduleDir)
- importlib.import_module(moduleDir)
+ remote_collection_matcher = RemoteCollectionMatcher(options.collections_path)
- staticDir = webconfig.get("static", "static_dir")
- staticEnabled = webconfig.get("static", "static_enabled") == "true"
+ # build nexus app
+ nexus_app_builder = NexusAppBuilder().set_modules(
+ web_config.get("modules", "module_dirs").split(","),
+ algorithm_config,
+ remote_collection_matcher.get_remote_collections()
+ )
- log.info("Initializing on host address '%s'" % options.address)
- log.info("Initializing on port '%s'" % options.port)
- log.info("Starting web server in debug mode: %s" % options.debug)
- if staticEnabled:
- log.info("Using static root path '%s'" % staticDir)
+ if web_config.get("static", "static_enabled") == "true":
+ nexus_app_builder.enable_static(
+ web_config.get("static", "static_dir")
+ )
else:
log.info("Static resources disabled")
- handlers = []
-
- log.info("Running Nexus Initializers")
- NexusHandler.executeInitializers(algorithm_config)
-
- max_request_threads = webconfig.getint("global", "server.max_simultaneous_requests")
- log.info("Initializing request ThreadPool to %s" % max_request_threads)
- request_thread_pool = tornado.concurrent.futures.ThreadPoolExecutor(max_request_threads)
-
- tile_service_factory = partial(NexusTileService, False, False, algorithm_config)
- spark_context = None
- for clazzWrapper in NexusHandler.AVAILABLE_HANDLERS:
- if issubclass(clazzWrapper, webservice.algorithms_spark.NexusCalcSparkHandler.NexusCalcSparkHandler):
- if spark_context is None:
- from pyspark.sql import SparkSession
-
- spark = SparkSession.builder.appName("nexus-analysis").getOrCreate()
- spark_context = spark.sparkContext
-
- args = dict(clazz=clazzWrapper,
- tile_service_factory=tile_service_factory,
- sc=spark_context,
- thread_pool=request_thread_pool)
- if clazzWrapper == webservice.algorithms_spark.Matchup.Matchup or issubclass(clazzWrapper, webservice.algorithms.doms.BaseDomsHandler.BaseDomsQueryCalcHandler):
- args['config'] = algorithm_config
-
- handlers.append((clazzWrapper.path,
- NexusRequestHandler,
- args))
- else:
- args = dict(clazz=clazzWrapper,
- tile_service_factory=tile_service_factory,
- thread_pool=request_thread_pool)
- if clazzWrapper == webservice.algorithms.doms.ResultsRetrieval.DomsResultsRetrievalHandler or clazzWrapper == webservice.algorithms.doms.ResultsPlotQuery.DomsResultsPlotHandler:
- args['config'] = algorithm_config
- handlers.append((clazzWrapper.path,
- NexusRequestHandler,
- args))
-
- class VersionHandler(tornado.web.RequestHandler):
- def get(self):
- self.write(pkg_resources.get_distribution("nexusanalysis").version)
-
- handlers.append((r"/version", VersionHandler))
-
- handlers.append(
- (r'/apidocs', tornado.web.RedirectHandler, {"url": "/apidocs/"}))
-
- apidocs_path = Path(__file__).parent.joinpath('apidocs').resolve()
- handlers.append(
- (r'/apidocs/(.*)', tornado.web.StaticFileHandler, {'path': str(apidocs_path), "default_filename": "index.html"}))
-
- if staticEnabled:
- handlers.append(
- (r'/(.*)', tornado.web.StaticFileHandler, {'path': staticDir, "default_filename": "index.html"}))
-
- app = tornado.web.Application(
- handlers,
- default_host=options.address,
- debug=options.debug
- )
+ local_sdap_app = nexus_app_builder.build(host=options.address, debug=options.debug)
- remote_collection_matcher = RemoteCollectionMatcher(options.collections_path)
- redirect_handler = (r'/(.*)', RedirectHandler(), remote_collection_matcher.get_redirected_collections())
- redirect_app = tornado.web.Application(
- [redirect_handler],
- default_host=options.address,
+ # build redirect app
+ remote_sdap_app = RedirectAppBuilder(remote_collection_matcher).build(
+ host=options.address,
debug=options.debug)
router = RuleRouter([
- Rule(remote_collection_matcher, redirect_app),
- Rule(AnyMatches(), app)
- ])
+ Rule(remote_collection_matcher, remote_sdap_app),
+ Rule(AnyMatches(), local_sdap_app)
+ ]
+ )
- server = HTTPServer(router)
+ log.info("Initializing on host address '%s'" % options.address)
+ log.info("Initializing on port '%s'" % options.port)
+ log.info("Starting web server in debug mode: %s" % options.debug)
+ server = tornado.web.HTTPServer(router)
server.listen(options.port)
log.info("Starting HTTP listener...")
tornado.ioloop.IOLoop.current().start()
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/data-access/nexustiles/dao/CassandraProxy.py b/data-access/nexustiles/dao/CassandraProxy.py
index e410b99..4ce684a 100644
--- a/data-access/nexustiles/dao/CassandraProxy.py
+++ b/data-access/nexustiles/dao/CassandraProxy.py
@@ -21,6 +21,7 @@ import nexusproto.DataTile_pb2 as nexusproto
import numpy as np
from cassandra.auth import PlainTextAuthProvider
from cassandra.cqlengine import columns, connection, CQLEngineException
+from cassandra.cluster import NoHostAvailable
from cassandra.cqlengine.models import Model
from cassandra.policies import TokenAwarePolicy, DCAwareRoundRobinPolicy, WhiteListRoundRobinPolicy
from multiprocessing.synchronize import Lock
@@ -296,11 +297,15 @@ class CassandraProxy(object):
auth_provider = PlainTextAuthProvider(username=self.__cass_username, password=self.__cass_password)
else:
auth_provider = None
-
- connection.setup([host for host in self.__cass_url.split(',')], self.__cass_keyspace,
- protocol_version=self.__cass_protocol_version, load_balancing_policy=token_policy,
- port=self.__cass_port,
- auth_provider=auth_provider)
+ try:
+ connection.setup(
+ [host for host in self.__cass_url.split(',')], self.__cass_keyspace,
+ protocol_version=self.__cass_protocol_version, load_balancing_policy=token_policy,
+ port=self.__cass_port,
+ auth_provider=auth_provider
+ )
+ except NoHostAvailable as e:
+ logger.error("Cassandra is not accessible, SDAP will not server local datasets", e)
def fetch_nexus_tiles(self, *tile_ids):
tile_ids = [uuid.UUID(str(tile_id)) for tile_id in tile_ids if
diff --git a/data-access/nexustiles/dao/SolrProxy.py b/data-access/nexustiles/dao/SolrProxy.py
index 39eed37..9b16533 100644
--- a/data-access/nexustiles/dao/SolrProxy.py
+++ b/data-access/nexustiles/dao/SolrProxy.py
@@ -182,6 +182,7 @@ class SolrProxy(object):
"facet.limit": "-1"
}
+
response = self.do_query_raw(*(search, None, None, False, None), **params)
l = []
for g, v in zip(*[iter(response.facets["facet_fields"]["dataset_s"])]*2):
@@ -640,6 +641,7 @@ class SolrProxy(object):
return response
+
def do_query_all(self, *args, **params):
results = []
diff --git a/docker/nexus-webapp/Dockerfile b/docker/nexus-webapp/Dockerfile
index e2b4187..fc58e79 100644
--- a/docker/nexus-webapp/Dockerfile
+++ b/docker/nexus-webapp/Dockerfile
@@ -89,7 +89,7 @@ RUN python3 setup.py install
WORKDIR /incubator-sdap-nexus/analysis
RUN python3 setup.py install
-RUN pip3 install importlib-metadata
+
WORKDIR /incubator-sdap-nexus/tools/deletebyquery
RUN pip3 install cassandra-driver==3.20.1 --install-option="--no-cython"
diff --git a/helm/templates/ingress.yml b/helm/templates/ingress.yml
index cbc72f1..8360b37 100644
--- a/helm/templates/ingress.yml
+++ b/helm/templates/ingress.yml
@@ -1,4 +1,4 @@
-apiVersion: extensions/v1beta1
+apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: nexus-webapp
@@ -14,20 +14,29 @@ spec:
rules:
- http:
paths:
- {{ if .Values.rootWebpage.enabled }}
- - path: /
- backend:
- serviceName: root-webpage
- servicePort: http
- {{ end }}
- - path: /nexus/?(.*)
- backend:
- serviceName: nexus-webapp
- servicePort: webapp
- {{ if ne .Values.onEarthProxyIP "" }}
- - path: /onearth/?(.*)
- backend:
- serviceName: onearth
- servicePort: 80
- {{ end }}
-
\ No newline at end of file
+ {{ if .Values.rootWebpage.enabled }}
+ - path: /
+ pathType: ImplementationSpecific
+ backend:
+ service:
+ name: root-webpage
+ port:
+ number: 80
+ {{ end }}
+ - path: /nexus/?(.*)
+ pathType: ImplementationSpecific
+ backend:
+ service:
+ name: nexus-webapp
+ port:
+ number: 8083
+ {{ if ne .Values.onEarthProxyIP "" }}
+ - path: /onearth/?(.*)
+ pathType: ImplementationSpecific
+ backend:
+ service:
+ name: onearth
+ port:
+ number: 80
+ {{ end }}
+
diff --git a/helm/templates/webapp.yml b/helm/templates/webapp.yml
index 5203bed..9417a88 100644
--- a/helm/templates/webapp.yml
+++ b/helm/templates/webapp.yml
@@ -16,12 +16,12 @@ spec:
- --cassandra-username={{ include "nexus.credentials.cassandra.username" . }}
- --cassandra-password={{ include "nexus.credentials.cassandra.password" . }}
- --solr-host={{ include "nexus.urls.solr" . }}
- - --collections-path={{ include "nexus.collectionsConfig.mountPath" . }}/collections.yml
+ - --collections-path={{ include "nexus.collectionsConfig.mountPath" . }}/collections-config.yaml
sparkVersion: "3.1.1"
volumes:
- name: collections-config-volume
configMap:
- name: { { include "nexus.collectionsConfig.configmapName" . } }
+ name: {{ include "nexus.collectionsConfig.configmapName" . }}
restartPolicy:
type: OnFailure
onFailureRetries: 10
@@ -31,7 +31,7 @@ spec:
driver:
volumeMounts:
- name: collections-config-volume
- mountPath: { { include "nexus.collectionsConfig.mountPath" . } }
+ mountPath: {{ include "nexus.collectionsConfig.mountPath" . }}
{{ .Values.webapp.distributed.driver | toYaml | indent 4 }}
labels:
version: 3.1.1
diff --git a/helm/values.yaml b/helm/values.yaml
index 35f72a7..afb43b4 100644
--- a/helm/values.yaml
+++ b/helm/values.yaml
@@ -80,11 +80,11 @@ ingestion:
## ref: https://github.com/apache/incubator-sdap-ingester/tree/dev/collection_manager#the-collections-configuration-file
## Either localDir should be set, or the git options, but not both.
collections:
- createCrd: true
+ createCrd: false
## Name of a ConfigMap containing the Collections Config YAML.
## Leave this blank if Git is enabled below.
- configMap:
+ configMap: collections-config
## Load the Collections Config file from a git repository.
git:
@@ -121,7 +121,7 @@ solr:
image:
repository: nexusjpl/solr
tag: 8.11.1
- replicaCount: 3
+ replicaCount: 1
authentication:
enabled: false
volumeClaimTemplates: