You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by tl...@apache.org on 2022/06/08 17:40:42 UTC

[incubator-sdap-nexus] branch sdap-388-proxy updated: proxy with redirect feature tested on laptop

This is an automated email from the ASF dual-hosted git repository.

tloubrieu pushed a commit to branch sdap-388-proxy
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git


The following commit(s) were added to refs/heads/sdap-388-proxy by this push:
     new 8a1ef76  proxy with redirect feature tested on laptop
8a1ef76 is described below

commit 8a1ef7680016bbb7ac2cbe92b41f1f51ac053924
Author: Thomas Loubrieu <lo...@jpl.nasa.gov>
AuthorDate: Wed Jun 8 19:40:31 2022 +0200

    proxy with redirect feature tested on laptop
---
 analysis/README.md                                 |  22 +++-
 analysis/conda-requirements.txt                    |   4 +-
 analysis/setup.py                                  |   2 +-
 analysis/tests/redirect/__init__.py                |   0
 analysis/tests/redirect/collections-config.yaml    |  20 +++
 analysis/tests/webapp_test.py                      |  14 +++
 analysis/webservice/algorithms/DataSeriesList.py   |  19 ++-
 .../algorithms/doms/DomsInitialization.py          |  42 ++++---
 .../app_builders/HandlerArgsBuilder.py             |  41 +++++++
 .../nexus_tornado/app_builders/NexusAppBuilder.py  |  75 ++++++++++++
 .../app_builders/RedirectAppBuilder.py             |  16 +++
 .../app_builders/SparkContextBuilder.py            |  17 +++
 .../nexus_tornado/app_builders/__init__.py         |   2 +
 analysis/webservice/redirect/RedirectHandler.py    |  23 +++-
 .../webservice/redirect/RemoteCollectionMatcher.py |  37 +++---
 analysis/webservice/webapp.py                      | 135 ++++++---------------
 data-access/nexustiles/dao/CassandraProxy.py       |  15 ++-
 data-access/nexustiles/dao/SolrProxy.py            |   2 +
 docker/nexus-webapp/Dockerfile                     |   2 +-
 helm/templates/ingress.yml                         |  45 ++++---
 helm/templates/webapp.yml                          |   6 +-
 helm/values.yaml                                   |   6 +-
 22 files changed, 374 insertions(+), 171 deletions(-)

diff --git a/analysis/README.md b/analysis/README.md
index a55841b..f34c8f8 100644
--- a/analysis/README.md
+++ b/analysis/README.md
@@ -10,7 +10,8 @@ Python module that exposes NEXUS analytical capabilities via a HTTP webservice.
 1. Setup a separate conda env or activate an existing one
 
     ````
-    conda create --name nexus-analysis python=2.7.17
+    conda create --name nexus-analysis python=3.8
+    conda create -n nexus-analysis-38 --no-default-packages python=3.
     conda activate nexus-analysis
     ````
 
@@ -18,9 +19,7 @@ Python module that exposes NEXUS analytical capabilities via a HTTP webservice.
 
     ````
     cd analysis
-    conda install pyspark
     conda install -c conda-forge --file conda-requirements.txt
-    #conda install numpy matplotlib mpld3 scipy netCDF4 basemap gdal pyproj=1.9.5.1 libnetcdf=4.3.3.1
     ````
 
 3. Update the configuration for solr and cassandra
@@ -46,7 +45,18 @@ BUT be carefull to remove them when you build the docker image. Otherwise they w
     python setup.py install
     ````
 
-5. Set environment variables (examples):
+5. Launch other requirements, through helm:
+
+   ````
+   cd ../helm/
+   kubectl create namespace sdap 
+   kubectl create configmap collections-config --from-file=../analysis/tests/redirect/ -n sdap
+   helm install nexus . -n sdap
+   kubectl port-forward service/nexus-cassandra 9042:9042 -n sdap
+
+   ````
+
+7. Set environment variables (examples):
 
     ```
     PYTHONUNBUFFERED=1
@@ -54,10 +64,12 @@ BUT be carefull to remove them when you build the docker image. Otherwise they w
     JAVA_HOME=/Library/Java/JavaVirtualMachines/jdk1.8.0_241.jdk/Contents/Home
      ```
 
-5. Launch unit tests
+8. Launch unit tests
 
     pip install pytest
+    pip install -e .
     pytest
+9. 
     
 
 5. Launch `python webservice/webapp.py` in command line or run it from the IDE.
diff --git a/analysis/conda-requirements.txt b/analysis/conda-requirements.txt
index 775cf29..3190b44 100644
--- a/analysis/conda-requirements.txt
+++ b/analysis/conda-requirements.txt
@@ -1,6 +1,7 @@
 netcdf4==1.5.5.1
 basemap==1.2.2
 scipy==1.6.0
+pyspark==3.2.1
 pytz==2021.1
 utm==0.6.0
 shapely==1.7.1
@@ -14,5 +15,6 @@ pyyaml==6.0
 geos==3.8.1
 gdal==3.2.1
 mock==4.0.3
-singledispatch==3.4.0.3
+importlib_metadata==4.11.4
+#singledispatch==3.4.0.3
 
diff --git a/analysis/setup.py b/analysis/setup.py
index 3eced72..663d9bd 100644
--- a/analysis/setup.py
+++ b/analysis/setup.py
@@ -57,7 +57,7 @@ setuptools.setup(
         ('static', ['static/index.html'])
     ],
     platforms='any',
-    python_requires='~=3.7',
+    python_requires='~=3.8',
     classifiers=[
         'Development Status :: 1 - Pre-Alpha',
         'Intended Audience :: Developers',
diff --git a/analysis/tests/redirect/__init__.py b/analysis/tests/redirect/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/analysis/tests/redirect/collections-config.yaml b/analysis/tests/redirect/collections-config.yaml
new file mode 100644
index 0000000..2e26d3b
--- /dev/null
+++ b/analysis/tests/redirect/collections-config.yaml
@@ -0,0 +1,20 @@
+collections:
+  - id: gmu-pm25
+    path: https://aq-sdap.stcenter.net/nexus/
+    remote-id: pm25
+  - id: ECCO_v4_r4_EVELMASS_latlon
+    path: /data/datasets/ecco-distribution-archive/nexus-ingest/EVELMASSv4r4/*.nc
+    priority: 6
+    forward-processing-priority: 7
+    projection: Grid
+    dimensionNames:
+      latitude: latitude
+      longitude: longitude
+      depth: Z
+      time: time
+      variable: EVELMASS
+    slices:
+      time: 1
+      i: 30
+      j: 30
+      k: 1
\ No newline at end of file
diff --git a/analysis/tests/webapp_test.py b/analysis/tests/webapp_test.py
index 096f5bc..16db747 100644
--- a/analysis/tests/webapp_test.py
+++ b/analysis/tests/webapp_test.py
@@ -2,8 +2,10 @@ import unittest
 import pkg_resources
 import configparser
 import sys
+import os
 import logging
 import mock
+
 from webservice.webapp import inject_args_in_config
 
 logging.basicConfig(
@@ -64,6 +66,18 @@ class MyTestCase(unittest.TestCase):
         # nothing should happend we just check that there is no section named after the option
         self.assertEqual(False, algorithm_config.has_section('port'))
 
+    @mock.patch('tornado.options')
+    @mock.patch('tornado.options._Option')
+    def test_sdap_redirection(self):
+        mock_option.name = 'collections_path'
+        mock_option.value.return_value = os.path.join(
+            os.path.dirname(__file__),
+            'collections_config.yaml'
+        )
+        mock_options._options = {'collections_path': mock_option}
+
+
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/analysis/webservice/algorithms/DataSeriesList.py b/analysis/webservice/algorithms/DataSeriesList.py
index e9275ed..e79df4e 100644
--- a/analysis/webservice/algorithms/DataSeriesList.py
+++ b/analysis/webservice/algorithms/DataSeriesList.py
@@ -32,6 +32,11 @@ class DataSeriesListCalcHandlerImpl(NexusCalcHandler):
     description = "Lists datasets currently available for analysis"
     params = {}
 
+    def __init__(self, tile_service_factory, remote_collections, **kwargs):
+        super().__init__(tile_service_factory, **kwargs)
+        self._remote_collections = remote_collections
+
+
     @cached(ttl=(60 * 60 * 1000))  # 1 hour cached
     def calc(self, computeOptions, **args):
         class SimpleResult(object):
@@ -41,4 +46,16 @@ class DataSeriesListCalcHandlerImpl(NexusCalcHandler):
             def toJson(self):
                 return json.dumps(self.result)
 
-        return SimpleResult(self._get_tile_service().get_dataseries_list())
+        collection_list = self._get_tile_service().get_dataseries_list()
+
+        # add remote collections
+        for collection in self._remote_collections.values():
+            collection_list.append(
+                {
+                    "shortName": collection["id"],
+                    "remoteUrl": collection["path"],
+                    "remoteShortName": collection["remote_id"] if 'remote_id' in collection else collection["id"]
+                }
+            )
+
+        return SimpleResult(collection_list)
diff --git a/analysis/webservice/algorithms/doms/DomsInitialization.py b/analysis/webservice/algorithms/doms/DomsInitialization.py
index a2cf4b1..d1ed5a5 100644
--- a/analysis/webservice/algorithms/doms/DomsInitialization.py
+++ b/analysis/webservice/algorithms/doms/DomsInitialization.py
@@ -21,6 +21,7 @@ import pkg_resources
 
 from cassandra.auth import PlainTextAuthProvider
 from cassandra.cluster import Cluster
+from cassandra.cluster import NoHostAvailable
 from cassandra.policies import (DCAwareRoundRobinPolicy, TokenAwarePolicy,
                                 WhiteListRoundRobinPolicy)
 from webservice.NexusHandler import nexus_initializer
@@ -32,8 +33,8 @@ class DomsInitializer:
         pass
 
     def init(self, config):
-        log = logging.getLogger(__name__)
-        log.info("*** STARTING DOMS INITIALIZATION ***")
+        self.log = logging.getLogger(__name__)
+        self.log.info("*** STARTING DOMS INITIALIZATION ***")
 
         domsconfig = configparser.SafeConfigParser()
         domsconfig.read(DomsInitializer._get_config_files('domsconfig.ini'))
@@ -52,11 +53,11 @@ class DomsInitializer:
         except configparser.NoOptionError:
             cassCreateKeyspaceGranted = "True"
 
-        log.info("Cassandra Host(s): %s" % (cassHost))
-        log.info("Cassandra Keyspace: %s" % (cassKeyspace))
-        log.info("Cassandra Datacenter: %s" % (cassDatacenter))
-        log.info("Cassandra Protocol Version: %s" % (cassVersion))
-        log.info("Cassandra DC Policy: %s" % (cassPolicy))
+        self.log.info("Cassandra Host(s): %s" % (cassHost))
+        self.log.info("Cassandra Keyspace: %s" % (cassKeyspace))
+        self.log.info("Cassandra Datacenter: %s" % (cassDatacenter))
+        self.log.info("Cassandra Protocol Version: %s" % (cassVersion))
+        self.log.info("Cassandra DC Policy: %s" % (cassPolicy))
 
         if cassPolicy == 'DCAwareRoundRobinPolicy':
             dc_policy = DCAwareRoundRobinPolicy(cassDatacenter)
@@ -69,19 +70,24 @@ class DomsInitializer:
         else:
             auth_provider = None
 
-        with Cluster([host for host in cassHost.split(',')],
-                     port=int(cassPort),
-                     load_balancing_policy=token_policy,
-                     protocol_version=cassVersion,
-                     auth_provider=auth_provider) as cluster:
-            session = cluster.connect()
+        try:
+            with Cluster([host for host in cassHost.split(',')],
+                         port=int(cassPort),
+                         load_balancing_policy=token_policy,
+                         protocol_version=cassVersion,
+                         auth_provider=auth_provider) as cluster:
+                session = cluster.connect()
+
+                if cassCreateKeyspaceGranted in ["True", "true"]:
+                    self.createKeyspace(session, cassKeyspace)
+                else:
+                    session.set_keyspace(cassKeyspace)
+
+                self.createTables(session)
 
-            if cassCreateKeyspaceGranted in ["True", "true"]:
-                self.createKeyspace(session, cassKeyspace)
-            else:
-                session.set_keyspace(cassKeyspace)
+        except NoHostAvailable as e:
+            self.log.error("Unable to connect to Cassandra, Nexus will not be able to access local data ", e)
 
-            self.createTables(session)
 
     def override_config(self, first, second):
         for section in second.sections():
diff --git a/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py b/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py
new file mode 100644
index 0000000..7f6334b
--- /dev/null
+++ b/analysis/webservice/nexus_tornado/app_builders/HandlerArgsBuilder.py
@@ -0,0 +1,41 @@
+import tornado
+import webservice.algorithms_spark.NexusCalcSparkHandler
+from .SparkContextBuilder import SparkContextBuilder
+
+
+class HandlerArgsBuilder:
+    def __init__(self, max_request_threads, tile_service_factory, algorithm_config, remote_collections):
+        self.request_thread_pool = tornado.concurrent.futures.ThreadPoolExecutor(max_request_threads)
+        self.tile_service_factory = tile_service_factory
+        self.algorithm_config = algorithm_config
+        self.remote_collections = remote_collections
+
+    @staticmethod
+    def handler_needs_algorithm_config(class_wrapper):
+        return (
+                class_wrapper == webservice.algorithms_spark.Matchup.Matchup
+                or issubclass(class_wrapper, webservice.algorithms.doms.BaseDomsHandler.BaseDomsQueryCalcHandler)
+                or class_wrapper == webservice.algorithms.doms.ResultsRetrieval.DomsResultsRetrievalHandler
+                or class_wrapper == webservice.algorithms.doms.ResultsPlotQuery.DomsResultsPlotHandler
+        )
+    @staticmethod
+    def handler_needs_remote_collections(class_wrapper):
+        return class_wrapper == webservice.algorithms.DataSeriesList.D
+
+    def get_args(self, clazz_wrapper):
+        args = dict(
+            clazz=clazz_wrapper,
+            tile_service_factory=self.tile_service_factory,
+            thread_pool=self. request_thread_pool
+        )
+
+        if issubclass(clazz_wrapper, webservice.algorithms_spark.NexusCalcSparkHandler.NexusCalcSparkHandler):
+            args['sc'] = SparkContextBuilder.get_spark_context()
+
+        if self.handler_needs_algorithm_config(clazz_wrapper):
+            args['config'] = self.algorithm_config
+
+        if clazz_wrapper == webservice.algorithms.DataSeriesList.DataSeriesListCalcHandlerImpl:
+            args['remote_collections'] = self.remote_collections
+
+        return args
diff --git a/analysis/webservice/nexus_tornado/app_builders/NexusAppBuilder.py b/analysis/webservice/nexus_tornado/app_builders/NexusAppBuilder.py
new file mode 100644
index 0000000..20eb335
--- /dev/null
+++ b/analysis/webservice/nexus_tornado/app_builders/NexusAppBuilder.py
@@ -0,0 +1,75 @@
+import logging
+from pathlib import Path
+import importlib
+from functools import partial
+import pkg_resources
+import tornado
+from nexustiles.nexustiles import NexusTileService
+from webservice import NexusHandler
+from webservice.nexus_tornado.request.handlers import NexusRequestHandler
+from .HandlerArgsBuilder import HandlerArgsBuilder
+
+
+class NexusAppBuilder:
+    def __init__(self):
+        self.handlers = []
+        self.log = logging.getLogger(__name__)
+
+        class VersionHandler(tornado.web.RequestHandler):
+            def get(self):
+                self.write(pkg_resources.get_distribution("nexusanalysis").version)
+
+        self.handlers.append((r"/version", VersionHandler))
+
+        self.handlers.append(
+            (r'/apidocs', tornado.web.RedirectHandler, {"url": "/apidocs/"}))
+
+        apidocs_path = Path(__file__).parent.parent.joinpath('apidocs').resolve()
+        self.handlers.append(
+            (
+                r'/apidocs/(.*)', tornado.web.StaticFileHandler,
+                {'path': str(apidocs_path), "default_filename": "index.html"}))
+
+    def set_modules(self, module_dir, algorithm_config, remote_collections, max_request_threads=4):
+        for moduleDir in module_dir:
+            self.log.info("Loading modules from %s" % moduleDir)
+            importlib.import_module(moduleDir)
+
+        self.log.info("Running Nexus Initializers")
+        NexusHandler.executeInitializers(algorithm_config)
+
+        self.log.info("Initializing request ThreadPool to %s" % max_request_threads)
+        tile_service_factory = partial(NexusTileService, False, False, algorithm_config)
+        handler_args_builder = HandlerArgsBuilder(
+            max_request_threads,
+            tile_service_factory,
+            algorithm_config,
+            remote_collections
+        )
+
+        for clazzWrapper in NexusHandler.AVAILABLE_HANDLERS:
+            self.handlers.append(
+                (
+                    clazzWrapper.path,
+                    NexusRequestHandler,
+                    handler_args_builder.get_args(clazzWrapper)
+                )
+            )
+
+        return self
+
+    def enable_static(self, static_dir):
+        self.log.info("Using static root path '%s'" % static_dir)
+        self.handlers.append(
+                (r'/(.*)', tornado.web.StaticFileHandler, {'path': static_dir, "default_filename": "index.html"}))
+
+        return self
+
+    def build(self, host=None, debug=False):
+
+        return tornado.web.Application(
+            self.handlers,
+            default_host=host,
+            debug=debug
+        )
+
diff --git a/analysis/webservice/nexus_tornado/app_builders/RedirectAppBuilder.py b/analysis/webservice/nexus_tornado/app_builders/RedirectAppBuilder.py
new file mode 100644
index 0000000..cae9b43
--- /dev/null
+++ b/analysis/webservice/nexus_tornado/app_builders/RedirectAppBuilder.py
@@ -0,0 +1,16 @@
+from webservice.redirect import RedirectHandler
+from webservice.redirect import RemoteCollectionMatcher
+import tornado
+
+
+class RedirectAppBuilder:
+    def __init__(self, remote_collection_matcher: RemoteCollectionMatcher):
+        redirected_collections = remote_collection_matcher.get_remote_collections()
+        self.redirect_handler = (r'/(.*)', RedirectHandler, {'redirected_collections': redirected_collections})
+
+    def build(self, host=None, debug=False):
+        return tornado.web.Application(
+            [self.redirect_handler],
+            default_host=host,
+            debug=debug
+        )
diff --git a/analysis/webservice/nexus_tornado/app_builders/SparkContextBuilder.py b/analysis/webservice/nexus_tornado/app_builders/SparkContextBuilder.py
new file mode 100644
index 0000000..6c0aa75
--- /dev/null
+++ b/analysis/webservice/nexus_tornado/app_builders/SparkContextBuilder.py
@@ -0,0 +1,17 @@
+
+class SparkContextBuilder:
+    def __init__(self):
+        pass
+
+    spark_context = None
+
+    @classmethod
+    def get_spark_context(cls):
+        if cls.spark_context is None:
+            from pyspark.sql import SparkSession
+
+            spark = SparkSession.builder.appName("nexus-analysis").getOrCreate()
+            cls.spark_context = spark.sparkContext
+
+        return cls.spark_context
+
diff --git a/analysis/webservice/nexus_tornado/app_builders/__init__.py b/analysis/webservice/nexus_tornado/app_builders/__init__.py
new file mode 100644
index 0000000..8276984
--- /dev/null
+++ b/analysis/webservice/nexus_tornado/app_builders/__init__.py
@@ -0,0 +1,2 @@
+from .NexusAppBuilder import NexusAppBuilder
+from .RedirectAppBuilder import RedirectAppBuilder
\ No newline at end of file
diff --git a/analysis/webservice/redirect/RedirectHandler.py b/analysis/webservice/redirect/RedirectHandler.py
index bad0482..7b20178 100644
--- a/analysis/webservice/redirect/RedirectHandler.py
+++ b/analysis/webservice/redirect/RedirectHandler.py
@@ -4,13 +4,28 @@ from webservice.webmodel.RequestParameters import RequestParameters
 
 class RedirectHandler(tornado.web.RequestHandler):
 
-    def initialize(self, redirected_collections):
+    def initialize(self, redirected_collections=None):
         self._redirected_collections = redirected_collections
 
-    def get(self, *args, **kwargs):
-        ds = self.get_arguments(ds=request.get_argument(RequestParameters.DATASET, None))
+    @tornado.gen.coroutine
+    def get(self, algo):
+        collection_id = self.request.query_arguments[RequestParameters.DATASET][0].decode('utf-8')
+        collection = self._redirected_collections[collection_id]
+        full_url = self.request.full_url()
+
+        #redirect to new URL
+        base_url = full_url[:full_url.find(algo)].rstrip('/')
+        new_base_url = collection['path'].rstrip('/')
+        new_full_url = full_url.replace(base_url, new_base_url)
+
+        # use remote collection id
+        if 'remote_id' in collection:
+            dataset_param = f"ds={collection_id}"
+            new_dataset_param = f"ds={collection['remote_id']}"
+            new_full_url = new_full_url.replace(dataset_param, new_dataset_param)
+
         self.redirect(
-            self._redirected_collections[ds]['url'],
+            new_full_url,
             permanent=True
         )
 
diff --git a/analysis/webservice/redirect/RemoteCollectionMatcher.py b/analysis/webservice/redirect/RemoteCollectionMatcher.py
index f7656ff..d0e6e43 100644
--- a/analysis/webservice/redirect/RemoteCollectionMatcher.py
+++ b/analysis/webservice/redirect/RemoteCollectionMatcher.py
@@ -1,29 +1,38 @@
 import yaml
+from tornado.routing import Matcher
 from webservice.webmodel.RequestParameters import RequestParameters
+from tornado.httputil import HTTPServerRequest
 
 
 class RemoteCollectionMatcher(Matcher):
     def __init__(self, collections_config: str):
         self._collections_config = collections_config
-        self._redirected_collections = None
+        self._remote_collections = None
 
-    def get_redirected_collections(self):
-        if self._redirected_collections is None:
-            self._redirected_collections = self.get_redirected_collections(self._collections_config)
-        return self._redicted_collections
+    def get_remote_collections(self):
+        if self._remote_collections is None:
+            self._remote_collections = self._get_remote_collections(self._collections_config)
+        return self._remote_collections
 
     @staticmethod
-    def _get_redirected_collections(collections_config: str):
-        _redirected_collections = {}
+    def _get_remote_collections(collections_config: str):
+        _remote_collections = {}
         with open(collections_config, 'r') as f:
             collections_yaml = yaml.load(f, Loader=yaml.FullLoader)
             for collection in collections_yaml['collections']:
-                if "url" in collection:
-                    _redirected_collections[collection["id"]] = collection
+                if "path" in collection and collection['path'].startswith('http'):
+                    _remote_collections[collection["id"]] = {k.replace('-', '_'): v for k, v in collection.items()}
 
-        return _redirected_collections
+        return _remote_collections
 
-    def match(self, request):
-        ds = request.get_argument(RequestParameters.DATASET, None)
-        if ds in self._redicted_collections:
-            return self._redicted_collection[ds]
\ No newline at end of file
+    def match(self, request: HTTPServerRequest):
+        if RequestParameters.DATASET in request.query_arguments:
+            # the returmed values are not used because I did not find how to use them
+            # just return empty dict() works to signify the request matches
+            # TODO do not hardcode utf-8, no time to do better today
+            collection = request.query_arguments[RequestParameters.DATASET][0].decode('utf-8')
+            if collection in self._remote_collections:
+                return dict()
+
+        # when request does not match
+        return None
\ No newline at end of file
diff --git a/analysis/webservice/webapp.py b/analysis/webservice/webapp.py
index f2f42b1..8db6459 100644
--- a/analysis/webservice/webapp.py
+++ b/analysis/webservice/webapp.py
@@ -14,24 +14,18 @@
 # limitations under the License.
 
 import configparser
-import importlib
+
 import logging
 import sys
 import os
-from pathlib import Path
-from functools import partial
-import yaml
 
-import pkg_resources
 import tornado.web
+from tornado.routing import Rule, RuleRouter, AnyMatches
 from tornado.options import define, options, parse_command_line
 
-import webservice.algorithms_spark.NexusCalcSparkHandler
-from nexustiles.nexustiles import NexusTileService
-from webservice import NexusHandler
-from webservice.nexus_tornado.request.handlers import NexusRequestHandler
-from webservice.redirect import RedirectHandler
 from webservice.redirect import RemoteCollectionMatcher
+from webservice.nexus_tornado.app_builders import NexusAppBuilder
+from webservice.nexus_tornado.app_builders import RedirectAppBuilder
 
 
 def inject_args_in_config(args, config):
@@ -54,24 +48,25 @@ def inject_args_in_config(args, config):
     return config
 
 
-if __name__ == "__main__":
+def main():
 
     logging.basicConfig(
         level=logging.DEBUG,
         format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-        datefmt="%Y-%m-%dT%H:%M:%S", stream=sys.stdout)
+        datefmt="%Y-%m-%dT%H:%M:%S", stream=sys.stdout
+    )
 
     log = logging.getLogger(__name__)
 
-    webconfig = configparser.RawConfigParser()
-    webconfig.read_file(open(os.path.join(os.path.dirname(__file__), "config", "web.ini")))
+    web_config = configparser.RawConfigParser()
+    web_config.read_file(open(os.path.join(os.path.dirname(__file__), "config", "web.ini")))
 
     algorithm_config = configparser.RawConfigParser()
     algorithm_config.read_file(open(os.path.join(os.path.dirname(__file__), "config", "algorithms.ini")))
 
     define("debug", default=False, help="run in debug mode")
-    define("port", default=webconfig.get("global", "server.socket_port"), help="run on the given port", type=int)
-    define("address", default=webconfig.get("global", "server.socket_host"), help="Bind to the given address")
+    define("port", default=web_config.get("global", "server.socket_port"), help="run on the given port", type=int)
+    define("address", default=web_config.get("global", "server.socket_host"), help="Bind to the given address")
     define('solr_time_out', default=60,
            help='time out for solr requests in seconds, default (60) is ok for most deployments'
                 ' when solr performances are not good this might need to be increased')
@@ -84,97 +79,43 @@ if __name__ == "__main__":
     parse_command_line()
     algorithm_config = inject_args_in_config(options, algorithm_config)
 
-    moduleDirs = webconfig.get("modules", "module_dirs").split(",")
-    for moduleDir in moduleDirs:
-        log.info("Loading modules from %s" % moduleDir)
-        importlib.import_module(moduleDir)
+    remote_collection_matcher = RemoteCollectionMatcher(options.collections_path)
 
-    staticDir = webconfig.get("static", "static_dir")
-    staticEnabled = webconfig.get("static", "static_enabled") == "true"
+    # build nexus app
+    nexus_app_builder = NexusAppBuilder().set_modules(
+        web_config.get("modules", "module_dirs").split(","),
+        algorithm_config,
+        remote_collection_matcher.get_remote_collections()
+    )
 
-    log.info("Initializing on host address '%s'" % options.address)
-    log.info("Initializing on port '%s'" % options.port)
-    log.info("Starting web server in debug mode: %s" % options.debug)
-    if staticEnabled:
-        log.info("Using static root path '%s'" % staticDir)
+    if web_config.get("static", "static_enabled") == "true":
+        nexus_app_builder.enable_static(
+            web_config.get("static", "static_dir")
+        )
     else:
         log.info("Static resources disabled")
 
-    handlers = []
-
-    log.info("Running Nexus Initializers")
-    NexusHandler.executeInitializers(algorithm_config)
-
-    max_request_threads = webconfig.getint("global", "server.max_simultaneous_requests")
-    log.info("Initializing request ThreadPool to %s" % max_request_threads)
-    request_thread_pool = tornado.concurrent.futures.ThreadPoolExecutor(max_request_threads)
-
-    tile_service_factory = partial(NexusTileService, False, False, algorithm_config)
-    spark_context = None
-    for clazzWrapper in NexusHandler.AVAILABLE_HANDLERS:
-        if issubclass(clazzWrapper, webservice.algorithms_spark.NexusCalcSparkHandler.NexusCalcSparkHandler):
-            if spark_context is None:
-                from pyspark.sql import SparkSession
-
-                spark = SparkSession.builder.appName("nexus-analysis").getOrCreate()
-                spark_context = spark.sparkContext
-
-            args = dict(clazz=clazzWrapper,
-                        tile_service_factory=tile_service_factory,
-                        sc=spark_context,
-                        thread_pool=request_thread_pool)
-            if clazzWrapper == webservice.algorithms_spark.Matchup.Matchup or issubclass(clazzWrapper, webservice.algorithms.doms.BaseDomsHandler.BaseDomsQueryCalcHandler):
-                args['config'] = algorithm_config
-
-            handlers.append((clazzWrapper.path,
-                             NexusRequestHandler,
-                             args))
-        else:
-            args = dict(clazz=clazzWrapper,
-                        tile_service_factory=tile_service_factory,
-                        thread_pool=request_thread_pool)
-            if clazzWrapper == webservice.algorithms.doms.ResultsRetrieval.DomsResultsRetrievalHandler or clazzWrapper == webservice.algorithms.doms.ResultsPlotQuery.DomsResultsPlotHandler:
-                args['config'] = algorithm_config
-            handlers.append((clazzWrapper.path,
-                             NexusRequestHandler,
-                             args))
-
-    class VersionHandler(tornado.web.RequestHandler):
-        def get(self):
-            self.write(pkg_resources.get_distribution("nexusanalysis").version)
-
-    handlers.append((r"/version", VersionHandler))
-
-    handlers.append(
-        (r'/apidocs', tornado.web.RedirectHandler, {"url": "/apidocs/"}))
-
-    apidocs_path = Path(__file__).parent.joinpath('apidocs').resolve()
-    handlers.append(
-        (r'/apidocs/(.*)', tornado.web.StaticFileHandler, {'path': str(apidocs_path), "default_filename": "index.html"}))
-
-    if staticEnabled:
-        handlers.append(
-            (r'/(.*)', tornado.web.StaticFileHandler, {'path': staticDir, "default_filename": "index.html"}))
-
-    app = tornado.web.Application(
-        handlers,
-        default_host=options.address,
-        debug=options.debug
-    )
+    local_sdap_app = nexus_app_builder.build(host=options.address, debug=options.debug)
 
-    remote_collection_matcher = RemoteCollectionMatcher(options.collections_path)
-    redirect_handler = (r'/(.*)', RedirectHandler(), remote_collection_matcher.get_redirected_collections())
-    redirect_app = tornado.web.Application(
-        [redirect_handler],
-        default_host=options.address,
+    # build redirect app
+    remote_sdap_app = RedirectAppBuilder(remote_collection_matcher).build(
+        host=options.address,
         debug=options.debug)
 
     router = RuleRouter([
-        Rule(remote_collection_matcher, redirect_app),
-        Rule(AnyMatches(), app)
-    ])
+        Rule(remote_collection_matcher, remote_sdap_app),
+        Rule(AnyMatches(), local_sdap_app)
+        ]
+    )
 
-    server = HTTPServer(router)
+    log.info("Initializing on host address '%s'" % options.address)
+    log.info("Initializing on port '%s'" % options.port)
+    log.info("Starting web server in debug mode: %s" % options.debug)
+    server = tornado.web.HTTPServer(router)
     server.listen(options.port)
     log.info("Starting HTTP listener...")
     tornado.ioloop.IOLoop.current().start()
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/data-access/nexustiles/dao/CassandraProxy.py b/data-access/nexustiles/dao/CassandraProxy.py
index e410b99..4ce684a 100644
--- a/data-access/nexustiles/dao/CassandraProxy.py
+++ b/data-access/nexustiles/dao/CassandraProxy.py
@@ -21,6 +21,7 @@ import nexusproto.DataTile_pb2 as nexusproto
 import numpy as np
 from cassandra.auth import PlainTextAuthProvider
 from cassandra.cqlengine import columns, connection, CQLEngineException
+from cassandra.cluster import NoHostAvailable
 from cassandra.cqlengine.models import Model
 from cassandra.policies import TokenAwarePolicy, DCAwareRoundRobinPolicy, WhiteListRoundRobinPolicy
 from multiprocessing.synchronize import Lock
@@ -296,11 +297,15 @@ class CassandraProxy(object):
             auth_provider = PlainTextAuthProvider(username=self.__cass_username, password=self.__cass_password)
         else:
             auth_provider = None
-
-        connection.setup([host for host in self.__cass_url.split(',')], self.__cass_keyspace,
-                         protocol_version=self.__cass_protocol_version, load_balancing_policy=token_policy,
-                         port=self.__cass_port,
-                         auth_provider=auth_provider)
+        try:
+            connection.setup(
+                [host for host in self.__cass_url.split(',')], self.__cass_keyspace,
+                protocol_version=self.__cass_protocol_version, load_balancing_policy=token_policy,
+                port=self.__cass_port,
+                auth_provider=auth_provider
+            )
+        except NoHostAvailable as e:
+            logger.error("Cassandra is not accessible, SDAP will not server local datasets", e)
 
     def fetch_nexus_tiles(self, *tile_ids):
         tile_ids = [uuid.UUID(str(tile_id)) for tile_id in tile_ids if
diff --git a/data-access/nexustiles/dao/SolrProxy.py b/data-access/nexustiles/dao/SolrProxy.py
index 39eed37..9b16533 100644
--- a/data-access/nexustiles/dao/SolrProxy.py
+++ b/data-access/nexustiles/dao/SolrProxy.py
@@ -182,6 +182,7 @@ class SolrProxy(object):
             "facet.limit": "-1"
         }
 
+
         response = self.do_query_raw(*(search, None, None, False, None), **params)
         l = []
         for g, v in zip(*[iter(response.facets["facet_fields"]["dataset_s"])]*2):
@@ -640,6 +641,7 @@ class SolrProxy(object):
 
         return response
 
+
     def do_query_all(self, *args, **params):
 
         results = []
diff --git a/docker/nexus-webapp/Dockerfile b/docker/nexus-webapp/Dockerfile
index e2b4187..fc58e79 100644
--- a/docker/nexus-webapp/Dockerfile
+++ b/docker/nexus-webapp/Dockerfile
@@ -89,7 +89,7 @@ RUN python3 setup.py install
 
 WORKDIR /incubator-sdap-nexus/analysis
 RUN python3 setup.py install
-RUN pip3 install importlib-metadata
+
 
 WORKDIR /incubator-sdap-nexus/tools/deletebyquery
 RUN pip3 install cassandra-driver==3.20.1 --install-option="--no-cython"
diff --git a/helm/templates/ingress.yml b/helm/templates/ingress.yml
index cbc72f1..8360b37 100644
--- a/helm/templates/ingress.yml
+++ b/helm/templates/ingress.yml
@@ -1,4 +1,4 @@
-apiVersion: extensions/v1beta1
+apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
   name: nexus-webapp
@@ -14,20 +14,29 @@ spec:
   rules:
   - http:
       paths:
-      {{ if .Values.rootWebpage.enabled }}
-      - path: /
-        backend:
-          serviceName: root-webpage
-          servicePort: http
-      {{ end }}
-      - path: /nexus/?(.*)
-        backend:
-          serviceName: nexus-webapp
-          servicePort: webapp
-      {{ if ne .Values.onEarthProxyIP "" }}
-      - path: /onearth/?(.*)
-        backend:
-          serviceName: onearth
-          servicePort: 80
-      {{ end }}
-      
\ No newline at end of file
+        {{ if .Values.rootWebpage.enabled }}
+        - path: /
+          pathType: ImplementationSpecific
+          backend:
+            service:
+              name: root-webpage
+              port:
+                number: 80
+        {{ end }}
+        - path: /nexus/?(.*)
+          pathType: ImplementationSpecific
+          backend:
+            service:
+              name: nexus-webapp
+              port:
+                number: 8083
+        {{ if ne .Values.onEarthProxyIP "" }}
+        - path: /onearth/?(.*)
+          pathType: ImplementationSpecific
+          backend:
+            service:
+              name: onearth
+              port:
+                number: 80
+        {{ end }}
+
diff --git a/helm/templates/webapp.yml b/helm/templates/webapp.yml
index 5203bed..9417a88 100644
--- a/helm/templates/webapp.yml
+++ b/helm/templates/webapp.yml
@@ -16,12 +16,12 @@ spec:
     - --cassandra-username={{ include "nexus.credentials.cassandra.username" . }}
     - --cassandra-password={{ include "nexus.credentials.cassandra.password" . }}
     - --solr-host={{ include "nexus.urls.solr" . }}
-    - --collections-path={{ include "nexus.collectionsConfig.mountPath" . }}/collections.yml
+    - --collections-path={{ include "nexus.collectionsConfig.mountPath" . }}/collections-config.yaml
   sparkVersion: "3.1.1"
   volumes:
     - name: collections-config-volume
       configMap:
-        name: { { include "nexus.collectionsConfig.configmapName" . } }
+        name: {{ include "nexus.collectionsConfig.configmapName" . }}
   restartPolicy:
     type: OnFailure
     onFailureRetries: 10
@@ -31,7 +31,7 @@ spec:
   driver:
     volumeMounts:
       - name: collections-config-volume
-          mountPath: { { include "nexus.collectionsConfig.mountPath" . } }
+        mountPath: {{ include "nexus.collectionsConfig.mountPath" . }}
 {{ .Values.webapp.distributed.driver | toYaml | indent 4 }}
     labels:
       version: 3.1.1
diff --git a/helm/values.yaml b/helm/values.yaml
index 35f72a7..afb43b4 100644
--- a/helm/values.yaml
+++ b/helm/values.yaml
@@ -80,11 +80,11 @@ ingestion:
   ## ref: https://github.com/apache/incubator-sdap-ingester/tree/dev/collection_manager#the-collections-configuration-file
   ## Either localDir should be set, or the git options, but not both.
   collections:
-    createCrd: true
+    createCrd: false
 
     ## Name of a ConfigMap containing the Collections Config YAML.
     ## Leave this blank if Git is enabled below.
-    configMap:
+    configMap: collections-config
 
     ## Load the Collections Config file from a git repository.
     git:
@@ -121,7 +121,7 @@ solr:
   image:
     repository: nexusjpl/solr
     tag: 8.11.1
-  replicaCount: 3
+  replicaCount: 1
   authentication:
     enabled: false
   volumeClaimTemplates: