You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by sk...@apache.org on 2023/06/30 02:43:46 UTC

[incubator-sdap-nexus] branch SDAP-473 created (now a9a9d1e)

This is an automated email from the ASF dual-hosted git repository.

skperez pushed a change to branch SDAP-473
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git


      at a9a9d1e  job prioritization

This branch includes the following new commits:

     new 8abe072  Update openapi spec
     new d4e3100  Merge branch 'SDAP-455' into SDAP-467
     new 8bb47d3  Update changelog
     new a9a9d1e  job prioritization

The 4 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[incubator-sdap-nexus] 04/04: job prioritization

Posted by sk...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

skperez pushed a commit to branch SDAP-473
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git

commit a9a9d1e63746c5ca54c4bda4f18a1879e8ed3e5e
Author: skorper <st...@gmail.com>
AuthorDate: Thu Jun 29 19:43:36 2023 -0700

    job prioritization
---
 analysis/setup.py                                  | 24 ++++++++++++----------
 analysis/webservice/algorithms_spark/Matchup.py    | 10 +++++++++
 analysis/webservice/config/scheduler.xml           | 10 +++++++++
 .../app_builders/SparkContextBuilder.py            |  9 +++++++-
 4 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/analysis/setup.py b/analysis/setup.py
index 99cd707..c09fe4a 100644
--- a/analysis/setup.py
+++ b/analysis/setup.py
@@ -17,18 +17,19 @@
 import setuptools
 from subprocess import check_call, CalledProcessError
 
-with open('../VERSION.txt', 'r') as f:
-    __version__ = f.read()
+# with open('../VERSION.txt', 'r') as f:
+#     __version__ = f.read()
+__version__ = '1.1.0a3'
 
 
-try:
-    check_call(['mamba', 'install', '-y', '-c', 'conda-forge', '--file', 'conda-requirements.txt'])
-except (CalledProcessError, IOError) as e:
-    print('Failed install with mamba; falling back to conda')
-    try:
-        check_call(['conda', 'install', '-y', '-c', 'conda-forge', '--file', 'conda-requirements.txt'])
-    except (CalledProcessError, IOError) as e:
-        raise EnvironmentError("Error installing conda packages", e)
+# try:
+#     check_call(['mamba', 'install', '-y', '-c', 'conda-forge', '--file', 'conda-requirements.txt'])
+# except (CalledProcessError, IOError) as e:
+#     print('Failed install with mamba; falling back to conda')
+#     try:
+#         check_call(['conda', 'install', '-y', '-c', 'conda-forge', '--file', 'conda-requirements.txt'])
+#     except (CalledProcessError, IOError) as e:
+#         raise EnvironmentError("Error installing conda packages", e)
 
 
 setuptools.setup(
@@ -60,7 +61,8 @@ setuptools.setup(
             'config/algorithms.ini',
             'apidocs/index.html',
             'apidocs/openapi.yml',
-            'apidocs/dataset-populate.js'
+            'apidocs/dataset-populate.js',
+            'config/scheduler.xml'
         ],
         'webservice.algorithms.doms': ['domsconfig.ini.default'],
     },
diff --git a/analysis/webservice/algorithms_spark/Matchup.py b/analysis/webservice/algorithms_spark/Matchup.py
index 4fb40cf..30d8261 100644
--- a/analysis/webservice/algorithms_spark/Matchup.py
+++ b/analysis/webservice/algorithms_spark/Matchup.py
@@ -44,6 +44,8 @@ from webservice.webmodel.NexusExecutionResults import ExecutionStatus
 EPOCH = timezone('UTC').localize(datetime(1970, 1, 1))
 ISO_8601 = '%Y-%m-%dT%H:%M:%S%z'
 
+LARGE_JOB_THRESHOLD = 4000
+
 
 class Schema:
     def __init__(self):
@@ -234,6 +236,11 @@ class Matchup(NexusCalcSparkTornadoHandler):
                depth_min, depth_max, time_tolerance, radius_tolerance, \
                platforms, match_once, result_size_limit, prioritize_distance
 
+    def get_job_pool(self, tile_ids):
+        if len(tile_ids) > LARGE_JOB_THRESHOLD:
+            return 'large'
+        return 'small'
+
     def async_calc(self, execution_id, tile_ids, bounding_polygon, primary_ds_name,
                    secondary_ds_names, parameter_s, start_time, end_time, depth_min,
                    depth_max, time_tolerance, radius_tolerance, platforms, match_once,
@@ -241,8 +248,11 @@ class Matchup(NexusCalcSparkTornadoHandler):
         # Call spark_matchup
         self.log.debug("Calling Spark Driver")
 
+        job_priority = self.get_job_pool(tile_ids)
+
         try:
             self._sc.setJobGroup(execution_id, execution_id)
+            self._sc.setLocalProperty('spark.scheduler.pool', job_priority)
             spark_result = spark_matchup_driver(
                 tile_ids, wkt.dumps(bounding_polygon),
                 primary_ds_name,
diff --git a/analysis/webservice/config/scheduler.xml b/analysis/webservice/config/scheduler.xml
new file mode 100644
index 0000000..3016017
--- /dev/null
+++ b/analysis/webservice/config/scheduler.xml
@@ -0,0 +1,10 @@
+<?xml version="1.0"?>
+<allocations>
+  <pool name="small">
+    <weight>1000</weight>
+    <minShare>1</minShare>
+  </pool>
+  <pool name="large">
+    <weight>1</weight>
+  </pool>
+</allocations>
\ No newline at end of file
diff --git a/analysis/webservice/nexus_tornado/app_builders/SparkContextBuilder.py b/analysis/webservice/nexus_tornado/app_builders/SparkContextBuilder.py
index ee3fd2f..5daf279 100644
--- a/analysis/webservice/nexus_tornado/app_builders/SparkContextBuilder.py
+++ b/analysis/webservice/nexus_tornado/app_builders/SparkContextBuilder.py
@@ -12,6 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import pkg_resources
 
 
 class SparkContextBuilder:
@@ -25,7 +26,13 @@ class SparkContextBuilder:
         if cls.spark_context is None:
             from pyspark.sql import SparkSession
 
-            spark = SparkSession.builder.appName("nexus-analysis").getOrCreate()
+            scheduler_path = pkg_resources.resource_filename('webservice', "config/scheduler.xml")
+
+            spark = SparkSession.builder.appName("nexus-analysis").config(
+                "spark.scheduler.allocation.file", scheduler_path
+            ).config(
+                "spark.scheduler.mode", "FAIR"
+            ).getOrCreate()
             cls.spark_context = spark.sparkContext
 
         return cls.spark_context


[incubator-sdap-nexus] 03/04: Update changelog

Posted by sk...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

skperez pushed a commit to branch SDAP-473
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git

commit 8bb47d30e014b578ae429c6ac85485674f262ded
Author: skorper <st...@gmail.com>
AuthorDate: Thu Jun 29 08:56:14 2023 -0700

    Update changelog
---
 CHANGELOG.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 344cb40..c857124 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,10 +4,11 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## Unreleased
+## [Unreleased]
 ### Added
 - SDAP-467: Added pagination to cdmsresults endpoint
 - SDAP-461: Added 4 remaining Saildrone insitu datasets.
+- SDAP-473: Added support for matchup job prioritization
 ### Changed
 - SDAP-453: Updated results storage and retrieval to support output JSON from `/cdmsresults` that matches output from `/match_spark`.
   - **NOTE:** Deploying these changes to an existing SDAP deployment will require modifying the Cassandra database with stored results. There is a script to do so at `/tools/update-doms-data-schema/update.py`


[incubator-sdap-nexus] 01/04: Update openapi spec

Posted by sk...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

skperez pushed a commit to branch SDAP-473
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git

commit 8abe07272b2b280fbb71568c8800a2fe61fb6583
Author: skorper <st...@gmail.com>
AuthorDate: Wed Jun 28 14:49:08 2023 -0700

    Update openapi spec
---
 analysis/webservice/apidocs/openapi.yml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/analysis/webservice/apidocs/openapi.yml b/analysis/webservice/apidocs/openapi.yml
index f125577..0ffe669 100644
--- a/analysis/webservice/apidocs/openapi.yml
+++ b/analysis/webservice/apidocs/openapi.yml
@@ -535,6 +535,22 @@ paths:
             type: string
             enum: ['CSV', 'NETCDF', 'JSON']
           example: CSV
+        - in: query
+          name: pageNum
+          description: |
+            Which page of results should be returned to user.
+          required: false
+          schema:
+            type: integer
+            default: 1
+        - in: query
+          name: pageSize
+          description: |
+            How many results should be returned to user in a single page. One entry is equal to one primary matched point.
+          required: false
+          schema:
+            type: integer
+            default: 1000
       responses:
         '200':
           description: Successful operation


[incubator-sdap-nexus] 02/04: Merge branch 'SDAP-455' into SDAP-467

Posted by sk...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

skperez pushed a commit to branch SDAP-473
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git

commit d4e310027266fcae64121d000b1bc230079a9a4c
Merge: 8abe072 830a234
Author: skorper <st...@gmail.com>
AuthorDate: Wed Jun 28 14:50:07 2023 -0700

    Merge branch 'SDAP-455' into SDAP-467

 .readthedocs.yaml                                  |   5 +
 CHANGELOG.md                                       |   3 +
 .../webservice/algorithms/doms/BaseDomsHandler.py  |   5 +-
 .../webservice/algorithms/doms/ExecutionCancel.py  |   6 +-
 .../webservice/algorithms/doms/ExecutionStatus.py  |   2 -
 .../webservice/algorithms/doms/ResultsStorage.py   |   3 +-
 analysis/webservice/algorithms_spark/Matchup.py    |   9 +-
 analysis/webservice/apidocs/openapi.yml            |  10 +
 .../request/renderers/NexusCSVRenderer.py          |   7 +-
 .../request/renderers/NexusNETCDFRenderer.py       |   7 +-
 analysis/webservice/webmodel/NexusResults.py       |   4 +
 climatology/.gitignore                             |  11 -
 climatology/clim/ClimatologySpark.py               | 469 -----------
 climatology/clim/ClimatologySpark2.py              | 650 ----------------
 climatology/clim/README.md                         |  32 -
 climatology/clim/__init__.py                       |  14 -
 climatology/clim/binsum.f                          |  64 --
 climatology/clim/cache.py                          |  87 ---
 climatology/clim/climatology.py                    | 247 ------
 climatology/clim/climatology1.py                   | 247 ------
 climatology/clim/climatology2.py                   | 467 -----------
 climatology/clim/climatology3Spark.py              | 432 -----------
 climatology/clim/cluster.py                        |  98 ---
 climatology/clim/cluster2.py                       |  98 ---
 climatology/clim/datasets.py                       | 331 --------
 climatology/clim/dparkTest.py                      |  23 -
 climatology/clim/gaussInterp.py                    |  57 --
 climatology/clim/gaussInterp.pyx                   | 145 ----
 climatology/clim/gaussInterp_f.f                   | 219 ------
 climatology/clim/gaussInterp_f.mk                  |   1 -
 climatology/clim/gaussInterp_slow.py               | 144 ----
 climatology/clim/interp.f                          | 302 --------
 climatology/clim/jobClimatology2.py                |  36 -
 climatology/clim/jobTest.py                        |  32 -
 climatology/clim/orig/C/README                     |   6 -
 climatology/clim/orig/C/binsum.c                   | 125 ---
 climatology/clim/orig/C/clouderosion.c             |  33 -
 climatology/clim/orig/C/gaussinterp.readme         | 159 ----
 climatology/clim/orig/C/gaussinterp_C_code.tar     | Bin 51200 -> 0 bytes
 climatology/clim/orig/C/interp.c                   | 448 -----------
 climatology/clim/orig/C/makefile                   |  33 -
 climatology/clim/orig/C/setupinterp.c              | 431 -----------
 .../clim/orig/Fortran/armstrong_interp_code.tar    | Bin 30720 -> 0 bytes
 climatology/clim/orig/Fortran/binsum.f             |  64 --
 climatology/clim/orig/Fortran/interp.f             | 302 --------
 climatology/clim/orig/Fortran/makefile             |  46 --
 climatology/clim/orig/Fortran/passbase.f           |   9 -
 climatology/clim/orig/Fortran/setupinterp.f        | 291 -------
 climatology/clim/pixelStats.py                     | 232 ------
 climatology/clim/plotlib.py                        | 857 ---------------------
 climatology/clim/reroot.py                         |  45 --
 climatology/clim/setup.py                          |  22 -
 climatology/clim/sort.py                           |  57 --
 climatology/clim/sparkTest.py                      |  31 -
 climatology/clim/spatialFilter.py                  |  50 --
 climatology/clim/spatialFilter_f.f                 | 121 ---
 climatology/clim/spatialFilter_f.mk                |   1 -
 climatology/clim/split.py                          | 212 -----
 climatology/clim/test/__init__.py                  |  14 -
 climatology/clim/test/ccmpTest.py                  |  30 -
 climatology/clim/timePartitions.py                 |  46 --
 climatology/clim/util/__init__.py                  |  14 -
 climatology/clim/util/array.py                     | 194 -----
 climatology/clim/util/introspect.py                |  49 --
 climatology/clim/util/plot.py                      | 147 ----
 climatology/clim/util/stats.py                     | 232 ------
 climatology/clim/util/timeJ2000.py                 | 383 ---------
 climatology/clim/util/warn.py                      |  57 --
 climatology/clim/util/wls.py                       | 811 -------------------
 climatology/clim/variables.py                      | 154 ----
 climatology/clim/wls.py                            | 811 -------------------
 climatology/setup.py                               |  23 -
 data-access/requirements.txt                       |   1 +
 docs/requirements.txt                              |   1 +
 74 files changed, 53 insertions(+), 10756 deletions(-)

diff --cc analysis/webservice/apidocs/openapi.yml
index 0ffe669,25d6f24..edd2ae3
--- a/analysis/webservice/apidocs/openapi.yml
+++ b/analysis/webservice/apidocs/openapi.yml
@@@ -535,22 -535,16 +535,32 @@@ paths
              type: string
              enum: ['CSV', 'NETCDF', 'JSON']
            example: CSV
+         - in: query
+           name: filename
+           description: |
+             Name of output file. Only works with CSV and NETCDF 
+             output types. Do not include file extension in this field. 
+             If this value is not provided, the filename will be 
+             `CDMS_[execution_id].[csv|nc]`
+           required: false
+           schema:
+             type: string
 +        - in: query
 +          name: pageNum
 +          description: |
 +            Which page of results should be returned to user.
 +          required: false
 +          schema:
 +            type: integer
 +            default: 1
 +        - in: query
 +          name: pageSize
 +          description: |
 +            How many results should be returned to user in a single page. One entry is equal to one primary matched point.
 +          required: false
 +          schema:
 +            type: integer
 +            default: 1000
        responses:
          '200':
            description: Successful operation