You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by sk...@apache.org on 2023/06/30 02:43:46 UTC
[incubator-sdap-nexus] branch SDAP-473 created (now a9a9d1e)
This is an automated email from the ASF dual-hosted git repository.
skperez pushed a change to branch SDAP-473
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git
at a9a9d1e job prioritization
This branch includes the following new commits:
new 8abe072 Update openapi spec
new d4e3100 Merge branch 'SDAP-455' into SDAP-467
new 8bb47d3 Update changelog
new a9a9d1e job prioritization
The 4 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
[incubator-sdap-nexus] 04/04: job prioritization
Posted by sk...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
skperez pushed a commit to branch SDAP-473
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git
commit a9a9d1e63746c5ca54c4bda4f18a1879e8ed3e5e
Author: skorper <st...@gmail.com>
AuthorDate: Thu Jun 29 19:43:36 2023 -0700
job prioritization
---
analysis/setup.py | 24 ++++++++++++----------
analysis/webservice/algorithms_spark/Matchup.py | 10 +++++++++
analysis/webservice/config/scheduler.xml | 10 +++++++++
.../app_builders/SparkContextBuilder.py | 9 +++++++-
4 files changed, 41 insertions(+), 12 deletions(-)
diff --git a/analysis/setup.py b/analysis/setup.py
index 99cd707..c09fe4a 100644
--- a/analysis/setup.py
+++ b/analysis/setup.py
@@ -17,18 +17,19 @@
import setuptools
from subprocess import check_call, CalledProcessError
-with open('../VERSION.txt', 'r') as f:
- __version__ = f.read()
+# with open('../VERSION.txt', 'r') as f:
+# __version__ = f.read()
+__version__ = '1.1.0a3'
-try:
- check_call(['mamba', 'install', '-y', '-c', 'conda-forge', '--file', 'conda-requirements.txt'])
-except (CalledProcessError, IOError) as e:
- print('Failed install with mamba; falling back to conda')
- try:
- check_call(['conda', 'install', '-y', '-c', 'conda-forge', '--file', 'conda-requirements.txt'])
- except (CalledProcessError, IOError) as e:
- raise EnvironmentError("Error installing conda packages", e)
+# try:
+# check_call(['mamba', 'install', '-y', '-c', 'conda-forge', '--file', 'conda-requirements.txt'])
+# except (CalledProcessError, IOError) as e:
+# print('Failed install with mamba; falling back to conda')
+# try:
+# check_call(['conda', 'install', '-y', '-c', 'conda-forge', '--file', 'conda-requirements.txt'])
+# except (CalledProcessError, IOError) as e:
+# raise EnvironmentError("Error installing conda packages", e)
setuptools.setup(
@@ -60,7 +61,8 @@ setuptools.setup(
'config/algorithms.ini',
'apidocs/index.html',
'apidocs/openapi.yml',
- 'apidocs/dataset-populate.js'
+ 'apidocs/dataset-populate.js',
+ 'config/scheduler.xml'
],
'webservice.algorithms.doms': ['domsconfig.ini.default'],
},
diff --git a/analysis/webservice/algorithms_spark/Matchup.py b/analysis/webservice/algorithms_spark/Matchup.py
index 4fb40cf..30d8261 100644
--- a/analysis/webservice/algorithms_spark/Matchup.py
+++ b/analysis/webservice/algorithms_spark/Matchup.py
@@ -44,6 +44,8 @@ from webservice.webmodel.NexusExecutionResults import ExecutionStatus
EPOCH = timezone('UTC').localize(datetime(1970, 1, 1))
ISO_8601 = '%Y-%m-%dT%H:%M:%S%z'
+LARGE_JOB_THRESHOLD = 4000
+
class Schema:
def __init__(self):
@@ -234,6 +236,11 @@ class Matchup(NexusCalcSparkTornadoHandler):
depth_min, depth_max, time_tolerance, radius_tolerance, \
platforms, match_once, result_size_limit, prioritize_distance
+ def get_job_pool(self, tile_ids):
+ if len(tile_ids) > LARGE_JOB_THRESHOLD:
+ return 'large'
+ return 'small'
+
def async_calc(self, execution_id, tile_ids, bounding_polygon, primary_ds_name,
secondary_ds_names, parameter_s, start_time, end_time, depth_min,
depth_max, time_tolerance, radius_tolerance, platforms, match_once,
@@ -241,8 +248,11 @@ class Matchup(NexusCalcSparkTornadoHandler):
# Call spark_matchup
self.log.debug("Calling Spark Driver")
+ job_priority = self.get_job_pool(tile_ids)
+
try:
self._sc.setJobGroup(execution_id, execution_id)
+ self._sc.setLocalProperty('spark.scheduler.pool', job_priority)
spark_result = spark_matchup_driver(
tile_ids, wkt.dumps(bounding_polygon),
primary_ds_name,
diff --git a/analysis/webservice/config/scheduler.xml b/analysis/webservice/config/scheduler.xml
new file mode 100644
index 0000000..3016017
--- /dev/null
+++ b/analysis/webservice/config/scheduler.xml
@@ -0,0 +1,10 @@
+<?xml version="1.0"?>
+<allocations>
+ <pool name="small">
+ <weight>1000</weight>
+ <minShare>1</minShare>
+ </pool>
+ <pool name="large">
+ <weight>1</weight>
+ </pool>
+</allocations>
\ No newline at end of file
diff --git a/analysis/webservice/nexus_tornado/app_builders/SparkContextBuilder.py b/analysis/webservice/nexus_tornado/app_builders/SparkContextBuilder.py
index ee3fd2f..5daf279 100644
--- a/analysis/webservice/nexus_tornado/app_builders/SparkContextBuilder.py
+++ b/analysis/webservice/nexus_tornado/app_builders/SparkContextBuilder.py
@@ -12,6 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+import pkg_resources
class SparkContextBuilder:
@@ -25,7 +26,13 @@ class SparkContextBuilder:
if cls.spark_context is None:
from pyspark.sql import SparkSession
- spark = SparkSession.builder.appName("nexus-analysis").getOrCreate()
+ scheduler_path = pkg_resources.resource_filename('webservice', "config/scheduler.xml")
+
+ spark = SparkSession.builder.appName("nexus-analysis").config(
+ "spark.scheduler.allocation.file", scheduler_path
+ ).config(
+ "spark.scheduler.mode", "FAIR"
+ ).getOrCreate()
cls.spark_context = spark.sparkContext
return cls.spark_context
[incubator-sdap-nexus] 03/04: Update changelog
Posted by sk...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
skperez pushed a commit to branch SDAP-473
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git
commit 8bb47d30e014b578ae429c6ac85485674f262ded
Author: skorper <st...@gmail.com>
AuthorDate: Thu Jun 29 08:56:14 2023 -0700
Update changelog
---
CHANGELOG.md | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 344cb40..c857124 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,10 +4,11 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-## Unreleased
+## [Unreleased]
### Added
- SDAP-467: Added pagination to cdmsresults endpoint
- SDAP-461: Added 4 remaining Saildrone insitu datasets.
+- SDAP-473: Added support for matchup job prioritization
### Changed
- SDAP-453: Updated results storage and retrieval to support output JSON from `/cdmsresults` that matches output from `/match_spark`.
- **NOTE:** Deploying these changes to an existing SDAP deployment will require modifying the Cassandra database with stored results. There is a script to do so at `/tools/update-doms-data-schema/update.py`
[incubator-sdap-nexus] 01/04: Update openapi spec
Posted by sk...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
skperez pushed a commit to branch SDAP-473
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git
commit 8abe07272b2b280fbb71568c8800a2fe61fb6583
Author: skorper <st...@gmail.com>
AuthorDate: Wed Jun 28 14:49:08 2023 -0700
Update openapi spec
---
analysis/webservice/apidocs/openapi.yml | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/analysis/webservice/apidocs/openapi.yml b/analysis/webservice/apidocs/openapi.yml
index f125577..0ffe669 100644
--- a/analysis/webservice/apidocs/openapi.yml
+++ b/analysis/webservice/apidocs/openapi.yml
@@ -535,6 +535,22 @@ paths:
type: string
enum: ['CSV', 'NETCDF', 'JSON']
example: CSV
+ - in: query
+ name: pageNum
+ description: |
+ Which page of results should be returned to user.
+ required: false
+ schema:
+ type: integer
+ default: 1
+ - in: query
+ name: pageSize
+ description: |
+ How many results should be returned to user in a single page. One entry is equal to one primary matched point.
+ required: false
+ schema:
+ type: integer
+ default: 1000
responses:
'200':
description: Successful operation
[incubator-sdap-nexus] 02/04: Merge branch 'SDAP-455' into SDAP-467
Posted by sk...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
skperez pushed a commit to branch SDAP-473
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git
commit d4e310027266fcae64121d000b1bc230079a9a4c
Merge: 8abe072 830a234
Author: skorper <st...@gmail.com>
AuthorDate: Wed Jun 28 14:50:07 2023 -0700
Merge branch 'SDAP-455' into SDAP-467
.readthedocs.yaml | 5 +
CHANGELOG.md | 3 +
.../webservice/algorithms/doms/BaseDomsHandler.py | 5 +-
.../webservice/algorithms/doms/ExecutionCancel.py | 6 +-
.../webservice/algorithms/doms/ExecutionStatus.py | 2 -
.../webservice/algorithms/doms/ResultsStorage.py | 3 +-
analysis/webservice/algorithms_spark/Matchup.py | 9 +-
analysis/webservice/apidocs/openapi.yml | 10 +
.../request/renderers/NexusCSVRenderer.py | 7 +-
.../request/renderers/NexusNETCDFRenderer.py | 7 +-
analysis/webservice/webmodel/NexusResults.py | 4 +
climatology/.gitignore | 11 -
climatology/clim/ClimatologySpark.py | 469 -----------
climatology/clim/ClimatologySpark2.py | 650 ----------------
climatology/clim/README.md | 32 -
climatology/clim/__init__.py | 14 -
climatology/clim/binsum.f | 64 --
climatology/clim/cache.py | 87 ---
climatology/clim/climatology.py | 247 ------
climatology/clim/climatology1.py | 247 ------
climatology/clim/climatology2.py | 467 -----------
climatology/clim/climatology3Spark.py | 432 -----------
climatology/clim/cluster.py | 98 ---
climatology/clim/cluster2.py | 98 ---
climatology/clim/datasets.py | 331 --------
climatology/clim/dparkTest.py | 23 -
climatology/clim/gaussInterp.py | 57 --
climatology/clim/gaussInterp.pyx | 145 ----
climatology/clim/gaussInterp_f.f | 219 ------
climatology/clim/gaussInterp_f.mk | 1 -
climatology/clim/gaussInterp_slow.py | 144 ----
climatology/clim/interp.f | 302 --------
climatology/clim/jobClimatology2.py | 36 -
climatology/clim/jobTest.py | 32 -
climatology/clim/orig/C/README | 6 -
climatology/clim/orig/C/binsum.c | 125 ---
climatology/clim/orig/C/clouderosion.c | 33 -
climatology/clim/orig/C/gaussinterp.readme | 159 ----
climatology/clim/orig/C/gaussinterp_C_code.tar | Bin 51200 -> 0 bytes
climatology/clim/orig/C/interp.c | 448 -----------
climatology/clim/orig/C/makefile | 33 -
climatology/clim/orig/C/setupinterp.c | 431 -----------
.../clim/orig/Fortran/armstrong_interp_code.tar | Bin 30720 -> 0 bytes
climatology/clim/orig/Fortran/binsum.f | 64 --
climatology/clim/orig/Fortran/interp.f | 302 --------
climatology/clim/orig/Fortran/makefile | 46 --
climatology/clim/orig/Fortran/passbase.f | 9 -
climatology/clim/orig/Fortran/setupinterp.f | 291 -------
climatology/clim/pixelStats.py | 232 ------
climatology/clim/plotlib.py | 857 ---------------------
climatology/clim/reroot.py | 45 --
climatology/clim/setup.py | 22 -
climatology/clim/sort.py | 57 --
climatology/clim/sparkTest.py | 31 -
climatology/clim/spatialFilter.py | 50 --
climatology/clim/spatialFilter_f.f | 121 ---
climatology/clim/spatialFilter_f.mk | 1 -
climatology/clim/split.py | 212 -----
climatology/clim/test/__init__.py | 14 -
climatology/clim/test/ccmpTest.py | 30 -
climatology/clim/timePartitions.py | 46 --
climatology/clim/util/__init__.py | 14 -
climatology/clim/util/array.py | 194 -----
climatology/clim/util/introspect.py | 49 --
climatology/clim/util/plot.py | 147 ----
climatology/clim/util/stats.py | 232 ------
climatology/clim/util/timeJ2000.py | 383 ---------
climatology/clim/util/warn.py | 57 --
climatology/clim/util/wls.py | 811 -------------------
climatology/clim/variables.py | 154 ----
climatology/clim/wls.py | 811 -------------------
climatology/setup.py | 23 -
data-access/requirements.txt | 1 +
docs/requirements.txt | 1 +
74 files changed, 53 insertions(+), 10756 deletions(-)
diff --cc analysis/webservice/apidocs/openapi.yml
index 0ffe669,25d6f24..edd2ae3
--- a/analysis/webservice/apidocs/openapi.yml
+++ b/analysis/webservice/apidocs/openapi.yml
@@@ -535,22 -535,16 +535,32 @@@ paths
type: string
enum: ['CSV', 'NETCDF', 'JSON']
example: CSV
+ - in: query
+ name: filename
+ description: |
+ Name of output file. Only works with CSV and NETCDF
+ output types. Do not include file extension in this field.
+ If this value is not provided, the filename will be
+ `CDMS_[execution_id].[csv|nc]`
+ required: false
+ schema:
+ type: string
+ - in: query
+ name: pageNum
+ description: |
+ Which page of results should be returned to user.
+ required: false
+ schema:
+ type: integer
+ default: 1
+ - in: query
+ name: pageSize
+ description: |
+ How many results should be returned to user in a single page. One entry is equal to one primary matched point.
+ required: false
+ schema:
+ type: integer
+ default: 1000
responses:
'200':
description: Successful operation