You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by GitBox <gi...@apache.org> on 2019/01/15 01:27:06 UTC
[incubator-sdap-nexus] Diff for: [GitHub] jjacob7734 closed pull request
#64: SDAP-173 Fix Hovmoller code reporting missing get_spark_cfg attribute
diff --git a/analysis/.gitignore b/analysis/.gitignore
index 2081316..559285c 100644
--- a/analysis/.gitignore
+++ b/analysis/.gitignore
@@ -1,3 +1,5 @@
+*.nc
+
# Created by .ignore support plugin (hsz.mobi)
### VirtualEnv template
# Virtualenv
@@ -194,4 +196,3 @@ com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
-
diff --git a/analysis/webservice/NexusHandler.py b/analysis/webservice/NexusHandler.py
index eb6373f..fd3cb4b 100644
--- a/analysis/webservice/NexusHandler.py
+++ b/analysis/webservice/NexusHandler.py
@@ -320,8 +320,7 @@ def set_config(self, algorithm_config):
def _setQueryParams(self, ds, bounds, start_time=None, end_time=None,
start_year=None, end_year=None, clim_month=None,
- fill=-9999., spark_master=None, spark_nexecs=None,
- spark_nparts=None):
+ fill=-9999.):
self._ds = ds
self._minLat, self._maxLat, self._minLon, self._maxLon = bounds
self._startTime = start_time
@@ -330,10 +329,7 @@ def _setQueryParams(self, ds, bounds, start_time=None, end_time=None,
self._endYear = end_year
self._climMonth = clim_month
self._fill = fill
- self._spark_master = spark_master
- self._spark_nexecs = spark_nexecs
- self._spark_nparts = spark_nparts
-
+
def _set_info_from_tile_set(self, nexus_tiles):
ntiles = len(nexus_tiles)
self.log.debug('Attempting to extract info from {0} tiles'.\
@@ -578,6 +574,13 @@ def _create_nc_file_latlon2d(self, a, fname, varname, varunits=None,
def _create_nc_file(self, a, fname, varname, **kwargs):
self._create_nc_file_latlon2d(a, fname, varname, **kwargs)
+ def _spark_nparts(self, nparts_requested):
+ max_parallelism = 128
+ num_partitions = min(nparts_requested if nparts_requested > 0
+ else self._sc.defaultParallelism,
+ max_parallelism)
+ return num_partitions
+
def executeInitializers(config):
[wrapper.init(config) for wrapper in AVAILABLE_INITIALIZERS]
diff --git a/analysis/webservice/algorithms/doms/BaseDomsHandler.py b/analysis/webservice/algorithms/doms/BaseDomsHandler.py
index 586624d..c9b8acf 100644
--- a/analysis/webservice/algorithms/doms/BaseDomsHandler.py
+++ b/analysis/webservice/algorithms/doms/BaseDomsHandler.py
@@ -14,9 +14,11 @@
# limitations under the License.
import StringIO
+import os
import csv
import json
from datetime import datetime
+import time
from decimal import Decimal
import numpy as np
@@ -38,6 +40,7 @@
from gdalnumeric import *
from netCDF4 import Dataset
+import netCDF4
import tempfile
@@ -96,7 +99,7 @@ def toCSV(self):
return DomsCSVFormatter.create(self.__executionId, self.results(), self.__args, self.__details)
def toNetCDF(self):
- return DomsNetCDFFormatterAlt.create(self.__executionId, self.results(), self.__args, self.__details)
+ return DomsNetCDFFormatter.create(self.__executionId, self.results(), self.__args, self.__details)
class DomsCSVFormatter:
@@ -109,7 +112,7 @@ def create(executionId, results, params, details):
DomsCSVFormatter.__addDynamicAttrs(csv_mem_file, executionId, results, params, details)
csv.writer(csv_mem_file).writerow([])
- DomsCSVFormatter.__packValues(csv_mem_file, results)
+ DomsCSVFormatter.__packValues(csv_mem_file, results, params)
csv_out = csv_mem_file.getvalue()
finally:
@@ -118,47 +121,60 @@ def create(executionId, results, params, details):
return csv_out
@staticmethod
- def __packValues(csv_mem_file, results):
+ def __packValues(csv_mem_file, results, params):
writer = csv.writer(csv_mem_file)
headers = [
# Primary
- "id", "source", "lon", "lat", "time", "platform", "sea_water_salinity_depth", "sea_water_salinity",
- "sea_water_temperature_depth", "sea_water_temperature", "wind_speed", "wind_direction", "wind_u", "wind_v",
+ "id", "source", "lon (degrees_east)", "lat (degrees_north)", "time", "platform",
+ "sea_surface_salinity (1e-3)", "sea_surface_temperature (degree_C)", "wind_speed (m s-1)", "wind_direction",
+ "wind_u (m s-1)", "wind_v (m s-1)",
# Match
- "id", "source", "lon", "lat", "time", "platform", "sea_water_salinity_depth", "sea_water_salinity",
- "sea_water_temperature_depth", "sea_water_temperature", "wind_speed", "wind_direction", "wind_u", "wind_v"
+ "id", "source", "lon (degrees_east)", "lat (degrees_north)", "time", "platform",
+ "depth (m)", "sea_water_salinity (1e-3)",
+ "sea_water_temperature (degree_C)", "wind_speed (m s-1)",
+ "wind_direction", "wind_u (m s-1)", "wind_v (m s-1)"
]
writer.writerow(headers)
+ #
+ # Only include the depth variable related to the match-up parameter. If the match-up parameter
+ # is not sss or sst then do not include any depth data, just fill values.
+ #
+ if params["parameter"] == "sss":
+ depth = "sea_water_salinity_depth"
+ elif params["parameter"] == "sst":
+ depth = "sea_water_temperature_depth"
+ else:
+ depth = "NO_DEPTH"
+
for primaryValue in results:
for matchup in primaryValue["matches"]:
row = [
# Primary
primaryValue["id"], primaryValue["source"], str(primaryValue["x"]), str(primaryValue["y"]),
primaryValue["time"].strftime(ISO_8601), primaryValue["platform"],
- primaryValue.get("sea_water_salinity_depth", ""), primaryValue.get("sea_water_salinity", ""),
- primaryValue.get("sea_water_temperature_depth", ""), primaryValue.get("sea_water_temperature", ""),
+ primaryValue.get("sea_water_salinity", ""), primaryValue.get("sea_water_temperature", ""),
primaryValue.get("wind_speed", ""), primaryValue.get("wind_direction", ""),
primaryValue.get("wind_u", ""), primaryValue.get("wind_v", ""),
# Matchup
matchup["id"], matchup["source"], matchup["x"], matchup["y"],
matchup["time"].strftime(ISO_8601), matchup["platform"],
- matchup.get("sea_water_salinity_depth", ""), matchup.get("sea_water_salinity", ""),
- matchup.get("sea_water_temperature_depth", ""), matchup.get("sea_water_temperature", ""),
+ matchup.get(depth, ""), matchup.get("sea_water_salinity", ""),
+ matchup.get("sea_water_temperature", ""),
matchup.get("wind_speed", ""), matchup.get("wind_direction", ""),
matchup.get("wind_u", ""), matchup.get("wind_v", ""),
]
-
writer.writerow(row)
@staticmethod
def __addConstants(csvfile):
global_attrs = [
+ {"Global Attribute": "product_version", "Value": "1.0"},
{"Global Attribute": "Conventions", "Value": "CF-1.6, ACDD-1.3"},
{"Global Attribute": "title", "Value": "DOMS satellite-insitu machup output file"},
{"Global Attribute": "history",
@@ -173,7 +189,9 @@ def __addConstants(csvfile):
{"Global Attribute": "keywords_vocabulary",
"Value": "NASA Global Change Master Directory (GCMD) Science Keywords"},
# TODO What should the keywords be?
- {"Global Attribute": "keywords", "Value": ""},
+ {"Global Attribute": "keywords", "Value": "SATELLITES, OCEAN PLATFORMS, SHIPS, BUOYS, MOORINGS, AUVS, ROV, "
+ "NASA/JPL/PODAAC, FSU/COAPS, UCAR/NCAR, SALINITY, "
+ "SEA SURFACE TEMPERATURE, SURFACE WINDS"},
{"Global Attribute": "creator_name", "Value": "NASA PO.DAAC"},
{"Global Attribute": "creator_email", "Value": "podaac@podaac.jpl.nasa.gov"},
{"Global Attribute": "creator_url", "Value": "https://podaac.jpl.nasa.gov/"},
@@ -196,14 +214,20 @@ def __addDynamicAttrs(csvfile, executionId, results, params, details):
for match in primaryValue['matches']:
platforms.add(match['platform'])
+ # insituDatasets = params["matchup"].split(",")
+ insituDatasets = params["matchup"]
+ insituLinks = set()
+ for insitu in insituDatasets:
+ insituLinks.add(config.METADATA_LINKS[insitu])
+
+
global_attrs = [
{"Global Attribute": "Platform", "Value": ', '.join(platforms)},
{"Global Attribute": "time_coverage_start",
"Value": params["startTime"].strftime(ISO_8601)},
{"Global Attribute": "time_coverage_end",
"Value": params["endTime"].strftime(ISO_8601)},
- # TODO I don't think this applies
- # {"Global Attribute": "time_coverage_resolution", "Value": "point"},
+ {"Global Attribute": "time_coverage_resolution", "Value": "point"},
{"Global Attribute": "geospatial_lon_min", "Value": params["bbox"].split(',')[0]},
{"Global Attribute": "geospatial_lat_min", "Value": params["bbox"].split(',')[1]},
@@ -223,31 +247,25 @@ def __addDynamicAttrs(csvfile, executionId, results, params, details):
{"Global Attribute": "DOMS_matchID", "Value": executionId},
{"Global Attribute": "DOMS_TimeWindow", "Value": params["timeTolerance"] / 60 / 60},
{"Global Attribute": "DOMS_TimeWindow_Units", "Value": "hours"},
- {"Global Attribute": "DOMS_depth_min", "Value": params["depthMin"]},
- {"Global Attribute": "DOMS_depth_min_units", "Value": "m"},
- {"Global Attribute": "DOMS_depth_max", "Value": params["depthMax"]},
- {"Global Attribute": "DOMS_depth_max_units", "Value": "m"},
{"Global Attribute": "DOMS_platforms", "Value": params["platforms"]},
{"Global Attribute": "DOMS_SearchRadius", "Value": params["radiusTolerance"]},
{"Global Attribute": "DOMS_SearchRadius_Units", "Value": "m"},
- {"Global Attribute": "DOMS_bounding_box", "Value": params["bbox"]},
+ {"Global Attribute": "DOMS_DatasetMetadata", "Value": ', '.join(insituLinks)},
{"Global Attribute": "DOMS_primary", "Value": params["primary"]},
- {"Global Attribute": "DOMS_match-up", "Value": ",".join(params["matchup"])},
+ {"Global Attribute": "DOMS_match_up", "Value": params["matchup"]},
{"Global Attribute": "DOMS_ParameterPrimary", "Value": params.get("parameter", "")},
{"Global Attribute": "DOMS_time_to_complete", "Value": details["timeToComplete"]},
{"Global Attribute": "DOMS_time_to_complete_units", "Value": "seconds"},
{"Global Attribute": "DOMS_num_matchup_matched", "Value": details["numInSituMatched"]},
{"Global Attribute": "DOMS_num_primary_matched", "Value": details["numGriddedMatched"]},
- {"Global Attribute": "DOMS_num_matchup_checked",
- "Value": details["numInSituChecked"] if details["numInSituChecked"] != 0 else "N/A"},
- {"Global Attribute": "DOMS_num_primary_checked",
- "Value": details["numGriddedChecked"] if details["numGriddedChecked"] != 0 else "N/A"},
{"Global Attribute": "date_modified", "Value": datetime.utcnow().replace(tzinfo=UTC).strftime(ISO_8601)},
{"Global Attribute": "date_created", "Value": datetime.utcnow().replace(tzinfo=UTC).strftime(ISO_8601)},
+
+ {"Global Attribute": "URI_Matchup", "Value": "http://{webservice}/domsresults?id=" + executionId + "&output=CSV"},
]
writer = csv.DictWriter(csvfile, sorted(next(iter(global_attrs)).keys()))
@@ -258,31 +276,22 @@ def __addDynamicAttrs(csvfile, executionId, results, params, details):
class DomsNetCDFFormatter:
@staticmethod
def create(executionId, results, params, details):
+
t = tempfile.mkstemp(prefix="doms_", suffix=".nc")
tempFileName = t[1]
dataset = Dataset(tempFileName, "w", format="NETCDF4")
+ dataset.DOMS_matchID = executionId
+ DomsNetCDFFormatter.__addNetCDFConstants(dataset)
- dataset.matchID = executionId
- dataset.Matchup_TimeWindow = params["timeTolerance"]
- dataset.Matchup_TimeWindow_Units = "hours"
-
- dataset.time_coverage_start = datetime.fromtimestamp(params["startTime"] / 1000).strftime('%Y%m%d %H:%M:%S')
- dataset.time_coverage_end = datetime.fromtimestamp(params["endTime"] / 1000).strftime('%Y%m%d %H:%M:%S')
- dataset.depth_min = params["depthMin"]
- dataset.depth_max = params["depthMax"]
- dataset.platforms = params["platforms"]
-
- dataset.Matchup_SearchRadius = params["radiusTolerance"]
- dataset.Matchup_SearchRadius_Units = "m"
-
- dataset.bounding_box = params["bbox"]
- dataset.primary = params["primary"]
- dataset.secondary = ",".join(params["matchup"])
-
- dataset.Matchup_ParameterPrimary = params["parameter"] if "parameter" in params else ""
-
+ dataset.date_modified = datetime.utcnow().replace(tzinfo=UTC).strftime(ISO_8601)
+ dataset.date_created = datetime.utcnow().replace(tzinfo=UTC).strftime(ISO_8601)
+ dataset.time_coverage_start = params["startTime"].strftime(ISO_8601)
+ dataset.time_coverage_end = params["endTime"].strftime(ISO_8601)
dataset.time_coverage_resolution = "point"
+ dataset.DOMS_match_up = params["matchup"]
+ dataset.DOMS_num_matchup_matched = details["numInSituMatched"]
+ dataset.DOMS_num_primary_matched = details["numGriddedMatched"]
bbox = geo.BoundingBox(asString=params["bbox"])
dataset.geospatial_lat_max = bbox.north
@@ -293,254 +302,65 @@ def create(executionId, results, params, details):
dataset.geospatial_lon_resolution = "point"
dataset.geospatial_lat_units = "degrees_north"
dataset.geospatial_lon_units = "degrees_east"
- dataset.geospatial_vertical_min = 0.0
- dataset.geospatial_vertical_max = params["radiusTolerance"]
+ dataset.geospatial_vertical_min = float(params["depthMin"])
+ dataset.geospatial_vertical_max = float(params["depthMax"])
dataset.geospatial_vertical_units = "m"
dataset.geospatial_vertical_resolution = "point"
dataset.geospatial_vertical_positive = "down"
- dataset.time_to_complete = details["timeToComplete"]
- dataset.num_insitu_matched = details["numInSituMatched"]
- dataset.num_gridded_checked = details["numGriddedChecked"]
- dataset.num_gridded_matched = details["numGriddedMatched"]
- dataset.num_insitu_checked = details["numInSituChecked"]
+ dataset.DOMS_TimeWindow = params["timeTolerance"] / 60 / 60
+ dataset.DOMS_TimeWindow_Units = "hours"
+ dataset.DOMS_SearchRadius = float(params["radiusTolerance"])
+ dataset.DOMS_SearchRadius_Units = "m"
+ # dataset.URI_Subset = "http://webservice subsetting query request"
+ dataset.URI_Matchup = "http://{webservice}/domsresults?id=" + executionId + "&output=NETCDF"
+ dataset.DOMS_ParameterPrimary = params["parameter"] if "parameter" in params else ""
+ dataset.DOMS_platforms = params["platforms"]
+ dataset.DOMS_primary = params["primary"]
+ dataset.DOMS_time_to_complete = details["timeToComplete"]
+ dataset.DOMS_time_to_complete_units = "seconds"
+
+ insituDatasets = params["matchup"]
+ insituLinks = set()
+ for insitu in insituDatasets:
+ insituLinks.add(config.METADATA_LINKS[insitu])
+ dataset.DOMS_DatasetMetadata = ', '.join(insituLinks)
- dataset.date_modified = datetime.now().strftime('%Y%m%d %H:%M:%S')
- dataset.date_created = datetime.now().strftime('%Y%m%d %H:%M:%S')
-
- DomsNetCDFFormatter.__addNetCDFConstants(dataset)
-
- idList = []
- primaryIdList = []
- DomsNetCDFFormatter.__packDataIntoDimensions(idList, primaryIdList, results)
-
- idDim = dataset.createDimension("id", size=None)
- primaryIdDim = dataset.createDimension("primary_id", size=None)
-
- idVar = dataset.createVariable("id", "i4", ("id",), chunksizes=(2048,))
- primaryIdVar = dataset.createVariable("primary_id", "i4", ("primary_id",), chunksizes=(2048,))
+ platforms = set()
+ for primaryValue in results:
+ platforms.add(primaryValue['platform'])
+ for match in primaryValue['matches']:
+ platforms.add(match['platform'])
+ dataset.platform = ', '.join(platforms)
- idVar[:] = idList
- primaryIdVar[:] = primaryIdList
+ satellite_group_name = "SatelliteData"
+ insitu_group_name = "InsituData"
- DomsNetCDFFormatter.__createDimension(dataset, results, "lat", "f4", "y")
- DomsNetCDFFormatter.__createDimension(dataset, results, "lon", "f4", "x")
+ #Create Satellite group, variables, and attributes
+ satelliteGroup = dataset.createGroup(satellite_group_name)
+ satelliteWriter = DomsNetCDFValueWriter(satelliteGroup, params["parameter"])
- DomsNetCDFFormatter.__createDimension(dataset, results, "sea_water_temperature_depth", "f4",
- "sea_water_temperature_depth")
- DomsNetCDFFormatter.__createDimension(dataset, results, "sea_water_temperature", "f4", "sea_water_temperature")
- DomsNetCDFFormatter.__createDimension(dataset, results, "sea_water_salinity_depth", "f4",
- "sea_water_salinity_depth")
- DomsNetCDFFormatter.__createDimension(dataset, results, "sea_water_salinity", "f4", "sea_water_salinity")
+ # Create InSitu group, variables, and attributes
+ insituGroup = dataset.createGroup(insitu_group_name)
+ insituWriter = DomsNetCDFValueWriter(insituGroup, params["parameter"])
- DomsNetCDFFormatter.__createDimension(dataset, results, "wind_speed", "f4", "wind_speed")
- DomsNetCDFFormatter.__createDimension(dataset, results, "wind_direction", "f4", "wind_direction")
- DomsNetCDFFormatter.__createDimension(dataset, results, "wind_u", "f4", "wind_u")
- DomsNetCDFFormatter.__createDimension(dataset, results, "wind_v", "f4", "wind_v")
+ # Add data to Insitu and Satellite groups, generate array of match ID pairs
+ matches = DomsNetCDFFormatter.__writeResults(results, satelliteWriter, insituWriter)
+ dataset.createDimension("MatchedRecords", size=None)
+ dataset.createDimension("MatchedGroups", size=2)
+ matchArray = dataset.createVariable("matchIDs", "f4", ("MatchedRecords", "MatchedGroups"))
+ matchArray[:] = matches
- DomsNetCDFFormatter.__createDimension(dataset, results, "time", "f4", "time")
dataset.close()
-
f = open(tempFileName, "rb")
data = f.read()
f.close()
os.unlink(tempFileName)
return data
- @staticmethod
- def __packDataIntoDimensions(idVar, primaryIdVar, values, primaryValueId=None):
-
- for value in values:
- id = hash(value["id"])
- idVar.append(id)
- primaryIdVar.append(primaryValueId if primaryValueId is not None else -1)
-
- if "matches" in value and len(value["matches"]) > 0:
- DomsNetCDFFormatter.__packDataIntoDimensions(idVar, primaryIdVar, value["matches"], id)
-
- @staticmethod
- def __packDimensionList(values, field, varList):
- for value in values:
- if field in value:
- varList.append(value[field])
- else:
- varList.append(np.nan)
- if "matches" in value and len(value["matches"]) > 0:
- DomsNetCDFFormatter.__packDimensionList(value["matches"], field, varList)
-
- @staticmethod
- def __createDimension(dataset, values, name, type, arrayField):
- dim = dataset.createDimension(name, size=None)
- var = dataset.createVariable(name, type, (name,), chunksizes=(2048,), fill_value=-32767.0)
-
- varList = []
- DomsNetCDFFormatter.__packDimensionList(values, arrayField, varList)
-
- var[:] = varList
-
- if name == "lon":
- DomsNetCDFFormatter.__enrichLonVariable(var)
- elif name == "lat":
- DomsNetCDFFormatter.__enrichLatVariable(var)
- elif name == "time":
- DomsNetCDFFormatter.__enrichTimeVariable(var)
- elif name == "sea_water_salinity":
- DomsNetCDFFormatter.__enrichSSSVariable(var)
- elif name == "sea_water_salinity_depth":
- DomsNetCDFFormatter.__enrichSSSDepthVariable(var)
- elif name == "sea_water_temperature":
- DomsNetCDFFormatter.__enrichSSTVariable(var)
- elif name == "sea_water_temperature_depth":
- DomsNetCDFFormatter.__enrichSSTDepthVariable(var)
- elif name == "wind_direction":
- DomsNetCDFFormatter.__enrichWindDirectionVariable(var)
- elif name == "wind_speed":
- DomsNetCDFFormatter.__enrichWindSpeedVariable(var)
- elif name == "wind_u":
- DomsNetCDFFormatter.__enrichWindUVariable(var)
- elif name == "wind_v":
- DomsNetCDFFormatter.__enrichWindVVariable(var)
-
- @staticmethod
- def __enrichSSSVariable(var):
- var.long_name = "sea surface salinity"
- var.standard_name = "sea_surface_salinity"
- var.units = "1e-3"
- var.valid_min = 30
- var.valid_max = 40
- var.scale_factor = 1.0
- var.add_offset = 0.0
- var.coordinates = "lon lat time"
- var.grid_mapping = "crs"
- var.comment = ""
- var.cell_methods = ""
- var.metadata_link = ""
-
- @staticmethod
- def __enrichSSSDepthVariable(var):
- var.long_name = "sea surface salinity_depth"
- var.standard_name = "sea_surface_salinity_depth"
- var.units = "m"
- var.scale_factor = 1.0
- var.add_offset = 0.0
- var.coordinates = "lon lat time"
- var.grid_mapping = "crs"
- var.comment = ""
- var.cell_methods = ""
- var.metadata_link = ""
-
- @staticmethod
- def __enrichSSTVariable(var):
- var.long_name = "sea surface temperature"
- var.standard_name = "sea_surface_temperature"
- var.units = "c"
- var.valid_min = -3
- var.valid_max = 50
- var.scale_factor = 1.0
- var.add_offset = 0.0
- var.coordinates = "lon lat time"
- var.grid_mapping = "crs"
- var.comment = ""
- var.cell_methods = ""
- var.metadata_link = ""
-
- @staticmethod
- def __enrichSSTDepthVariable(var):
- var.long_name = "sea surface temperature_depth"
- var.standard_name = "sea_surface_temperature_depth"
- var.units = "m"
- var.scale_factor = 1.0
- var.add_offset = 0.0
- var.coordinates = "lon lat time"
- var.grid_mapping = "crs"
- var.comment = ""
- var.cell_methods = ""
- var.metadata_link = ""
-
- @staticmethod
- def __enrichWindDirectionVariable(var):
- var.long_name = "wind direction"
- var.standard_name = "wind_direction"
- var.units = "degrees"
- var.scale_factor = 1.0
- var.add_offset = 0.0
- var.coordinates = "lon lat time"
- var.grid_mapping = "crs"
- var.comment = ""
- var.cell_methods = ""
- var.metadata_link = ""
-
- @staticmethod
- def __enrichWindSpeedVariable(var):
- var.long_name = "wind speed"
- var.standard_name = "wind_speed"
- var.units = "km/h"
- var.scale_factor = 1.0
- var.add_offset = 0.0
- var.coordinates = "lon lat time"
- var.grid_mapping = "crs"
- var.comment = ""
- var.cell_methods = ""
- var.metadata_link = ""
-
- @staticmethod
- def __enrichWindUVariable(var):
- var.long_name = "wind u"
- var.standard_name = "wind_u"
- var.units = ""
- var.scale_factor = 1.0
- var.add_offset = 0.0
- var.coordinates = "lon lat time"
- var.grid_mapping = "crs"
- var.comment = ""
- var.cell_methods = ""
- var.metadata_link = ""
-
- @staticmethod
- def __enrichWindVVariable(var):
- var.long_name = "wind v"
- var.standard_name = "wind_v"
- var.units = ""
- var.scale_factor = 1.0
- var.add_offset = 0.0
- var.coordinates = "lon lat time"
- var.grid_mapping = "crs"
- var.comment = ""
- var.cell_methods = ""
- var.metadata_link = ""
-
- @staticmethod
- def __enrichTimeVariable(var):
- var.long_name = "Time"
- var.standard_name = "time"
- var.axis = "T"
- var.units = "seconds since 1970-01-01 00:00:00 0:00"
- var.calendar = "standard"
- var.comment = "Nominal time of satellite corresponding to the start of the product time interval"
-
- @staticmethod
- def __enrichLonVariable(var):
- var.long_name = "Longitude"
- var.standard_name = "longitude"
- var.axis = "X"
- var.units = "degrees_east"
- var.valid_min = -180.0
- var.valid_max = 180.0
- var.comment = "Data longitude for in-situ, midpoint beam for satellite measurements."
-
- @staticmethod
- def __enrichLatVariable(var):
- var.long_name = "Latitude"
- var.standard_name = "latitude"
- var.axis = "Y"
- var.units = "degrees_north"
- var.valid_min = -90.0
- var.valid_max = 90.0
- var.comment = "Data latitude for in-situ, midpoint beam for satellite measurements."
-
@staticmethod
def __addNetCDFConstants(dataset):
- dataset.bnds = 2
+ dataset.product_version = "1.0"
dataset.Conventions = "CF-1.6, ACDD-1.3"
dataset.title = "DOMS satellite-insitu machup output file"
dataset.history = "Processing_Version = V1.0, Software_Name = DOMS, Software_Version = 1.03"
@@ -549,176 +369,267 @@ def __addNetCDFConstants(dataset):
dataset.standard_name_vocabulary = "CF Standard Name Table v27", "BODC controlled vocabulary"
dataset.cdm_data_type = "Point/Profile, Swath/Grid"
dataset.processing_level = "4"
- dataset.platform = "Endeavor"
- dataset.instrument = "Endeavor on-board sea-bird SBE 9/11 CTD"
dataset.project = "Distributed Oceanographic Matchup System (DOMS)"
dataset.keywords_vocabulary = "NASA Global Change Master Directory (GCMD) Science Keywords"
- dataset.keywords = "Salinity, Upper Ocean, SPURS, CTD, Endeavor, Atlantic Ocean"
+ dataset.keywords = "SATELLITES, OCEAN PLATFORMS, SHIPS, BUOYS, MOORINGS, AUVS, ROV, NASA/JPL/PODAAC, " \
+ "FSU/COAPS, UCAR/NCAR, SALINITY, SEA SURFACE TEMPERATURE, SURFACE WINDS"
dataset.creator_name = "NASA PO.DAAC"
dataset.creator_email = "podaac@podaac.jpl.nasa.gov"
dataset.creator_url = "https://podaac.jpl.nasa.gov/"
dataset.publisher_name = "NASA PO.DAAC"
dataset.publisher_email = "podaac@podaac.jpl.nasa.gov"
dataset.publisher_url = "https://podaac.jpl.nasa.gov"
- dataset.acknowledgment = "DOMS is a NASA/AIST-funded project. Grant number ####."
-
+ dataset.acknowledgment = "DOMS is a NASA/AIST-funded project. NRA NNH14ZDA001N."
-class DomsNetCDFFormatterAlt:
@staticmethod
- def create(executionId, results, params, details):
- t = tempfile.mkstemp(prefix="doms_", suffix=".nc")
- tempFileName = t[1]
-
- dataset = Dataset(tempFileName, "w", format="NETCDF4")
-
- dataset.matchID = executionId
- dataset.Matchup_TimeWindow = params["timeTolerance"]
- dataset.Matchup_TimeWindow_Units = "hours"
-
- dataset.time_coverage_start = datetime.fromtimestamp(params["startTime"] / 1000).strftime('%Y%m%d %H:%M:%S')
- dataset.time_coverage_end = datetime.fromtimestamp(params["endTime"] / 1000).strftime('%Y%m%d %H:%M:%S')
- dataset.depth_min = params["depthMin"]
- dataset.depth_max = params["depthMax"]
- dataset.platforms = params["platforms"]
-
- dataset.Matchup_SearchRadius = params["radiusTolerance"]
- dataset.Matchup_SearchRadius_Units = "m"
-
- dataset.bounding_box = params["bbox"]
- dataset.primary = params["primary"]
- dataset.secondary = ",".join(params["matchup"])
-
- dataset.Matchup_ParameterPrimary = params["parameter"] if "parameter" in params else ""
-
- dataset.time_coverage_resolution = "point"
-
- bbox = geo.BoundingBox(asString=params["bbox"])
- dataset.geospatial_lat_max = bbox.north
- dataset.geospatial_lat_min = bbox.south
- dataset.geospatial_lon_max = bbox.east
- dataset.geospatial_lon_min = bbox.west
- dataset.geospatial_lat_resolution = "point"
- dataset.geospatial_lon_resolution = "point"
- dataset.geospatial_lat_units = "degrees_north"
- dataset.geospatial_lon_units = "degrees_east"
- dataset.geospatial_vertical_min = 0.0
- dataset.geospatial_vertical_max = params["radiusTolerance"]
- dataset.geospatial_vertical_units = "m"
- dataset.geospatial_vertical_resolution = "point"
- dataset.geospatial_vertical_positive = "down"
+ def __writeResults(results, satelliteWriter, insituWriter):
+ ids = {}
+ matches = []
+ insituIndex = 0
- dataset.time_to_complete = details["timeToComplete"]
- dataset.num_insitu_matched = details["numInSituMatched"]
- dataset.num_gridded_checked = details["numGriddedChecked"]
- dataset.num_gridded_matched = details["numGriddedMatched"]
- dataset.num_insitu_checked = details["numInSituChecked"]
+ #
+ # Loop through all of the results, add each satellite data point to the array
+ #
+ for r in range(0, len(results)):
+ result = results[r]
+ satelliteWriter.addData(result)
- dataset.date_modified = datetime.now().strftime('%Y%m%d %H:%M:%S')
- dataset.date_created = datetime.now().strftime('%Y%m%d %H:%M:%S')
+ # Add each match only if it is not already in the array of in situ points
+ for match in result["matches"]:
+ if match["id"] not in ids:
+ ids[match["id"]] = insituIndex
+ insituIndex += 1
+ insituWriter.addData(match)
- DomsNetCDFFormatterAlt.__addNetCDFConstants(dataset)
+ # Append an index pait of (satellite, in situ) to the array of matches
+ matches.append((r, ids[match["id"]]))
- satelliteGroup = dataset.createGroup("SatelliteData")
- satelliteWriter = DomsNetCDFValueWriter(satelliteGroup)
+ # Add data/write to the netCDF file
+ satelliteWriter.writeGroup()
+ insituWriter.writeGroup()
- insituGroup = dataset.createGroup("InsituData")
- insituWriter = DomsNetCDFValueWriter(insituGroup)
+ return matches
- matches = DomsNetCDFFormatterAlt.__writeResults(results, satelliteWriter, insituWriter)
- satelliteWriter.commit()
- insituWriter.commit()
+class DomsNetCDFValueWriter:
+ def __init__(self, group, matchup_parameter):
+ group.createDimension("dim", size=None)
+ self.group = group
- satDim = dataset.createDimension("satellite_ids", size=None)
- satVar = dataset.createVariable("satellite_ids", "i4", ("satellite_ids",), chunksizes=(2048,),
- fill_value=-32767)
+ self.lat = []
+ self.lon = []
+ self.time = []
+ self.sea_water_salinity = []
+ self.wind_speed = []
+ self.wind_u = []
+ self.wind_v = []
+ self.wind_direction = []
+ self.sea_water_temperature = []
+ self.depth = []
+
+ self.satellite_group_name = "SatelliteData"
+ self.insitu_group_name = "InsituData"
+
+ #
+ # Only include the depth variable related to the match-up parameter. If the match-up parameter is
+ # not sss or sst then do not include any depth data, just fill values.
+ #
+ if matchup_parameter == "sss":
+ self.matchup_depth = "sea_water_salinity_depth"
+ elif matchup_parameter == "sst":
+ self.matchup_depth = "sea_water_temperature_depth"
+ else:
+ self.matchup_depth = "NO_DEPTH"
+
+ def addData(self, value):
+ self.lat.append(value.get("y", None))
+ self.lon.append(value.get("x", None))
+ self.time.append(time.mktime(value.get("time").timetuple()))
+ self.sea_water_salinity.append(value.get("sea_water_salinity", None))
+ self.wind_speed.append(value.get("wind_speed", None))
+ self.wind_u.append(value.get("wind_u", None))
+ self.wind_v.append(value.get("wind_v", None))
+ self.wind_direction.append(value.get("wind_direction", None))
+ self.sea_water_temperature.append(value.get("sea_water_temperature", None))
+ self.depth.append(value.get(self.matchup_depth, None))
+
+ def writeGroup(self):
+ #
+ # Create variables, enrich with attributes, and add data
+ #
+ lonVar = self.group.createVariable("lon", "f4", ("dim",), fill_value=-32767.0)
+ latVar = self.group.createVariable("lat", "f4", ("dim",), fill_value=-32767.0)
+ timeVar = self.group.createVariable("time", "f4", ("dim",), fill_value=-32767.0)
+
+ self.__enrichLon(lonVar, min(self.lon), max(self.lon))
+ self.__enrichLat(latVar, min(self.lat), max(self.lat))
+ self.__enrichTime(timeVar)
+
+ latVar[:] = self.lat
+ lonVar[:] = self.lon
+ timeVar[:] = self.time
+
+ if self.sea_water_salinity.count(None) != len(self.sea_water_salinity):
+ if self.group.name == self.satellite_group_name:
+ sssVar = self.group.createVariable("SeaSurfaceSalinity", "f4", ("dim",), fill_value=-32767.0)
+ self.__enrichSSSMeasurements(sssVar, min(self.sea_water_salinity), max(self.sea_water_salinity))
+ else: # group.name == self.insitu_group_name
+ sssVar = self.group.createVariable("SeaWaterSalinity", "f4", ("dim",), fill_value=-32767.0)
+ self.__enrichSWSMeasurements(sssVar, min(self.sea_water_salinity), max(self.sea_water_salinity))
+ sssVar[:] = self.sea_water_salinity
+
+ if self.wind_speed.count(None) != len(self.wind_speed):
+ windSpeedVar = self.group.createVariable("WindSpeed", "f4", ("dim",), fill_value=-32767.0)
+ self.__enrichWindSpeed(windSpeedVar, self.__calcMin(self.wind_speed), max(self.wind_speed))
+ windSpeedVar[:] = self.wind_speed
+
+ if self.wind_u.count(None) != len(self.wind_u):
+ windUVar = self.group.createVariable("WindU", "f4", ("dim",), fill_value=-32767.0)
+ windUVar[:] = self.wind_u
+ self.__enrichWindU(windUVar, self.__calcMin(self.wind_u), max(self.wind_u))
+
+ if self.wind_v.count(None) != len(self.wind_v):
+ windVVar = self.group.createVariable("WindV", "f4", ("dim",), fill_value=-32767.0)
+ windVVar[:] = self.wind_v
+ self.__enrichWindV(windVVar, self.__calcMin(self.wind_v), max(self.wind_v))
+
+ if self.wind_direction.count(None) != len(self.wind_direction):
+ windDirVar = self.group.createVariable("WindDirection", "f4", ("dim",), fill_value=-32767.0)
+ windDirVar[:] = self.wind_direction
+ self.__enrichWindDir(windDirVar)
+
+ if self.sea_water_temperature.count(None) != len(self.sea_water_temperature):
+ if self.group.name == self.satellite_group_name:
+ tempVar = self.group.createVariable("SeaSurfaceTemp", "f4", ("dim",), fill_value=-32767.0)
+ self.__enrichSurfaceTemp(tempVar, self.__calcMin(self.sea_water_temperature), max(self.sea_water_temperature))
+ else:
+ tempVar = self.group.createVariable("SeaWaterTemp", "f4", ("dim",), fill_value=-32767.0)
+ self.__enrichWaterTemp(tempVar, self.__calcMin(self.sea_water_temperature), max(self.sea_water_temperature))
+ tempVar[:] = self.sea_water_temperature
- satVar[:] = [f[0] for f in matches]
+ if self.group.name == self.insitu_group_name:
+ depthVar = self.group.createVariable("Depth", "f4", ("dim",), fill_value=-32767.0)
- insituDim = dataset.createDimension("insitu_ids", size=None)
- insituVar = dataset.createVariable("insitu_ids", "i4", ("insitu_ids",), chunksizes=(2048,),
- fill_value=-32767)
- insituVar[:] = [f[1] for f in matches]
+ if self.depth.count(None) != len(self.depth):
+ self.__enrichDepth(depthVar, self.__calcMin(self.depth), max(self.depth))
+ depthVar[:] = self.depth
+ else:
+ # If depth has no data, set all values to 0
+ tempDepth = [0 for x in range(len(self.depth))]
+ depthVar[:] = tempDepth
- dataset.close()
+ #
+ # Lists may include 'None" values, to calc min these must be filtered out
+ #
+ @staticmethod
+ def __calcMin(var):
+ return min(x for x in var if x is not None)
- f = open(tempFileName, "rb")
- data = f.read()
- f.close()
- os.unlink(tempFileName)
- return data
+ #
+ # Add attributes to each variable
+ #
@staticmethod
- def __writeResults(results, satelliteWriter, insituWriter):
- ids = {}
- matches = []
+ def __enrichLon(var, var_min, var_max):
+ var.long_name = "Longitude"
+ var.standard_name = "longitude"
+ var.axis = "X"
+ var.units = "degrees_east"
+ var.valid_min = var_min
+ var.valid_max = var_max
- insituIndex = 0
+ @staticmethod
+ def __enrichLat(var, var_min, var_max):
+ var.long_name = "Latitude"
+ var.standard_name = "latitude"
+ var.axis = "Y"
+ var.units = "degrees_north"
+ var.valid_min = var_min
+ var.valid_max = var_max
- for r in range(0, len(results)):
- result = results[r]
- satelliteWriter.write(result)
- for match in result["matches"]:
- if match["id"] not in ids:
- ids[match["id"]] = insituIndex
- insituIndex += 1
- insituWriter.write(match)
+ @staticmethod
+ def __enrichTime(var):
+ var.long_name = "Time"
+ var.standard_name = "time"
+ var.axis = "T"
+ var.units = "seconds since 1970-01-01 00:00:00 0:00"
- matches.append((r, ids[match["id"]]))
+ @staticmethod
+ def __enrichSSSMeasurements(var, var_min, var_max):
+ var.long_name = "Sea surface salinity"
+ var.standard_name = "sea_surface_salinity"
+ var.units = "1e-3"
+ var.valid_min = var_min
+ var.valid_max = var_max
+ var.coordinates = "lon lat time"
- return matches
+ @staticmethod
+ def __enrichSWSMeasurements(var, var_min, var_max):
+ var.long_name = "Sea water salinity"
+ var.standard_name = "sea_water_salinity"
+ var.units = "1e-3"
+ var.valid_min = var_min
+ var.valid_max = var_max
+ var.coordinates = "lon lat depth time"
@staticmethod
- def __addNetCDFConstants(dataset):
- dataset.bnds = 2
- dataset.Conventions = "CF-1.6, ACDD-1.3"
- dataset.title = "DOMS satellite-insitu machup output file"
- dataset.history = "Processing_Version = V1.0, Software_Name = DOMS, Software_Version = 1.03"
- dataset.institution = "JPL, FSU, NCAR"
- dataset.source = "doms.jpl.nasa.gov"
- dataset.standard_name_vocabulary = "CF Standard Name Table v27", "BODC controlled vocabulary"
- dataset.cdm_data_type = "Point/Profile, Swath/Grid"
- dataset.processing_level = "4"
- dataset.platform = "Endeavor"
- dataset.instrument = "Endeavor on-board sea-bird SBE 9/11 CTD"
- dataset.project = "Distributed Oceanographic Matchup System (DOMS)"
- dataset.keywords_vocabulary = "NASA Global Change Master Directory (GCMD) Science Keywords"
- dataset.keywords = "Salinity, Upper Ocean, SPURS, CTD, Endeavor, Atlantic Ocean"
- dataset.creator_name = "NASA PO.DAAC"
- dataset.creator_email = "podaac@podaac.jpl.nasa.gov"
- dataset.creator_url = "https://podaac.jpl.nasa.gov/"
- dataset.publisher_name = "NASA PO.DAAC"
- dataset.publisher_email = "podaac@podaac.jpl.nasa.gov"
- dataset.publisher_url = "https://podaac.jpl.nasa.gov"
- dataset.acknowledgment = "DOMS is a NASA/AIST-funded project. Grant number ####."
+ def __enrichDepth(var, var_min, var_max):
+ var.valid_min = var_min
+ var.valid_max = var_max
+ var.units = "m"
+ var.long_name = "Depth"
+ var.standard_name = "depth"
+ var.axis = "Z"
+ var.positive = "Down"
+ @staticmethod
+ def __enrichWindSpeed(var, var_min, var_max):
+ var.long_name = "Wind speed"
+ var.standard_name = "wind_speed"
+ var.units = "m s-1"
+ var.valid_min = var_min
+ var.valid_max = var_max
+ var.coordinates = "lon lat depth time"
-class DomsNetCDFValueWriter:
- def __init__(self, group):
- self.latVar = DomsNetCDFValueWriter.__createDimension(group, "lat", "f4")
- self.lonVar = DomsNetCDFValueWriter.__createDimension(group, "lon", "f4")
- self.sstVar = DomsNetCDFValueWriter.__createDimension(group, "sea_water_temperature", "f4")
- self.timeVar = DomsNetCDFValueWriter.__createDimension(group, "time", "f4")
+ @staticmethod
+ def __enrichWindU(var, var_min, var_max):
+ var.long_name = "Eastward wind"
+ var.standard_name = "eastward_wind"
+ var.units = "m s-1"
+ var.valid_min = var_min
+ var.valid_max = var_max
+ var.coordinates = "lon lat depth time"
- self.lat = []
- self.lon = []
- self.sst = []
- self.time = []
+ @staticmethod
+ def __enrichWindV(var, var_min, var_max):
+ var.long_name = "Northward wind"
+ var.standard_name = "northward_wind"
+ var.units = "m s-1"
+ var.valid_min = var_min
+ var.valid_max = var_max
+ var.coordinates = "lon lat depth time"
- def write(self, value):
- self.lat.append(value["y"])
- self.lon.append(value["x"])
- self.time.append(value["time"])
- self.sst.append(value["sea_water_temperature"])
+ @staticmethod
+ def __enrichWaterTemp(var, var_min, var_max):
+ var.long_name = "Sea water temperature"
+ var.standard_name = "sea_water_temperature"
+ var.units = "degree_C"
+ var.valid_min = var_min
+ var.valid_max = var_max
+ var.coordinates = "lon lat depth time"
- def commit(self):
- self.latVar[:] = self.lat
- self.lonVar[:] = self.lon
- self.sstVar[:] = self.sst
- self.timeVar[:] = self.time
+ @staticmethod
+ def __enrichSurfaceTemp(var, var_min, var_max):
+ var.long_name = "Sea surface temperature"
+ var.standard_name = "sea_surface_temperature"
+ var.units = "degree_C"
+ var.valid_min = var_min
+ var.valid_max = var_max
+ var.coordinates = "lon lat time"
@staticmethod
- def __createDimension(group, name, type):
- dim = group.createDimension(name, size=None)
- var = group.createVariable(name, type, (name,), chunksizes=(2048,), fill_value=-32767.0)
- return var
+ def __enrichWindDir(var):
+ var.long_name = "Wind from direction"
+ var.standard_name = "wind_from_direction"
+ var.units = "degree"
+ var.coordinates = "lon lat depth time"
diff --git a/analysis/webservice/algorithms/doms/config.py b/analysis/webservice/algorithms/doms/config.py
index 0863a55..ff492e8 100644
--- a/analysis/webservice/algorithms/doms/config.py
+++ b/analysis/webservice/algorithms/doms/config.py
@@ -48,6 +48,12 @@
}
]
+METADATA_LINKS = {
+ "samos": "http://samos.coaps.fsu.edu/html/nav.php?s=2",
+ "icoads": "https://rda.ucar.edu/datasets/ds548.1/",
+ "spurs": "https://podaac.jpl.nasa.gov/spurs"
+}
+
import os
try:
@@ -87,6 +93,11 @@
"metadataUrl": "http://doms.jpl.nasa.gov/ws/metadata/dataset?shortName=SPURS-2&format=umm-json"
}
]
+ METADATA_LINKS = {
+ "samos": "http://samos.coaps.fsu.edu/html/nav.php?s=2",
+ "icoads": "https://rda.ucar.edu/datasets/ds548.1/",
+ "spurs": "https://podaac.jpl.nasa.gov/spurs"
+ }
except KeyError:
pass
diff --git a/analysis/webservice/algorithms_spark/ClimMapSpark.py b/analysis/webservice/algorithms_spark/ClimMapSpark.py
index eb567f5..75c7b73 100644
--- a/analysis/webservice/algorithms_spark/ClimMapSpark.py
+++ b/analysis/webservice/algorithms_spark/ClimMapSpark.py
@@ -14,6 +14,7 @@
# limitations under the License.
+import math
import logging
from calendar import timegm, monthrange
from datetime import datetime
@@ -120,7 +121,6 @@ def calc(self, computeOptions, **args):
:return:
"""
- spark_master, spark_nexecs, spark_nparts = computeOptions.get_spark_cfg()
self._setQueryParams(computeOptions.get_dataset()[0],
(float(computeOptions.get_min_lat()),
float(computeOptions.get_max_lat()),
@@ -128,10 +128,7 @@ def calc(self, computeOptions, **args):
float(computeOptions.get_max_lon())),
start_year=computeOptions.get_start_year(),
end_year=computeOptions.get_end_year(),
- clim_month=computeOptions.get_clim_month(),
- spark_master=spark_master,
- spark_nexecs=spark_nexecs,
- spark_nparts=spark_nparts)
+ clim_month=computeOptions.get_clim_month())
self._startTime = timegm((self._startYear, 1, 1, 0, 0, 0))
self._endTime = timegm((self._endYear, 12, 31, 23, 59, 59))
@@ -139,6 +136,8 @@ def calc(self, computeOptions, **args):
raise NexusProcessingException(reason="Cannot compute Latitude/Longitude Time Average map on a climatology",
code=400)
+ nparts_requested = computeOptions.get_nparts()
+
nexus_tiles = self._find_global_tile_set()
# print 'tiles:'
# for tile in nexus_tiles:
@@ -199,7 +198,9 @@ def calc(self, computeOptions, **args):
# print nexus_tiles_spark[i]
# Launch Spark computations
- rdd = self._sc.parallelize(nexus_tiles_spark, self._spark_nparts)
+ spark_nparts = self._spark_nparts(nparts_requested)
+ self.log.info('Using {} partitions'.format(spark_nparts))
+ rdd = self._sc.parallelize(nexus_tiles_spark, spark_nparts)
sum_count_part = rdd.map(self._map)
sum_count = \
sum_count_part.combineByKey(lambda val: val,
diff --git a/analysis/webservice/algorithms_spark/CorrMapSpark.py b/analysis/webservice/algorithms_spark/CorrMapSpark.py
index c6b0c99..9503298 100644
--- a/analysis/webservice/algorithms_spark/CorrMapSpark.py
+++ b/analysis/webservice/algorithms_spark/CorrMapSpark.py
@@ -15,8 +15,9 @@
import json
+import math
import logging
-
+from datetime import datetime
import numpy as np
from nexustiles.nexustiles import NexusTileService
@@ -164,17 +165,14 @@ def _map(tile_in):
def calc(self, computeOptions, **args):
- spark_master, spark_nexecs, spark_nparts = computeOptions.get_spark_cfg()
self._setQueryParams(computeOptions.get_dataset(),
(float(computeOptions.get_min_lat()),
float(computeOptions.get_max_lat()),
float(computeOptions.get_min_lon()),
float(computeOptions.get_max_lon())),
computeOptions.get_start_time(),
- computeOptions.get_end_time(),
- spark_master=spark_master,
- spark_nexecs=spark_nexecs,
- spark_nparts=spark_nparts)
+ computeOptions.get_end_time())
+ nparts_requested = computeOptions.get_nparts()
self.log.debug('ds = {0}'.format(self._ds))
if not len(self._ds) == 2:
@@ -200,6 +198,20 @@ def calc(self, computeOptions, **args):
self.log.debug('Using Native resolution: lat_res={0}, lon_res={1}'.format(self._latRes, self._lonRes))
self.log.debug('nlats={0}, nlons={1}'.format(self._nlats, self._nlons))
+ daysinrange = self._tile_service.find_days_in_range_asc(self._minLat,
+ self._maxLat,
+ self._minLon,
+ self._maxLon,
+ self._ds[0],
+ self._startTime,
+ self._endTime)
+ ndays = len(daysinrange)
+ if ndays == 0:
+ raise NoDataException(reason="No data found for selected timeframe")
+ self.log.debug('Found {0} days in range'.format(ndays))
+ for i, d in enumerate(daysinrange):
+ self.log.debug('{0}, {1}'.format(i, datetime.utcfromtimestamp(d)))
+
# Create array of tuples to pass to Spark map function
nexus_tiles_spark = [[self._find_tile_bounds(t),
self._startTime, self._endTime,
@@ -212,30 +224,20 @@ def calc(self, computeOptions, **args):
# Expand Spark map tuple array by duplicating each entry N times,
# where N is the number of ways we want the time dimension carved up.
- num_time_parts = 72
- # num_time_parts = 2
- # num_time_parts = 1
+ # Set the time boundaries for each of the Spark map tuples so that
+ # every Nth element in the array gets the same time bounds.
+ max_time_parts = 72
+ num_time_parts = min(max_time_parts, ndays)
+
+ spark_part_time_ranges = np.tile(np.array([a[[0,-1]] for a in np.array_split(np.array(daysinrange), num_time_parts)]), (len(nexus_tiles_spark),1))
nexus_tiles_spark = np.repeat(nexus_tiles_spark, num_time_parts, axis=0)
- self.log.debug('repeated len(nexus_tiles_spark) = {0}'.format(len(nexus_tiles_spark)))
-
- # Set the time boundaries for each of the Spark map tuples.
- # Every Nth element in the array gets the same time bounds.
- spark_part_times = np.linspace(self._startTime, self._endTime + 1,
- num_time_parts + 1, dtype=np.int64)
-
- spark_part_time_ranges = \
- np.repeat([[[spark_part_times[i],
- spark_part_times[i + 1] - 1] for i in range(num_time_parts)]],
- len(nexus_tiles_spark) / num_time_parts, axis=0).reshape((len(nexus_tiles_spark), 2))
- self.log.debug('spark_part_time_ranges={0}'.format(spark_part_time_ranges))
nexus_tiles_spark[:, 1:3] = spark_part_time_ranges
- # print 'nexus_tiles_spark final = '
- # for i in range(len(nexus_tiles_spark)):
- # print nexus_tiles_spark[i]
# Launch Spark computations
- # print 'nexus_tiles_spark=',nexus_tiles_spark
- rdd = self._sc.parallelize(nexus_tiles_spark, self._spark_nparts)
+ spark_nparts = self._spark_nparts(nparts_requested)
+ self.log.info('Using {} partitions'.format(spark_nparts))
+
+ rdd = self._sc.parallelize(nexus_tiles_spark, spark_nparts)
sum_tiles_part = rdd.map(self._map)
# print "sum_tiles_part = ",sum_tiles_part.collect()
sum_tiles = \
diff --git a/analysis/webservice/algorithms_spark/HofMoellerSpark.py b/analysis/webservice/algorithms_spark/HofMoellerSpark.py
index 96e9f6a..1696732 100644
--- a/analysis/webservice/algorithms_spark/HofMoellerSpark.py
+++ b/analysis/webservice/algorithms_spark/HofMoellerSpark.py
@@ -190,13 +190,10 @@ def parse_arguments(self, request):
request.get_start_datetime().strftime(ISO_8601), request.get_end_datetime().strftime(ISO_8601)),
code=400)
- spark_master, spark_nexecs, spark_nparts = request.get_spark_cfg()
-
start_seconds_from_epoch = long((start_time - EPOCH).total_seconds())
end_seconds_from_epoch = long((end_time - EPOCH).total_seconds())
- return ds, bounding_polygon, start_seconds_from_epoch, end_seconds_from_epoch, \
- spark_master, spark_nexecs, spark_nparts
+ return ds, bounding_polygon, start_seconds_from_epoch, end_seconds_from_epoch
def applyDeseasonToHofMoellerByField(self, results, pivot="lats", field="mean", append=True):
shape = (len(results), len(results[0][pivot]))
@@ -336,7 +333,7 @@ def __init__(self):
BaseHoffMoellerHandlerImpl.__init__(self)
def calc(self, compute_options, **args):
- ds, bbox, start_time, end_time, spark_master, spark_nexecs, spark_nparts = self.parse_arguments(compute_options)
+ ds, bbox, start_time, end_time = self.parse_arguments(compute_options)
min_lon, min_lat, max_lon, max_lat = bbox.bounds
@@ -378,7 +375,7 @@ def __init__(self):
BaseHoffMoellerHandlerImpl.__init__(self)
def calc(self, compute_options, **args):
- ds, bbox, start_time, end_time, spark_master, spark_nexecs, spark_nparts = self.parse_arguments(compute_options)
+ ds, bbox, start_time, end_time = self.parse_arguments(compute_options)
min_lon, min_lat, max_lon, max_lat = bbox.bounds
diff --git a/analysis/webservice/algorithms_spark/TimeAvgMapSpark.py b/analysis/webservice/algorithms_spark/TimeAvgMapSpark.py
index 473f4ce..9b00489 100644
--- a/analysis/webservice/algorithms_spark/TimeAvgMapSpark.py
+++ b/analysis/webservice/algorithms_spark/TimeAvgMapSpark.py
@@ -14,6 +14,7 @@
# limitations under the License.
+import math
import logging
from datetime import datetime
@@ -128,13 +129,12 @@ def parse_arguments(self, request):
request.get_start_datetime().strftime(ISO_8601), request.get_end_datetime().strftime(ISO_8601)),
code=400)
- spark_master, spark_nexecs, spark_nparts = request.get_spark_cfg()
+ nparts_requested = request.get_nparts()
start_seconds_from_epoch = long((start_time - EPOCH).total_seconds())
end_seconds_from_epoch = long((end_time - EPOCH).total_seconds())
- return ds, bounding_polygon, start_seconds_from_epoch, end_seconds_from_epoch, \
- spark_master, spark_nexecs, spark_nparts
+ return ds, bounding_polygon, start_seconds_from_epoch, end_seconds_from_epoch, nparts_requested
def calc(self, compute_options, **args):
"""
@@ -144,20 +144,14 @@ def calc(self, compute_options, **args):
:return:
"""
- ds, bbox, start_time, end_time, spark_master, spark_nexecs, spark_nparts = self.parse_arguments(compute_options)
-
- compute_options.get_spark_cfg()
-
+ ds, bbox, start_time, end_time, nparts_requested = self.parse_arguments(compute_options)
self._setQueryParams(ds,
(float(bbox.bounds[1]),
float(bbox.bounds[3]),
float(bbox.bounds[0]),
float(bbox.bounds[2])),
start_time,
- end_time,
- spark_master=spark_master,
- spark_nexecs=spark_nexecs,
- spark_nparts=spark_nparts)
+ end_time)
nexus_tiles = self._find_global_tile_set()
@@ -165,6 +159,22 @@ def calc(self, compute_options, **args):
raise NoDataException(reason="No data found for selected timeframe")
self.log.debug('Found {0} tiles'.format(len(nexus_tiles)))
+ print('Found {} tiles'.format(len(nexus_tiles)))
+
+ daysinrange = self._tile_service.find_days_in_range_asc(bbox.bounds[1],
+ bbox.bounds[3],
+ bbox.bounds[0],
+ bbox.bounds[2],
+ ds,
+ start_time,
+ end_time)
+ ndays = len(daysinrange)
+ if ndays == 0:
+ raise NoDataException(reason="No data found for selected timeframe")
+ self.log.debug('Found {0} days in range'.format(ndays))
+ for i, d in enumerate(daysinrange):
+ self.log.debug('{0}, {1}'.format(i, datetime.utcfromtimestamp(d)))
+
self.log.debug('Using Native resolution: lat_res={0}, lon_res={1}'.format(self._latRes, self._lonRes))
self.log.debug('nlats={0}, nlons={1}'.format(self._nlats, self._nlons))
@@ -185,25 +195,20 @@ def calc(self, compute_options, **args):
# Expand Spark map tuple array by duplicating each entry N times,
# where N is the number of ways we want the time dimension carved up.
- num_time_parts = 72
+ # Set the time boundaries for each of the Spark map tuples so that
+ # every Nth element in the array gets the same time bounds.
+ max_time_parts = 72
+ num_time_parts = min(max_time_parts, ndays)
+ spark_part_time_ranges = np.tile(np.array([a[[0,-1]] for a in np.array_split(np.array(daysinrange), num_time_parts)]), (len(nexus_tiles_spark),1))
nexus_tiles_spark = np.repeat(nexus_tiles_spark, num_time_parts, axis=0)
- self.log.debug('repeated len(nexus_tiles_spark) = {0}'.format(len(nexus_tiles_spark)))
-
- # Set the time boundaries for each of the Spark map tuples.
- # Every Nth element in the array gets the same time bounds.
- spark_part_times = np.linspace(self._startTime, self._endTime,
- num_time_parts + 1, dtype=np.int64)
-
- spark_part_time_ranges = \
- np.repeat([[[spark_part_times[i],
- spark_part_times[i + 1]] for i in range(num_time_parts)]],
- len(nexus_tiles_spark) / num_time_parts, axis=0).reshape((len(nexus_tiles_spark), 2))
- self.log.debug('spark_part_time_ranges={0}'.format(spark_part_time_ranges))
nexus_tiles_spark[:, 1:3] = spark_part_time_ranges
# Launch Spark computations
- rdd = self._sc.parallelize(nexus_tiles_spark, self._spark_nparts)
+ spark_nparts = self._spark_nparts(nparts_requested)
+ self.log.info('Using {} partitions'.format(spark_nparts))
+
+ rdd = self._sc.parallelize(nexus_tiles_spark, spark_nparts)
sum_count_part = rdd.map(self._map)
sum_count = \
sum_count_part.combineByKey(lambda val: val,
diff --git a/analysis/webservice/algorithms_spark/TimeSeriesSpark.py b/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
index a24c2d5..4a102aa 100644
--- a/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
+++ b/analysis/webservice/algorithms_spark/TimeSeriesSpark.py
@@ -14,6 +14,7 @@
# limitations under the License.
+import math
import calendar
import itertools
import logging
@@ -153,13 +154,12 @@ def parse_arguments(self, request):
apply_seasonal_cycle_filter = request.get_apply_seasonal_cycle_filter(default=False)
apply_low_pass_filter = request.get_apply_low_pass_filter()
- spark_master, spark_nexecs, spark_nparts = request.get_spark_cfg()
-
start_seconds_from_epoch = long((start_time - EPOCH).total_seconds())
end_seconds_from_epoch = long((end_time - EPOCH).total_seconds())
- return ds, bounding_polygon, start_seconds_from_epoch, end_seconds_from_epoch, \
- apply_seasonal_cycle_filter, apply_low_pass_filter, spark_master, spark_nexecs, spark_nparts
+ nparts_requested = request.get_nparts()
+
+ return ds, bounding_polygon, start_seconds_from_epoch, end_seconds_from_epoch, apply_seasonal_cycle_filter, apply_low_pass_filter, nparts_requested
def calc(self, request, **args):
"""
@@ -169,9 +169,7 @@ def calc(self, request, **args):
:return:
"""
- ds, bounding_polygon, start_seconds_from_epoch, end_seconds_from_epoch, \
- apply_seasonal_cycle_filter, apply_low_pass_filter, spark_master, \
- spark_nexecs, spark_nparts = self.parse_arguments(request)
+ ds, bounding_polygon, start_seconds_from_epoch, end_seconds_from_epoch, apply_seasonal_cycle_filter, apply_low_pass_filter, nparts_requested = self.parse_arguments(request)
resultsRaw = []
@@ -194,11 +192,12 @@ def calc(self, request, **args):
self.log.debug('Found {0} days in range'.format(ndays))
for i, d in enumerate(daysinrange):
self.log.debug('{0}, {1}'.format(i, datetime.utcfromtimestamp(d)))
- spark_nparts_needed = min(spark_nparts, ndays)
-
+ spark_nparts = self._spark_nparts(nparts_requested)
+ self.log.info('Using {} partitions'.format(spark_nparts))
the_time = datetime.now()
- results, meta = spark_driver(daysinrange, bounding_polygon, shortName,
- spark_nparts_needed=spark_nparts_needed, sc=self._sc)
+ results, meta = spark_driver(daysinrange, bounding_polygon,
+ shortName, spark_nparts=spark_nparts,
+ sc=self._sc)
self.log.info(
"Time series calculation took %s for dataset %s" % (str(datetime.now() - the_time), shortName))
@@ -487,15 +486,15 @@ def createLinePlot(self):
return sio.getvalue()
-def spark_driver(daysinrange, bounding_polygon, ds, fill=-9999., spark_nparts_needed=1, sc=None):
+def spark_driver(daysinrange, bounding_polygon, ds, fill=-9999.,
+ spark_nparts=1, sc=None):
nexus_tiles_spark = [(bounding_polygon.wkt, ds,
list(daysinrange_part), fill)
for daysinrange_part
- in np.array_split(daysinrange,
- spark_nparts_needed)]
+ in np.array_split(daysinrange, spark_nparts)]
# Launch Spark computations
- rdd = sc.parallelize(nexus_tiles_spark, spark_nparts_needed)
+ rdd = sc.parallelize(nexus_tiles_spark, spark_nparts)
results = rdd.map(calc_average_on_day).collect()
results = list(itertools.chain.from_iterable(results))
results = sorted(results, key=lambda entry: entry["time"])
diff --git a/analysis/webservice/config/web.ini b/analysis/webservice/config/web.ini
index a1ecb2c..2644ade 100644
--- a/analysis/webservice/config/web.ini
+++ b/analysis/webservice/config/web.ini
@@ -14,4 +14,4 @@ static_enabled=true
static_dir=static
[modules]
-module_dirs=webservice.algorithms,webservice.algorithms_spark
\ No newline at end of file
+module_dirs=webservice.algorithms,webservice.algorithms_spark,webservice.algorithms.doms
\ No newline at end of file
diff --git a/analysis/webservice/webmodel.py b/analysis/webservice/webmodel.py
index feeb019..0f98c30 100644
--- a/analysis/webservice/webmodel.py
+++ b/analysis/webservice/webmodel.py
@@ -51,7 +51,7 @@ class RequestParameters(object):
ORDER = "lpOrder"
PLOT_SERIES = "plotSeries"
PLOT_TYPE = "plotType"
- SPARK_CFG = "spark"
+ NPARTS = "nparts"
METADATA_FILTER = "metadataFilter"
@@ -79,12 +79,6 @@ def __init__(self, reason="Dataset not found"):
NexusProcessingException.__init__(self, StandardNexusErrors.DATASET_MISSING, reason, code=404)
-class SparkConfig(object):
- MAX_NUM_EXECS = 64
- MAX_NUM_PARTS = 8192
- DEFAULT = "local,1,1"
-
-
class StatsComputeOptions(object):
def __init__(self):
pass
@@ -149,7 +143,7 @@ def get_plot_series(self, default="mean"):
def get_plot_type(self, default="default"):
raise Exception("Please implement")
- def get_spark_cfg(self, default=SparkConfig.DEFAULT):
+ def get_nparts(self):
raise Exception("Please implement")
@@ -343,25 +337,8 @@ def get_plot_series(self, default="mean"):
def get_plot_type(self, default="default"):
return self.get_argument(RequestParameters.PLOT_TYPE, default=default)
- def get_spark_cfg(self, default=SparkConfig.DEFAULT):
- arg = self.get_argument(RequestParameters.SPARK_CFG, default)
- try:
- master, nexecs, nparts = arg.split(',')
- except:
- raise ValueError('Invalid spark configuration: %s' % arg)
- if master not in ("local", "yarn", "mesos"):
- raise ValueError('Invalid spark master: %s' % master)
- nexecs = int(nexecs)
- if (nexecs < 1) or (nexecs > SparkConfig.MAX_NUM_EXECS):
- raise ValueError('Invalid number of Spark executors: %d (must be between 1 and %d)' % (
- nexecs, SparkConfig.MAX_NUM_EXECS))
- nparts = int(nparts)
- if (nparts < 1) or (nparts > SparkConfig.MAX_NUM_PARTS):
- raise ValueError('Invalid number of Spark data partitions: %d (must be between 1 and %d)' % (
- nparts, SparkConfig.MAX_NUM_PARTS))
- if master == "local":
- master = "local[%d]" % nexecs
- return master, nexecs, nparts
+ def get_nparts(self):
+ return self.get_int_arg(RequestParameters.NPARTS, 0)
class NexusResults:
diff --git a/docker/nexus-webapp/Readme.md b/docker/nexus-webapp/Readme.md
new file mode 100644
index 0000000..7aeb962
--- /dev/null
+++ b/docker/nexus-webapp/Readme.md
@@ -0,0 +1,41 @@
+
+# How to Build
+
+All docker builds should happen from this directory. For copy/paste ability, first export the environment variable `BUILD_VERSION` to the version number you would like to tag images as.
+
+## spark-mesos-base
+
+ docker build -t sdap/spark-mesos-base:${BUILD_VERSION} -f mesos/base/Dockerfile .
+
+## spark-mesos-master
+
+Builds from `spark-mesos-base` and supports `tag_version` build argument which specifies the version of base to build from.
+
+ docker build -t sdap/spark-mesos-master:${BUILD_VERSION} -f mesos/master/Dockerfile .
+
+## spark-mesos-agent
+
+Builds from `spark-mesos-base` and supports `tag_version` build argument which specifies the version of base to build from.
+
+ docker build -t sdap/spark-mesos-agent:${BUILD_VERSION} -f mesos/agent/Dockerfile .
+
+## nexus-webapp:mesos
+
+Builds from `spark-mesos-base` and supports `tag_version` build argument which specifies the version of base to build from.
+
+ docker build -t sdap/nexus-webapp:mesos.${BUILD_VERSION} -f mesos/webapp/Dockerfile .
+
+## nexus-webapp:standalone
+
+ docker build -t sdap/nexus-webapp:standalone.${BUILD_VERSION} -f standalone/Dockerfile .
+
+# Push Images
+
+Push the images to the `sdap` organization on DockerHub
+
+ docker push sdap/spark-mesos-base:${BUILD_VERSION}
+ docker push sdap/spark-mesos-master:${BUILD_VERSION}
+ docker push sdap/spark-mesos-agent:${BUILD_VERSION}
+ docker push sdap/nexus-webapp:mesos.${BUILD_VERSION}
+ docker push sdap/nexus-webapp:standalone.${BUILD_VERSION}
+
diff --git a/docker/spark-mesos-base/install_nexusproto.sh b/docker/nexus-webapp/install_conda.sh
similarity index 70%
rename from docker/spark-mesos-base/install_nexusproto.sh
rename to docker/nexus-webapp/install_conda.sh
index ce44c70..7d99ef2 100755
--- a/docker/spark-mesos-base/install_nexusproto.sh
+++ b/docker/nexus-webapp/install_conda.sh
@@ -13,23 +13,15 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-set -e
-APACHE_NEXUSPROTO="https://github.com/apache/incubator-sdap-nexusproto.git"
-MASTER="master"
+set -ebx
-GIT_REPO=${1:-$APACHE_NEXUSPROTO}
-GIT_BRANCH=${2:-$MASTER}
+URL=${1:-"https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh"}
+CONDA=${2:-"/usr/local/anaconda2"}
-mkdir nexusproto
-pushd nexusproto
-git init
-git pull ${GIT_REPO} ${GIT_BRANCH}
-
-./gradlew pythonInstall --info
-
-./gradlew install --info
-
-rm -rf /root/.gradle
+pushd /tmp
+wget -q ${URL} -O install_anaconda.sh
+/bin/bash install_anaconda.sh -b -p ${CONDA}
+rm install_anaconda.sh
+${CONDA}/bin/conda update -n base conda
popd
-rm -rf nexusproto
diff --git a/docker/nexus-webapp/install_java.sh b/docker/nexus-webapp/install_java.sh
new file mode 100755
index 0000000..b9ce92b
--- /dev/null
+++ b/docker/nexus-webapp/install_java.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -ebx
+
+URL=${1:-"http://download.oracle.com/otn-pub/java/jdk/8u191-b12/2787e4a523244c269598db4e85c51e0c/jdk-8u191-linux-x64.rpm"}
+RPM_PACKAGE=${URL##*/}
+
+# Install Oracle JDK
+wget -q --no-cookies --no-check-certificate --header "Cookie: gpw_e24=http%3A%2F%2Fwww.oracle.com%2F; oraclelicense=accept-securebackup-cookie" ${URL}
+yum install -y ${RPM_PACKAGE}
+rm ${RPM_PACKAGE}
diff --git a/docker/nexus-webapp/install_spark.sh b/docker/nexus-webapp/install_spark.sh
new file mode 100755
index 0000000..1e668cc
--- /dev/null
+++ b/docker/nexus-webapp/install_spark.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -ebx
+
+DL_HOST=${1:-"http://d3kbcqa49mib13.cloudfront.net"}
+VERSION=${2:-"2.2.0"}
+DIR=${3:-"spark-${VERSION}"}
+INSTALL_DIR=${4:-"/usr/local"}
+
+pushd ${INSTALL_DIR}
+wget --quiet ${DL_HOST}/spark-${VERSION}-bin-hadoop2.7.tgz
+tar -xzf spark-${VERSION}-bin-hadoop2.7.tgz
+chown -R root.root spark-${VERSION}-bin-hadoop2.7.tgz
+ln -s spark-${VERSION}-bin-hadoop2.7 ${DIR}
+# Do not remove the package, mesos requires it
+# rm spark-${VERSION}-bin-hadoop2.7.tgz
+popd
diff --git a/docker/nexus-webapp/mesos/agent/Dockerfile b/docker/nexus-webapp/mesos/agent/Dockerfile
new file mode 100644
index 0000000..feca35f
--- /dev/null
+++ b/docker/nexus-webapp/mesos/agent/Dockerfile
@@ -0,0 +1,31 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ARG tag_version=1.0.0-SNAPSHOT
+FROM sdap/spark-mesos-base:${tag_version}
+
+MAINTAINER Apache SDAP "dev@sdap.apache.org"
+
+# Run a Mesos slave.
+
+WORKDIR /tmp
+
+COPY mesos/agent/docker-entrypoint.sh /tmp/docker-entrypoint.sh
+
+
+# This will run docker-entrypoint.sh with the value of CMD as default arguments. However, if any arguments are supplied
+# to the docker run command when launching this image, the command line arguments will override these CMD arguments
+ENTRYPOINT ["/bin/bash", "-c", "/tmp/docker-entrypoint.sh $(eval echo $@)", "$@"]
+CMD ["--master=${MESOS_MASTER_NAME}:${MESOS_MASTER_PORT}", "--port=${MESOS_AGENT_PORT}", "--work_dir=${MESOS_WORKDIR}"]
diff --git a/docker/nexus-webapp/mesos/agent/docker-entrypoint.sh b/docker/nexus-webapp/mesos/agent/docker-entrypoint.sh
new file mode 100755
index 0000000..2228b0d
--- /dev/null
+++ b/docker/nexus-webapp/mesos/agent/docker-entrypoint.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -ebx
+
+if [ -n "$TORNADO_HOST" ]; then
+ sed -i "s/server.socket_host = .*/server.socket_host = '${TORNADO_HOST}'/g" ${NEXUS_SRC}/analysis/webservice/config/web.ini
+fi
+sed -i "s/host=127.0.0.1/host=$CASSANDRA_CONTACT_POINTS/g" ${NEXUS_SRC}/data-access/nexustiles/config/datastores.ini && \
+sed -i "s/local_datacenter=.*/local_datacenter=$CASSANDRA_LOCAL_DATACENTER/g" ${NEXUS_SRC}/data-access/nexustiles/config/datastores.ini && \
+sed -i "s/host=localhost:8983/host=$SOLR_URL_PORT/g" ${NEXUS_SRC}/data-access/nexustiles/config/datastores.ini
+
+# DOMS
+sed -i "s/module_dirs=.*/module_dirs=webservice.algorithms,webservice.algorithms_spark,webservice.algorithms.doms/g" ${NEXUS_SRC}/analysis/webservice/config/web.ini && \
+sed -i "s/host=.*/host=$CASSANDRA_CONTACT_POINTS/g" ${NEXUS_SRC}/analysis/webservice/algorithms/doms/domsconfig.ini && \
+sed -i "s/local_datacenter=.*/local_datacenter=$CASSANDRA_LOCAL_DATACENTER/g" ${NEXUS_SRC}/analysis/webservice/algorithms/doms/domsconfig.ini
+
+cd ${NEXUS_SRC}/data-access
+python setup.py install --force
+
+cd ${NEXUS_SRC}/analysis
+python setup.py install --force
+
+# Set PROJ_LIB env var as workaround for missing environment variable for basemap https://github.com/conda-forge/basemap-feedstock/issues/30
+${MESOS_HOME}/build/bin/mesos-agent.sh --no-systemd_enable_support --launcher=posix --no-switch_user --executor_environment_variables='{"PYTHON_EGG_CACHE": "/tmp", "PROJ_LIB":"/usr/local/anaconda2/share/proj"}' "$@"
diff --git a/docker/nexus-webapp/mesos/base/Dockerfile b/docker/nexus-webapp/mesos/base/Dockerfile
new file mode 100644
index 0000000..2cce5d1
--- /dev/null
+++ b/docker/nexus-webapp/mesos/base/Dockerfile
@@ -0,0 +1,107 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+FROM centos:7
+
+MAINTAINER Apache SDAP "dev@sdap.apache.org"
+
+WORKDIR /tmp
+
+RUN yum -y update && \
+ yum -y install \
+ bzip2 \
+ gcc \
+ git \
+ mesa-libGL.x86_64 \
+ python-devel \
+ wget \
+ which && \
+ yum clean all
+
+# Set environment variables. For Mesos, I used MESOS_VER because MESOS_VERSION
+# is expected to be a logical TRUE/FALSE flag that tells Mesos whether or not
+# to simply print the version number and exit.
+
+ENV INSTALL_LOC=/usr/local \
+ HADOOP_VERSION=2.7.3 \
+ SPARK_VERSION=2.2.0 \
+ MESOS_VER=1.5.1 \
+ MESOS_MASTER_PORT=5050 \
+ MESOS_AGENT_PORT=5051 \
+ MESOS_WORKDIR=/var/lib/mesos \
+ MESOS_IP=0.0.0.0 \
+ MESOS_MASTER_NAME=mesos-master \
+ PYTHON_EGG_CACHE=/tmp \
+ JAVA_HOME=/usr/java/default \
+ NEXUS_SRC=/tmp/incubator-sdap-nexus
+
+ENV CONDA_HOME=${INSTALL_LOC}/anaconda2 \
+ MESOS_HOME=${INSTALL_LOC}/mesos-${MESOS_VER} \
+ SPARK_DIR=spark-${SPARK_VERSION} \
+ SPARK_PACKAGE=spark-${SPARK_VERSION}-bin-hadoop2.7 \
+ MESOS_MASTER=mesos://${MESOS_IP}:${MESOS_PORT} \
+ MESOS_PACKAGE=mesos-${MESOS_VER}.tar.gz
+
+ENV SPARK_HOME=${INSTALL_LOC}/${SPARK_DIR} \
+ PYSPARK_DRIVER_PYTHON=${CONDA_HOME}/bin/python \
+ PYSPARK_PYTHON=${CONDA_HOME}/bin/python \
+ PYSPARK_SUBMIT_ARGS="--driver-memory=4g pyspark-shell" \
+ # Workaround for missing environment variable for basemap https://github.com/conda-forge/basemap-feedstock/issues/30
+ PROJ_LIB=${CONDA_HOME}/share/proj
+
+ENV PYTHONPATH=${PYTHONPATH}:${SPARK_HOME}/python:${SPARK_HOME}/python/lib/py4j-0.10.4-src.zip:${SPARK_HOME}/python/lib/pyspark.zip \
+ MESOS_NATIVE_JAVA_LIBRARY=${INSTALL_LOC}/lib/libmesos.so \
+ SPARK_EXECUTOR_URI=${INSTALL_LOC}/${SPARK_PACKAGE}.tgz \
+ PATH=${CONDA_HOME}/bin:${PATH}
+
+# Install Oracle JDK
+COPY install_java.sh ./install_java.sh
+RUN ./install_java.sh "http://download.oracle.com/otn-pub/java/jdk/8u191-b12/2787e4a523244c269598db4e85c51e0c/jdk-8u191-linux-x64.rpm"
+
+# Set up Mesos
+COPY mesos/base/install_mesos.sh ./install_mesos.sh
+RUN source ./install_mesos.sh
+
+# Install Spark
+COPY install_spark.sh ./install_spark.sh
+RUN ./install_spark.sh "http://d3kbcqa49mib13.cloudfront.net" ${SPARK_VERSION} ${SPARK_DIR} ${INSTALL_LOC}
+
+# Install Anaconda
+COPY install_conda.sh ./install_conda.sh
+RUN ./install_conda.sh "https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh" ${CONDA_HOME}
+
+# Conda dependencies for nexus
+RUN conda install -c conda-forge -y netCDF4 && \
+ conda install -y numpy cython mpld3 scipy basemap gdal matplotlib && \
+ pip install shapely==1.5.16 cassandra-driver==3.5.0 && \
+ conda install -c conda-forge backports.functools_lru_cache=1.3 && \
+ cd /usr/lib64 && ln -s libcom_err.so.2 libcom_err.so.3 && \
+ cd /usr/local/anaconda2/lib && \
+ ln -s libnetcdf.so.11 libnetcdf.so.7 && \
+ ln -s libkea.so.1.4.6 libkea.so.1.4.5 && \
+ ln -s libhdf5_cpp.so.12 libhdf5_cpp.so.10 && \
+ ln -s libjpeg.so.9 libjpeg.so.8
+
+# Install nexusproto and nexus
+ARG APACHE_NEXUSPROTO=https://github.com/apache/incubator-sdap-nexusproto.git
+ARG APACHE_NEXUSPROTO_BRANCH=master
+ARG APACHE_NEXUS=https://github.com/apache/incubator-sdap-nexus.git
+ARG APACHE_NEXUS_BRANCH=master
+ARG REBUILD_CODE=1
+COPY install_nexusproto.sh ./install_nexusproto.sh
+COPY install_nexus.sh ./install_nexus.sh
+RUN /tmp/install_nexusproto.sh $APACHE_NEXUSPROTO $APACHE_NEXUSPROTO_BRANCH && \
+ /tmp/install_nexus.sh $APACHE_NEXUS $APACHE_NEXUS_BRANCH $NEXUS_SRC
+
+CMD ["/bin/bash"]
diff --git a/docker/spark-mesos-base/install_mesos.sh b/docker/nexus-webapp/mesos/base/install_mesos.sh
old mode 100644
new mode 100755
similarity index 92%
rename from docker/spark-mesos-base/install_mesos.sh
rename to docker/nexus-webapp/mesos/base/install_mesos.sh
index 655d737..fc0c475
--- a/docker/spark-mesos-base/install_mesos.sh
+++ b/docker/nexus-webapp/mesos/base/install_mesos.sh
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+set -ebx
+
# Install a few utility tools
yum install -y tar wget git
@@ -56,10 +58,15 @@ tar -zxf ${MESOS_PACKAGE} -C ${INSTALL_LOC}
rm -f ${MESOS_PACKAGE}
# Configure and build.
-cd ${MESOS_HOME}
+mkdir -p ${MESOS_WORKDIR}
+mkdir -p ${MESOS_HOME}
+pushd ${MESOS_HOME}
mkdir build
-cd build
+pushd build
../configure
make
-make check
+# Can't run make check until this is fixed: https://issues.apache.org/jira/browse/MESOS-8608
+# make check
make install
+popd
+popd
diff --git a/docker/spark-mesos-master/Dockerfile b/docker/nexus-webapp/mesos/master/Dockerfile
similarity index 69%
rename from docker/spark-mesos-master/Dockerfile
rename to docker/nexus-webapp/mesos/master/Dockerfile
index c1d7d39..a6f7f91 100644
--- a/docker/spark-mesos-master/Dockerfile
+++ b/docker/nexus-webapp/mesos/master/Dockerfile
@@ -24,4 +24,7 @@ EXPOSE ${MESOS_MASTER_PORT}
WORKDIR ${MESOS_HOME}/build
-CMD ["/bin/bash", "-c", "./bin/mesos-master.sh --ip=${MESOS_IP} --hostname=${MESOS_MASTER_NAME} --port=${MESOS_MASTER_PORT} --work_dir=${MESOS_WORKDIR}"]
+# This will run mesos-master.sh with the value of CMD as default arguments. However, if any arguments are supplied
+# to the docker run command when launching this image, the command line arguments will override these CMD arguments
+ENTRYPOINT ["/bin/bash", "-c", "./bin/mesos-master.sh $(eval echo $@)", "$@"]
+CMD ["--ip=${MESOS_IP}", "--hostname=${MESOS_MASTER_NAME}", "--port=${MESOS_MASTER_PORT}", "--work_dir=${MESOS_WORKDIR}"]
diff --git a/docker/spark-mesos-agent/Dockerfile b/docker/nexus-webapp/mesos/webapp/Dockerfile
similarity index 88%
rename from docker/spark-mesos-agent/Dockerfile
rename to docker/nexus-webapp/mesos/webapp/Dockerfile
index 471d63f..aaa64b2 100644
--- a/docker/spark-mesos-agent/Dockerfile
+++ b/docker/nexus-webapp/mesos/webapp/Dockerfile
@@ -18,10 +18,7 @@ FROM sdap/spark-mesos-base:${tag_version}
MAINTAINER Apache SDAP "dev@sdap.apache.org"
-# Run a Mesos slave.
-
-WORKDIR ${MESOS_HOME}/build
-
-COPY docker-entrypoint.sh /tmp/docker-entrypoint.sh
+COPY /mesos/webapp/docker-entrypoint.sh /tmp/docker-entrypoint.sh
+ENV MASTER=mesos://${MESOS_MASTER_NAME}:${MESOS_MASTER_PORT}
ENTRYPOINT ["/tmp/docker-entrypoint.sh"]
diff --git a/docker/nexus-webapp/docker-entrypoint.sh b/docker/nexus-webapp/mesos/webapp/docker-entrypoint.sh
similarity index 100%
rename from docker/nexus-webapp/docker-entrypoint.sh
rename to docker/nexus-webapp/mesos/webapp/docker-entrypoint.sh
diff --git a/docker/nexus-webapp/Dockerfile b/docker/nexus-webapp/standalone/Dockerfile
similarity index 71%
rename from docker/nexus-webapp/Dockerfile
rename to docker/nexus-webapp/standalone/Dockerfile
index 94ea293..06073fc 100644
--- a/docker/nexus-webapp/Dockerfile
+++ b/docker/nexus-webapp/standalone/Dockerfile
@@ -41,23 +41,25 @@ ENV SPARK_LOCAL_IP=127.0.0.1 \
PYSPARK_SUBMIT_ARGS="--driver-memory=4g pyspark-shell" \
PYTHONPATH=${PYTHONPATH}:/usr/local/spark-${SPARK_VERSION}/python:/usr/local/spark-${SPARK_VERSION}/python/lib/py4j-0.10.4-src.zip:/usr/local/spark-${SPARK_VERSION}/python/lib/pyspark.zip \
SPARK_EXECUTOR_URI=/usr/local/spark-${SPARK_VERSION}-bin-hadoop2.7.tgz \
- NEXUS_SRC=/tmp/incubator-sdap-nexus
+ NEXUS_SRC=/tmp/incubator-sdap-nexus \
+ PATH=/usr/local/anaconda2/bin:$PATH \
+ # Workaround for missing environment variable for basemap https://github.com/conda-forge/basemap-feedstock/issues/30
+ PROJ_LIB=/usr/local/anaconda2/share/proj
+
+WORKDIR /tmp
+
+# Install Oracle JDK
+COPY install_java.sh ./install_java.sh
+RUN ./install_java.sh "http://download.oracle.com/otn-pub/java/jdk/8u191-b12/2787e4a523244c269598db4e85c51e0c/jdk-8u191-linux-x64.rpm"
# Install Spark
-RUN cd /usr/local && \
- wget --quiet http://d3kbcqa49mib13.cloudfront.net/spark-${SPARK_VERSION}-bin-hadoop2.7.tgz && \
- tar -xzf spark-${SPARK_VERSION}-bin-hadoop2.7.tgz && \
- chown -R root.root spark-${SPARK_VERSION}-bin-hadoop2.7.tgz && \
- ln -s spark-${SPARK_VERSION}-bin-hadoop2.7 ${SPARK_DIR} && \
- rm spark-${SPARK_VERSION}-bin-hadoop2.7.tgz && \
- cd /
+COPY install_spark.sh ./install_spark.sh
+RUN ./install_spark.sh "http://d3kbcqa49mib13.cloudfront.net" ${SPARK_VERSION} ${SPARK_DIR} ${INSTALL_LOC}
+
+# Install Anaconda
+COPY install_conda.sh ./install_conda.sh
+RUN ./install_conda.sh "https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh" /usr/local/anaconda2
-# Install Miniconda
-RUN wget -q https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O install_anaconda.sh && \
- /bin/bash install_anaconda.sh -b -p /usr/local/anaconda2 && \
- rm install_anaconda.sh && \
- /usr/local/anaconda2/bin/conda update -n base conda
-ENV PATH /usr/local/anaconda2/bin:$PATH
# Conda dependencies for nexus
RUN conda install -c conda-forge -y netCDF4 && \
conda install -y numpy cython mpld3 scipy basemap gdal matplotlib && \
@@ -70,22 +72,16 @@ RUN conda install -c conda-forge -y netCDF4 && \
ln -s libhdf5_cpp.so.12 libhdf5_cpp.so.10 && \
ln -s libjpeg.so.9 libjpeg.so.8
-# Install Oracle JDK 1.8u182-b13
-RUN wget -q --no-cookies --no-check-certificate --header "Cookie: gpw_e24=http%3A%2F%2Fwww.oracle.com%2F; oraclelicense=accept-securebackup-cookie" "http://download.oracle.com/otn-pub/java/jdk/8u181-b13/96a7b8442fe848ef90c96a2fad6ed6d1/jdk-8u181-linux-x64.rpm" && \
- yum install -y jdk-8u181-linux-x64.rpm && \
- rm jdk-8u181-linux-x64.rpm
-
-COPY *.sh /tmp/
-
# Install nexusproto and nexus
ARG APACHE_NEXUSPROTO=https://github.com/apache/incubator-sdap-nexusproto.git
ARG APACHE_NEXUSPROTO_BRANCH=master
ARG APACHE_NEXUS=https://github.com/apache/incubator-sdap-nexus.git
ARG APACHE_NEXUS_BRANCH=master
ARG REBUILD_CODE=1
+COPY install_nexusproto.sh ./install_nexusproto.sh
+COPY install_nexus.sh ./install_nexus.sh
RUN /tmp/install_nexusproto.sh $APACHE_NEXUSPROTO $APACHE_NEXUSPROTO_BRANCH && \
/tmp/install_nexus.sh $APACHE_NEXUS $APACHE_NEXUS_BRANCH $NEXUS_SRC
-EXPOSE 8083
-
+COPY standalone/docker-entrypoint.sh ./docker-entrypoint.sh
ENTRYPOINT ["/tmp/docker-entrypoint.sh"]
diff --git a/docker/spark-mesos-agent/docker-entrypoint.sh b/docker/nexus-webapp/standalone/docker-entrypoint.sh
similarity index 87%
rename from docker/spark-mesos-agent/docker-entrypoint.sh
rename to docker/nexus-webapp/standalone/docker-entrypoint.sh
index 36d608b..0589fb2 100755
--- a/docker/spark-mesos-agent/docker-entrypoint.sh
+++ b/docker/nexus-webapp/standalone/docker-entrypoint.sh
@@ -35,5 +35,4 @@ python setup.py install --force
cd ${NEXUS_SRC}/analysis
python setup.py install --force
-
-${MESOS_HOME}/build/bin/mesos-agent.sh --master=${MESOS_MASTER_NAME}:${MESOS_MASTER_PORT} --port=${MESOS_AGENT_PORT} --work_dir=${MESOS_WORKDIR} --no-systemd_enable_support --launcher=posix --no-switch_user --executor_environment_variables='{ "PYTHON_EGG_CACHE": "/tmp" }'
+python -m webservice.webapp
diff --git a/docker/spark-mesos-base/Dockerfile b/docker/spark-mesos-base/Dockerfile
deleted file mode 100644
index 4c27bd6..0000000
--- a/docker/spark-mesos-base/Dockerfile
+++ /dev/null
@@ -1,153 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-FROM centos:7
-
-MAINTAINER Apache SDAP "dev@sdap.apache.org"
-
-WORKDIR /tmp
-
-RUN yum -y update && \
- yum -y install wget \
- git \
- which \
- bzip2 \
- gcc \
- python-devel
-
-# Install Oracle JDK 1.8u121-b13
-RUN wget -q --no-cookies --no-check-certificate --header "Cookie: gpw_e24=http%3A%2F%2Fwww.oracle.com%2F; oraclelicense=accept-securebackup-cookie" "http://download.oracle.com/otn-pub/java/jdk/8u121-b13/e9e7ea248e2c4826b92b3f075a80e441/jdk-8u121-linux-x64.rpm" && \
- yum -y install jdk-8u121-linux-x64.rpm && \
- rm jdk-8u121-linux-x64.rpm
-ENV JAVA_HOME /usr/java/default
-
-# ########################
-# # Apache Maven #
-# ########################
-ENV M2_HOME /usr/local/apache-maven
-ENV M2 $M2_HOME/bin
-ENV PATH $PATH:$M2
-
-RUN mkdir $M2_HOME && \
- wget -q http://mirror.stjschools.org/public/apache/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz && \
- tar -xvzf apache-maven-3.3.9-bin.tar.gz -C $M2_HOME --strip-components=1 && \
- rm -f apache-maven-3.3.9-bin.tar.gz
-
-# ########################
-# # Anaconda #
-# ########################
-RUN wget -q https://repo.continuum.io/archive/Anaconda2-4.3.0-Linux-x86_64.sh -O install_anaconda.sh && \
- /bin/bash install_anaconda.sh -b -p /usr/local/anaconda2 && \
- rm install_anaconda.sh
-ENV PATH $PATH:/usr/local/anaconda2/bin
-
-# Set environment variables. For Mesos, I used MESOS_VER because MESOS_VERSION
-# is expected to be a logical TRUE/FALSE flag that tells Mesos whether or not
-# to simply print the version number and exit.
-
-ENV INSTALL_LOC=/usr/local \
- HADOOP_VERSION=2.7.3 \
- SPARK_VERSION=2.1.0 \
- MESOS_VER=1.5.0 \
- MESOS_MASTER_PORT=5050 \
- MESOS_AGENT_PORT=5051 \
- MESOS_WORKDIR=/var/lib/mesos \
- MESOS_IP=0.0.0.0 \
- MESOS_MASTER_NAME=mesos-master \
- PYTHON_EGG_CACHE=/tmp
-
-ENV CONDA_HOME=${INSTALL_LOC}/anaconda2 \
- MESOS_HOME=${INSTALL_LOC}/mesos-${MESOS_VER} \
- SPARK_DIR=spark-${SPARK_VERSION} \
- SPARK_PACKAGE=spark-${SPARK_VERSION}-bin-hadoop2.7 \
- MESOS_MASTER=mesos://${MESOS_IP}:${MESOS_PORT} \
- MESOS_PACKAGE=mesos-${MESOS_VER}.tar.gz
-
-ENV SPARK_HOME=${INSTALL_LOC}/${SPARK_DIR} \
- PYSPARK_DRIVER_PYTHON=${CONDA_HOME}/bin/python \
- PYSPARK_PYTHON=${CONDA_HOME}/bin/python \
- PYSPARK_SUBMIT_ARGS="--driver-memory=4g pyspark-shell"
-
-ENV PYTHONPATH=${PYTHONPATH}:${SPARK_HOME}/python:${SPARK_HOME}/python/lib/py4j-0.10.4-src.zip:${SPARK_HOME}/python/lib/pyspark.zip \
- MESOS_NATIVE_JAVA_LIBRARY=${INSTALL_LOC}/lib/libmesos.so \
- SPARK_EXECUTOR_URI=${INSTALL_LOC}/${SPARK_PACKAGE}.tgz
-
-WORKDIR ${INSTALL_LOC}
-
-# Set up Spark
-
-RUN wget --quiet http://d3kbcqa49mib13.cloudfront.net/${SPARK_PACKAGE}.tgz && \
- tar -xzf ${SPARK_PACKAGE}.tgz && \
- chown -R root.root ${SPARK_PACKAGE} && \
- ln -s ${SPARK_PACKAGE} ${SPARK_DIR}
-
-# Set up Mesos
-
-COPY install_mesos.sh .
-
-RUN source ./install_mesos.sh && \
- mkdir ${MESOS_WORKDIR}
-
-# Set up Anaconda environment
-
-ENV PATH=${CONDA_HOME}/bin:${PATH}:${HADOOP_HOME}/bin:${SPARK_HOME}/bin
-
-RUN conda install -c conda-forge -y netCDF4 && \
- conda install -y numpy cython mpld3 scipy basemap gdal matplotlib && \
- pip install shapely cassandra-driver==3.5.0 && \
- conda install -c conda-forge backports.functools_lru_cache=1.3
-
-# Workaround missing libcom_err.so (needed for gdal)
-
-RUN cd /usr/lib64 && ln -s libcom_err.so.2 libcom_err.so.3
-
-# Workaround missing conda libs needed for gdal
-
-RUN cd ${CONDA_HOME}/lib && \
- ln -s libnetcdf.so.11 libnetcdf.so.7 && \
- ln -s libkea.so.1.4.6 libkea.so.1.4.5 && \
- ln -s libhdf5_cpp.so.12 libhdf5_cpp.so.10 && \
- ln -s libjpeg.so.9 libjpeg.so.8
-
-RUN yum install -y mesa-libGL.x86_64
-
-# Install nexusproto
-ARG APACHE_NEXUSPROTO=https://github.com/apache/incubator-sdap-nexusproto.git
-ARG APACHE_NEXUSPROTO_BRANCH=master
-ARG REBUILD_CODE=1
-COPY install_nexusproto.sh ./install_nexusproto.sh
-RUN ./install_nexusproto.sh $APACHE_NEXUSPROTO $APACHE_NEXUSPROTO_BRANCH
-
-# Retrieve NEXUS code and build it.
-
-WORKDIR /
-
-RUN git clone https://github.com/apache/incubator-sdap-nexus.git
-
-RUN sed -i 's/,webservice.algorithms.doms//g' /incubator-sdap-nexus/analysis/webservice/config/web.ini && \
- sed -i 's/127.0.0.1/nexus-webapp/g' /incubator-sdap-nexus/analysis/webservice/config/web.ini && \
- sed -i 's/127.0.0.1/cassandra1,cassandra2,cassandra3,cassandra4,cassandra5,cassandra6/g' /incubator-sdap-nexus/data-access/nexustiles/config/datastores.ini && \
- sed -i 's/localhost:8983/solr1:8983/g' /incubator-sdap-nexus/data-access/nexustiles/config/datastores.ini
-
-WORKDIR /incubator-sdap-nexus/data-access
-
-RUN python setup.py install
-
-WORKDIR /incubator-sdap-nexus/analysis
-
-RUN python setup.py install
-
-WORKDIR /tmp
-
-CMD ["/bin/bash"]
diff --git a/docs/quickstart.rst b/docs/quickstart.rst
index 42a403d..c03dec1 100644
--- a/docs/quickstart.rst
+++ b/docs/quickstart.rst
@@ -45,7 +45,7 @@ Pull the necessary Docker images from the `SDAP repository <https://hub.docker.c
docker pull sdap/ningester:${VERSION}
docker pull sdap/solr-singlenode:${VERSION}
docker pull sdap/cassandra:${VERSION}
- docker pull sdap/nexus-webapp:${VERSION}
+ docker pull sdap/nexus-webapp:standalone.${VERSION}
.. _quickstart-step2:
With regards,
Apache Git Services