You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by rk...@apache.org on 2024/03/01 19:23:42 UTC

(incubator-sdap-nexus) branch SDAP-511 updated: Dockerfile update

This is an automated email from the ASF dual-hosted git repository.

rkk pushed a commit to branch SDAP-511
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-nexus.git


The following commit(s) were added to refs/heads/SDAP-511 by this push:
     new 968adf9  Dockerfile update
968adf9 is described below

commit 968adf924de2a15a7e7566dca9aed071d5207dfb
Author: rileykk <ri...@jpl.nasa.gov>
AuthorDate: Fri Mar 1 11:23:28 2024 -0800

    Dockerfile update
---
 .gitignore                                         |   2 +
 analysis/webservice/webapp.py                      |   7 +-
 docker/nexus-webapp/Dockerfile                     | 144 ++++++++++-----------
 docker/nexus-webapp/{Dockerfile => Dockerfile.old} |   0
 docker/nexus-webapp/entrypoint.sh                  | 133 +++++++++++++++++++
 docker/nexus-webapp/install_conda.sh               |  42 ------
 docker/nexus-webapp/install_nexusproto.sh          |   2 +
 .../{install_nexusproto.sh => install_python.sh}   |  43 +++---
 poetry.lock                                        |   6 +-
 9 files changed, 234 insertions(+), 145 deletions(-)

diff --git a/.gitignore b/.gitignore
index 12ab2d6..a93b944 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,5 @@
 analysis/webservice/algorithms/doms/domsconfig.ini
 data-access/nexustiles/config/datastores.ini
 venv/
+
+dist/
\ No newline at end of file
diff --git a/analysis/webservice/webapp.py b/analysis/webservice/webapp.py
index a2365f0..89c37ff 100644
--- a/analysis/webservice/webapp.py
+++ b/analysis/webservice/webapp.py
@@ -36,12 +36,15 @@ except ImportError:
 try:
     __version__ = _version('nexusanalysis')
 except Exception:
-    __version__ = 'Cannot be determined'
+    try:
+        __version__ = _version('sdap-nexus')
+    except Exception:
+        __version__ = 'Cannot be determined'
 
 banner = [
      '',
      ' ____  ____    _    ____    | ',
-     '/ ___||  _ \\  / \\  |  _ \\   | Apache SDAP',
+     '/ ___||  _ \\  / \\  |  _ \\   | Apache SDAP (TM)',
      '\\___ \\| | | |/ _ \\ | |_) |  | Science Data Analytics Platform',
      f' ___) | |_| / ___ \\|  __/   | Version: {__version__}',
      '|____/|____/_/   \\_\\_|      | ',
diff --git a/docker/nexus-webapp/Dockerfile b/docker/nexus-webapp/Dockerfile
index c38c5f2..fad208a 100644
--- a/docker/nexus-webapp/Dockerfile
+++ b/docker/nexus-webapp/Dockerfile
@@ -13,98 +13,94 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-FROM nexusjpl/alpine-pyspark:3.1.1
+FROM eclipse-temurin:8
 
+COPY --chmod=777 docker/nexus-webapp/install_python.sh /tmp/install_python.sh
 
-MAINTAINER Apache SDAP "dev@sdap.apache.org"
+RUN /tmp/install_python.sh
+RUN ln -s /opt/python/3.9.7/bin/python3.9  /opt/python/3.9.7/bin/python
 
+ENV PATH="/opt/python/3.9.7/bin:$PATH"
 
-ARG CONDA_VERSION="4.7.12.1"
-ARG CONDA_MD5="81c773ff87af5cfac79ab862942ab6b3"
-ARG CONDA_DIR="/opt/conda"
+#FROM python:3.9.7-slim as py-base
+#FROM python:3.9.7-slim as builder-base
 
-ENV  \
-    PYTHONPATH=/opt/conda/share/py4j/py4j0.10.9.2.jar \
-    NEXUS_SRC=/tmp/incubator-sdap-nexus \
-    PROJ_LIB=/opt/conda/lib/python3.8/site-packages/pyproj/data	\
-    PATH="$CONDA_DIR/bin:$PATH" \
+ENV PYTHONUNBUFFERED=1 \
+    # prevents python creating .pyc files
     PYTHONDONTWRITEBYTECODE=1 \
-    SPARK_HOME=/opt/spark \
-    PYSPARK_DRIVER_PYTHON=/opt/conda/bin/python3.8 \
-    PYSPARK_PYTHON=/opt/conda/bin/python3.8 \
-    LD_LIBRARY_PATH=/usr/lib \
-    REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt
+    \
+    # pip
+    PIP_NO_CACHE_DIR=off \
+    PIP_DISABLE_PIP_VERSION_CHECK=on \
+    PIP_DEFAULT_TIMEOUT=100 \
+    \
+    # poetry
+    # https://python-poetry.org/docs/configuration/#using-environment-variables
+    POETRY_VERSION=1.8.1 \
+    # make poetry install to this location
+    POETRY_HOME="/opt/poetry" \
+    # make poetry create the virtual environment in the project's root
+    # it gets named `.venv`
+    POETRY_VIRTUALENVS_IN_PROJECT=true \
+    # do not ask any interactive question
+    POETRY_NO_INTERACTION=1 \
+    PYSETUP_PATH="/opt/pysetup" \
+    \
+    # VENV_PATH="/opt/pysetup/.venv" \
+    VENV_PATH="/incubator-sdap-nexus/.venv" \
+    POETRY_BUILD="yes"
+
+RUN curl -sSL https://install.python-poetry.org -o /tmp/install_poetry.py && python /tmp/install_poetry.py
+
+ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH"
 
-RUN apk add --update --no-cache \
-    bzip2 \
-    gcc \
-    git \
-    mesa-gl \
-    wget \
-    curl \
-    which \
-    python3 \
-    bash==4.4.19-r1 \
-    libc-dev \
-    libressl2.7-libcrypto
-
-RUN  apk upgrade musl
-
-WORKDIR /tmp
-
-RUN apk del libc6-compat
-RUN apk --no-cache add wget zlib && \
-    wget -q -O /etc/apk/keys/sgerrand.rsa.pub https://alpine-pkgs.sgerrand.com/sgerrand.rsa.pub && \
-    wget https://github.com/sgerrand/alpine-pkg-glibc/releases/download/2.30-r0/glibc-2.30-r0.apk && \
-    apk add glibc-2.30-r0.apk && \
-    ln -s /lib/libz.so.1 /usr/glibc-compat/lib/ && \
-    ln -s /lib/libc.musl-x86_64.so.1 /usr/glibc-compat/lib && \
-    ln -s /usr/lib/libgcc_s.so.1 /usr/glibc-compat/lib
-
-COPY docker/nexus-webapp/install_conda.sh ./install_conda.sh
-RUN /tmp/install_conda.sh 
-
-RUN conda install -c conda-forge python=3.8.15=h257c98d_0_cpython tqdm=4.64.1=py38h06a4308_0  mamba && conda clean -afy
-
-RUN cd /usr/lib && ln -s libcom_err.so.2 libcom_err.so.3 && \
-    cd /opt/conda/lib && \
-    ln -s libnetcdf.so.11 libnetcdf.so.7 && \
-    ln -s libkea.so.1.4.6 libkea.so.1.4.5 && \
-    ln -s libhdf5_cpp.so.12 libhdf5_cpp.so.10 && \
-    ln -s libjpeg.so.9 libjpeg.so.8
-
-# Change REBUILD_CODE if you want tell Docker not to use cached layers from this line on
 ARG REBUILD_CODE=0
 
+WORKDIR /incubator-sdap-nexus
+COPY poetry.lock pyproject.toml ./
+COPY data-access /incubator-sdap-nexus/data-access
+COPY analysis /incubator-sdap-nexus/analysis
+COPY tools /incubator-sdap-nexus/tools
+COPY docker/nexus-webapp/install_nexusproto.sh /tmp/install_nexusproto.sh
+COPY docker/nexus-webapp/entrypoint.sh /opt/entrypoint.sh
+
+# Copy readme + ASF legal files
+COPY README.md README DISCLAIMER LICENSE NOTICE ./
+
+RUN apt-get update && \
+    apt-get install --no-install-recommends -y proj-bin && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# install runtime deps - uses $POETRY_VIRTUALENVS_IN_PROJECT internally
+RUN poetry install --no-dev && poetry cache clear --all .
+
+ARG REBUILD_CODE=1
+
 ARG BUILD_NEXUSPROTO
 
 ARG APACHE_NEXUSPROTO=https://github.com/apache/incubator-sdap-nexusproto.git
 ARG APACHE_NEXUSPROTO_BRANCH=master
 
-COPY docker/nexus-webapp/install_nexusproto.sh ./install_nexusproto.sh
 RUN /tmp/install_nexusproto.sh $APACHE_NEXUSPROTO $APACHE_NEXUSPROTO_BRANCH
 
-COPY VERSION.txt /incubator-sdap-nexus/VERSION.txt
-COPY data-access /incubator-sdap-nexus/data-access
-COPY analysis /incubator-sdap-nexus/analysis
-COPY tools /incubator-sdap-nexus/tools
-
-WORKDIR /incubator-sdap-nexus/data-access
-RUN python3 setup.py install clean
+ENV  \
+    PYTHONPATH=/incubator-sdap-nexus/.venv/share/py4j/py4j0.10.9.3.jar \
+    NEXUS_SRC=/tmp/incubator-sdap-nexus \
+    \
+#    PROJ_LIB=/opt/conda/lib/python3.8/site-packages/pyproj/data	\
+    PATH="$POETRY_HOME/bin:$VENV_PATH/bin:/opt/python/3.9.7/bin:$PATH" \
+    SPARK_HOME=/incubator-sdap-nexus/.venv/lib/python3.9/site-packages/pyspark \
+    PYSPARK_DRIVER_PYTHON=/incubator-sdap-nexus/.venv/bin/python3.9 \
+    PYSPARK_PYTHON=/incubator-sdap-nexus/.venv/bin/python3.9 \
+    LD_LIBRARY_PATH=/usr/lib \
+    REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt
 
-WORKDIR /incubator-sdap-nexus/analysis
-RUN python3 setup.py install clean && mamba clean -afy
 
-RUN pip install shapely==1.7.1
+# This image has k8s client jar version 5.4.1, idk if we should force-install 4.12.0 or if it's even needed anymore...
+#RUN rm $SPARK_HOME/jars/kubernetes-client-4.12.0.jar
+#ADD https://repo1.maven.org/maven2/io/fabric8/kubernetes-client/4.12.0/kubernetes-client-4.12.0.jar $SPARK_HOME/jars
 
-WORKDIR /incubator-sdap-nexus/tools/deletebyquery
-ARG CASS_DRIVER_BUILD_CONCURRENCY=8
-RUN pip3 install cassandra-driver==3.20.1 pyspark py4j
-RUN pip3 install -r requirements.txt
-RUN pip3 install cython
 
-WORKDIR /incubator-sdap-nexus
+ENTRYPOINT ["/opt/entrypoint.sh"]
 
-# Upgrade kubernetes client jar from the default version
-RUN rm /opt/spark/jars/kubernetes-client-4.12.0.jar
-ADD https://repo1.maven.org/maven2/io/fabric8/kubernetes-client/4.12.0/kubernetes-client-4.12.0.jar /opt/spark/jars
diff --git a/docker/nexus-webapp/Dockerfile b/docker/nexus-webapp/Dockerfile.old
similarity index 100%
copy from docker/nexus-webapp/Dockerfile
copy to docker/nexus-webapp/Dockerfile.old
diff --git a/docker/nexus-webapp/entrypoint.sh b/docker/nexus-webapp/entrypoint.sh
new file mode 100755
index 0000000..216e8fe
--- /dev/null
+++ b/docker/nexus-webapp/entrypoint.sh
@@ -0,0 +1,133 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# echo commands to the terminal output
+set -ex
+
+# Check whether there is a passwd entry for the container UID
+myuid=$(id -u)
+mygid=$(id -g)
+# turn off -e for getent because it will return error code in anonymous uid case
+set +e
+uidentry=$(getent passwd $myuid)
+set -e
+
+# If there is no passwd entry for the container UID, attempt to create one
+if [ -z "$uidentry" ] ; then
+    if [ -w /etc/passwd ] ; then
+        echo "$myuid:x:$myuid:$mygid:anonymous uid:$SPARK_HOME:/bin/false" >> /etc/passwd
+    else
+        echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID"
+    fi
+fi
+
+SPARK_K8S_CMD="$1"
+case "$SPARK_K8S_CMD" in
+    driver | driver-py | driver-r | executor)
+      shift 1
+      ;;
+    "")
+      ;;
+    *)
+      echo "Non-spark-on-k8s command provided, proceeding in pass-through mode..."
+      exec /sbin/tini -s -- "$@"
+      ;;
+esac
+
+SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*"
+env | grep SPARK_JAVA_OPT_ | sort -t_ -k4 -n | sed 's/[^=]*=\(.*\)/\1/g' > /tmp/java_opts.txt
+readarray -t SPARK_EXECUTOR_JAVA_OPTS < /tmp/java_opts.txt
+
+if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then
+  SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH"
+fi
+
+if [ -n "$PYSPARK_FILES" ]; then
+    PYTHONPATH="$PYTHONPATH:$PYSPARK_FILES"
+fi
+
+PYSPARK_ARGS=""
+if [ -n "$PYSPARK_APP_ARGS" ]; then
+    PYSPARK_ARGS="$PYSPARK_APP_ARGS"
+fi
+
+R_ARGS=""
+if [ -n "$R_APP_ARGS" ]; then
+    R_ARGS="$R_APP_ARGS"
+fi
+
+if [ "$PYSPARK_MAJOR_PYTHON_VERSION" == "2" ]; then
+    pyv="$(python -V 2>&1)"
+    export PYTHON_VERSION="${pyv:7}"
+    export PYSPARK_PYTHON="python"
+    export PYSPARK_DRIVER_PYTHON="python"
+elif [ "$PYSPARK_MAJOR_PYTHON_VERSION" == "3" ]; then
+    pyv3="$(python3 -V 2>&1)"
+    export PYTHON_VERSION="${pyv3:7}"
+    export PYSPARK_PYTHON="python3"
+    export PYSPARK_DRIVER_PYTHON="python3"
+fi
+
+case "$SPARK_K8S_CMD" in
+  driver)
+    CMD=(
+      "$SPARK_HOME/bin/spark-submit"
+      --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS"
+      --deploy-mode client
+      "$@"
+    )
+    ;;
+  driver-py)
+    CMD=(
+      "$SPARK_HOME/bin/spark-submit"
+      --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS"
+      --deploy-mode client
+      "$@" $PYSPARK_PRIMARY $PYSPARK_ARGS
+    )
+    ;;
+    driver-r)
+    CMD=(
+      "$SPARK_HOME/bin/spark-submit"
+      --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS"
+      --deploy-mode client
+      "$@" $R_PRIMARY $R_ARGS
+    )
+    ;;
+  executor)
+    CMD=(
+      ${JAVA_HOME}/bin/java
+      "${SPARK_EXECUTOR_JAVA_OPTS[@]}"
+      -Xms$SPARK_EXECUTOR_MEMORY
+      -Xmx$SPARK_EXECUTOR_MEMORY
+      -cp "$SPARK_CLASSPATH"
+      org.apache.spark.executor.CoarseGrainedExecutorBackend
+      --driver-url $SPARK_DRIVER_URL
+      --executor-id $SPARK_EXECUTOR_ID
+      --cores $SPARK_EXECUTOR_CORES
+      --app-id $SPARK_APPLICATION_ID
+      --hostname $SPARK_EXECUTOR_POD_IP
+    )
+    ;;
+
+  *)
+    echo "Unknown command: $SPARK_K8S_CMD" 1>&2
+    exit 1
+esac
+
+# Execute the container CMD under tini for better hygiene
+exec /sbin/tini -s -- "${CMD[@]}"
diff --git a/docker/nexus-webapp/install_conda.sh b/docker/nexus-webapp/install_conda.sh
deleted file mode 100755
index 2c2ad37..0000000
--- a/docker/nexus-webapp/install_conda.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/usr/bin/env bash
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-echo "**** install dev packages ****"
-apk add --no-cache --virtual .build-dependencies bash wget
-
-echo "**** get Miniconda ****" 
-mkdir -p "$CONDA_DIR" 
-wget "http://repo.continuum.io/miniconda/Miniconda3-${CONDA_VERSION}-Linux-x86_64.sh" -O miniconda.sh 
-echo "$CONDA_MD5  miniconda.sh" | md5sum -c 
-
-echo "**** install Miniconda ****" 
-bash miniconda.sh -f -b -p "$CONDA_DIR" 
-echo "export PATH=$CONDA_DIR/bin:\$PATH" > /etc/profile.d/conda.sh 
-
-echo "**** setup Miniconda ****" 
-conda update --all --yes 
-conda config --set auto_update_conda False 
-
-echo "**** cleanup ****" 
-apk del --purge .build-dependencies 
-rm -f miniconda.sh 
-conda clean --all --force-pkgs-dirs --yes 
-find "$CONDA_DIR" -follow -type f \( -iname '*.a' -o -iname '*.pyc' -o -iname '*.js.map' \) -delete 
-
-echo "**** finalize ****" 
-mkdir -p "$CONDA_DIR/locks" 
-chmod 777 "$CONDA_DIR/locks" 
-conda update -n base conda
\ No newline at end of file
diff --git a/docker/nexus-webapp/install_nexusproto.sh b/docker/nexus-webapp/install_nexusproto.sh
index 08d1894..49b1c6e 100755
--- a/docker/nexus-webapp/install_nexusproto.sh
+++ b/docker/nexus-webapp/install_nexusproto.sh
@@ -37,6 +37,8 @@ if [ ! -z ${BUILD_NEXUSPROTO+x} ]; then
   rm -rf /root/.gradle
   popd
   rm -rf nexusproto
+elif [ ! -z ${POETRY_BUILD+x} ]; then
+  poetry add nexusproto
 else
   pip install nexusproto
 fi
diff --git a/docker/nexus-webapp/install_nexusproto.sh b/docker/nexus-webapp/install_python.sh
old mode 100755
new mode 100644
similarity index 53%
copy from docker/nexus-webapp/install_nexusproto.sh
copy to docker/nexus-webapp/install_python.sh
index 08d1894..b7258da
--- a/docker/nexus-webapp/install_nexusproto.sh
+++ b/docker/nexus-webapp/install_python.sh
@@ -16,27 +16,22 @@
 
 set -e
 
-if [ ! -z ${BUILD_NEXUSPROTO+x} ]; then
-  echo 'Building nexusproto from source...'
-
-  APACHE_NEXUSPROTO="https://github.com/apache/incubator-sdap-nexusproto.git"
-  MASTER="master"
-
-  GIT_REPO=${1:-$APACHE_NEXUSPROTO}
-  GIT_BRANCH=${2:-$MASTER}
-
-  mkdir nexusproto
-  pushd nexusproto
-  git init
-  git pull ${GIT_REPO} ${GIT_BRANCH}
-
-  ./gradlew pythonInstall --info
-
-  ./gradlew install --info
-
-  rm -rf /root/.gradle
-  popd
-  rm -rf nexusproto
-else
-  pip install nexusproto
-fi
+apt-get update
+apt-get upgrade -y
+apt-get install --no-install-recommends -y make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev libncursesw5-dev xz-utils tk-dev liblzma-dev tk-dev libffi-dev
+apt-get clean
+rm -rf /var/lib/apt/lists/*
+
+cd /tmp/
+wget https://www.python.org/ftp/python/3.9.7/Python-3.9.7.tgz
+tar xzf Python-3.9.7.tgz
+cd Python-3.9.7
+
+./configure --prefix=/opt/python/3.9.7/ --enable-optimizations --with-lto --with-computed-gotos --with-system-ffi
+make -j "$(nproc)"
+make altinstall
+rm /tmp/Python-3.9.7.tgz
+cd /tmp/
+rm -rf Python-3.9.7
+
+/opt/python/3.9.7/bin/python3.9 -m pip install --upgrade pip setuptools wheel
diff --git a/poetry.lock b/poetry.lock
index 84c5bfb..60176ef 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1504,13 +1504,13 @@ pytest = ">=3.0.0,<8.0.0"
 
 [[package]]
 name = "python-dateutil"
-version = "2.8.2"
+version = "2.9.0.post0"
 description = "Extensions to the standard Python datetime module"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
 files = [
-    {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
-    {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"},
+    {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
+    {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
 ]
 
 [package.dependencies]