You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zeppelin.apache.org by pd...@apache.org on 2021/02/03 10:28:14 UTC

[zeppelin] branch master updated: [ZEPPELIN-5200] Update dockerfile

This is an automated email from the ASF dual-hosted git repository.

pdallig pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/zeppelin.git


The following commit(s) were added to refs/heads/master by this push:
     new b1adb5a  [ZEPPELIN-5200] Update dockerfile
b1adb5a is described below

commit b1adb5a23a9c9e23d8527edbaa260cb70587d81b
Author: Philipp Dallig <ph...@gmail.com>
AuthorDate: Mon Jan 25 11:38:18 2021 +0100

    [ZEPPELIN-5200] Update dockerfile
    
    ### What is this PR for?
    This is a complete rewrite of the used Zeppelin Dockerfile.
    Main benefits:
     - update to ubuntu 20.04
     - install python3 and R with conda as in your CI system
    
    ### What type of PR is it?
     - Refactoring
    
    ### Todos
    * [ ] - Testing
    
    ### What is the Jira issue?
    * https://issues.apache.org/jira/browse/ZEPPELIN-5200
    
    ### Questions:
    * Does the licenses files need update? No
    * Is there breaking changes for older versions? No
    * Does this needs documentation? No
    
    Author: Philipp Dallig <ph...@gmail.com>
    
    Closes #4022 from Reamer/docker_zeppelin and squashes the following commits:
    
    4841ef327 [Philipp Dallig] Update miniconda version
    ce7f0dd8e [Philipp Dallig] Add some more python modules
    27fe42ce6 [Philipp Dallig] Use python 3.7
    50362e2cd [Philipp Dallig] Correct comment
    bf7f73ea4 [Philipp Dallig] Update dockerfile
---
 scripts/docker/zeppelin/bin/Dockerfile             | 117 +++++++--------------
 .../docker/zeppelin/bin/env_python_3_with_R.yml    |  37 +++++++
 2 files changed, 74 insertions(+), 80 deletions(-)

diff --git a/scripts/docker/zeppelin/bin/Dockerfile b/scripts/docker/zeppelin/bin/Dockerfile
index 3928b61..bb611bc 100644
--- a/scripts/docker/zeppelin/bin/Dockerfile
+++ b/scripts/docker/zeppelin/bin/Dockerfile
@@ -13,100 +13,57 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-FROM ubuntu:16.04
-MAINTAINER Apache Software Foundation <de...@zeppelin.apache.org>
+FROM ubuntu:20.04
 
-ENV Z_VERSION="0.9.0-preview2"
+LABEL maintainer="Apache Software Foundation <de...@zeppelin.apache.org>"
+
+ENV Z_VERSION="0.9.0"
 
 ENV LOG_TAG="[ZEPPELIN_${Z_VERSION}]:" \
-    Z_HOME="/zeppelin" \
+    Z_HOME="/opt/zeppelin" \
     LANG=en_US.UTF-8 \
     LC_ALL=en_US.UTF-8 \
+    JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \
     ZEPPELIN_ADDR="0.0.0.0"
 
-RUN echo "$LOG_TAG update and install basic packages" && \
+RUN echo "$LOG_TAG install basic packages" && \
     apt-get -y update && \
-    apt-get install -y locales && \
-    locale-gen $LANG && \
-    apt-get install -y software-properties-common && \
-    apt -y autoclean && \
-    apt -y dist-upgrade && \
-    apt-get install -y build-essential
-
-RUN echo "$LOG_TAG install tini related packages" && \
-    apt-get install -y wget curl grep sed dpkg && \
-    TINI_VERSION=`curl https://github.com/krallin/tini/releases/latest | grep -o "/v.*\"" | sed 's:^..\(.*\).$:\1:'` && \
-    curl -L "https://github.com/krallin/tini/releases/download/v${TINI_VERSION}/tini_${TINI_VERSION}.deb" > tini.deb && \
-    dpkg -i tini.deb && \
-    rm tini.deb
-
-ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
-RUN echo "$LOG_TAG Install java8" && \
-    apt-get -y update && \
-    apt-get install -y openjdk-8-jdk && \
-    rm -rf /var/lib/apt/lists/*
-
-# should install conda first before numpy, matploylib since pip and python will be installed by conda
-RUN echo "$LOG_TAG Install miniconda3 related packages" && \
-    apt-get -y update && \
-    apt-get install -y bzip2 ca-certificates \
-    libglib2.0-0 libxext6 libsm6 libxrender1 \
-    git mercurial subversion && \
-    echo 'export PATH=/opt/conda/bin:$PATH' > /etc/profile.d/conda.sh && \
-    wget --quiet https://repo.continuum.io/miniconda/Miniconda3-4.6.14-Linux-x86_64.sh -O ~/miniconda.sh && \
-    /bin/bash ~/miniconda.sh -b -p /opt/conda && \
-    rm ~/miniconda.sh
-
-ENV PATH /opt/conda/bin:$PATH
+    DEBIAN_FRONTEND=noninteractive apt-get install -y locales language-pack-en tini openjdk-8-jre-headless wget && \
+    # Cleanup
+    rm -rf /var/lib/apt/lists/* && \
+    apt-get autoclean && \
+    apt-get clean
 
-RUN echo "$LOG_TAG Install python related packages" && \
-    apt-get -y update && \
-    apt-get install -y python-dev python-pip && \
-    apt-get install -y gfortran && \
-    # numerical/algebra packages
-    apt-get install -y libblas-dev libatlas-dev liblapack-dev && \
-    # font, image
-    apt-get install -y libpng-dev libfreetype6-dev libxft-dev && \
-    # for tkinter
-    apt-get install -y python-tk libxml2-dev libxslt-dev zlib1g-dev && \
-    hash -r && \
+# Install conda to manage python and R packages
+ARG miniconda_version="py37_4.9.2"
+# Hashes via https://docs.conda.io/en/latest/miniconda_hashes.html
+ARG miniconda_sha256="79510c6e7bd9e012856e25dcb21b3e093aa4ac8113d9aa7e82a86987eabe1c31"
+# Install python and R packages via conda
+COPY env_python_3_with_R.yml /env_python_3_with_R.yml
+RUN set -ex && \
+    wget -nv https://repo.anaconda.com/miniconda/Miniconda3-${miniconda_version}-Linux-x86_64.sh -O miniconda.sh && \
+    echo "${miniconda_sha256} miniconda.sh" > anaconda.sha256 && \
+    sha256sum --strict -c anaconda.sha256 && \
+    bash miniconda.sh -b -p /opt/conda && \
+    export PATH=/opt/conda/bin:$PATH && \
     conda config --set always_yes yes --set changeps1 no && \
-    conda update -q conda && \
     conda info -a && \
-    conda config --add channels conda-forge && \
-    pip install -q pycodestyle==2.5.0 && \
-    pip install -q numpy==1.17.3 pandas==0.25.0 scipy==1.3.1 grpcio==1.19.0 bkzep==0.6.1 hvplot==0.5.2 protobuf==3.10.0 pandasql==0.7.3 ipython==7.8.0 matplotlib==3.0.3 ipykernel==5.1.2 jupyter_client==5.3.4 bokeh==1.3.4 panel==0.6.0 holoviews==1.12.3 seaborn==0.9.0 plotnine==0.5.1 intake==0.5.3 intake-parquet==0.2.2 altair==3.2.0 pycodestyle==2.5.0 apache_beam==2.15.0
-
-RUN echo "$LOG_TAG Install R related packages" && \
-    echo "PATH: $PATH" && \
-    ls /opt/conda/bin && \
-    echo "deb http://cran.rstudio.com/bin/linux/ubuntu xenial/" | tee -a /etc/apt/sources.list && \
-    apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 51716619E084DAB9 && \
-    apt-get -y update && \
-    apt-get -y --allow-unauthenticated install r-base r-base-dev && \
-    R -e "install.packages('evaluate', repos = 'https://cloud.r-project.org')" && \
-    R -e "install.packages('knitr', repos='http://cran.us.r-project.org')" && \
-    R -e "install.packages('ggplot2', repos='http://cran.us.r-project.org')" && \
-    R -e "install.packages('googleVis', repos='http://cran.us.r-project.org')" && \
-    R -e "install.packages('data.table', repos='http://cran.us.r-project.org')" && \
-    R -e "install.packages('IRkernel', repos = 'https://cloud.r-project.org');IRkernel::installspec()" && \
-    R -e "install.packages('shiny', repos = 'https://cloud.r-project.org')" && \
-    # for devtools, Rcpp
-    apt-get -y install libcurl4-gnutls-dev libssl-dev && \
-    R -e "install.packages('devtools', repos='http://cran.us.r-project.org')" && \
-    R -e "install.packages('Rcpp', repos='http://cran.us.r-project.org')" && \
-    Rscript -e "library('devtools'); library('Rcpp'); install_github('ramnathv/rCharts')"
-
-RUN echo "$LOG_TAG Cleanup" && \
-    apt-get autoclean && \
-    apt-get clean
+    conda env update -f /env_python_3_with_R.yml --prune && \
+    # Cleanup
+    rm -v miniconda.sh anaconda.sha256  && \
+    # Cleanup based on https://github.com/ContinuumIO/docker-images/commit/cac3352bf21a26fa0b97925b578fb24a0fe8c383
+    find /opt/conda/ -follow -type f -name '*.a' -delete && \
+    find /opt/conda/ -follow -type f -name '*.js.map' -delete && \
+    conda clean -ay
+    # Allow to modify conda packages. This allows malicious code to be injected into other interpreter sessions, therefore it is disabled by default
+    # chmod -R ug+rwX /opt/conda
+ENV PATH /opt/conda/bin:$PATH
 
 RUN echo "$LOG_TAG Download Zeppelin binary" && \
-    wget --quiet -O /tmp/zeppelin-${Z_VERSION}-bin-all.tgz http://archive.apache.org/dist/zeppelin/zeppelin-${Z_VERSION}/zeppelin-${Z_VERSION}-bin-all.tgz && \
-    tar -zxvf /tmp/zeppelin-${Z_VERSION}-bin-all.tgz && \
-    rm -rf /tmp/zeppelin-${Z_VERSION}-bin-all.tgz && \
     mkdir -p ${Z_HOME} && \
-    mv /zeppelin-${Z_VERSION}-bin-all/* ${Z_HOME}/ && \
+    wget -nv -O /tmp/zeppelin-${Z_VERSION}-bin-all.tgz https://archive.apache.org/dist/zeppelin/zeppelin-${Z_VERSION}/zeppelin-${Z_VERSION}-bin-all.tgz && \
+    tar --strip-components=1 -zxvf  /tmp/zeppelin-${Z_VERSION}-bin-all.tgz -C ${Z_HOME} && \
+    rm -f /tmp/zeppelin-${Z_VERSION}-bin-all.tgz && \
     chown -R root:root ${Z_HOME} && \
     mkdir -p ${Z_HOME}/logs ${Z_HOME}/run ${Z_HOME}/webapps && \
     # Allow process to edit /etc/passwd, to create a user entry for zeppelin
diff --git a/scripts/docker/zeppelin/bin/env_python_3_with_R.yml b/scripts/docker/zeppelin/bin/env_python_3_with_R.yml
new file mode 100644
index 0000000..fb88458
--- /dev/null
+++ b/scripts/docker/zeppelin/bin/env_python_3_with_R.yml
@@ -0,0 +1,37 @@
+name: base
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - pycodestyle
+  - numpy=1
+  - pandas=0.25
+  - scipy=1
+  - grpcio
+  - hvplot
+  - protobuf=3
+  - pandasql=0.7.3
+  - ipython=7
+  - matplotlib=3
+  - ipykernel=5
+  - jupyter_client=5
+  - bokeh=1.3.4
+  - panel
+  - holoviews
+  - pyyaml=3
+  - altair
+  - intake
+  - plotnine
+  - seaborn
+  - pip
+  - pip:
+    - bkzep==0.6.1
+
+  - r-base=3
+  - r-evaluate
+  - r-base64enc
+  - r-knitr
+  - r-ggplot2
+  - r-irkernel
+  - r-shiny
+  - r-googlevis