You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zeppelin.apache.org by pd...@apache.org on 2021/02/03 10:28:14 UTC
[zeppelin] branch master updated: [ZEPPELIN-5200] Update dockerfile
This is an automated email from the ASF dual-hosted git repository.
pdallig pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/zeppelin.git
The following commit(s) were added to refs/heads/master by this push:
new b1adb5a [ZEPPELIN-5200] Update dockerfile
b1adb5a is described below
commit b1adb5a23a9c9e23d8527edbaa260cb70587d81b
Author: Philipp Dallig <ph...@gmail.com>
AuthorDate: Mon Jan 25 11:38:18 2021 +0100
[ZEPPELIN-5200] Update dockerfile
### What is this PR for?
This is a complete rewrite of the used Zeppelin Dockerfile.
Main benefits:
- update to ubuntu 20.04
- install python3 and R with conda as in your CI system
### What type of PR is it?
- Refactoring
### Todos
* [ ] - Testing
### What is the Jira issue?
* https://issues.apache.org/jira/browse/ZEPPELIN-5200
### Questions:
* Does the licenses files need update? No
* Is there breaking changes for older versions? No
* Does this needs documentation? No
Author: Philipp Dallig <ph...@gmail.com>
Closes #4022 from Reamer/docker_zeppelin and squashes the following commits:
4841ef327 [Philipp Dallig] Update miniconda version
ce7f0dd8e [Philipp Dallig] Add some more python modules
27fe42ce6 [Philipp Dallig] Use python 3.7
50362e2cd [Philipp Dallig] Correct comment
bf7f73ea4 [Philipp Dallig] Update dockerfile
---
scripts/docker/zeppelin/bin/Dockerfile | 117 +++++++--------------
.../docker/zeppelin/bin/env_python_3_with_R.yml | 37 +++++++
2 files changed, 74 insertions(+), 80 deletions(-)
diff --git a/scripts/docker/zeppelin/bin/Dockerfile b/scripts/docker/zeppelin/bin/Dockerfile
index 3928b61..bb611bc 100644
--- a/scripts/docker/zeppelin/bin/Dockerfile
+++ b/scripts/docker/zeppelin/bin/Dockerfile
@@ -13,100 +13,57 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-FROM ubuntu:16.04
-MAINTAINER Apache Software Foundation <de...@zeppelin.apache.org>
+FROM ubuntu:20.04
-ENV Z_VERSION="0.9.0-preview2"
+LABEL maintainer="Apache Software Foundation <de...@zeppelin.apache.org>"
+
+ENV Z_VERSION="0.9.0"
ENV LOG_TAG="[ZEPPELIN_${Z_VERSION}]:" \
- Z_HOME="/zeppelin" \
+ Z_HOME="/opt/zeppelin" \
LANG=en_US.UTF-8 \
LC_ALL=en_US.UTF-8 \
+ JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \
ZEPPELIN_ADDR="0.0.0.0"
-RUN echo "$LOG_TAG update and install basic packages" && \
+RUN echo "$LOG_TAG install basic packages" && \
apt-get -y update && \
- apt-get install -y locales && \
- locale-gen $LANG && \
- apt-get install -y software-properties-common && \
- apt -y autoclean && \
- apt -y dist-upgrade && \
- apt-get install -y build-essential
-
-RUN echo "$LOG_TAG install tini related packages" && \
- apt-get install -y wget curl grep sed dpkg && \
- TINI_VERSION=`curl https://github.com/krallin/tini/releases/latest | grep -o "/v.*\"" | sed 's:^..\(.*\).$:\1:'` && \
- curl -L "https://github.com/krallin/tini/releases/download/v${TINI_VERSION}/tini_${TINI_VERSION}.deb" > tini.deb && \
- dpkg -i tini.deb && \
- rm tini.deb
-
-ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
-RUN echo "$LOG_TAG Install java8" && \
- apt-get -y update && \
- apt-get install -y openjdk-8-jdk && \
- rm -rf /var/lib/apt/lists/*
-
-# should install conda first before numpy, matploylib since pip and python will be installed by conda
-RUN echo "$LOG_TAG Install miniconda3 related packages" && \
- apt-get -y update && \
- apt-get install -y bzip2 ca-certificates \
- libglib2.0-0 libxext6 libsm6 libxrender1 \
- git mercurial subversion && \
- echo 'export PATH=/opt/conda/bin:$PATH' > /etc/profile.d/conda.sh && \
- wget --quiet https://repo.continuum.io/miniconda/Miniconda3-4.6.14-Linux-x86_64.sh -O ~/miniconda.sh && \
- /bin/bash ~/miniconda.sh -b -p /opt/conda && \
- rm ~/miniconda.sh
-
-ENV PATH /opt/conda/bin:$PATH
+ DEBIAN_FRONTEND=noninteractive apt-get install -y locales language-pack-en tini openjdk-8-jre-headless wget && \
+ # Cleanup
+ rm -rf /var/lib/apt/lists/* && \
+ apt-get autoclean && \
+ apt-get clean
-RUN echo "$LOG_TAG Install python related packages" && \
- apt-get -y update && \
- apt-get install -y python-dev python-pip && \
- apt-get install -y gfortran && \
- # numerical/algebra packages
- apt-get install -y libblas-dev libatlas-dev liblapack-dev && \
- # font, image
- apt-get install -y libpng-dev libfreetype6-dev libxft-dev && \
- # for tkinter
- apt-get install -y python-tk libxml2-dev libxslt-dev zlib1g-dev && \
- hash -r && \
+# Install conda to manage python and R packages
+ARG miniconda_version="py37_4.9.2"
+# Hashes via https://docs.conda.io/en/latest/miniconda_hashes.html
+ARG miniconda_sha256="79510c6e7bd9e012856e25dcb21b3e093aa4ac8113d9aa7e82a86987eabe1c31"
+# Install python and R packages via conda
+COPY env_python_3_with_R.yml /env_python_3_with_R.yml
+RUN set -ex && \
+ wget -nv https://repo.anaconda.com/miniconda/Miniconda3-${miniconda_version}-Linux-x86_64.sh -O miniconda.sh && \
+ echo "${miniconda_sha256} miniconda.sh" > anaconda.sha256 && \
+ sha256sum --strict -c anaconda.sha256 && \
+ bash miniconda.sh -b -p /opt/conda && \
+ export PATH=/opt/conda/bin:$PATH && \
conda config --set always_yes yes --set changeps1 no && \
- conda update -q conda && \
conda info -a && \
- conda config --add channels conda-forge && \
- pip install -q pycodestyle==2.5.0 && \
- pip install -q numpy==1.17.3 pandas==0.25.0 scipy==1.3.1 grpcio==1.19.0 bkzep==0.6.1 hvplot==0.5.2 protobuf==3.10.0 pandasql==0.7.3 ipython==7.8.0 matplotlib==3.0.3 ipykernel==5.1.2 jupyter_client==5.3.4 bokeh==1.3.4 panel==0.6.0 holoviews==1.12.3 seaborn==0.9.0 plotnine==0.5.1 intake==0.5.3 intake-parquet==0.2.2 altair==3.2.0 pycodestyle==2.5.0 apache_beam==2.15.0
-
-RUN echo "$LOG_TAG Install R related packages" && \
- echo "PATH: $PATH" && \
- ls /opt/conda/bin && \
- echo "deb http://cran.rstudio.com/bin/linux/ubuntu xenial/" | tee -a /etc/apt/sources.list && \
- apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 51716619E084DAB9 && \
- apt-get -y update && \
- apt-get -y --allow-unauthenticated install r-base r-base-dev && \
- R -e "install.packages('evaluate', repos = 'https://cloud.r-project.org')" && \
- R -e "install.packages('knitr', repos='http://cran.us.r-project.org')" && \
- R -e "install.packages('ggplot2', repos='http://cran.us.r-project.org')" && \
- R -e "install.packages('googleVis', repos='http://cran.us.r-project.org')" && \
- R -e "install.packages('data.table', repos='http://cran.us.r-project.org')" && \
- R -e "install.packages('IRkernel', repos = 'https://cloud.r-project.org');IRkernel::installspec()" && \
- R -e "install.packages('shiny', repos = 'https://cloud.r-project.org')" && \
- # for devtools, Rcpp
- apt-get -y install libcurl4-gnutls-dev libssl-dev && \
- R -e "install.packages('devtools', repos='http://cran.us.r-project.org')" && \
- R -e "install.packages('Rcpp', repos='http://cran.us.r-project.org')" && \
- Rscript -e "library('devtools'); library('Rcpp'); install_github('ramnathv/rCharts')"
-
-RUN echo "$LOG_TAG Cleanup" && \
- apt-get autoclean && \
- apt-get clean
+ conda env update -f /env_python_3_with_R.yml --prune && \
+ # Cleanup
+ rm -v miniconda.sh anaconda.sha256 && \
+ # Cleanup based on https://github.com/ContinuumIO/docker-images/commit/cac3352bf21a26fa0b97925b578fb24a0fe8c383
+ find /opt/conda/ -follow -type f -name '*.a' -delete && \
+ find /opt/conda/ -follow -type f -name '*.js.map' -delete && \
+ conda clean -ay
+ # Allow to modify conda packages. This allows malicious code to be injected into other interpreter sessions, therefore it is disabled by default
+ # chmod -R ug+rwX /opt/conda
+ENV PATH /opt/conda/bin:$PATH
RUN echo "$LOG_TAG Download Zeppelin binary" && \
- wget --quiet -O /tmp/zeppelin-${Z_VERSION}-bin-all.tgz http://archive.apache.org/dist/zeppelin/zeppelin-${Z_VERSION}/zeppelin-${Z_VERSION}-bin-all.tgz && \
- tar -zxvf /tmp/zeppelin-${Z_VERSION}-bin-all.tgz && \
- rm -rf /tmp/zeppelin-${Z_VERSION}-bin-all.tgz && \
mkdir -p ${Z_HOME} && \
- mv /zeppelin-${Z_VERSION}-bin-all/* ${Z_HOME}/ && \
+ wget -nv -O /tmp/zeppelin-${Z_VERSION}-bin-all.tgz https://archive.apache.org/dist/zeppelin/zeppelin-${Z_VERSION}/zeppelin-${Z_VERSION}-bin-all.tgz && \
+ tar --strip-components=1 -zxvf /tmp/zeppelin-${Z_VERSION}-bin-all.tgz -C ${Z_HOME} && \
+ rm -f /tmp/zeppelin-${Z_VERSION}-bin-all.tgz && \
chown -R root:root ${Z_HOME} && \
mkdir -p ${Z_HOME}/logs ${Z_HOME}/run ${Z_HOME}/webapps && \
# Allow process to edit /etc/passwd, to create a user entry for zeppelin
diff --git a/scripts/docker/zeppelin/bin/env_python_3_with_R.yml b/scripts/docker/zeppelin/bin/env_python_3_with_R.yml
new file mode 100644
index 0000000..fb88458
--- /dev/null
+++ b/scripts/docker/zeppelin/bin/env_python_3_with_R.yml
@@ -0,0 +1,37 @@
+name: base
+channels:
+ - conda-forge
+ - defaults
+dependencies:
+ - pycodestyle
+ - numpy=1
+ - pandas=0.25
+ - scipy=1
+ - grpcio
+ - hvplot
+ - protobuf=3
+ - pandasql=0.7.3
+ - ipython=7
+ - matplotlib=3
+ - ipykernel=5
+ - jupyter_client=5
+ - bokeh=1.3.4
+ - panel
+ - holoviews
+ - pyyaml=3
+ - altair
+ - intake
+ - plotnine
+ - seaborn
+ - pip
+ - pip:
+ - bkzep==0.6.1
+
+ - r-base=3
+ - r-evaluate
+ - r-base64enc
+ - r-knitr
+ - r-ggplot2
+ - r-irkernel
+ - r-shiny
+ - r-googlevis