You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by GitBox <gi...@apache.org> on 2020/03/30 21:23:00 UTC

[GitHub] [airflow] ashb commented on a change in pull request #7832: Add production image support

ashb commented on a change in pull request #7832: Add production image support
URL: https://github.com/apache/airflow/pull/7832#discussion_r400502435
 
 

 ##########
 File path: Dockerfile
 ##########
 @@ -0,0 +1,375 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# THIS DOCKERFILE IS INTENDED FOR PRODUCTION USE AND DEPLOYMENT.
+# NOTE! IT IS ALFA-QUALITY FOR NOW - WE ARE IN A PROCESS OF TESTING IT
+#
+#
+# This is a multi-segmented image. It actually contains two images:
+#
+# airflow-build-image  - there all airflow dependencies can be installed (and
+#                        built - for those dependencies that require
+#                        build essentials). Airflow is installed there with
+#                        --user switch so that all the dependencies are
+#                        installed to ${HOME}/.local
+#
+# main                 - this is the actual production image that is much
+#                        smaller because it does not contain all the build
+#                        essentials. Instead the ${HOME}/.local folder
+#                        is copied from the build-image - this way we have
+#                        only result of installation and we do not need
+#                        all the build essentials. This makes the image
+#                        nuch smaller.
+#
+ARG AIRFLOW_VERSION="2.0.0.dev0"
+ARG AIRFLOW_ORG="apache"
+ARG AIRFLOW_REPO="airflow"
+ARG AIRFLOW_GIT_REFERENCE="master"
+ARG REQUIREMENTS_GIT_REFERENCE="master"
+ARG WWW_FOLDER="www"
+ARG AIRFLOW_EXTRAS="async,aws,azure,celery,dask,elasticsearch,gcp,kubernetes,mysql,postgres,redis,slack,ssh,statsd,virtualenv"
+
+ARG AIRFLOW_HOME=/opt/airflow
+ARG AIRFLOW_UID="50000"
+ARG AIRFLOW_GID="50000"
+
+ARG PIP_VERSION="19.0.2"
+ARG CASS_DRIVER_BUILD_CONCURRENCY="8"
+
+ARG PYTHON_BASE_IMAGE="python:3.6-slim-buster"
+ARG PYTHON_MAJOR_MINOR_VERSION="3.6"
+
+##############################################################################################
+# This is the build image where we build all dependencies
+##############################################################################################
+FROM ${PYTHON_BASE_IMAGE} as airflow-build-image
+SHELL ["/bin/bash", "-o", "pipefail", "-e", "-u", "-x", "-c"]
+
+LABEL org.apache.airflow.docker=true
+LABEL org.apache.airflow.distro="debian"
+LABEL org.apache.airflow.distro.version="buster"
+LABEL org.apache.airflow.module="airflow"
+LABEL org.apache.airflow.component="airflow"
+LABEL org.apache.airflow.image="airflow-build-image"
+LABEL org.apache.airflow.uid="${AIRFLOW_UID}"
+
+ARG AIRFLOW_VERSION
+ARG AIRFLOW_ORG
+ARG AIRFLOW_REPO
+ARG AIRFLOW_GIT_REFERENCE
+ARG REQUIREMENTS_GIT_REFERENCE
+ARG WWW_FOLDER
+ARG AIRFLOW_EXTRAS
+
+ARG AIRFLOW_HOME
+ARG AIRFLOW_UID
+ARG AIRFLOW_GID
+
+ARG PIP_VERSION
+ARG CASS_DRIVER_BUILD_CONCURRENCY
+
+ARG PYTHON_MAJOR_MINOR_VERSION
+
+ENV PYTHON_BASE_IMAGE=${PYTHON_BASE_IMAGE}
+ENV PYTHON_MAJOR_MINOR_VERSION=${PYTHON_MAJOR_MINOR_VERSION}
+
+ENV AIRFLOW_VERSION=${AIRFLOW_VERSION}
+ENV AIRFLOW_ORG=${AIRFLOW_ORG}
+ENV AIRFLOW_REPO=${AIRFLOW_REPO}
+ENV AIRFLOW_GIT_REFERENCE=${AIRFLOW_GIT_REFERENCE}
+ENV REQUIREMENTS_GIT_REFERENCE=${REQUIREMENTS_GIT_REFERENCE}
+
+ENV AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS}
+
+ENV AIRFLOW_REPO_URL="https://github.com/${AIRFLOW_ORG}/${AIRFLOW_REPO}"
+ENV AIRFLOW_RAW_CONTENT_URL="https://raw.githubusercontent.com/${AIRFLOW_ORG}/${AIRFLOW_REPO}"
+
+ENV PIP_VERSION=${PIP_VERSION}
+ENV CASS_DRIVER_BUILD_CONCURRENCY=${CASS_DRIVER_BUILD_CONCURRENCY}
+
+ARG AIRFLOW_SOURCES="${AIRFLOW_REPO_URL}/archive/${AIRFLOW_GIT_REFERENCE}.tar.gz#egg=apache-airflow"
+ENV AIRFLOW_SOURCES=${AIRFLOW_SOURCES}
+
+ARG CONSTRAINT_REQUIREMENTS="${AIRFLOW_RAW_CONTENT_URL}/${REQUIREMENTS_GIT_REFERENCE}/requirements/requirements-python${PYTHON_MAJOR_MINOR_VERSION}.txt"
+ENV CONSTRAINT_REQUIREMENTS=${CONSTRAINT_REQUIREMENTS}
+
+ARG ENTRYPOINT_FILE="${AIRFLOW_RAW_CONTENT_URL}/${REQUIREMENTS_GIT_REFERENCE}/entrypoint.sh"
+ENV ENTRYPOINT_FILE="${ENTRYPOINT_FILE}"
+
+# Print versions
+RUN echo "Building airflow-build-image stage" \
+    echo "Base image: ${PYTHON_BASE_IMAGE}"; \
+    echo "Airflow version: ${AIRFLOW_VERSION}"; \
+    echo "Airflow git reference: ${AIRFLOW_GIT_REFERENCE}"; \
+    echo "Airflow org: ${AIRFLOW_ORG}"; \
+    echo "Airflow repo: ${AIRFLOW_REPO}"; \
+    echo "Airflow repo url: ${AIRFLOW_REPO_URL}"; \
+    echo "Airflow extras: ${AIRFLOW_EXTRAS}" ;\
+    echo "Airflow install source: ${AIRFLOW_SOURCES}"; \
+    echo "Constraint requirements: ${CONSTRAINT_REQUIREMENTS}" ;\
+    echo "PIP version: ${PIP_VERSION}" ;\
+    echo "Cassandra concurrency: ${CASS_DRIVER_BUILD_CONCURRENCY}" ;\
+    echo
+
+# Make sure noninteractive debian install is used and language variables set
+ENV DEBIAN_FRONTEND=noninteractive LANGUAGE=C.UTF-8 LANG=C.UTF-8 LC_ALL=C.UTF-8 \
+    LC_CTYPE=C.UTF-8 LC_MESSAGES=C.UTF-8
+
+# Install curl and gnupg2 - needed to download nodejs in the next step
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+           curl \
+           gnupg2 \
+    && apt-get autoremove -yqq --purge \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install basic apt dependencies
+RUN curl --fail --location https://deb.nodesource.com/setup_10.x | bash - \
+    && curl https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add - > /dev/null \
+    && echo "deb https://dl.yarnpkg.com/debian/ stable main" > /etc/apt/sources.list.d/yarn.list \
+    # Note missing man directories on debian-buster
+    # https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=863199
+    && mkdir -pv /usr/share/man/man1 \
+    && mkdir -pv /usr/share/man/man7 \
+    && apt-get update \
+    && apt-get install -y --no-install-recommends \
+           apt-transport-https \
+           apt-utils \
+           build-essential \
+           ca-certificates \
+           curl \
+           gnupg \
+           dirmngr \
+           freetds-bin \
+           freetds-dev \
+           gosu \
+           krb5-user \
+           ldap-utils \
+           libffi-dev \
+           libkrb5-dev \
+           libpq-dev \
+           libsasl2-2 \
+           libsasl2-dev \
+           libsasl2-modules \
+           libssl-dev \
+           locales  \
+           lsb-release \
+           nodejs \
 
 Review comment:
   I'd really rather we didn't have nodejs in our production images.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services