You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2022/01/08 19:42:05 UTC

[airflow] branch main updated: Uses airflow user for build segment of docker image (#20744)

This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 3feb057  Uses airflow user for build segment of docker image (#20744)
3feb057 is described below

commit 3feb057f0ee34482c2ec28c4e3bedd2d07586347
Author: Jarek Potiuk <ja...@potiuk.com>
AuthorDate: Sat Jan 8 20:41:29 2022 +0100

    Uses airflow user for build segment of docker image (#20744)
    
    PIP produces a warning when root user is used to run pip install.
    This is done for a good reason - because installing PIP this way
    clashes with a number of distro-managed python packages.
    
    The warning cannot be disabled even if our use case is legitimate
    as has been extensively discussed in
    https://github.com/pypa/pip/issues/10556.
    
    However, the advice given by the warning is a bit misleading - it
    suggests to use virtualenv, but since this is considered a bad practice
    for container building and because we need to create virtualenvs
    dynamically inside the image, using virtualenv is a bad solution for us.
    It's been attempted in #19189 and failed.
    
    Instead we create an airflow user and use PIP_USER="true" which
    installs all dependencies in build segment to ~/.local folder
    from where we can copy it to the main image.
    
    That get rids of the warning and at the same time allows us to
    keep the best practices of building the images.
---
 Dockerfile | 48 ++++++++++++++++++++++++++++++++++--------------
 1 file changed, 34 insertions(+), 14 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 8de5fef..5eaaff4 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -40,6 +40,7 @@ ARG ADDITIONAL_PYTHON_DEPS=""
 
 ARG AIRFLOW_HOME=/opt/airflow
 ARG AIRFLOW_UID="50000"
+ARG AIRFLOW_USER_HOME_DIR=/home/airflow
 
 ARG PYTHON_BASE_IMAGE="python:3.6-slim-buster"
 
@@ -173,6 +174,10 @@ ARG UPGRADE_TO_NEWER_DEPENDENCIES="false"
 ARG AIRFLOW_SOURCES_FROM="empty"
 ARG AIRFLOW_SOURCES_TO="/empty"
 
+ARG AIRFLOW_HOME
+ARG AIRFLOW_USER_HOME_DIR
+ARG AIRFLOW_UID
+
 ENV INSTALL_MYSQL_CLIENT=${INSTALL_MYSQL_CLIENT} \
     INSTALL_MSSQL_CLIENT=${INSTALL_MSSQL_CLIENT} \
     AIRFLOW_REPO=${AIRFLOW_REPO} \
@@ -183,26 +188,40 @@ ENV INSTALL_MYSQL_CLIENT=${INSTALL_MYSQL_CLIENT} \
     AIRFLOW_CONSTRAINTS_REFERENCE=${AIRFLOW_CONSTRAINTS_REFERENCE} \
     AIRFLOW_CONSTRAINTS_LOCATION=${AIRFLOW_CONSTRAINTS_LOCATION} \
     DEFAULT_CONSTRAINTS_BRANCH=${DEFAULT_CONSTRAINTS_BRANCH} \
-    PATH=${PATH}:/root/.local/bin \
+    PATH=${PATH}:${AIRFLOW_USER_HOME_DIR}/.local/bin \
     AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \
     PIP_PROGRESS_BAR=${PIP_PROGRESS_BAR} \
+    AIRFLOW_USER_HOME_DIR=${AIRFLOW_USER_HOME_DIR} \
+    AIRFLOW_HOME=${AIRFLOW_HOME} \
+    AIRFLOW_UID=${AIRFLOW_UID} \
     AIRFLOW_INSTALL_EDITABLE_FLAG="" \
     UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES} \
     # By default PIP installs everything to ~/.local
     PIP_USER="true"
 
-COPY scripts/docker/*.sh /scripts/docker/
+COPY scripts/docker/install_mysql.sh scripts/docker/install_mssql.sh  /scripts/docker/
+
 RUN bash ./scripts/docker/install_mysql.sh dev \
     && bash ./scripts/docker/install_mssql.sh
 ENV PATH=${PATH}:/opt/mssql-tools/bin
 
 COPY docker-context-files /docker-context-files
 
+RUN adduser --gecos "First Last,RoomNumber,WorkPhone,HomePhone" --disabled-password \
+       --quiet "airflow" --uid "${AIRFLOW_UID}" --gid "0" --home "${AIRFLOW_USER_HOME_DIR}" && \
+    mkdir -p ${AIRFLOW_HOME} && chown -R "airflow:0" "${AIRFLOW_USER_HOME_DIR}" ${AIRFLOW_HOME}
+
+USER airflow
+
 RUN if [[ -f /docker-context-files/pip.conf ]]; then \
-        mkdir -p /root/.config/pip; \
-        cp /docker-context-files/pip.conf /root/.config/pip/pip.conf; \
+        mkdir -p ${AIRFLOW_USER_HOME_DIR}/.config/pip; \
+        cp /docker-context-files/pip.conf "${AIRFLOW_USER_HOME_DIR}/.config/pip/pip.conf"; \
     fi
 
+# Copy all scripts required for installation - changing any of those should lead to
+# rebuilding from here
+COPY --chown=airflow:0 scripts/docker/* /scripts/docker/
+
 ENV AIRFLOW_PRE_CACHED_PIP_PACKAGES=${AIRFLOW_PRE_CACHED_PIP_PACKAGES} \
     INSTALL_PROVIDERS_FROM_SOURCES=${INSTALL_PROVIDERS_FROM_SOURCES} \
     AIRFLOW_VERSION=${AIRFLOW_VERSION} \
@@ -224,7 +243,7 @@ RUN bash /scripts/docker/install_pip_version.sh; \
         bash /scripts/docker/install_airflow_dependencies_from_branch_tip.sh; \
     fi
 
-COPY ${AIRFLOW_SOURCES_FROM} ${AIRFLOW_SOURCES_TO}
+COPY --chown=airflow:0 ${AIRFLOW_SOURCES_FROM} ${AIRFLOW_SOURCES_TO}
 
 # Add extra python dependencies
 ARG ADDITIONAL_PYTHON_DEPS=""
@@ -263,11 +282,11 @@ RUN if [[ ${AIRFLOW_INSTALLATION_METHOD} == "." ]]; then \
     if [[ -n "${ADDITIONAL_PYTHON_DEPS}" ]]; then \
         bash /scripts/docker/install_additional_dependencies.sh; \
     fi; \
-    find /root/.local/ -name '*.pyc' -print0 | xargs -0 rm -r || true ; \
-    find /root/.local/ -type d -name '__pycache__' -print0 | xargs -0 rm -r || true ; \
+    find "${AIRFLOW_USER_HOME_DIR}/.local/" -name '*.pyc' -print0 | xargs -0 rm -f || true ; \
+    find "${AIRFLOW_USER_HOME_DIR}/.local/" -type d -name '__pycache__' -print0 | xargs -0 rm -rf || true ; \
     # make sure that all directories and files in .local are also group accessible
-    find /root/.local -executable -print0 | xargs --null chmod g+x; \
-    find /root/.local -print0 | xargs --null chmod g+rw
+    find "${AIRFLOW_USER_HOME_DIR}/.local" -executable -print0 | xargs --null chmod g+x; \
+    find "${AIRFLOW_USER_HOME_DIR}/.local" -print0 | xargs --null chmod g+rw
 
 # In case there is a requirements.txt file in "docker-context-files" it will be installed
 # during the build additionally to whatever has been installed so far. It is recommended that
@@ -372,7 +391,7 @@ ARG ADDITIONAL_RUNTIME_APT_COMMAND=""
 ARG ADDITIONAL_RUNTIME_APT_ENV=""
 ARG INSTALL_MYSQL_CLIENT="true"
 ARG INSTALL_MSSQL_CLIENT="true"
-ARG AIRFLOW_USER_HOME_DIR=/home/airflow
+ARG AIRFLOW_USER_HOME_DIR
 ARG AIRFLOW_HOME
 # Having the variable in final image allows to disable providers manager warnings when
 # production image is prepared from sources rather than from package
@@ -431,13 +450,14 @@ RUN chmod a+x /scripts/docker/install_mysql.sh && \
     mkdir -pv "${AIRFLOW_HOME}"; \
     mkdir -pv "${AIRFLOW_HOME}/dags"; \
     mkdir -pv "${AIRFLOW_HOME}/logs"; \
-    chown -R "airflow:root" "${AIRFLOW_USER_HOME_DIR}" "${AIRFLOW_HOME}"; \
+    chown -R airflow:0 "${AIRFLOW_USER_HOME_DIR}" "${AIRFLOW_HOME}"; \
     find "${AIRFLOW_HOME}" -executable -print0 | xargs --null chmod g+x && \
         find "${AIRFLOW_HOME}" -print0 | xargs --null chmod g+rw
 
-COPY --chown=airflow:root --from=airflow-build-image /root/.local "${AIRFLOW_USER_HOME_DIR}/.local"
-COPY --chown=airflow:root scripts/in_container/prod/entrypoint_prod.sh /entrypoint
-COPY --chown=airflow:root scripts/in_container/prod/clean-logs.sh /clean-logs
+COPY --chown=airflow:0 --from=airflow-build-image \
+     "${AIRFLOW_USER_HOME_DIR}/.local" "${AIRFLOW_USER_HOME_DIR}/.local"
+COPY --chown=airflow:0 scripts/in_container/prod/entrypoint_prod.sh /entrypoint
+COPY --chown=airflow:0 scripts/in_container/prod/clean-logs.sh /clean-logs
 
 # Make /etc/passwd root-group-writeable so that user can be dynamically added by OpenShift
 # See https://github.com/apache/airflow/issues/9248