You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2022/01/22 15:49:31 UTC

[airflow] 18/33: Optimize dockerfiles for local rebuilds (#20238)

This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch v2-2-test
in repository https://gitbox.apache.org/repos/asf/airflow.git

commit f6a4197290b00697d0135788c8e61ecaff40141e
Author: Jarek Potiuk <ja...@potiuk.com>
AuthorDate: Tue Jan 11 10:38:34 2022 +0100

    Optimize dockerfiles for local rebuilds (#20238)
    
    When you build dockerfiles locally for development the layer
    invalidation could happen earlier than you wanted - some of the
    variables (like COMMIT_SHA) were affecting the cache of Docker
    in the way that they forced either invalidation of the pre-cached
    packages installed or forced to recreate assets when they were
    not touched.
    
    Similarly when no webpack/yarn/packages/static are modified,
    the node asset compilation should not happen. It makes
    no sense to compile all the assets on docker rebuild when
    none of the www files changed.
    
    In case of CI build we can also separate node modules
    preparation and asset compilation, because node modules
    should remain in the image anyway for incremental changes.
    
    Fixes: #20259
    
    This PR improves the experience of iterating over docker image
    building by decreasing unnecesary layer invalidations.
    
    (cherry picked from commit 4620770af4550251b5139bb99185656227335f67)
---
 Dockerfile                                         | 178 +++++++++++++--------
 Dockerfile.ci                                      |  85 ++++++----
 breeze                                             |  10 +-
 dev/README_RELEASE_PROVIDER_PACKAGES.md            |   2 +-
 docs/docker-stack/build-arg-ref.rst                |  10 ++
 scripts/ci/libraries/_build_images.sh              |   6 +-
 scripts/ci/libraries/_initialization.sh            |   9 ++
 scripts/docker/compile_www_assets.sh               |  34 ++--
 scripts/docker/install_airflow.sh                  |   1 +
 ...mpile_www_assets.sh => prepare_node_modules.sh} |  27 +---
 10 files changed, 214 insertions(+), 148 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index bb80aca..76b2d36 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -166,11 +166,19 @@ ARG INSTALL_PROVIDERS_FROM_SOURCES="false"
 # But it also can be `.` from local installation or GitHub URL pointing to specific branch or tag
 # Of Airflow. Note That for local source installation you need to have local sources of
 # Airflow checked out together with the Dockerfile and AIRFLOW_SOURCES_FROM and AIRFLOW_SOURCES_TO
-# set to "." and "/opt/airflow" respectively.
+# set to "." and "/opt/airflow" respectively. Similarly AIRFLOW_SOURCES_WWW_FROM/TO are set to right source
+# and destination
 ARG AIRFLOW_INSTALLATION_METHOD="apache-airflow"
 # By default we do not upgrade to latest dependencies
 ARG UPGRADE_TO_NEWER_DEPENDENCIES="false"
 # By default we install latest airflow from PyPI so we do not need to copy sources of Airflow
+# www to compile the assets but in case of breeze/CI builds we use latest sources and we override those
+# those SOURCES_FROM/TO with "airflow/www" and "/opt/airflow/airflow/www" respectively.
+# This is to rebuild the assets only when any of the www sources change
+ARG AIRFLOW_SOURCES_WWW_FROM="empty"
+ARG AIRFLOW_SOURCES_WWW_TO="/empty"
+
+# By default we install latest airflow from PyPI so we do not need to copy sources of Airflow
 # but in case of breeze/CI builds we use latest sources and we override those
 # those SOURCES_FROM/TO with "." and "/opt/airflow" respectively
 ARG AIRFLOW_SOURCES_FROM="empty"
@@ -181,30 +189,13 @@ ARG AIRFLOW_USER_HOME_DIR
 ARG AIRFLOW_UID
 
 ENV INSTALL_MYSQL_CLIENT=${INSTALL_MYSQL_CLIENT} \
-    INSTALL_MSSQL_CLIENT=${INSTALL_MSSQL_CLIENT} \
-    AIRFLOW_REPO=${AIRFLOW_REPO} \
-    AIRFLOW_BRANCH=${AIRFLOW_BRANCH} \
-    AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS}${ADDITIONAL_AIRFLOW_EXTRAS:+,}${ADDITIONAL_AIRFLOW_EXTRAS} \
-    CONSTRAINTS_GITHUB_REPOSITORY=${CONSTRAINTS_GITHUB_REPOSITORY} \
-    AIRFLOW_CONSTRAINTS=${AIRFLOW_CONSTRAINTS} \
-    AIRFLOW_CONSTRAINTS_REFERENCE=${AIRFLOW_CONSTRAINTS_REFERENCE} \
-    AIRFLOW_CONSTRAINTS_LOCATION=${AIRFLOW_CONSTRAINTS_LOCATION} \
-    DEFAULT_CONSTRAINTS_BRANCH=${DEFAULT_CONSTRAINTS_BRANCH} \
-    PATH=${PATH}:${AIRFLOW_USER_HOME_DIR}/.local/bin \
-    AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \
-    PIP_PROGRESS_BAR=${PIP_PROGRESS_BAR} \
-    AIRFLOW_USER_HOME_DIR=${AIRFLOW_USER_HOME_DIR} \
-    AIRFLOW_HOME=${AIRFLOW_HOME} \
-    AIRFLOW_UID=${AIRFLOW_UID} \
-    AIRFLOW_INSTALL_EDITABLE_FLAG="" \
-    UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES} \
-    # By default PIP installs everything to ~/.local
-    PIP_USER="true"
+    INSTALL_MSSQL_CLIENT=${INSTALL_MSSQL_CLIENT}
 
-COPY scripts/docker/install_mysql.sh scripts/docker/install_mssql.sh  /scripts/docker/
+# Only copy mysql/mssql installation scripts for now - so that changing the other
+# scripts which are needed much later will not invalidate the docker layer here
+COPY scripts/docker/install_mysql.sh scripts/docker/install_mssql.sh /scripts/docker/
 
-RUN bash -o pipefail -o errexit -o nounset -o nolog ./scripts/docker/install_mysql.sh dev \
-    && bash -o pipefail -o errexit -o nounset -o nolog ./scripts/docker/install_mssql.sh
+RUN /scripts/docker/install_mysql.sh dev && /scripts/docker/install_mssql.sh
 ENV PATH=${PATH}:/opt/mssql-tools/bin
 
 COPY docker-context-files /docker-context-files
@@ -220,17 +211,38 @@ RUN if [[ -f /docker-context-files/pip.conf ]]; then \
         cp /docker-context-files/pip.conf "${AIRFLOW_USER_HOME_DIR}/.config/pip/pip.conf"; \
     fi
 
-# Copy all scripts required for installation - changing any of those should lead to
-# rebuilding from here
-COPY --chown=airflow:0 scripts/docker/* /scripts/docker/
-
-ENV AIRFLOW_PRE_CACHED_PIP_PACKAGES=${AIRFLOW_PRE_CACHED_PIP_PACKAGES} \
+ENV AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \
+    AIRFLOW_PRE_CACHED_PIP_PACKAGES=${AIRFLOW_PRE_CACHED_PIP_PACKAGES} \
     INSTALL_PROVIDERS_FROM_SOURCES=${INSTALL_PROVIDERS_FROM_SOURCES} \
     AIRFLOW_VERSION=${AIRFLOW_VERSION} \
     AIRFLOW_INSTALLATION_METHOD=${AIRFLOW_INSTALLATION_METHOD} \
     AIRFLOW_VERSION_SPECIFICATION=${AIRFLOW_VERSION_SPECIFICATION} \
     AIRFLOW_SOURCES_FROM=${AIRFLOW_SOURCES_FROM} \
-    AIRFLOW_SOURCES_TO=${AIRFLOW_SOURCES_TO}
+    AIRFLOW_SOURCES_TO=${AIRFLOW_SOURCES_TO} \
+    AIRFLOW_REPO=${AIRFLOW_REPO} \
+    AIRFLOW_BRANCH=${AIRFLOW_BRANCH} \
+    AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS}${ADDITIONAL_AIRFLOW_EXTRAS:+,}${ADDITIONAL_AIRFLOW_EXTRAS} \
+    CONSTRAINTS_GITHUB_REPOSITORY=${CONSTRAINTS_GITHUB_REPOSITORY} \
+    AIRFLOW_CONSTRAINTS=${AIRFLOW_CONSTRAINTS} \
+    AIRFLOW_CONSTRAINTS_REFERENCE=${AIRFLOW_CONSTRAINTS_REFERENCE} \
+    AIRFLOW_CONSTRAINTS_LOCATION=${AIRFLOW_CONSTRAINTS_LOCATION} \
+    DEFAULT_CONSTRAINTS_BRANCH=${DEFAULT_CONSTRAINTS_BRANCH} \
+    PATH=${PATH}:${AIRFLOW_USER_HOME_DIR}/.local/bin \
+    AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \
+    PIP_PROGRESS_BAR=${PIP_PROGRESS_BAR} \
+    AIRFLOW_USER_HOME_DIR=${AIRFLOW_USER_HOME_DIR} \
+    AIRFLOW_HOME=${AIRFLOW_HOME} \
+    AIRFLOW_UID=${AIRFLOW_UID} \
+    AIRFLOW_INSTALL_EDITABLE_FLAG="" \
+    UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES} \
+    # By default PIP installs everything to ~/.local
+    PIP_USER="true"
+
+# Copy all scripts required for installation - changing any of those should lead to
+# rebuilding from here
+COPY --chown=airflow:0 scripts/docker/common.sh scripts/docker/install_pip_version.sh \
+    /scripts/docker/install_airflow_dependencies_from_branch_tip.sh \
+    /scripts/docker/
 
 # In case of Production build image segment we want to pre-install main version of airflow
 # dependencies from GitHub so that we do not have to always reinstall it from the scratch.
@@ -239,15 +251,33 @@ ENV AIRFLOW_PRE_CACHED_PIP_PACKAGES=${AIRFLOW_PRE_CACHED_PIP_PACKAGES} \
 # the cache is only used when "upgrade to newer dependencies" is not set to automatically
 # account for removed dependencies (we do not install them in the first place)
 # Upgrade to specific PIP version
-RUN bash -o pipefail -o errexit -o nounset -o nolog /scripts/docker/install_pip_version.sh; \
+RUN /scripts/docker/install_pip_version.sh; \
     if [[ ${AIRFLOW_PRE_CACHED_PIP_PACKAGES} == "true" && \
           ${UPGRADE_TO_NEWER_DEPENDENCIES} == "false" ]]; then \
-        bash -o pipefail -o errexit -o nounset -o nolog \
-            /scripts/docker/install_airflow_dependencies_from_branch_tip.sh; \
+        /scripts/docker/install_airflow_dependencies_from_branch_tip.sh; \
     fi
 
+COPY --chown=airflow:0 scripts/docker/compile_www_assets.sh scripts/docker/prepare_node_modules.sh /scripts/docker/
+COPY --chown=airflow:0 ${AIRFLOW_SOURCES_WWW_FROM} ${AIRFLOW_SOURCES_WWW_TO}
+
+# hadolint ignore=SC2086, SC2010
+RUN if [[ ${AIRFLOW_INSTALLATION_METHOD} == "." ]]; then \
+        # only prepare node modules and compile assets if the prod image is build from sources
+        # otherwise they are already compiled-in. We should do it in one step with removing artifacts \
+        # as we want to keep the final image small
+        /scripts/docker/prepare_node_modules.sh; \
+        REMOVE_ARTIFACTS="true" BUILD_TYPE="prod" /scripts/docker/compile_www_assets.sh; \
+        # Copy generated dist folder (otherwise it will be overridden by the COPY step below)
+        mv -f /opt/airflow/airflow/www/static/dist /tmp/dist; \
+    fi;
+
 COPY --chown=airflow:0 ${AIRFLOW_SOURCES_FROM} ${AIRFLOW_SOURCES_TO}
 
+# Copy back the generated dist folder
+RUN if [[ ${AIRFLOW_INSTALLATION_METHOD} == "." ]]; then \
+        mv -f /tmp/dist /opt/airflow/airflow/www/static/dist; \
+    fi;
+
 # Add extra python dependencies
 ARG ADDITIONAL_PYTHON_DEPS=""
 # We can set this value to true in case we want to install .whl .tar.gz packages placed in the
@@ -271,19 +301,18 @@ ENV ADDITIONAL_PYTHON_DEPS=${ADDITIONAL_PYTHON_DEPS} \
 
 WORKDIR /opt/airflow
 
+COPY --chown=airflow:0 scripts/docker/install_from_docker_context_files.sh scripts/docker/install_airflow.sh \
+     scripts/docker/install_additional_dependencies.sh \
+     /scripts/docker/
+
 # hadolint ignore=SC2086, SC2010
-RUN if [[ ${AIRFLOW_INSTALLATION_METHOD} == "." ]]; then \
-        # only compile assets if the prod image is build from sources
-        # otherwise they are already compiled-in
-        bash -o pipefail -o errexit -o nounset -o nolog /scripts/docker/compile_www_assets.sh; \
-    fi; \
-    if [[ ${INSTALL_FROM_DOCKER_CONTEXT_FILES} == "true" ]]; then \
-        bash -o pipefail -o errexit -o nounset -o nolog /scripts/docker/install_from_docker_context_files.sh; \
+RUN if [[ ${INSTALL_FROM_DOCKER_CONTEXT_FILES} == "true" ]]; then \
+        /scripts/docker/install_from_docker_context_files.sh; \
     elif [[ ${INSTALL_FROM_PYPI} == "true" ]]; then \
-        bash -o pipefail -o errexit -o nounset -o nolog /scripts/docker/install_airflow.sh; \
+        /scripts/docker/install_airflow.sh; \
     fi; \
     if [[ -n "${ADDITIONAL_PYTHON_DEPS}" ]]; then \
-        bash -o pipefail -o errexit -o nounset -o nolog /scripts/docker/install_additional_dependencies.sh; \
+        /scripts/docker/install_additional_dependencies.sh; \
     fi; \
     find "${AIRFLOW_USER_HOME_DIR}/.local/" -name '*.pyc' -print0 | xargs -0 rm -f || true ; \
     find "${AIRFLOW_USER_HOME_DIR}/.local/" -type d -name '__pycache__' -print0 | xargs -0 rm -rf || true ; \
@@ -392,11 +421,7 @@ ARG AIRFLOW_HOME
 # Having the variable in final image allows to disable providers manager warnings when
 # production image is prepared from sources rather than from package
 ARG AIRFLOW_INSTALLATION_METHOD="apache-airflow"
-ARG BUILD_ID
-ARG COMMIT_SHA
 ARG AIRFLOW_IMAGE_REPOSITORY
-ARG AIRFLOW_IMAGE_DATE_CREATED
-ARG AIRFLOW_VERSION_SPECIFICATION
 
 ENV RUNTIME_APT_DEPS=${RUNTIME_APT_DEPS} \
     ADDITIONAL_RUNTIME_APT_DEPS=${ADDITIONAL_RUNTIME_APT_DEPS} \
@@ -412,8 +437,6 @@ ENV RUNTIME_APT_DEPS=${RUNTIME_APT_DEPS} \
     GUNICORN_CMD_ARGS="--worker-tmp-dir /dev/shm" \
     AIRFLOW_INSTALLATION_METHOD=${AIRFLOW_INSTALLATION_METHOD} \
     AIRFLOW_VERSION_SPECIFICATION=${AIRFLOW_VERSION_SPECIFICATION} \
-    BUILD_ID=${BUILD_ID} \
-    COMMIT_SHA=${COMMIT_SHA} \
     # By default PIP installs everything to ~/.local
     PIP_USER="true"
 
@@ -434,11 +457,11 @@ RUN apt-get update \
            ${ADDITIONAL_RUNTIME_APT_DEPS} \
     && apt-get autoremove -yqq --purge \
     && apt-get clean \
-    && rm -rf /var/lib/apt/lists/*
+    && rm -rf /var/lib/apt/lists/* \
+    && rm -rf /var/log/*
 
-# Only copy install_m(y/s)sql and install_pip_version.sh. We do not need any other scripts in the final image.
-COPY scripts/docker/install_mysql.sh /scripts/docker/install_mssql.sh scripts/docker/install_pip_version.sh \
-   scripts/docker/common.sh /scripts/docker/
+# Only copy install_m(y/s)sql. We do not need any other scripts in the final image.
+COPY scripts/docker/install_mysql.sh /scripts/docker/install_mssql.sh /scripts/docker/
 
 # fix permission issue in Azure DevOps when running the scripts
 RUN chmod a+x /scripts/docker/install_mysql.sh && \
@@ -453,8 +476,9 @@ RUN chmod a+x /scripts/docker/install_mysql.sh && \
     mkdir -pv "${AIRFLOW_HOME}/dags"; \
     mkdir -pv "${AIRFLOW_HOME}/logs"; \
     chown -R airflow:0 "${AIRFLOW_USER_HOME_DIR}" "${AIRFLOW_HOME}"; \
-    find "${AIRFLOW_HOME}" -executable -print0 | xargs --null chmod g+x && \
-        find "${AIRFLOW_HOME}" -print0 | xargs --null chmod g+rw
+    chmod -R g+rw "${AIRFLOW_USER_HOME_DIR}" "${AIRFLOW_HOME}" ; \
+    find "${AIRFLOW_HOME}" -executable -print0 | xargs --null chmod g+x; \
+    find "${AIRFLOW_USER_HOME_DIR}" -executable -print0 | xargs --null chmod g+x
 
 COPY --chown=airflow:0 --from=airflow-build-image \
      "${AIRFLOW_USER_HOME_DIR}/.local" "${AIRFLOW_USER_HOME_DIR}/.local"
@@ -463,19 +487,45 @@ COPY --chown=airflow:0 scripts/in_container/prod/clean-logs.sh /clean-logs
 
 # Make /etc/passwd root-group-writeable so that user can be dynamically added by OpenShift
 # See https://github.com/apache/airflow/issues/9248
+# Set default groups for airflow and root user
 
 RUN chmod a+x /entrypoint /clean-logs && \
-    chmod g=u /etc/passwd
+    chmod g=u /etc/passwd  && \
+    chmod g+w "${AIRFLOW_USER_HOME_DIR}/.local" && \
+    usermod -g 0 airflow -G 0
+
+# make sure that the venv is activated for all users
+# including plain sudo, sudo with --interactive flag
+RUN sed --in-place=.bak "s/secure_path=\"/secure_path=\"\/.venv\/bin:/" /etc/sudoers
+
+# See https://airflow.apache.org/docs/docker-stack/entrypoint.html#signal-propagation
+# to learn more about the way how signals are handled by the image
+# Also set airflow as nice PROMPT message.
+# LD_PRELOAD is to workaround https://github.com/apache/airflow/issues/17546
+# issue with /usr/lib/x86_64-linux-gnu/libstdc++.so.6: cannot allocate memory in static TLS block
+# We do not yet a more "correct" solution to the problem but in order to avoid raising new issues
+# by users of the prod image, we implement the workaround now.
+# The side effect of this is slightly (in the range of 100s of milliseconds) slower load for any
+# binary started and a little memory used for Heap allocated by initialization of libstdc++
+# This overhead is not happening for binaries that already link dynamically libstdc++
+ENV DUMB_INIT_SETSID="1" \
+    PS1="(airflow)" \
+    LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libstdc++.so.6"
 
 WORKDIR ${AIRFLOW_HOME}
 
 EXPOSE 8080
 
-RUN usermod -g 0 airflow -G 0
-
 USER ${AIRFLOW_UID}
 
-RUN /scripts/docker/install_pip_version.sh
+# Those should be set and used as late as possible as any change in commit/build otherwise invalidates the
+# layers right after
+ARG BUILD_ID
+ARG COMMIT_SHA
+ARG AIRFLOW_IMAGE_REPOSITORY
+ARG AIRFLOW_IMAGE_DATE_CREATED
+
+ENV BUILD_ID=${BUILD_ID} COMMIT_SHA=${COMMIT_SHA}
 
 LABEL org.apache.airflow.distro="debian" \
   org.apache.airflow.distro.version="buster" \
@@ -499,19 +549,5 @@ LABEL org.apache.airflow.distro="debian" \
   org.opencontainers.image.title="Production Airflow Image" \
   org.opencontainers.image.description="Reference, production-ready Apache Airflow image"
 
-
-# See https://airflow.apache.org/docs/docker-stack/entrypoint.html#signal-propagation
-# to learn more about the way how signals are handled by the image
-ENV DUMB_INIT_SETSID="1"
-
-# This one is to workaround https://github.com/apache/airflow/issues/17546
-# issue with /usr/lib/x86_64-linux-gnu/libstdc++.so.6: cannot allocate memory in static TLS block
-# We do not yet a more "correct" solution to the problem but in order to avoid raising new issues
-# by users of the prod image, we implement the workaround now.
-# The side effect of this is slightly (in the range of 100s of milliseconds) slower load for any
-# binary started and a little memory used for Heap allocated by initialization of libstdc++
-# This overhead is not happening for binaries that already link dynamically libstdc++
-ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libstdc++.so.6"
-
 ENTRYPOINT ["/usr/bin/dumb-init", "--", "/entrypoint"]
 CMD []
diff --git a/Dockerfile.ci b/Dockerfile.ci
index 62a61f5..d9e4477 100644
--- a/Dockerfile.ci
+++ b/Dockerfile.ci
@@ -98,8 +98,7 @@ RUN apt-get update \
 # Only copy mysql/mssql installation scripts for now - so that changing the other
 # scripts which are needed much later will not invalidate the docker layer here
 COPY scripts/docker/install_mysql.sh scripts/docker/install_mssql.sh /scripts/docker/
-RUN bash -o pipefail -o errexit -o nounset -o nolog /scripts/docker/install_mysql.sh dev \
-    && bash -o pipefail -o errexit -o nounset -o nolog /scripts/docker/install_mssql.sh \
+RUN /scripts/docker/install_mysql.sh dev && /scripts/docker/install_mssql.sh \
     && adduser --gecos "First Last,RoomNumber,WorkPhone,HomePhone" --disabled-password \
               --quiet "airflow" --home "/home/airflow" \
     && echo -e "airflow\nairflow" | passwd airflow 2>&1 \
@@ -187,7 +186,7 @@ ARG BATS_FILE_VERSION="0.2.0"
 
 RUN curl -sSL https://github.com/bats-core/bats-core/archive/v${BATS_VERSION}.tar.gz -o /tmp/bats.tgz \
     && tar -zxf /tmp/bats.tgz -C /tmp \
-    && bash -o pipefail -o errexit -o nounset -o nolog /tmp/bats-core-${BATS_VERSION}/install.sh /opt/bats && rm -rf \
+    && /tmp/bats-core-${BATS_VERSION}/install.sh /opt/bats && rm -rf \
     && mkdir -p /opt/bats/lib/bats-support \
     && curl -sSL https://github.com/bats-core/bats-support/archive/v${BATS_SUPPORT_VERSION}.tar.gz -o /tmp/bats-support.tgz \
     && tar -zxf /tmp/bats-support.tgz -C /opt/bats/lib/bats-support --strip 1 && rm -rf /tmp/* \
@@ -269,8 +268,14 @@ ARG UPGRADE_TO_NEWER_DEPENDENCIES="false"
 ENV EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS=${EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS} \
     UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES}
 
-COPY scripts/docker/*.sh scripts/docker/install_pip_version.sh /scripts/docker/
+# Copy all scripts required for installation - changing any of those should lead to
+# rebuilding from here
+COPY scripts/docker/install_pip_version.sh scripts/docker/install_airflow_dependencies_from_branch_tip.sh \
+     scripts/docker/common.sh \
+     /scripts/docker/
 
+# We are first creating a venv where all python packages and .so binaries needed by those are
+# installed.
 # In case of CI builds we want to pre-install main version of airflow dependencies so that
 # We do not have to always reinstall it from the scratch.
 # And is automatically reinstalled from the scratch every time patch release of python gets released
@@ -280,10 +285,10 @@ COPY scripts/docker/*.sh scripts/docker/install_pip_version.sh /scripts/docker/
 # account for removed dependencies (we do not install them in the first place)
 RUN echo -e "\n\e[32mThe 'Running pip as the root user' warnings below are not valid but we can't disable them :(\e[0m\n"; \
     echo -e "\n\e[34mSee https://github.com/pypa/pip/issues/10556 for details.\e[0m\n" ; \
-    bash -o pipefail -o errexit -o nounset -o nolog /scripts/docker/install_pip_version.sh; \
+    /scripts/docker/install_pip_version.sh; \
     if [[ ${AIRFLOW_PRE_CACHED_PIP_PACKAGES} == "true" && \
           ${UPGRADE_TO_NEWER_DEPENDENCIES} == "false" ]]; then \
-        bash -o pipefail -o errexit -o nounset -o nolog /scripts/docker/install_airflow_dependencies_from_branch_tip.sh; \
+        /scripts/docker/install_airflow_dependencies_from_branch_tip.sh; \
     fi
 
 # Generate random hex dump file so that we can determine whether it's faster to rebuild the image
@@ -291,11 +296,24 @@ RUN echo -e "\n\e[32mThe 'Running pip as the root user' warnings below are not v
 # the new image (when it is different)
 RUN head -c 30 /dev/urandom | xxd -ps >/build-cache-hash
 
-# Link dumb-init for backwards compatibility (so that older images also work)
-RUN ln -sf /usr/bin/dumb-init /usr/local/bin/dumb-init
+# Copy package.json and yarn.lock to install node modules
+# this way even if other static check files change, node modules will not need to be installed
+# we want to keep node_modules so we can do this step separately from compiling assets
+COPY airflow/www/package.json airflow/www/yarn.lock ${AIRFLOW_SOURCES}/airflow/www/
+COPY scripts/docker/prepare_node_modules.sh /scripts/docker/
+
+# Package JS/css for production
+RUN /scripts/docker/prepare_node_modules.sh
+
+# Copy all the needed www/ for assets compilation. Done as two separate COPY
+# commands so as otherwise it copies the _contents_ of static/ in to www/
+COPY airflow/www/webpack.config.js ${AIRFLOW_SOURCES}/airflow/www/
+COPY airflow/www/static ${AIRFLOW_SOURCES}/airflow/www/static/
+COPY scripts/docker/compile_www_assets.sh /scripts/docker/
 
-# Note! We are copying everything with airflow:airflow user:group even if we use root to run the scripts
-# This is fine as root user will be able to use those dirs anyway.
+# Build artifacts without removing temporary artifacts (we will need them for incremental changes)
+# in build  mode
+RUN REMOVE_ARTIFACTS="false" BUILD_TYPE="build" /scripts/docker/compile_www_assets.sh
 
 # Airflow sources change frequently but dependency configuration won't change that often
 # We copy setup.py and other files needed to perform setup of dependencies
@@ -305,6 +323,8 @@ COPY setup.cfg ${AIRFLOW_SOURCES}/setup.cfg
 
 COPY airflow/__init__.py ${AIRFLOW_SOURCES}/airflow/__init__.py
 
+COPY scripts/docker/install_airflow.sh /scripts/docker/
+
 # The goal of this line is to install the dependencies from the most current setup.py from sources
 # This will be usually incremental small set of packages in CI optimized build, so it will be very fast
 # In non-CI optimized build this will install all dependencies before installing sources.
@@ -312,28 +332,22 @@ COPY airflow/__init__.py ${AIRFLOW_SOURCES}/airflow/__init__.py
 # But in cron job we will install latest versions matching setup.py to see if there is no breaking change
 # and push the constraints if everything is successful
 RUN if [[ ${INSTALL_FROM_PYPI} == "true" ]]; then \
-        bash -o pipefail -o errexit -o nounset -o nolog /scripts/docker/install_airflow.sh; \
+        /scripts/docker/install_airflow.sh; \
     fi
 
-# Copy all the www/ files we need to compile assets. Done as two separate COPY
-# commands so as otherwise it copies the _contents_ of static/ in to www/
-COPY airflow/www/webpack.config.js airflow/www/package.json airflow/www/yarn.lock ${AIRFLOW_SOURCES}/airflow/www/
-COPY airflow/www/static ${AIRFLOW_SOURCES}/airflow/www/static/
-
-# Package JS/css for production
-RUN bash -o pipefail -o errexit -o nounset -o nolog /scripts/docker/compile_www_assets.sh
-
 COPY scripts/in_container/entrypoint_ci.sh /entrypoint
 RUN chmod a+x /entrypoint
 
 COPY scripts/docker/load.bash /opt/bats/lib/
+COPY scripts/docker/install_pip_version.sh scripts/docker/install_additional_dependencies.sh /scripts/docker/
+
 
 # Additional python deps to install
 ARG ADDITIONAL_PYTHON_DEPS=""
 
-RUN bash -o pipefail -o errexit -o nounset -o nolog /scripts/docker/install_pip_version.sh; \
+RUN /scripts/docker/install_pip_version.sh; \
     if [[ -n "${ADDITIONAL_PYTHON_DEPS}" ]]; then \
-            bash -o pipefail -o errexit -o nounset -o nolog /scripts/docker/install_additional_dependencies.sh; \
+        /scripts/docker/install_additional_dependencies.sh; \
     fi
 
 # Install autocomplete for airflow
@@ -356,11 +370,25 @@ ARG BUILD_ID
 ARG COMMIT_SHA
 ARG AIRFLOW_IMAGE_DATE_CREATED
 
-ENV PATH="/files/bin/:/opt/airflow/scripts/in_container/bin/:${HOME}:${PATH}" \
+ENV PATH="/files/bin/:/opt/airflow/scripts/in_container/bin/:${PATH}" \
     GUNICORN_CMD_ARGS="--worker-tmp-dir /dev/shm/" \
     BUILD_ID=${BUILD_ID} \
     COMMIT_SHA=${COMMIT_SHA}
 
+# This one is to workaround https://github.com/apache/airflow/issues/17546
+# issue with /usr/lib/x86_64-linux-gnu/libstdc++.so.6: cannot allocate memory in static TLS block
+# We do not yet a more "correct" solution to the problem but in order to avoid raising new issues
+# by users of the prod image, we implement the workaround now.
+# The side effect of this is slightly (in the range of 100s of milliseconds) slower load for any
+# binary started and a little memory used for Heap allocated by initialization of libstdc++
+# This overhead is not happening for binaries that already link dynamically libstdc++
+ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libstdc++.so.6"
+
+# Link dumb-init for backwards compatibility (so that older images also work)
+RUN ln -sf /usr/bin/dumb-init /usr/local/bin/dumb-init
+
+EXPOSE 8080
+
 LABEL org.apache.airflow.distro="debian" \
   org.apache.airflow.distro.version="buster" \
   org.apache.airflow.module="airflow" \
@@ -385,16 +413,5 @@ LABEL org.apache.airflow.distro="debian" \
   org.opencontainers.image.title="Continuous Integration Airflow Image" \
   org.opencontainers.image.description="Installed Apache Airflow with Continuous Integration dependencies"
 
-# This one is to workaround https://github.com/apache/airflow/issues/17546
-# issue with /usr/lib/x86_64-linux-gnu/libstdc++.so.6: cannot allocate memory in static TLS block
-# We do not yet a more "correct" solution to the problem but in order to avoid raising new issues
-# by users of the prod image, we implement the workaround now.
-# The side effect of this is slightly (in the range of 100s of milliseconds) slower load for any
-# binary started and a little memory used for Heap allocated by initialization of libstdc++
-# This overhead is not happening for binaries that already link dynamically libstdc++
-ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libstdc++.so.6"
-
-
-EXPOSE 8080
-
 ENTRYPOINT ["/usr/bin/dumb-init", "--", "/entrypoint"]
+CMD []
diff --git a/breeze b/breeze
index dccdd6b..8d53400 100755
--- a/breeze
+++ b/breeze
@@ -149,7 +149,15 @@ function breeze::setup_default_breeze_constants() {
     AIRFLOW_SOURCES_TO=${AIRFLOW_SOURCES_TO:="/opt/airflow"}
     export AIRFLOW_SOURCES_TO
 
-    # Unlike in CI scripts, in breeze by default production image ist installed from sources
+    # Sources by default are installed from local sources when using breeze
+    AIRFLOW_SOURCES_WWW_FROM=${AIRFLOW_SOURCES_WWW_FROM:="./airflow/www"}
+    export AIRFLOW_SOURCES_WWW_FROM
+
+    # They are copied to /opt/airflow by default in breeze
+    AIRFLOW_SOURCES_WWW_TO=${AIRFLOW_SOURCES_WWW_TO:="/opt/airflow/airflow/www"}
+    export AIRFLOW_SOURCES_WWW_TO
+
+    # Unlike in CI scripts, in breeze by default production image is installed from sources
     export AIRFLOW_INSTALLATION_METHOD="."
 
     # If it set is set to specified version, then the source version of Airflow
diff --git a/dev/README_RELEASE_PROVIDER_PACKAGES.md b/dev/README_RELEASE_PROVIDER_PACKAGES.md
index db069c8..bc1bd03 100644
--- a/dev/README_RELEASE_PROVIDER_PACKAGES.md
+++ b/dev/README_RELEASE_PROVIDER_PACKAGES.md
@@ -613,7 +613,7 @@ additional tools. Below is an example Dockerfile, which installs providers for G
 ```dockerfile
 FROM apache/airflow:2.2.3
 
-RUN pip install --user apache-airflow-providers-google==2.2.2.rc1
+RUN pip install  --user apache-airflow-providers-google==2.2.2.rc1
 
 USER ${AIRFLOW_UID}
 ```
diff --git a/docs/docker-stack/build-arg-ref.rst b/docs/docker-stack/build-arg-ref.rst
index ab75d4a..1c298f1 100644
--- a/docs/docker-stack/build-arg-ref.rst
+++ b/docs/docker-stack/build-arg-ref.rst
@@ -198,6 +198,16 @@ You can see some examples of those in:
 |                                          |                                          | "/opt/airflow" when you install Airflow  |
 |                                          |                                          | from local sources.                      |
 +------------------------------------------+------------------------------------------+------------------------------------------+
+| ``AIRFLOW_SOURCES_WWW_FROM``             | ``empty``                                | Sources of Airflow WWW files used for    |
+|                                          |                                          | asset compilation. Set it to             |
+|                                          |                                          | "./airflow/www" when                     |
+|                                          |                                          | you install Airflow from local sources   |
++------------------------------------------+------------------------------------------+------------------------------------------+
+| ``AIRFLOW_SOURCES_WWW_TO``               | ``/empty``                               | Target for Airflow files used for        |
+|                                          |                                          | asset compilation. Set it to             |
+|                                          |                                          | "/opt/airflow/airflow/www" when          |
+|                                          |                                          | you install Airflow from local sources.  |
++------------------------------------------+------------------------------------------+------------------------------------------+
 | ``AIRFLOW_VERSION_SPECIFICATION``        |                                          | Optional - might be used for using limit |
 |                                          |                                          | for Airflow version installation - for   |
 |                                          |                                          | example ``<2.0.2`` for automated builds. |
diff --git a/scripts/ci/libraries/_build_images.sh b/scripts/ci/libraries/_build_images.sh
index e0a2a8a..abc06bd 100644
--- a/scripts/ci/libraries/_build_images.sh
+++ b/scripts/ci/libraries/_build_images.sh
@@ -20,9 +20,11 @@
 # pass build flags depending on the version and method of the installation (for example to
 # get proper requirement constraint files)
 function build_images::add_build_args_for_remote_install() {
-    # entrypoint is used as AIRFLOW_SOURCES_FROM/TO in order to avoid costly copying of all sources of
+    # entrypoint is used as AIRFLOW_SOURCES_(WWW)_FROM/TO in order to avoid costly copying of all sources of
     # Airflow - those are not needed for remote install at all. Entrypoint is later overwritten by
     EXTRA_DOCKER_PROD_BUILD_FLAGS+=(
+        "--build-arg" "AIRFLOW_SOURCES_WWW_FROM=empty"
+        "--build-arg" "AIRFLOW_SOURCES_WWW_TO=/empty"
         "--build-arg" "AIRFLOW_SOURCES_FROM=empty"
         "--build-arg" "AIRFLOW_SOURCES_TO=/empty"
     )
@@ -738,6 +740,8 @@ function build_images::prepare_prod_build() {
         EXTRA_DOCKER_PROD_BUILD_FLAGS=(
             "--build-arg" "AIRFLOW_SOURCES_FROM=${AIRFLOW_SOURCES_FROM}"
             "--build-arg" "AIRFLOW_SOURCES_TO=${AIRFLOW_SOURCES_TO}"
+            "--build-arg" "AIRFLOW_SOURCES_WWW_FROM=${AIRFLOW_SOURCES_WWW_FROM}"
+            "--build-arg" "AIRFLOW_SOURCES_WWW_TO=${AIRFLOW_SOURCES_WWW_TO}"
             "--build-arg" "AIRFLOW_INSTALLATION_METHOD=${AIRFLOW_INSTALLATION_METHOD}"
             "--build-arg" "AIRFLOW_CONSTRAINTS_REFERENCE=${DEFAULT_CONSTRAINTS_BRANCH}"
         )
diff --git a/scripts/ci/libraries/_initialization.sh b/scripts/ci/libraries/_initialization.sh
index a0e9e30..3df9513 100644
--- a/scripts/ci/libraries/_initialization.sh
+++ b/scripts/ci/libraries/_initialization.sh
@@ -439,6 +439,13 @@ function initialization::initialize_image_build_variables() {
     AIRFLOW_SOURCES_TO=${AIRFLOW_SOURCES_TO:="/empty"}
     export AIRFLOW_SOURCES_TO
 
+    # By default no sources are copied to image
+    AIRFLOW_SOURCES_WWW_FROM=${AIRFLOW_SOURCES_WWW_FROM:="empty"}
+    export AIRFLOW_SOURCES_WWW_FROM
+
+    AIRFLOW_SOURCES_WWW_TO=${AIRFLOW_SOURCES_WWW_TO:="/empty"}
+    export AIRFLOW_SOURCES_WWW_TO
+
     # By default in scripts production docker image is installed from PyPI package
     export AIRFLOW_INSTALLATION_METHOD=${AIRFLOW_INSTALLATION_METHOD:="apache-airflow"}
 
@@ -711,6 +718,8 @@ Production image build variables:
     AIRFLOW_VERSION_SPECIFICATION: '${AIRFLOW_VERSION_SPECIFICATION}'
     AIRFLOW_SOURCES_FROM: '${AIRFLOW_SOURCES_FROM}'
     AIRFLOW_SOURCES_TO: '${AIRFLOW_SOURCES_TO}'
+    AIRFLOW_SOURCES_WWW_FROM: '${AIRFLOW_SOURCES_WWW_FROM}'
+    AIRFLOW_SOURCES_WWW_TO: '${AIRFLOW_SOURCES_WWW_TO}'
 
 Detected GitHub environment:
 
diff --git a/scripts/docker/compile_www_assets.sh b/scripts/docker/compile_www_assets.sh
index b9034be..e34fe46 100755
--- a/scripts/docker/compile_www_assets.sh
+++ b/scripts/docker/compile_www_assets.sh
@@ -19,6 +19,7 @@
 set -euo pipefail
 
 BUILD_TYPE=${BUILD_TYPE="prod"}
+REMOVE_ARTIFACTS=${REMOVE_ARTIFACTS="true"}
 
 COLOR_BLUE=$'\e[34m'
 readonly COLOR_BLUE
@@ -28,11 +29,8 @@ readonly COLOR_RESET
 # Installs additional dependencies passed as Argument to the Docker build command
 function compile_www_assets() {
     echo
-    echo "${COLOR_BLUE}Compiling www assets${COLOR_RESET}"
+    echo "${COLOR_BLUE}Compiling www assets: running yarn ${BUILD_TYPE}${COLOR_RESET}"
     echo
-    local md5sum_file
-    md5sum_file="static/dist/sum.md5"
-    readonly md5sum_file
     local www_dir
     if [[ ${AIRFLOW_INSTALLATION_METHOD=} == "." ]]; then
         # In case we are building from sources in production image, we should build the assets
@@ -42,30 +40,32 @@ function compile_www_assets() {
     fi
     pushd ${www_dir} || exit 1
     set +e
-    yarn install --frozen-lockfile --no-cache 2>/tmp/out-yarn-install.txt
-    local res=$?
-    if [[ ${res} != 0 ]]; then
-        >&2 echo
-        >&2 echo "Error when running yarn install:"
-        >&2 echo
-        >&2 cat /tmp/out-yarn-install.txt && rm -f /tmp/out-yarn-install.txt
-        exit 1
-    fi
-    rm -f /tmp/out-yarn-install.txt
     yarn run "${BUILD_TYPE}" 2>/tmp/out-yarn-run.txt
     res=$?
     if [[ ${res} != 0 ]]; then
         >&2 echo
-        >&2 echo "Error when running yarn install:"
+        >&2 echo "Error when running yarn run:"
         >&2 echo
         >&2 cat /tmp/out-yarn-run.txt && rm -rf /tmp/out-yarn-run.txt
         exit 1
     fi
     rm -f /tmp/out-yarn-run.txt
     set -e
+    local md5sum_file
+    md5sum_file="static/dist/sum.md5"
+    readonly md5sum_file
     find package.json yarn.lock static/css static/js -type f | sort | xargs md5sum > "${md5sum_file}"
-    rm -rf "${www_dir}/node_modules"
-    rm -vf "${www_dir}"/{package.json,yarn.lock,.eslintignore,.eslintrc,.stylelintignore,.stylelintrc,compile_assets.sh,webpack.config.js}
+    if [[ ${REMOVE_ARTIFACTS} == "true" ]]; then
+        echo
+        echo "${COLOR_BLUE}Removing generated node modules${COLOR_RESET}"
+        echo
+        rm -rf "${www_dir}/node_modules"
+        rm -vf "${www_dir}"/{package.json,yarn.lock,.eslintignore,.eslintrc,.stylelintignore,.stylelintrc,compile_assets.sh,webpack.config.js}
+    else
+        echo
+        echo "${COLOR_BLUE}Leaving generated node modules${COLOR_RESET}"
+        echo
+    fi
     popd || exit 1
 }
 
diff --git a/scripts/docker/install_airflow.sh b/scripts/docker/install_airflow.sh
index b2ece66..25da442 100755
--- a/scripts/docker/install_airflow.sh
+++ b/scripts/docker/install_airflow.sh
@@ -59,6 +59,7 @@ function install_airflow() {
             pip install ${AIRFLOW_INSTALL_EDITABLE_FLAG} \
                 "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}"
         fi
+
         # make sure correct PIP version is used
         pip install --disable-pip-version-check "pip==${AIRFLOW_PIP_VERSION}"
         echo
diff --git a/scripts/docker/compile_www_assets.sh b/scripts/docker/prepare_node_modules.sh
similarity index 65%
copy from scripts/docker/compile_www_assets.sh
copy to scripts/docker/prepare_node_modules.sh
index b9034be..e30b96e 100755
--- a/scripts/docker/compile_www_assets.sh
+++ b/scripts/docker/prepare_node_modules.sh
@@ -18,21 +18,16 @@
 # shellcheck disable=SC2086
 set -euo pipefail
 
-BUILD_TYPE=${BUILD_TYPE="prod"}
-
 COLOR_BLUE=$'\e[34m'
 readonly COLOR_BLUE
 COLOR_RESET=$'\e[0m'
 readonly COLOR_RESET
 
-# Installs additional dependencies passed as Argument to the Docker build command
-function compile_www_assets() {
+# Prepares node modules needed to compile WWW assets
+function prepare_node_modules() {
     echo
-    echo "${COLOR_BLUE}Compiling www assets${COLOR_RESET}"
+    echo "${COLOR_BLUE}Preparing node modules${COLOR_RESET}"
     echo
-    local md5sum_file
-    md5sum_file="static/dist/sum.md5"
-    readonly md5sum_file
     local www_dir
     if [[ ${AIRFLOW_INSTALLATION_METHOD=} == "." ]]; then
         # In case we are building from sources in production image, we should build the assets
@@ -52,21 +47,7 @@ function compile_www_assets() {
         exit 1
     fi
     rm -f /tmp/out-yarn-install.txt
-    yarn run "${BUILD_TYPE}" 2>/tmp/out-yarn-run.txt
-    res=$?
-    if [[ ${res} != 0 ]]; then
-        >&2 echo
-        >&2 echo "Error when running yarn install:"
-        >&2 echo
-        >&2 cat /tmp/out-yarn-run.txt && rm -rf /tmp/out-yarn-run.txt
-        exit 1
-    fi
-    rm -f /tmp/out-yarn-run.txt
-    set -e
-    find package.json yarn.lock static/css static/js -type f | sort | xargs md5sum > "${md5sum_file}"
-    rm -rf "${www_dir}/node_modules"
-    rm -vf "${www_dir}"/{package.json,yarn.lock,.eslintignore,.eslintrc,.stylelintignore,.stylelintrc,compile_assets.sh,webpack.config.js}
     popd || exit 1
 }
 
-compile_www_assets
+prepare_node_modules