You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2021/01/02 10:17:12 UTC

[airflow] branch master updated: Removes pip download when installing from local packages (#13422)

This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/master by this push:
     new e436883  Removes pip download when installing from local packages (#13422)
e436883 is described below

commit e43688358320a5f20776c0d346c310a568a55049
Author: Jarek Potiuk <ja...@polidea.com>
AuthorDate: Sat Jan 2 11:16:51 2021 +0100

    Removes pip download when installing from local packages (#13422)
    
    This PR improves building production image from local packages,
    in preparation for moving provider requirements out of setup.cfg.
    
    Previously `pip download` step was executed in the CI scripts
    in order to download all the packages that were needed. However
    this had two problems:
    
    1) PIP download was executed outside of Dockerfile in CI scripts
       which means that any change to requirements there could not
       be executed in 'workflow_run' event - because main branch version
       of CI scripts is used there. We want to add extra requirements
       when installing airflow so in order to be able to change
       it, those requirements should be added in Dockerfile.
       This will be done in the follow-up #13409 PR.
    
    2) Packages downloaded with PIP download have a "file" version
       rather than regular == version when you run pip freeze/check.
       This looks weird and while you can figure out the version
       from file name, when you `pip install` them, they look
       much more normal. The airflow package and provider package
       will still get the "file" form but this is ok because we are
       building those packages from sources and they are not yet
       available in PyPI.
    
    Example:
    
      adal==1.2.5
      aiohttp==3.7.3
      alembic==1.4.3
      amqp==2.6.1
      apache-airflow @ file:///docker-context-files/apache_airflow-2.1.0.dev0-py3-none-any.whl
      apache-airflow-providers-amazon @ file:///docker-context-files/apache_airflow_providers_amazon-1.0.0-py3-none-any.whl
      apache-airflow-providers-celery @ file:///docker-context-files/apache_airflow_providers_celery-1.0.0-py3-none-any.whl
      ...
    
    With this PR, we do not `pip download` all packages, but instead
    we prepare airflow + providers packages as .whl files and
    install them from there (all the dependencies are installed
    from PyPI)
---
 Dockerfile                                       | 39 +++++++++++++++++++-----
 Dockerfile.ci                                    | 32 ++++++++++++++-----
 IMAGES.rst                                       | 25 ++++++++++++---
 docs/apache-airflow/production-deployment.rst    | 27 ++++++++++++++--
 scripts/ci/images/ci_build_dockerhub.sh          |  5 ++-
 scripts/ci/images/ci_prepare_prod_image_on_ci.sh |  5 ++-
 scripts/ci/libraries/_build_images.sh            | 33 ++++++++++++--------
 scripts/ci/libraries/_runs.sh                    | 16 ----------
 8 files changed, 127 insertions(+), 55 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 65b11fa..81737a1 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -247,31 +247,54 @@ ENV UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES}
 
 WORKDIR /opt/airflow
 
-# remove mysql from extras if client is not installed
+# hadolint ignore=SC2086, SC2010
 RUN if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then \
+        # Remove mysql from extras if client is not installed \
         AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,}; \
     fi; \
     if [[ ${INSTALL_FROM_PYPI} == "true" ]]; then \
         if [[ "${UPGRADE_TO_NEWER_DEPENDENCIES}" != "false" ]]; then \
             pip install --user "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_INSTALL_VERSION}" \
                 --upgrade --upgrade-strategy eager; \
+            pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
         else \
-            pip install --user "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_INSTALL_VERSION}" \
+            pip install --upgrade --upgrade-strategy only-if-needed \
+                --user "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_INSTALL_VERSION}" \
                 --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
+            pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
+        fi; \
+    fi; \
+    if [[ ${INSTALL_FROM_DOCKER_CONTEXT_FILES} == "true" ]]; then \
+        reinstalling_apache_airflow_packages=$(ls /docker-context-files/apache?airflow*.{whl,tar.gz} 2>/dev/null || true); \
+        # We want to install apache airflow packages with constraints \
+        if [[ "${reinstalling_apache_airflow_packages}" != "" ]]; then \
+            if [[ "${UPGRADE_TO_NEWER_DEPENDENCIES}" != "false" ]]; then \
+                pip install --force-reinstall --upgrade --upgrade-strategy eager \
+                    --user ${reinstalling_apache_airflow_packages}; \
+                pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
+            else \
+                pip install --force-reinstall --upgrade --upgrade-strategy only-if-needed \
+                    --user ${reinstalling_apache_airflow_packages} --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
+                pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
+            fi; \
+        fi ; \
+        # All the others we want to reinstall as-is, without dependencies \
+        reinstalling_other_packages=$(ls /docker-context-files/*.{whl,tar.gz} 2>/dev/null | \
+            grep -v apache_airflow | grep -v apache-airflow || true); \
+        if [[ "${reinstalling_other_packages}" != "" ]]; then \
+            pip install --force-reinstall --user --no-deps ${reinstalling_other_packages}; \
         fi; \
     fi; \
     if [[ -n "${ADDITIONAL_PYTHON_DEPS}" ]]; then \
         if [[ "${UPGRADE_TO_NEWER_DEPENDENCIES}" != "false" ]]; then \
             pip install --user ${ADDITIONAL_PYTHON_DEPS} --upgrade --upgrade-strategy eager; \
+            pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
         else \
-            pip install --user ${ADDITIONAL_PYTHON_DEPS} --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
+            pip install --user ${ADDITIONAL_PYTHON_DEPS} --upgrade --upgrade-strategy only-if-needed \
+                --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
+            pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
         fi; \
     fi; \
-    if [[ ${INSTALL_FROM_DOCKER_CONTEXT_FILES} == "true" ]]; then \
-        if ls /docker-context-files/*.{whl,tar.gz} 1> /dev/null 2>&1; then \
-            pip install --user --no-deps /docker-context-files/*.{whl,tar.gz}; \
-        fi ; \
-    fi; \
     find /root/.local/ -name '*.pyc' -print0 | xargs -0 rm -r || true ; \
     find /root/.local/ -type d -name '__pycache__' -print0 | xargs -0 rm -r || true
 
diff --git a/Dockerfile.ci b/Dockerfile.ci
index 199db6f..63360a0 100644
--- a/Dockerfile.ci
+++ b/Dockerfile.ci
@@ -281,8 +281,8 @@ RUN pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"
 RUN if [[ ${AIRFLOW_PRE_CACHED_PIP_PACKAGES} == "true" ]]; then \
         pip install \
             "https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz#egg=apache-airflow[${AIRFLOW_EXTRAS}]" \
-                --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" \
-                && pip uninstall --yes apache-airflow; \
+            --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
+        pip uninstall --yes apache-airflow; \
     fi
 
 # Generate random hex dump file so that we can determine whether it's faster to rebuild the image
@@ -325,7 +325,8 @@ RUN if [[ ${INSTALL_FROM_PYPI} == "true" ]]; then \
             pip install -e ".[${AIRFLOW_EXTRAS}]" --upgrade --upgrade-strategy eager; \
             pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
         else \
-            pip install -e ".[${AIRFLOW_EXTRAS}]" --upgrade --upgrade-strategy only-if-needed; \
+            pip install -e ".[${AIRFLOW_EXTRAS}]" --upgrade --upgrade-strategy only-if-needed\
+                --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
             pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
         fi; \
     fi
@@ -334,11 +335,28 @@ RUN if [[ ${INSTALL_FROM_PYPI} == "true" ]]; then \
 # they are also installed additionally to whatever is installed from Airflow.
 COPY docker-context-files/ /docker-context-files/
 
-RUN if [[ ${INSTALL_FROM_DOCKER_CONTEXT_FILES} != "true" ]]; then \
-        if ls /docker-context-files/*.{whl,tar.gz} 1> /dev/null 2>&1; then \
-            pip install --no-deps /docker-context-files/*.{whl,tar.gz}; \
+# hadolint ignore=SC2086, SC2010
+RUN if [[ ${INSTALL_FROM_DOCKER_CONTEXT_FILES} == "true" ]]; then \
+        reinstalling_apache_airflow_packages=$(ls /docker-context-files/apache?airflow*.{whl,tar.gz} 2>/dev/null || true); \
+        # We want to install apache airflow packages with constraints \
+        if [[ "${reinstalling_apache_airflow_packages}" != "" ]]; then \
+            if [[ "${UPGRADE_TO_NEWER_DEPENDENCIES}" != "false" ]]; then \
+                pip install --force-reinstall --upgrade --upgrade-strategy eager \
+                    --user ${reinstalling_apache_airflow_packages}; \
+                pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
+            else \
+                pip install --force-reinstall --upgrade --upgrade-strategy only-if-needed \
+                    --user ${reinstalling_apache_airflow_packages} --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
+                pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}"; \
+            fi; \
         fi ; \
-    fi
+        # All the others we want to reinstall as-is, without dependencies \
+        reinstalling_other_packages=$(ls /docker-context-files/*.{whl,tar.gz} 2>/dev/null | \
+            grep -v apache_airflow | grep -v apache-airflow || true); \
+        if [[ "${reinstalling_other_packages}" != "" ]]; then \
+            pip install --force-reinstall --user --no-deps ${reinstalling_other_packages}; \
+        fi; \
+    fi;
 
 # Copy all the www/ files we need to compile assets. Done as two separate COPY
 # commands so as otherwise it copies the _contents_ of static/ in to www/
diff --git a/IMAGES.rst b/IMAGES.rst
index 49cf343..5e14680 100644
--- a/IMAGES.rst
+++ b/IMAGES.rst
@@ -449,10 +449,27 @@ The following build arguments (``--build-arg`` in docker build command) can be u
 |                                          |                                          | package. It has no effect when           |
 |                                          |                                          | installing from PyPI or GitHub repo.     |
 +------------------------------------------+------------------------------------------+------------------------------------------+
-| ``INSTALL_FROM_DOCKER_CONTEXT_FILES``    | ``false``                                | If set to true, Airflow and it's         |
-|                                          |                                          | dependencies are installed from locally  |
-|                                          |                                          | downloaded .whl files placed in the      |
-|                                          |                                          | ``docker-context-files``.                |
+| ``INSTALL_FROM_DOCKER_CONTEXT_FILES``    | ``false``                                | If set to true, Airflow, providers and   |
+|                                          |                                          | all dependencies are installed from      |
+|                                          |                                          | from locally built/downloaded            |
+|                                          |                                          | .whl and .tar.gz files placed in the     |
+|                                          |                                          | ``docker-context-files``. In certain     |
+|                                          |                                          | corporate environments, this is required |
+|                                          |                                          | to install airflow from such pre-vetted  |
+|                                          |                                          | packages rather than from PyPI. For this |
+|                                          |                                          | to work, also set ``INSTALL_FROM_PYPI``. |
+|                                          |                                          | Note that packages starting with         |
+|                                          |                                          | ``apache?airflow`` glob are treated      |
+|                                          |                                          | differently than other packages. All     |
+|                                          |                                          | ``apache?airflow`` packages are          |
+|                                          |                                          | installed with dependencies limited by   |
+|                                          |                                          | airflow constraints. All other packages  |
+|                                          |                                          | are installed without dependencies       |
+|                                          |                                          | 'as-is'. If you wish to install airflow  |
+|                                          |                                          | via 'pip download' with all dependencies |
+|                                          |                                          | downloaded, you have to rename the       |
+|                                          |                                          | apache airflow and provider packages to  |
+|                                          |                                          | not start with ``apache?airflow`` glob.  |
 +------------------------------------------+------------------------------------------+------------------------------------------+
 | ``AIRFLOW_EXTRAS``                       | ``all``                                  | extras to install                        |
 +------------------------------------------+------------------------------------------+------------------------------------------+
diff --git a/docs/apache-airflow/production-deployment.rst b/docs/apache-airflow/production-deployment.rst
index c813f3a..881ee39 100644
--- a/docs/apache-airflow/production-deployment.rst
+++ b/docs/apache-airflow/production-deployment.rst
@@ -317,8 +317,19 @@ Preparing the constraint files and wheel files:
     --constraint docker-context-files/constraints-2-0.txt  \
     apache-airflow[async,aws,azure,celery,dask,elasticsearch,gcp,kubernetes,mysql,postgres,redis,slack,ssh,statsd,virtualenv]==2.0.0
 
+Since apache-airflow .whl packages are treated differently by the docker image, you need to rename the
+downloaded apache-airflow* files, for example:
 
-Building the image (after copying the files downloaded to the "docker-context-files" directory:
+.. code-block:: bash
+
+   pushd docker-context-files
+   for file in apache?airflow*
+   do
+     mv ${file} _${file}
+   done
+   popd
+
+Building the image:
 
 .. code-block:: bash
 
@@ -539,7 +550,19 @@ The following build arguments (``--build-arg`` in docker build command) can be u
 |                                          |                                          | corporate environments, this is required |
 |                                          |                                          | to install airflow from such pre-vetted  |
 |                                          |                                          | packages rather than from PyPI. For this |
-|                                          |                                          | to work, also set ``INSTALL_FROM_PYPI``  |
+|                                          |                                          | to work, also set ``INSTALL_FROM_PYPI``. |
+|                                          |                                          | Note that packages starting with         |
+|                                          |                                          | ``apache?airflow`` glob are treated      |
+|                                          |                                          | differently than other packages. All     |
+|                                          |                                          | ``apache?airflow`` packages are          |
+|                                          |                                          | installed with dependencies limited by   |
+|                                          |                                          | airflow constraints. All other packages  |
+|                                          |                                          | are installed without dependencies       |
+|                                          |                                          | 'as-is'. If you wish to install airflow  |
+|                                          |                                          | via 'pip download' with all dependencies |
+|                                          |                                          | downloaded, you have to rename the       |
+|                                          |                                          | apache airflow and provider packages to  |
+|                                          |                                          | not start with ``apache?airflow`` glob.  |
 +------------------------------------------+------------------------------------------+------------------------------------------+
 | ``UPGRADE_TO_NEWER_DEPENDENCIES``        | ``false``                                | If set to true, the dependencies are     |
 |                                          |                                          | upgraded to newer versions matching      |
diff --git a/scripts/ci/images/ci_build_dockerhub.sh b/scripts/ci/images/ci_build_dockerhub.sh
index 4b67cef..decf224 100755
--- a/scripts/ci/images/ci_build_dockerhub.sh
+++ b/scripts/ci/images/ci_build_dockerhub.sh
@@ -68,8 +68,8 @@ if [[ ! "${DOCKER_TAG}" =~ ^[0-9].* ]]; then
     # we need to run those in sub-processes
     (
         export INSTALL_FROM_PYPI="true"
-        export INSTALL_FROM_DOCKER_CONTEXT_FILES="false"
         export INSTALL_PROVIDERS_FROM_SOURCES="true"
+        export INSTALL_FROM_DOCKER_CONTEXT_FILES="false"
         export AIRFLOW_PRE_CACHED_PIP_PACKAGES="true"
         export DOCKER_CACHE="pulled"
         # shellcheck source=scripts/ci/libraries/_script_init.sh
@@ -86,7 +86,6 @@ if [[ ! "${DOCKER_TAG}" =~ ^[0-9].* ]]; then
     (
         export INSTALL_FROM_PYPI="false"
         export INSTALL_FROM_DOCKER_CONTEXT_FILES="true"
-        export INSTALL_PROVIDERS_FROM_SOURCES="false"
         export AIRFLOW_PRE_CACHED_PIP_PACKAGES="false"
         export DOCKER_CACHE="pulled"
         # shellcheck source=scripts/ci/libraries/_script_init.sh
@@ -97,7 +96,7 @@ if [[ ! "${DOCKER_TAG}" =~ ^[0-9].* ]]; then
         rm -rf "${BUILD_CACHE_DIR}"
         rm -rf "${AIRFLOW_SOURCES}/docker-context-files/*"
         build_images::prepare_prod_build
-        build_images::build_prod_images_from_packages
+        build_images::build_prod_images_from_locally_built_airflow_packages
         push_pull_remove_images::push_prod_images
     )
 else
diff --git a/scripts/ci/images/ci_prepare_prod_image_on_ci.sh b/scripts/ci/images/ci_prepare_prod_image_on_ci.sh
index 700487c..4d2309f 100755
--- a/scripts/ci/images/ci_prepare_prod_image_on_ci.sh
+++ b/scripts/ci/images/ci_prepare_prod_image_on_ci.sh
@@ -18,9 +18,8 @@
 
 export INSTALL_FROM_PYPI="false"
 export INSTALL_FROM_DOCKER_CONTEXT_FILES="true"
-export INSTALL_PROVIDERS_FROM_SOURCES="false"
 export AIRFLOW_PRE_CACHED_PIP_PACKAGES="false"
-export DOCKER_CACHE="local"
+export DOCKER_CACHE="pulled"
 export VERBOSE="true"
 
 
@@ -41,7 +40,7 @@ function build_prod_images_on_ci() {
             ":${GITHUB_REGISTRY_PULL_IMAGE_TAG}" "${AIRFLOW_PROD_IMAGE}"
 
     else
-        build_images::build_prod_images_from_packages
+        build_images::build_prod_images_from_locally_built_airflow_packages
     fi
 
 
diff --git a/scripts/ci/libraries/_build_images.sh b/scripts/ci/libraries/_build_images.sh
index 2e37209..8b637ef 100644
--- a/scripts/ci/libraries/_build_images.sh
+++ b/scripts/ci/libraries/_build_images.sh
@@ -913,31 +913,40 @@ function build_images::determine_docker_cache_strategy() {
 }
 
 
-function build_images::build_prod_images_from_packages() {
+function build_image::assert_variable() {
+    local variable_name="${1}"
+    local expected_value="${2}"
+    local variable_value=${!variable_name}
+    if [[ ${variable_value} != "${expected_value}" ]]; then
+        echo
+        echo  "${COLOR_RED_ERROR}: Variable ${variable_name}: expected_value: '${expected_value}' but was '${variable_value}'!${COLOR_RESET}"
+        echo
+        exit 1
+    fi
+}
+
+function build_images::build_prod_images_from_locally_built_airflow_packages() {
+    # We do not install from PyPI
+    build_image::assert_variable INSTALL_FROM_PYPI "false"
+    # But then we reinstall airflow and providers from prepared packages in the docker context files
+    build_image::assert_variable INSTALL_FROM_DOCKER_CONTEXT_FILES "true"
+    # But we install everything from scratch to make a "clean" installation in case any dependencies got removed
+    build_image::assert_variable AIRFLOW_PRE_CACHED_PIP_PACKAGES "false"
+
     # Cleanup dist and docker-context-files folders
     mkdir -pv "${AIRFLOW_SOURCES}/dist"
     mkdir -pv "${AIRFLOW_SOURCES}/docker-context-files"
     rm -f "${AIRFLOW_SOURCES}/dist/"*.{whl,tar.gz}
     rm -f "${AIRFLOW_SOURCES}/docker-context-files/"*.{whl,tar.gz}
 
-    runs::run_pip_download
-
-    # Remove all downloaded apache airflow packages
-    rm -f "${AIRFLOW_SOURCES}/dist/"apache_airflow*.whl
-    rm -f "${AIRFLOW_SOURCES}/dist/"apache-airflow*.tar.gz
-
-    # Remove all downloaded apache airflow packages
-    mv -f "${AIRFLOW_SOURCES}/dist/"* "${AIRFLOW_SOURCES}/docker-context-files/"
-
     # Build necessary provider packages
     runs::run_prepare_provider_packages "${INSTALLED_PROVIDERS[@]}"
-
     mv "${AIRFLOW_SOURCES}/dist/"* "${AIRFLOW_SOURCES}/docker-context-files/"
 
     # Build apache airflow packages
     build_airflow_packages::build_airflow_packages
-
     mv "${AIRFLOW_SOURCES}/dist/"* "${AIRFLOW_SOURCES}/docker-context-files/"
+
     build_images::build_prod_images_with_group
 }
 
diff --git a/scripts/ci/libraries/_runs.sh b/scripts/ci/libraries/_runs.sh
index 09bef0b..5f4569f 100644
--- a/scripts/ci/libraries/_runs.sh
+++ b/scripts/ci/libraries/_runs.sh
@@ -27,22 +27,6 @@ function runs::run_docs() {
     start_end::group_end
 }
 
-# Downloads packages from PIP
-function runs::run_pip_download() {
-    start_end::group_start "PIP download"
-    if [[ ${UPGRADE_TO_NEWER_DEPENDENCIES} ]]; then
-        pip_download_command="pip download -d /dist '.[${INSTALLED_EXTRAS}]'"
-    else
-        pip_download_command="pip download -d /dist '.[${INSTALLED_EXTRAS}]' --constraint
-'https://raw.githubusercontent.com/apache/airflow/${DEFAULT_CONSTRAINTS_BRANCH}/constraints-${PYTHON_MAJOR_MINOR_VERSION}.txt'"
-    fi
-    # Download all dependencies needed
-    docker run --rm --entrypoint /bin/bash \
-        "${EXTRA_DOCKER_FLAGS[@]}" \
-        "${AIRFLOW_CI_IMAGE}" -c "${pip_download_command}"
-    start_end::group_end
-}
-
 # Docker command to generate constraint files.
 function runs::run_generate_constraints() {
     start_end::group_start "Run generate constraints"