You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by ka...@apache.org on 2020/12/03 00:10:26 UTC

[airflow] 25/34: Improve verification of images with PIP check (#12718)

This is an automated email from the ASF dual-hosted git repository.

kaxilnaik pushed a commit to branch v1-10-test
in repository https://gitbox.apache.org/repos/asf/airflow.git

commit 591dc992977f2a0c9ac1bbd06d0a1730cb260e44
Author: Jarek Potiuk <ja...@polidea.com>
AuthorDate: Tue Dec 1 09:51:24 2020 +0100

    Improve verification of images with PIP check (#12718)
    
    Verification of images with PIP is done in separate jobs and
    they provide useful information to committers and contributors
    when the pip check fails.
    
    (cherry picked from commit ebc8fcf199d3304d8c55f6c3908108701c05f9ad)
---
 .github/workflows/ci.yml                           |  47 +++++++++
 scripts/ci/images/ci_prepare_prod_image_on_ci.sh   |   1 -
 ..._wait_for_ci_image.sh => ci_verify_ci_image.sh} |  49 +++++----
 scripts/ci/images/ci_verify_prod_image.sh          | 116 +++++++++++++++++++++
 scripts/ci/images/ci_wait_for_ci_image.sh          |  18 ----
 scripts/ci/images/ci_wait_for_prod_image.sh        |  18 ----
 scripts/ci/libraries/_build_images.sh              |  61 +++++++++++
 7 files changed, 250 insertions(+), 60 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 77cbf65..9655955 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -177,6 +177,28 @@ jobs:
         run: ./scripts/ci/images/ci_wait_for_all_ci_images.sh
         if: needs.build-info.outputs.waitForImage == 'true'
 
+  verify-ci-images:
+    timeout-minutes: 20
+    name: "Verify CI Image Py${{matrix.python-version}}"
+    runs-on: ubuntu-20.04
+    needs: [build-info, ci-images]
+    strategy:
+      matrix:
+        python-version: ${{ fromJson(needs.build-info.outputs.pythonVersions) }}
+    env:
+      BACKEND: sqlite
+    if: needs.build-info.outputs.image-build == 'true'
+    steps:
+      - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
+        uses: actions/checkout@v2
+        if: needs.build-info.outputs.waitForImage == 'true'
+      - name: "Free space"
+        run: ./scripts/ci/tools/ci_free_space_on_ci.sh
+        if: needs.build-info.outputs.waitForImage == 'true'
+      - name: "Verify CI image Py${{matrix.python-version}}:${{ env.GITHUB_REGISTRY_PULL_IMAGE_TAG }}"
+        run: ./scripts/ci/images/ci_verify_ci_image.sh
+        if: needs.build-info.outputs.waitForImage == 'true'
+
   static-checks:
     timeout-minutes: 30
     name: "Static checks"
@@ -593,6 +615,27 @@ jobs:
         run: ./scripts/ci/images/ci_wait_for_all_prod_images.sh
         if: needs.build-info.outputs.waitForImage == 'true'
 
+  verify-prod-images:
+    timeout-minutes: 20
+    name: "Verify Prod Image Py${{matrix.python-version}}"
+    runs-on: ubuntu-20.04
+    needs: [build-info, prod-images]
+    strategy:
+      matrix:
+        python-version: ${{ fromJson(needs.build-info.outputs.pythonVersions) }}
+    env:
+      BACKEND: sqlite
+    steps:
+      - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
+        uses: actions/checkout@v2
+        if: needs.build-info.outputs.waitForImage == 'true'
+      - name: "Free space"
+        run: ./scripts/ci/tools/ci_free_space_on_ci.sh
+        if: needs.build-info.outputs.waitForImage == 'true'
+      - name: "Verify PROD image Py${{matrix.python-version}}:${{ env.GITHUB_REGISTRY_PULL_IMAGE_TAG }}"
+        run: ./scripts/ci/images/ci_verify_prod_image.sh
+        if: needs.build-info.outputs.waitForImage == 'true'
+
   tests-kubernetes:
     timeout-minutes: 50
     name: K8s ${{matrix.python-version}} ${{matrix.kubernetes-version}} ${{matrix.kubernetes-mode}}
@@ -681,6 +724,7 @@ jobs:
       - tests-mysql
       - tests-kubernetes
       - prod-images
+      - verify-prod-images
       - docs
     if: >
       (github.ref == 'refs/heads/master' || github.ref == 'refs/heads/v1-10-test') &&
@@ -717,6 +761,7 @@ jobs:
       - tests-mysql
       - tests-kubernetes
       - ci-images
+      - verify-ci-images
       - docs
     if: >
       (github.ref == 'refs/heads/master' || github.ref == 'refs/heads/v1-10-test' ) &&
@@ -781,6 +826,8 @@ jobs:
     needs:
       - build-info
       - constraints
+      - verify-ci-images
+      - verify-prod-images
       - static-checks
       - tests-sqlite
       - tests-mysql
diff --git a/scripts/ci/images/ci_prepare_prod_image_on_ci.sh b/scripts/ci/images/ci_prepare_prod_image_on_ci.sh
index f8cdcd2..d38182b 100755
--- a/scripts/ci/images/ci_prepare_prod_image_on_ci.sh
+++ b/scripts/ci/images/ci_prepare_prod_image_on_ci.sh
@@ -47,5 +47,4 @@ function build_prod_images_on_ci() {
     unset FORCE_BUILD
 }
 
-
 build_prod_images_on_ci
diff --git a/scripts/ci/images/ci_wait_for_ci_image.sh b/scripts/ci/images/ci_verify_ci_image.sh
similarity index 57%
copy from scripts/ci/images/ci_wait_for_ci_image.sh
copy to scripts/ci/images/ci_verify_ci_image.sh
index 2c0bdf2..e1f2b98 100755
--- a/scripts/ci/images/ci_wait_for_ci_image.sh
+++ b/scripts/ci/images/ci_verify_ci_image.sh
@@ -16,37 +16,40 @@
 # specific language governing permissions and limitations
 # under the License.
 # shellcheck source=scripts/ci/libraries/_script_init.sh
-. "$( dirname "${BASH_SOURCE[0]}" )/../libraries/_script_init.sh"
+. "$(dirname "${BASH_SOURCE[0]}")/../libraries/_script_init.sh"
+
+function verify_ci_image_dependencies() {
 
-function verify_ci_image_dependencies {
     echo
-    echo "Checking if Airflow dependencies are non-conflicting in CI image."
+    echo "Checking if Airflow dependencies are non-conflicting in ${AIRFLOW_CI_IMAGE} image."
     echo
 
-    push_pull_remove_images::pull_image_github_dockerhub "${AIRFLOW_CI_IMAGE}" \
-        "${GITHUB_REGISTRY_AIRFLOW_CI_IMAGE}:${GITHUB_REGISTRY_PULL_IMAGE_TAG}"
-
-    # TODO: remove after we have it fully working
-    docker run --rm --entrypoint /bin/bash "${AIRFLOW_CI_IMAGE}" -c 'pip check' || true
+    set +e
+    docker run --rm --entrypoint /bin/bash "${AIRFLOW_CI_IMAGE}" -c 'pip check'
+    local res=$?
+    if [[ ${res} != "0" ]]; then
+        echo -e " \e[31mERROR: ^^^ Some dependencies are conflicting. See instructions below on how to deal with it.\e[0m"
+        echo
+        build_images::inform_about_pip_check ""
+    else
+        echo
+        echo -e " \e[32mOK. The ${AIRFLOW_PROD_IMAGE} image dependencies are consistent.\e[0m"
+        echo
+    fi
+    set -e
+    exit ${res}
 }
 
-push_pull_remove_images::check_if_github_registry_wait_for_image_enabled
-
-push_pull_remove_images::check_if_jq_installed
-
-build_image::login_to_github_registry_if_needed
-
-export AIRFLOW_CI_IMAGE_NAME="${BRANCH_NAME}-python${PYTHON_MAJOR_MINOR_VERSION}-ci"
+function pull_ci_image() {
+    local image_name_with_tag="${GITHUB_REGISTRY_AIRFLOW_CI_IMAGE}:${GITHUB_REGISTRY_PULL_IMAGE_TAG}"
+    echo
+    echo "Pulling the ${image_name_with_tag} image."
+    echo
 
-echo
-echo "Waiting for image to appear: ${AIRFLOW_CI_IMAGE_NAME}"
-echo
+    push_pull_remove_images::pull_image_github_dockerhub "${AIRFLOW_CI_IMAGE}" "${image_name_with_tag}"
+}
 
-push_pull_remove_images::wait_for_github_registry_image \
-    "${AIRFLOW_CI_IMAGE_NAME}" "${GITHUB_REGISTRY_PULL_IMAGE_TAG}"
 
-echo
-echo "Verifying the ${AIRFLOW_CI_IMAGE_NAME} image after pulling it"
-echo
+build_images::prepare_ci_build
 
 verify_ci_image_dependencies
diff --git a/scripts/ci/images/ci_verify_prod_image.sh b/scripts/ci/images/ci_verify_prod_image.sh
new file mode 100755
index 0000000..b330904
--- /dev/null
+++ b/scripts/ci/images/ci_verify_prod_image.sh
@@ -0,0 +1,116 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# shellcheck source=scripts/ci/libraries/_script_init.sh
+. "$( dirname "${BASH_SOURCE[0]}" )/../libraries/_script_init.sh"
+
+function verify_prod_image_has_airflow_and_providers {
+    echo
+    echo "Airflow folders installed in the image:"
+    echo
+
+    AIRFLOW_INSTALLATION_LOCATION="/home/airflow/.local"
+
+    docker run --rm --entrypoint /bin/bash "${AIRFLOW_PROD_IMAGE}" -c \
+        'find '"${AIRFLOW_INSTALLATION_LOCATION}"'/lib/python*/site-packages/airflow/ -type d'
+
+    EXPECTED_MIN_AIRFLOW_DIRS_COUNT="60"
+    readonly EXPECTED_MIN_AIRFLOW_DIRS_COUNT
+
+    COUNT_AIRFLOW_DIRS=$(docker run --rm --entrypoint /bin/bash "${AIRFLOW_PROD_IMAGE}" -c \
+         'find '"${AIRFLOW_INSTALLATION_LOCATION}"'/lib/python*/site-packages/airflow/ -type d | grep -c -v "/airflow/providers"')
+
+    echo
+    echo "Number of airflow dirs: ${COUNT_AIRFLOW_DIRS}"
+    echo
+
+    if [[ "${COUNT_AIRFLOW_DIRS}" -lt "${EXPECTED_MIN_AIRFLOW_DIRS_COUNT}" ]]; then
+        >&2 echo
+        >&2 echo Number of airflow folders installed is less than ${EXPECTED_MIN_AIRFLOW_DIRS_COUNT}
+        >&2 echo This is unexpected. Please investigate, looking at the output above!
+        >&2 echo
+        exit 1
+    else
+        echo
+        echo -e " \e[32mOK. Airflow is installed.\e[0m"
+        echo
+    fi
+
+    EXPECTED_MIN_PROVIDERS_DIRS_COUNT="240"
+    readonly EXPECTED_MIN_PROVIDERS_DIRS_COUNT
+
+    COUNT_AIRFLOW_PROVIDERS_DIRS=$(docker run --rm --entrypoint /bin/bash "${AIRFLOW_PROD_IMAGE}" -c \
+         'find '"${AIRFLOW_INSTALLATION_LOCATION}"'/lib/python*/site-packages/airflow/providers -type d | grep -c "" | xargs')
+
+    echo
+    echo "Number of providers dirs: ${COUNT_AIRFLOW_PROVIDERS_DIRS}"
+    echo
+
+    if [ "${COUNT_AIRFLOW_PROVIDERS_DIRS}" -lt "${EXPECTED_MIN_PROVIDERS_DIRS_COUNT}" ]; then
+        >&2 echo
+        >&2 echo Number of providers folders installed is less than ${EXPECTED_MIN_PROVIDERS_DIRS_COUNT}
+        >&2 echo This is unexpected. Please investigate, looking at the output above!
+        >&2 echo
+        exit 1
+    else
+        echo
+        echo -e " \e[32mOK. Airflow Providers are installed.\e[0m"
+        echo
+    fi
+}
+
+
+function verify_prod_image_dependencies {
+
+    echo
+    echo "Checking if Airflow dependencies are non-conflicting in ${AIRFLOW_PROD_IMAGE} image."
+    echo
+
+    set +e
+    docker run --rm --entrypoint /bin/bash "${AIRFLOW_PROD_IMAGE}" -c 'pip check'
+    local res=$?
+    if [[ ${res} != "0" ]]; then
+        echo -e " \e[31mERROR: ^^^ Some dependencies are conflicting. See instructions below on how to deal with it.\e[0m"
+        echo
+        build_images::inform_about_pip_check "--production "
+        # TODO(potiuk) - enable the comment once https://github.com/apache/airflow/pull/12188 is merged
+        # exit ${res}
+    else
+        echo
+        echo " \e[32mOK. The ${AIRFLOW_PROD_IMAGE} image dependencies are consistent.\e[0m"
+        echo
+    fi
+    set -e
+
+}
+
+function pull_prod_image() {
+    local image_name_with_tag="${GITHUB_REGISTRY_AIRFLOW_PROD_IMAGE}:${GITHUB_REGISTRY_PULL_IMAGE_TAG}"
+
+    echo
+    echo "Pulling the ${image_name_with_tag} image."
+    echo
+
+    push_pull_remove_images::pull_image_github_dockerhub "${AIRFLOW_PROD_IMAGE}" "${image_name_with_tag}"
+}
+
+build_images::prepare_prod_build
+
+
+verify_prod_image_has_airflow_and_providers
+
+verify_prod_image_dependencies
diff --git a/scripts/ci/images/ci_wait_for_ci_image.sh b/scripts/ci/images/ci_wait_for_ci_image.sh
index 2c0bdf2..0c3ea08 100755
--- a/scripts/ci/images/ci_wait_for_ci_image.sh
+++ b/scripts/ci/images/ci_wait_for_ci_image.sh
@@ -18,18 +18,6 @@
 # shellcheck source=scripts/ci/libraries/_script_init.sh
 . "$( dirname "${BASH_SOURCE[0]}" )/../libraries/_script_init.sh"
 
-function verify_ci_image_dependencies {
-    echo
-    echo "Checking if Airflow dependencies are non-conflicting in CI image."
-    echo
-
-    push_pull_remove_images::pull_image_github_dockerhub "${AIRFLOW_CI_IMAGE}" \
-        "${GITHUB_REGISTRY_AIRFLOW_CI_IMAGE}:${GITHUB_REGISTRY_PULL_IMAGE_TAG}"
-
-    # TODO: remove after we have it fully working
-    docker run --rm --entrypoint /bin/bash "${AIRFLOW_CI_IMAGE}" -c 'pip check' || true
-}
-
 push_pull_remove_images::check_if_github_registry_wait_for_image_enabled
 
 push_pull_remove_images::check_if_jq_installed
@@ -44,9 +32,3 @@ echo
 
 push_pull_remove_images::wait_for_github_registry_image \
     "${AIRFLOW_CI_IMAGE_NAME}" "${GITHUB_REGISTRY_PULL_IMAGE_TAG}"
-
-echo
-echo "Verifying the ${AIRFLOW_CI_IMAGE_NAME} image after pulling it"
-echo
-
-verify_ci_image_dependencies
diff --git a/scripts/ci/images/ci_wait_for_prod_image.sh b/scripts/ci/images/ci_wait_for_prod_image.sh
index e53aec1..1c7cef5 100755
--- a/scripts/ci/images/ci_wait_for_prod_image.sh
+++ b/scripts/ci/images/ci_wait_for_prod_image.sh
@@ -18,18 +18,6 @@
 # shellcheck source=scripts/ci/libraries/_script_init.sh
 . "$( dirname "${BASH_SOURCE[0]}" )/../libraries/_script_init.sh"
 
-function verify_prod_image_dependencies {
-    echo
-    echo "Checking if Airflow dependencies are non-conflicting in PROD image."
-    echo
-
-    push_pull_remove_images::pull_image_github_dockerhub "${AIRFLOW_PROD_IMAGE}" \
-        "${GITHUB_REGISTRY_AIRFLOW_PROD_IMAGE}:${GITHUB_REGISTRY_PULL_IMAGE_TAG}"
-
-    # TODO: remove the | true after we fixed pip check for prod image
-    docker run --rm --entrypoint /bin/bash "${AIRFLOW_PROD_IMAGE}" -c 'pip check' || true
-}
-
 push_pull_remove_images::check_if_github_registry_wait_for_image_enabled
 
 push_pull_remove_images::check_if_jq_installed
@@ -44,9 +32,3 @@ echo
 
 push_pull_remove_images::wait_for_github_registry_image \
     "${AIRFLOW_PROD_IMAGE_NAME}" "${GITHUB_REGISTRY_PULL_IMAGE_TAG}"
-
-echo
-echo "Verifying the ${AIRFLOW_PROD_IMAGE_NAME} image after pulling it"
-echo
-
-verify_prod_image_dependencies
diff --git a/scripts/ci/libraries/_build_images.sh b/scripts/ci/libraries/_build_images.sh
index 8de58db..30e7a85 100644
--- a/scripts/ci/libraries/_build_images.sh
+++ b/scripts/ci/libraries/_build_images.sh
@@ -849,3 +849,64 @@ function build_images::determine_docker_cache_strategy() {
     verbosity::print_info "Using ${DOCKER_CACHE} cache strategy for the build."
     verbosity::print_info
 }
+
+
+# Useful information for people who stumble upon a pip check failure
+function build_images::inform_about_pip_check() {
+        >&2 echo """
+
+The image did not pass 'pip check' verification. This means that there are some conflicting dependencies
+in the image. Usually it means that some setup.py or setup.cfg limits need to be adjusted to fix it.
+
+Usually it happens when one of the dependencies gets upgraded and it has more strict requirements
+than the other dependencies and they are conflicting.
+
+In case you did not update setup.py or any of your dependencies, this error might happen in case
+someone accidentally merges conflicting dependencies in master. This
+should not happen as we are running 'pip check' as dependency before we upgrade the constrained
+dependencies, but we could miss some edge cases (thank you for your patience). Please let committer now
+and apologies for the troubles. You do not have to do anything in this case. You might be asked to
+rebase to the latest master after the problem is fixed.
+
+In case you actually updated setup.py, there are some steps you can take to address that:
+
+* first of all ask the committer to set 'upgrade to newer dependencies' and 'full tests needed' labels
+  for your PR. This will turn your PR in mode where all the dependencies are upgraded to latest matching
+  dependencies and the checks will run for all python versions
+
+* run locally the image that is failing with Breeze - this will make it easy to manually try to update
+  the setup.py and test the consequences of changing constraints. You can do it by checking out your PR
+  and running this command:
+
+    ./breeze ${1}--github-image-id ${GITHUB_REGISTRY_PULL_IMAGE_TAG} --backend ${BACKEND} --python ${PYTHON_MAJOR_MINOR_VERSION}
+
+* your setup.py and setup.cfg will be mounted to the container and you will be able to iterate with
+  different setup.py versions.
+
+* run 'pipdeptree' to figure out where the dependency conflict comes from. Useful commands that can help you
+  to find out dependencies you have are:
+     * 'pipdeptree | less' (you can then search through the dependencies with vim-like shortcuts)
+     * 'pipdeptree > /files/pipdeptree.txt' - this will produce a pipdeptree.txt file in your source
+       'files' directory and you can open it in editor of your choice,
+     * 'pipdeptree | grep YOUR_DEPENDENCY' - to see all the requirements your dependency has as specified
+       by other packages
+
+* figure out which dependency limits should be upgraded. First try to upgrade them in setup.py extras
+  and run pip to upgrade your dependencies accordingly:
+
+     pip install '.[all]' --upgrade --upgrade-strategy eager
+
+* run pip check to figure out if the dependencies have been fixed (it should let you know which dependencies
+  are conflicting or (hurray!) if there are no conflicts:
+
+     pip check
+
+* in some, rare, cases, pip will not limit the requirement in case you specify it in extras, you might
+  need to add such requirement in 'install_requires' section of setup.cfg in order to have pip take it into
+  account. This will happen if higher version of your dependency is already installed in 'install_requires'
+  section. In such case update 'setup.cfg' and run pip install/pip check from the previous steps
+
+* iterate until all such dependency conflicts are fixed.
+
+"""
+}