You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2020/12/01 09:05:33 UTC
[airflow] branch v1-10-test updated: Improve verification of images
with PIP check (#12718)
This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch v1-10-test
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/v1-10-test by this push:
new cd41b3e Improve verification of images with PIP check (#12718)
cd41b3e is described below
commit cd41b3e747b9dceedd5d7b28abcf9d755f70df1c
Author: Jarek Potiuk <ja...@polidea.com>
AuthorDate: Tue Dec 1 09:51:24 2020 +0100
Improve verification of images with PIP check (#12718)
Verification of images with PIP is done in separate jobs and
they provide useful information to committers and contributors
when the pip check fails.
(cherry picked from commit ebc8fcf199d3304d8c55f6c3908108701c05f9ad)
---
.github/workflows/ci.yml | 47 +++++++++
scripts/ci/images/ci_prepare_prod_image_on_ci.sh | 1 -
..._wait_for_ci_image.sh => ci_verify_ci_image.sh} | 49 +++++----
scripts/ci/images/ci_verify_prod_image.sh | 116 +++++++++++++++++++++
scripts/ci/images/ci_wait_for_ci_image.sh | 18 ----
scripts/ci/images/ci_wait_for_prod_image.sh | 18 ----
scripts/ci/libraries/_build_images.sh | 61 +++++++++++
7 files changed, 250 insertions(+), 60 deletions(-)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 77cbf65..9655955 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -177,6 +177,28 @@ jobs:
run: ./scripts/ci/images/ci_wait_for_all_ci_images.sh
if: needs.build-info.outputs.waitForImage == 'true'
+ verify-ci-images:
+ timeout-minutes: 20
+ name: "Verify CI Image Py${{matrix.python-version}}"
+ runs-on: ubuntu-20.04
+ needs: [build-info, ci-images]
+ strategy:
+ matrix:
+ python-version: ${{ fromJson(needs.build-info.outputs.pythonVersions) }}
+ env:
+ BACKEND: sqlite
+ if: needs.build-info.outputs.image-build == 'true'
+ steps:
+ - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
+ uses: actions/checkout@v2
+ if: needs.build-info.outputs.waitForImage == 'true'
+ - name: "Free space"
+ run: ./scripts/ci/tools/ci_free_space_on_ci.sh
+ if: needs.build-info.outputs.waitForImage == 'true'
+ - name: "Verify CI image Py${{matrix.python-version}}:${{ env.GITHUB_REGISTRY_PULL_IMAGE_TAG }}"
+ run: ./scripts/ci/images/ci_verify_ci_image.sh
+ if: needs.build-info.outputs.waitForImage == 'true'
+
static-checks:
timeout-minutes: 30
name: "Static checks"
@@ -593,6 +615,27 @@ jobs:
run: ./scripts/ci/images/ci_wait_for_all_prod_images.sh
if: needs.build-info.outputs.waitForImage == 'true'
+ verify-prod-images:
+ timeout-minutes: 20
+ name: "Verify Prod Image Py${{matrix.python-version}}"
+ runs-on: ubuntu-20.04
+ needs: [build-info, prod-images]
+ strategy:
+ matrix:
+ python-version: ${{ fromJson(needs.build-info.outputs.pythonVersions) }}
+ env:
+ BACKEND: sqlite
+ steps:
+ - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
+ uses: actions/checkout@v2
+ if: needs.build-info.outputs.waitForImage == 'true'
+ - name: "Free space"
+ run: ./scripts/ci/tools/ci_free_space_on_ci.sh
+ if: needs.build-info.outputs.waitForImage == 'true'
+ - name: "Verify PROD image Py${{matrix.python-version}}:${{ env.GITHUB_REGISTRY_PULL_IMAGE_TAG }}"
+ run: ./scripts/ci/images/ci_verify_prod_image.sh
+ if: needs.build-info.outputs.waitForImage == 'true'
+
tests-kubernetes:
timeout-minutes: 50
name: K8s ${{matrix.python-version}} ${{matrix.kubernetes-version}} ${{matrix.kubernetes-mode}}
@@ -681,6 +724,7 @@ jobs:
- tests-mysql
- tests-kubernetes
- prod-images
+ - verify-prod-images
- docs
if: >
(github.ref == 'refs/heads/master' || github.ref == 'refs/heads/v1-10-test') &&
@@ -717,6 +761,7 @@ jobs:
- tests-mysql
- tests-kubernetes
- ci-images
+ - verify-ci-images
- docs
if: >
(github.ref == 'refs/heads/master' || github.ref == 'refs/heads/v1-10-test' ) &&
@@ -781,6 +826,8 @@ jobs:
needs:
- build-info
- constraints
+ - verify-ci-images
+ - verify-prod-images
- static-checks
- tests-sqlite
- tests-mysql
diff --git a/scripts/ci/images/ci_prepare_prod_image_on_ci.sh b/scripts/ci/images/ci_prepare_prod_image_on_ci.sh
index f8cdcd2..d38182b 100755
--- a/scripts/ci/images/ci_prepare_prod_image_on_ci.sh
+++ b/scripts/ci/images/ci_prepare_prod_image_on_ci.sh
@@ -47,5 +47,4 @@ function build_prod_images_on_ci() {
unset FORCE_BUILD
}
-
build_prod_images_on_ci
diff --git a/scripts/ci/images/ci_wait_for_ci_image.sh b/scripts/ci/images/ci_verify_ci_image.sh
similarity index 57%
copy from scripts/ci/images/ci_wait_for_ci_image.sh
copy to scripts/ci/images/ci_verify_ci_image.sh
index 2c0bdf2..e1f2b98 100755
--- a/scripts/ci/images/ci_wait_for_ci_image.sh
+++ b/scripts/ci/images/ci_verify_ci_image.sh
@@ -16,37 +16,40 @@
# specific language governing permissions and limitations
# under the License.
# shellcheck source=scripts/ci/libraries/_script_init.sh
-. "$( dirname "${BASH_SOURCE[0]}" )/../libraries/_script_init.sh"
+. "$(dirname "${BASH_SOURCE[0]}")/../libraries/_script_init.sh"
+
+function verify_ci_image_dependencies() {
-function verify_ci_image_dependencies {
echo
- echo "Checking if Airflow dependencies are non-conflicting in CI image."
+ echo "Checking if Airflow dependencies are non-conflicting in ${AIRFLOW_CI_IMAGE} image."
echo
- push_pull_remove_images::pull_image_github_dockerhub "${AIRFLOW_CI_IMAGE}" \
- "${GITHUB_REGISTRY_AIRFLOW_CI_IMAGE}:${GITHUB_REGISTRY_PULL_IMAGE_TAG}"
-
- # TODO: remove after we have it fully working
- docker run --rm --entrypoint /bin/bash "${AIRFLOW_CI_IMAGE}" -c 'pip check' || true
+ set +e
+ docker run --rm --entrypoint /bin/bash "${AIRFLOW_CI_IMAGE}" -c 'pip check'
+ local res=$?
+ if [[ ${res} != "0" ]]; then
+ echo -e " \e[31mERROR: ^^^ Some dependencies are conflicting. See instructions below on how to deal with it.\e[0m"
+ echo
+ build_images::inform_about_pip_check ""
+ else
+ echo
+ echo -e " \e[32mOK. The ${AIRFLOW_PROD_IMAGE} image dependencies are consistent.\e[0m"
+ echo
+ fi
+ set -e
+ exit ${res}
}
-push_pull_remove_images::check_if_github_registry_wait_for_image_enabled
-
-push_pull_remove_images::check_if_jq_installed
-
-build_image::login_to_github_registry_if_needed
-
-export AIRFLOW_CI_IMAGE_NAME="${BRANCH_NAME}-python${PYTHON_MAJOR_MINOR_VERSION}-ci"
+function pull_ci_image() {
+ local image_name_with_tag="${GITHUB_REGISTRY_AIRFLOW_CI_IMAGE}:${GITHUB_REGISTRY_PULL_IMAGE_TAG}"
+ echo
+ echo "Pulling the ${image_name_with_tag} image."
+ echo
-echo
-echo "Waiting for image to appear: ${AIRFLOW_CI_IMAGE_NAME}"
-echo
+ push_pull_remove_images::pull_image_github_dockerhub "${AIRFLOW_CI_IMAGE}" "${image_name_with_tag}"
+}
-push_pull_remove_images::wait_for_github_registry_image \
- "${AIRFLOW_CI_IMAGE_NAME}" "${GITHUB_REGISTRY_PULL_IMAGE_TAG}"
-echo
-echo "Verifying the ${AIRFLOW_CI_IMAGE_NAME} image after pulling it"
-echo
+build_images::prepare_ci_build
verify_ci_image_dependencies
diff --git a/scripts/ci/images/ci_verify_prod_image.sh b/scripts/ci/images/ci_verify_prod_image.sh
new file mode 100755
index 0000000..b330904
--- /dev/null
+++ b/scripts/ci/images/ci_verify_prod_image.sh
@@ -0,0 +1,116 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# shellcheck source=scripts/ci/libraries/_script_init.sh
+. "$( dirname "${BASH_SOURCE[0]}" )/../libraries/_script_init.sh"
+
+function verify_prod_image_has_airflow_and_providers {
+ echo
+ echo "Airflow folders installed in the image:"
+ echo
+
+ AIRFLOW_INSTALLATION_LOCATION="/home/airflow/.local"
+
+ docker run --rm --entrypoint /bin/bash "${AIRFLOW_PROD_IMAGE}" -c \
+ 'find '"${AIRFLOW_INSTALLATION_LOCATION}"'/lib/python*/site-packages/airflow/ -type d'
+
+ EXPECTED_MIN_AIRFLOW_DIRS_COUNT="60"
+ readonly EXPECTED_MIN_AIRFLOW_DIRS_COUNT
+
+ COUNT_AIRFLOW_DIRS=$(docker run --rm --entrypoint /bin/bash "${AIRFLOW_PROD_IMAGE}" -c \
+ 'find '"${AIRFLOW_INSTALLATION_LOCATION}"'/lib/python*/site-packages/airflow/ -type d | grep -c -v "/airflow/providers"')
+
+ echo
+ echo "Number of airflow dirs: ${COUNT_AIRFLOW_DIRS}"
+ echo
+
+ if [[ "${COUNT_AIRFLOW_DIRS}" -lt "${EXPECTED_MIN_AIRFLOW_DIRS_COUNT}" ]]; then
+ >&2 echo
+ >&2 echo Number of airflow folders installed is less than ${EXPECTED_MIN_AIRFLOW_DIRS_COUNT}
+ >&2 echo This is unexpected. Please investigate, looking at the output above!
+ >&2 echo
+ exit 1
+ else
+ echo
+ echo -e " \e[32mOK. Airflow is installed.\e[0m"
+ echo
+ fi
+
+ EXPECTED_MIN_PROVIDERS_DIRS_COUNT="240"
+ readonly EXPECTED_MIN_PROVIDERS_DIRS_COUNT
+
+ COUNT_AIRFLOW_PROVIDERS_DIRS=$(docker run --rm --entrypoint /bin/bash "${AIRFLOW_PROD_IMAGE}" -c \
+ 'find '"${AIRFLOW_INSTALLATION_LOCATION}"'/lib/python*/site-packages/airflow/providers -type d | grep -c "" | xargs')
+
+ echo
+ echo "Number of providers dirs: ${COUNT_AIRFLOW_PROVIDERS_DIRS}"
+ echo
+
+ if [ "${COUNT_AIRFLOW_PROVIDERS_DIRS}" -lt "${EXPECTED_MIN_PROVIDERS_DIRS_COUNT}" ]; then
+ >&2 echo
+ >&2 echo Number of providers folders installed is less than ${EXPECTED_MIN_PROVIDERS_DIRS_COUNT}
+ >&2 echo This is unexpected. Please investigate, looking at the output above!
+ >&2 echo
+ exit 1
+ else
+ echo
+ echo -e " \e[32mOK. Airflow Providers are installed.\e[0m"
+ echo
+ fi
+}
+
+
+function verify_prod_image_dependencies {
+
+ echo
+ echo "Checking if Airflow dependencies are non-conflicting in ${AIRFLOW_PROD_IMAGE} image."
+ echo
+
+ set +e
+ docker run --rm --entrypoint /bin/bash "${AIRFLOW_PROD_IMAGE}" -c 'pip check'
+ local res=$?
+ if [[ ${res} != "0" ]]; then
+ echo -e " \e[31mERROR: ^^^ Some dependencies are conflicting. See instructions below on how to deal with it.\e[0m"
+ echo
+ build_images::inform_about_pip_check "--production "
+ # TODO(potiuk) - enable the comment once https://github.com/apache/airflow/pull/12188 is merged
+ # exit ${res}
+ else
+ echo
+ echo " \e[32mOK. The ${AIRFLOW_PROD_IMAGE} image dependencies are consistent.\e[0m"
+ echo
+ fi
+ set -e
+
+}
+
+function pull_prod_image() {
+ local image_name_with_tag="${GITHUB_REGISTRY_AIRFLOW_PROD_IMAGE}:${GITHUB_REGISTRY_PULL_IMAGE_TAG}"
+
+ echo
+ echo "Pulling the ${image_name_with_tag} image."
+ echo
+
+ push_pull_remove_images::pull_image_github_dockerhub "${AIRFLOW_PROD_IMAGE}" "${image_name_with_tag}"
+}
+
+build_images::prepare_prod_build
+
+
+verify_prod_image_has_airflow_and_providers
+
+verify_prod_image_dependencies
diff --git a/scripts/ci/images/ci_wait_for_ci_image.sh b/scripts/ci/images/ci_wait_for_ci_image.sh
index 2c0bdf2..0c3ea08 100755
--- a/scripts/ci/images/ci_wait_for_ci_image.sh
+++ b/scripts/ci/images/ci_wait_for_ci_image.sh
@@ -18,18 +18,6 @@
# shellcheck source=scripts/ci/libraries/_script_init.sh
. "$( dirname "${BASH_SOURCE[0]}" )/../libraries/_script_init.sh"
-function verify_ci_image_dependencies {
- echo
- echo "Checking if Airflow dependencies are non-conflicting in CI image."
- echo
-
- push_pull_remove_images::pull_image_github_dockerhub "${AIRFLOW_CI_IMAGE}" \
- "${GITHUB_REGISTRY_AIRFLOW_CI_IMAGE}:${GITHUB_REGISTRY_PULL_IMAGE_TAG}"
-
- # TODO: remove after we have it fully working
- docker run --rm --entrypoint /bin/bash "${AIRFLOW_CI_IMAGE}" -c 'pip check' || true
-}
-
push_pull_remove_images::check_if_github_registry_wait_for_image_enabled
push_pull_remove_images::check_if_jq_installed
@@ -44,9 +32,3 @@ echo
push_pull_remove_images::wait_for_github_registry_image \
"${AIRFLOW_CI_IMAGE_NAME}" "${GITHUB_REGISTRY_PULL_IMAGE_TAG}"
-
-echo
-echo "Verifying the ${AIRFLOW_CI_IMAGE_NAME} image after pulling it"
-echo
-
-verify_ci_image_dependencies
diff --git a/scripts/ci/images/ci_wait_for_prod_image.sh b/scripts/ci/images/ci_wait_for_prod_image.sh
index e53aec1..1c7cef5 100755
--- a/scripts/ci/images/ci_wait_for_prod_image.sh
+++ b/scripts/ci/images/ci_wait_for_prod_image.sh
@@ -18,18 +18,6 @@
# shellcheck source=scripts/ci/libraries/_script_init.sh
. "$( dirname "${BASH_SOURCE[0]}" )/../libraries/_script_init.sh"
-function verify_prod_image_dependencies {
- echo
- echo "Checking if Airflow dependencies are non-conflicting in PROD image."
- echo
-
- push_pull_remove_images::pull_image_github_dockerhub "${AIRFLOW_PROD_IMAGE}" \
- "${GITHUB_REGISTRY_AIRFLOW_PROD_IMAGE}:${GITHUB_REGISTRY_PULL_IMAGE_TAG}"
-
- # TODO: remove the | true after we fixed pip check for prod image
- docker run --rm --entrypoint /bin/bash "${AIRFLOW_PROD_IMAGE}" -c 'pip check' || true
-}
-
push_pull_remove_images::check_if_github_registry_wait_for_image_enabled
push_pull_remove_images::check_if_jq_installed
@@ -44,9 +32,3 @@ echo
push_pull_remove_images::wait_for_github_registry_image \
"${AIRFLOW_PROD_IMAGE_NAME}" "${GITHUB_REGISTRY_PULL_IMAGE_TAG}"
-
-echo
-echo "Verifying the ${AIRFLOW_PROD_IMAGE_NAME} image after pulling it"
-echo
-
-verify_prod_image_dependencies
diff --git a/scripts/ci/libraries/_build_images.sh b/scripts/ci/libraries/_build_images.sh
index 8de58db..30e7a85 100644
--- a/scripts/ci/libraries/_build_images.sh
+++ b/scripts/ci/libraries/_build_images.sh
@@ -849,3 +849,64 @@ function build_images::determine_docker_cache_strategy() {
verbosity::print_info "Using ${DOCKER_CACHE} cache strategy for the build."
verbosity::print_info
}
+
+
+# Useful information for people who stumble upon a pip check failure
+function build_images::inform_about_pip_check() {
+ >&2 echo """
+
+The image did not pass 'pip check' verification. This means that there are some conflicting dependencies
+in the image. Usually it means that some setup.py or setup.cfg limits need to be adjusted to fix it.
+
+Usually it happens when one of the dependencies gets upgraded and it has more strict requirements
+than the other dependencies and they are conflicting.
+
+In case you did not update setup.py or any of your dependencies, this error might happen in case
+someone accidentally merges conflicting dependencies in master. This
+should not happen as we are running 'pip check' as dependency before we upgrade the constrained
+dependencies, but we could miss some edge cases (thank you for your patience). Please let committer now
+and apologies for the troubles. You do not have to do anything in this case. You might be asked to
+rebase to the latest master after the problem is fixed.
+
+In case you actually updated setup.py, there are some steps you can take to address that:
+
+* first of all ask the committer to set 'upgrade to newer dependencies' and 'full tests needed' labels
+ for your PR. This will turn your PR in mode where all the dependencies are upgraded to latest matching
+ dependencies and the checks will run for all python versions
+
+* run locally the image that is failing with Breeze - this will make it easy to manually try to update
+ the setup.py and test the consequences of changing constraints. You can do it by checking out your PR
+ and running this command:
+
+ ./breeze ${1}--github-image-id ${GITHUB_REGISTRY_PULL_IMAGE_TAG} --backend ${BACKEND} --python ${PYTHON_MAJOR_MINOR_VERSION}
+
+* your setup.py and setup.cfg will be mounted to the container and you will be able to iterate with
+ different setup.py versions.
+
+* run 'pipdeptree' to figure out where the dependency conflict comes from. Useful commands that can help you
+ to find out dependencies you have are:
+ * 'pipdeptree | less' (you can then search through the dependencies with vim-like shortcuts)
+ * 'pipdeptree > /files/pipdeptree.txt' - this will produce a pipdeptree.txt file in your source
+ 'files' directory and you can open it in editor of your choice,
+ * 'pipdeptree | grep YOUR_DEPENDENCY' - to see all the requirements your dependency has as specified
+ by other packages
+
+* figure out which dependency limits should be upgraded. First try to upgrade them in setup.py extras
+ and run pip to upgrade your dependencies accordingly:
+
+ pip install '.[all]' --upgrade --upgrade-strategy eager
+
+* run pip check to figure out if the dependencies have been fixed (it should let you know which dependencies
+ are conflicting or (hurray!) if there are no conflicts:
+
+ pip check
+
+* in some, rare, cases, pip will not limit the requirement in case you specify it in extras, you might
+ need to add such requirement in 'install_requires' section of setup.cfg in order to have pip take it into
+ account. This will happen if higher version of your dependency is already installed in 'install_requires'
+ section. In such case update 'setup.cfg' and run pip install/pip check from the previous steps
+
+* iterate until all such dependency conflicts are fixed.
+
+"""
+}