You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by as...@apache.org on 2021/04/15 12:07:29 UTC

[airflow] 10/36: Constraints are now parallelized and merged in single job (#15211)

This is an automated email from the ASF dual-hosted git repository.

ash pushed a commit to branch v2-0-test
in repository https://gitbox.apache.org/repos/asf/airflow.git

commit a51707a956c4ec61c3b608f9e5a84caf1267bddf
Author: Jarek Potiuk <ja...@potiuk.com>
AuthorDate: Tue Apr 6 04:08:11 2021 +0200

    Constraints are now parallelized and merged in single job (#15211)
    
    Originally, the constraints were generated in separate jobs and uploaded as
    artifacts and then joined be a separate push job. Thanks to parallel
    processing, we can now do that all in a single job, with both cost and
    time savings.
    
    (cherry picked from commit aebacd74058d01cfecaf913c04c0dbc50bb188ea)
---
 .github/workflows/ci.yml                           | 64 ++++++----------------
 BREEZE.rst                                         | 39 ++++++-------
 CONTRIBUTING.rst                                   | 19 +++++--
 scripts/ci/constraints/ci_commit_constraints.sh    |  2 +-
 .../ci_generate_all_constraints.sh}                | 13 ++++-
 scripts/ci/constraints/ci_generate_constraints.sh  |  8 +++
 .../images/ci_wait_for_and_verify_all_ci_images.sh |  5 +-
 .../ci_wait_for_and_verify_all_prod_images.sh      |  4 +-
 scripts/ci/libraries/_parallel.sh                  | 13 +++++
 9 files changed, 89 insertions(+), 78 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 86bc960..21a5429 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1114,17 +1114,21 @@ ${{ hashFiles('.pre-commit-config.yaml') }}"
     timeout-minutes: 10
     name: "Constraints"
     runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }}
-    strategy:
-      matrix:
-        python-version: ${{ fromJson(needs.build-info.outputs.pythonVersions) }}
-      fail-fast: false
     needs:
       - build-info
       - ci-images
+      - prod-images
+      - static-checks
+      - static-checks-pylint
+      - tests-sqlite
+      - tests-mysql
+      - tests-postgres
+      - tests-kubernetes
     env:
       RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }}
       PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }}
       GITHUB_REGISTRY: ${{ needs.ci-images.outputs.githubRegistry }}
+      CURRENT_PYTHON_MAJOR_MINOR_VERSIONS_AS_STRING: ${{needs.build-info.outputs.pythonVersionsListAsString}}
     # Only run it for direct pushes
     if: >
       github.ref == 'refs/heads/master' || github.ref == 'refs/heads/v1-10-test' ||
@@ -1140,54 +1144,22 @@ ${{ hashFiles('.pre-commit-config.yaml') }}"
           python-version: ${{ env.PYTHON_MAJOR_MINOR_VERSION }}
       - name: "Free space"
         run: ./scripts/ci/tools/ci_free_space_on_ci.sh
-      - name: "Prepare CI image ${{env.PYTHON_MAJOR_MINOR_VERSION}}:${{ github.sha }}"
-        run: ./scripts/ci/images/ci_prepare_ci_image_on_ci.sh
+      - name: >
+          Wait for CI images
+          ${{ needs.build-info.outputs.pythonVersions }}:${{ env.GITHUB_REGISTRY_PULL_IMAGE_TAG }}
+        run: ./scripts/ci/images/ci_wait_for_and_verify_all_ci_images.sh
       - name: "Generate constraints with PyPI providers"
-        run: ./scripts/ci/constraints/ci_generate_constraints.sh
+        run: ./scripts/ci/constraints/ci_generate_all_constraints.sh
         env:
           GENERATE_CONSTRAINTS_MODE: "pypi-providers"
       - name: "Generate constraints with source providers"
-        run: ./scripts/ci/constraints/ci_generate_constraints.sh
+        run: ./scripts/ci/constraints/ci_generate_all_constraints.sh
         env:
           GENERATE_CONSTRAINTS_MODE: "source-providers"
       - name: "Generate constraints without providers"
-        run: ./scripts/ci/constraints/ci_generate_constraints.sh
+        run: ./scripts/ci/constraints/ci_generate_all_constraints.sh
         env:
           GENERATE_CONSTRAINTS_MODE: "no-providers"
-      - name: "Upload constraint artifacts"
-        uses: actions/upload-artifact@v2
-        with:
-          name: 'constraints-${{matrix.python-version}}'
-          path: './files/constraints-${{matrix.python-version}}/constraints-*${{matrix.python-version}}.txt'
-          retention-days: 7
-
-  constraints-push:
-    timeout-minutes: 10
-    name: "Constraints push"
-    runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }}
-    needs:
-      - build-info
-      - constraints
-      - ci-images
-      - prod-images
-      - static-checks
-      - static-checks-pylint
-      - tests-sqlite
-      - tests-mysql
-      - tests-postgres
-      - tests-kubernetes
-    # Only run it for direct pushes
-    if: >
-      github.ref == 'refs/heads/master' || github.ref == 'refs/heads/v1-10-test' ||
-      github.ref == 'refs/heads/v2-0-test'
-    env:
-      RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }}
-    steps:
-      - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
-        uses: actions/checkout@v2
-        with:
-          persist-credentials: false
-          submodules: recursive
       - name: "Set constraints branch name"
         id: constraints-branch
         run: ./scripts/ci/constraints/ci_branch_constraints.sh
@@ -1197,10 +1169,6 @@ ${{ hashFiles('.pre-commit-config.yaml') }}"
           path: "repo"
           ref: ${{ steps.constraints-branch.outputs.branch }}
           persist-credentials: false
-      - name: "Get all artifacts (constraints)"
-        uses: actions/download-artifact@v2
-        with:
-          path: 'artifacts'
       - name: "Commit changed constraint files for ${{needs.build-info.outputs.pythonVersions}}"
         run: ./scripts/ci/constraints/ci_commit_constraints.sh
       - name: "Push changes"
@@ -1223,7 +1191,7 @@ ${{ hashFiles('.pre-commit-config.yaml') }}"
       - tests-postgres
       - tests-mysql
       - tests-kubernetes
-      - constraints-push
+      - constraints
       - prepare-test-provider-packages-wheel
       - prepare-test-provider-packages-sdist
     if: github.event_name == 'schedule' &&  github.repository == 'apache/airflow'
diff --git a/BREEZE.rst b/BREEZE.rst
index 2a8a74a..72633e8 100644
--- a/BREEZE.rst
+++ b/BREEZE.rst
@@ -809,38 +809,39 @@ Generating constraints
 
 Whenever setup.py gets modified, the CI master job will re-generate constraint files. Those constraint
 files are stored in separated orphan branches: ``constraints-master``, ``constraints-2-0``
-and ``constraints-1-10``. They are stored separately for each python version and there are separate
-constraints for:
+and ``constraints-1-10``.
+
+Those are constraint files as described in detail in the
+`<CONTRIBUTING.rst#pinned-constraint-files>`_ contributing documentation.
+
+You can use ``./breeze generate-constraints`` command to manually generate constraints for a single python
+version and single constraint mode like this:
+
+.. code-block:: bash
+
+     ./breeze generate-constraints --generate-constraints-mode pypi-providers
+
+
+Constraints are generated separately for each python version and there are separate constraints modes:
 
 * 'constraints' - those are constraints generated by matching the current airflow version from sources
    and providers that are installed from PyPI. Those are constraints used by the users who want to
-   install airflow with pip
+   install airflow with pip. Use ``pypi-providers`` mode for that.
 
 * "constraints-source-providers" - those are constraints generated by using providers installed from
   current sources. While adding new providers their dependencies might change, so this set of providers
   is the current set of the constraints for airflow and providers from the current master sources.
-  Those providers are used by CI system to keep "stable" set of constraints.
+  Those providers are used by CI system to keep "stable" set of constraints. Use
+  ``source-providers`` mode for that.
 
 * "constraints-no-providers" - those are constraints generated from only Apache Airflow, without any
   providers. If you want to manage airflow separately and then add providers individually, you can
-  use those.
-
-Those are constraint files as described in detail in the
-`<CONTRIBUTING.rst#pinned-constraint-files>`_ contributing documentation.
+  use those. Use ``no-providers`` mode for that.
 
 In case someone modifies setup.py, the ``CRON`` scheduled CI build automatically upgrades and
 pushes changed to the constraint files, however you can also perform test run of this locally using
-``generate-constraints`` command of Breeze.
-
-.. code-block:: bash
-
-  for python_version in 3.6 3.7 3.8
-  do
-    ./breeze generate-constraints --generate-constraints-mode source-providers --python ${python_version}
-    ./breeze generate-constraints --generate-constraints-mode pypi-providers --python ${python_version}
-    ./breeze generate-constraints --generate-constraints-mode no-providers --python ${python_version}
-  done
-
+the procedure described in `<CONTRIBUTING.rst#mnully-generating-constraint-files>`_ which utilises
+multiple processors on your local machine to generate such constraints faster.
 
 This bumps the constraint files to latest versions and stores hash of setup.py. The generated constraint
 and setup.py hash files are stored in the ``files`` folder and while generating the constraints diff
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index 6673ddf..19c4077 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -871,19 +871,26 @@ Manually generating constraint files
 ------------------------------------
 
 The constraint files are generated automatically by the CI job. Sometimes however it is needed to regenerate
-them manually (committers only). For example when master build did not succeed for quite some time). This can be done by
-running this:
+them manually (committers only). For example when master build did not succeed for quite some time).
+This can be done by running this (it utilizes parallel preparation of the constraints):
 
 .. code-block:: bash
 
-    for python_version in 3.6 3.7 3.8
+    export CURRENT_PYTHON_MAJOR_MINOR_VERSIONS_AS_STRING="3.6 3.7 3.8"
+    for python_version in $(echo "${CURRENT_PYTHON_MAJOR_MINOR_VERSIONS_AS_STRING}")
     do
-      ./breeze generate-constraints --generate-constraints-mode source-providers --python ${python_version} --build-cache-local
-      ./breeze generate-constraints --generate-constraints-mode pypi-providers --python ${python_version} --build-cache-local
-      ./breeze generate-constraints --generate-constraints-mode no-providers --python ${python_version} --build-cache-local
+      ./breeze build-image --upgrade-to-newer-dependencies --python ${python_version} --build-cache-local
+      ./breeze build-image --upgrade-to-newer-dependencies --python ${python_version} --build-cache-local
+      ./breeze build-image --upgrade-to-newer-dependencies --python ${python_version} --build-cache-local
     done
+
+    GENERATE_CONSTRAINTS_MODE="pypi-providers" ./scripts/ci/constraints/ci_generate_all_constraints.sh
+    GENERATE_CONSTRAINTS_MODE="source-providers" ./scripts/ci/constraints/ci_generate_all_constraints.sh
+    GENERATE_CONSTRAINTS_MODE="no-providers" ./scripts/ci/constraints/ci_generate_all_constraints.sh
+
     AIRFLOW_SOURCES=$(pwd)
 
+
 The constraints will be generated in "files/constraints-PYTHON_VERSION/constraints-*.txt files. You need to
 checkout the right 'constraints-' branch in a separate repository and then you can copy, commit and push the
 generated files:
diff --git a/scripts/ci/constraints/ci_commit_constraints.sh b/scripts/ci/constraints/ci_commit_constraints.sh
index 7c24dc5..c3a7521 100755
--- a/scripts/ci/constraints/ci_commit_constraints.sh
+++ b/scripts/ci/constraints/ci_commit_constraints.sh
@@ -18,7 +18,7 @@
 # shellcheck source=scripts/ci/libraries/_script_init.sh
 . "$( dirname "${BASH_SOURCE[0]}" )/../libraries/_script_init.sh"
 
-cp -v ./artifacts/constraints-*/constraints*.txt repo/
+cp -v ./files/constraints-*/constraints*.txt repo/
 cd repo || exit 1
 git config --local user.email "dev@airflow.apache.org"
 git config --local user.name "Automated GitHub Actions commit"
diff --git a/scripts/ci/images/ci_wait_for_and_verify_all_ci_images.sh b/scripts/ci/constraints/ci_generate_all_constraints.sh
similarity index 75%
copy from scripts/ci/images/ci_wait_for_and_verify_all_ci_images.sh
copy to scripts/ci/constraints/ci_generate_all_constraints.sh
index 7e09b1c..9a7a77e 100755
--- a/scripts/ci/images/ci_wait_for_and_verify_all_ci_images.sh
+++ b/scripts/ci/constraints/ci_generate_all_constraints.sh
@@ -17,6 +17,7 @@
 # under the License.
 set -euo pipefail
 
+
 # We cannot perform full initialization because it will be done later in the "single run" scripts
 # And some readonly variables are set there, therefore we only selectively reuse parallel lib needed
 LIBRARIES_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")/../libraries/" && pwd)
@@ -25,10 +26,18 @@ source "${LIBRARIES_DIR}/_all_libs.sh"
 
 initialization::set_output_color_variables
 
+export CHECK_IMAGE_FOR_REBUILD="false"
+echo
+echo "${COLOR_YELLOW}Skip rebuilding CI images. Assume the one we have is good!${COLOR_RESET}"
+echo "${COLOR_YELLOW}You must run './breeze build-image --upgrade-to-newer-dependencies before for all python versions before running this one!${COLOR_RESET}"
+echo
+
 parallel::make_sure_gnu_parallel_is_installed
 
+parallel::make_sure_python_versions_are_specified
+
 echo
-echo "Waiting for all CI images to appear: ${CURRENT_PYTHON_MAJOR_MINOR_VERSIONS_AS_STRING}"
+echo "${COLOR_BLUE}Generating all constraint files${COLOR_RESET}"
 echo
 
 parallel::initialize_monitoring
@@ -37,5 +46,5 @@ parallel::monitor_progress
 
 # shellcheck disable=SC2086
 parallel --results "${PARALLEL_MONITORED_DIR}" \
-    "$( dirname "${BASH_SOURCE[0]}" )/ci_wait_for_and_verify_ci_image.sh" ::: \
+    "$( dirname "${BASH_SOURCE[0]}" )/ci_generate_constraints.sh" ::: \
     ${CURRENT_PYTHON_MAJOR_MINOR_VERSIONS_AS_STRING}
diff --git a/scripts/ci/constraints/ci_generate_constraints.sh b/scripts/ci/constraints/ci_generate_constraints.sh
index 10a4107..7e1cefa 100755
--- a/scripts/ci/constraints/ci_generate_constraints.sh
+++ b/scripts/ci/constraints/ci_generate_constraints.sh
@@ -15,6 +15,14 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+if [[ $1 == "" ]]; then
+  >&2 echo "Requires python MAJOR/MINOR version as first parameter"
+  exit 1
+fi
+
+export PYTHON_MAJOR_MINOR_VERSION=$1
+shift
+
 # shellcheck source=scripts/ci/libraries/_script_init.sh
 . "$( dirname "${BASH_SOURCE[0]}" )/../libraries/_script_init.sh"
 
diff --git a/scripts/ci/images/ci_wait_for_and_verify_all_ci_images.sh b/scripts/ci/images/ci_wait_for_and_verify_all_ci_images.sh
index 7e09b1c..4255374 100755
--- a/scripts/ci/images/ci_wait_for_and_verify_all_ci_images.sh
+++ b/scripts/ci/images/ci_wait_for_and_verify_all_ci_images.sh
@@ -27,10 +27,13 @@ initialization::set_output_color_variables
 
 parallel::make_sure_gnu_parallel_is_installed
 
+parallel::make_sure_python_versions_are_specified
+
 echo
-echo "Waiting for all CI images to appear: ${CURRENT_PYTHON_MAJOR_MINOR_VERSIONS_AS_STRING}"
+echo "${COLOR_BLUE}Waiting for all CI images to appear${COLOR_RESET}"
 echo
 
+
 parallel::initialize_monitoring
 
 parallel::monitor_progress
diff --git a/scripts/ci/images/ci_wait_for_and_verify_all_prod_images.sh b/scripts/ci/images/ci_wait_for_and_verify_all_prod_images.sh
index 2d1da54..08ed54b 100755
--- a/scripts/ci/images/ci_wait_for_and_verify_all_prod_images.sh
+++ b/scripts/ci/images/ci_wait_for_and_verify_all_prod_images.sh
@@ -27,8 +27,10 @@ initialization::set_output_color_variables
 
 parallel::make_sure_gnu_parallel_is_installed
 
+parallel::make_sure_python_versions_are_specified
+
 echo
-echo "Waiting for all PROD images to appear: ${CURRENT_PYTHON_MAJOR_MINOR_VERSIONS_AS_STRING}"
+echo "${COLOR_BLUE}Waiting for all PROD images to appear${COLOR_RESET}"
 echo
 
 parallel::initialize_monitoring
diff --git a/scripts/ci/libraries/_parallel.sh b/scripts/ci/libraries/_parallel.sh
index e2f8ad4..7239e82 100644
--- a/scripts/ci/libraries/_parallel.sh
+++ b/scripts/ci/libraries/_parallel.sh
@@ -193,3 +193,16 @@ function parallel::cleanup_runner() {
     parallel::kill_stale_semaphore_locks
     start_end::group_end
 }
+
+
+function parallel::make_sure_python_versions_are_specified() {
+    if [[ -z "${CURRENT_PYTHON_MAJOR_MINOR_VERSIONS_AS_STRING=}" ]]; then
+        echo
+        echo "${COLOR_RED}The CURRENT_PYTHON_MAJOR_MINOR_VERSIONS_AS_STRING variable must be set and list python versions to use!${COLOR_RESET}"
+        echo
+        exit 1
+    fi
+    echo
+    echo "${COLOR_BLUE}Running parallel builds for those Python versions: ${CURRENT_PYTHON_MAJOR_MINOR_VERSIONS_AS_STRING}!${COLOR_RESET}"
+    echo
+}