You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2024/02/14 01:18:56 UTC

(airflow) branch main updated: Force Python 3.9+ version when building reproducible packages (#37401)

This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 4ae096eaf3 Force Python 3.9+ version when building reproducible packages (#37401)
4ae096eaf3 is described below

commit 4ae096eaf3dc5541c3c7bd2e4d8053fca738e0e1
Author: Jarek Potiuk <ja...@potiuk.com>
AuthorDate: Wed Feb 14 02:18:48 2024 +0100

    Force Python 3.9+ version when building reproducible packages (#37401)
    
    When building reproducible packages with Python 3.8 they are ...
    not reproducible. The tarfile produces slightly different output
    and packages are not binary identical.
    
    This change forces anyone preparing reproducible package to have
    breeze installed using Python 3.9+
---
 .github/actions/breeze/action.yml                     | 13 ++++++++++---
 .github/actions/prepare_breeze_and_image/action.yml   |  5 +++++
 .github/workflows/build-images.yml                    |  7 +++++++
 .github/workflows/ci.yml                              | 19 +++++++++++++++++++
 dev/breeze/doc/01_installation.rst                    | 16 ++++++++++++++--
 .../commands/release_candidate_command.py             |  3 +++
 .../commands/release_management_commands.py           |  7 ++++++-
 .../src/airflow_breeze/utils/kubernetes_utils.py      |  3 +--
 .../src/airflow_breeze/utils/python_versions.py       | 11 +++++++++++
 dev/breeze/src/airflow_breeze/utils/reproducible.py   |  3 +++
 scripts/ci/install_breeze.sh                          | 10 +++++++++-
 11 files changed, 88 insertions(+), 9 deletions(-)

diff --git a/.github/actions/breeze/action.yml b/.github/actions/breeze/action.yml
index 50c6da4a6a..ef8a00fc31 100644
--- a/.github/actions/breeze/action.yml
+++ b/.github/actions/breeze/action.yml
@@ -18,6 +18,10 @@
 ---
 name: 'Setup Breeze'
 description: 'Sets up Python and Breeze'
+inputs:
+  python-version:
+    description: 'Python version to use'
+    default: "3.8"
 outputs:
   host-python-version:
     description: Python version used in host
@@ -28,7 +32,7 @@ runs:
     - name: "Setup python"
       uses: actions/setup-python@v5
       with:
-        python-version: 3.8
+        python-version: ${{ inputs.python-version }}
         cache: 'pip'
         cache-dependency-path: ./dev/breeze/setup*
     - name: Cache breeze
@@ -37,11 +41,14 @@ runs:
         path: ~/.local/pipx
         # README has the latest breeze's hash and python location is used to distinguish between
         # different minor versions of python
-        key: "breeze-3.8-${{ env.pythonLocation }}-${{ hashFiles('dev/breeze/README.md') }}"
-        restore-keys: breeze-3.8-${{ env.pythonLocation }}
+        key: "breeze-${{inputs.python-version}}-${{env.pythonLocation}}-\
+          ${{hashFiles('dev/breeze/README.md')}}"
+        restore-keys: breeze-${{inputs.python-version}}-${{ env.pythonLocation }}
     - name: "Install Breeze"
       shell: bash
       run: ./scripts/ci/install_breeze.sh
+      env:
+        PYTHON_VERSION: ${{ inputs.python-version }}
     - name: "Free space"
       shell: bash
       run: breeze ci free-space
diff --git a/.github/actions/prepare_breeze_and_image/action.yml b/.github/actions/prepare_breeze_and_image/action.yml
index 61967e1e52..3690565550 100644
--- a/.github/actions/prepare_breeze_and_image/action.yml
+++ b/.github/actions/prepare_breeze_and_image/action.yml
@@ -19,6 +19,9 @@
 name: 'Prepare breeze && current python image'
 description: 'Installs breeze and pulls current python image'
 inputs:
+  python-version:
+    description: 'Python version to use'
+    default: "3.8"
   pull-image-type:
     description: 'Which image to pull'
     default: CI
@@ -32,6 +35,8 @@ runs:
     - name: "Install Breeze"
       uses: ./.github/actions/breeze
       id: breeze
+      with:
+        python-version: ${{ inputs.python-version }}
     - name: Pull CI image ${{ env.PYTHON_MAJOR_MINOR_VERSION }}:${{ env.IMAGE_TAG }}
       shell: bash
       run: breeze ci-image pull --tag-as-latest
diff --git a/.github/workflows/build-images.yml b/.github/workflows/build-images.yml
index c8e0f83ea9..5630c91119 100644
--- a/.github/workflows/build-images.yml
+++ b/.github/workflows/build-images.yml
@@ -42,6 +42,11 @@ env:
   IMAGE_TAG: "${{ github.event.pull_request.head.sha || github.sha }}"
   USE_SUDO: "true"
   INCLUDE_SUCCESS_OUTPUTS: "true"
+  # Version of Python used for reproducibility of the packages built
+  # Python 3.8 tarfile produces different tarballs than Python 3.9+ tarfile that's why we are forcing
+  # Python 3.9 for all release preparation commands to make sure that the tarballs are reproducible
+  # TODO: remove me when we switch to Python 3.9 as minimal version
+  REPRODUCIBLE_PYTHON_VERSION: "3.9"
 
 concurrency:
   group: build-${{ github.event.pull_request.number || github.ref }}
@@ -294,6 +299,8 @@ jobs:
       ####################################################################################################
       - name: "Install Breeze"
         uses: ./.github/actions/breeze
+        with:
+          python-version: ${{ env.REPRODUCIBLE_PYTHON_VERSION }}
       - name: >
           Build PROD Images
           ${{needs.build-info.outputs.all-python-versions-list-as-string}}:${{env.IMAGE_TAG}}
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 6cefe50bd7..b2d45e5c30 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -52,6 +52,11 @@ env:
   INCLUDE_NOT_READY_PROVIDERS: "true"
   AIRFLOW_ENABLE_AIP_44: "true"
   MOUNT_SOURCES: "skip"
+  # Version of Python used for reproducibility of the packages built
+  # Python 3.8 tarfile produces different tarballs than Python 3.9+ tarfile that's why we are forcing
+  # Python 3.9 for all release preparation commands to make sure that the tarballs are reproducible
+  # TODO: remove me when we switch to Python 3.9 as minimal version
+  REPRODUCIBLE_PYTHON_VERSION: "3.9"
 
 concurrency:
   group: ci-${{ github.event.pull_request.number || github.ref }}
@@ -496,6 +501,8 @@ jobs:
           persist-credentials: false
       - name: "Install Breeze"
         uses: ./.github/actions/breeze
+        with:
+          python-version: ${{ env.REPRODUCIBLE_PYTHON_VERSION }}
       - name: Pull CI images ${{ env.PYTHON_VERSIONS }}:${{ env.IMAGE_TAG }}
         run: breeze ci-image pull --run-in-parallel --tag-as-latest
       - name: Verify CI images ${{ env.PYTHON_VERSIONS }}:${{ env.IMAGE_TAG }}
@@ -849,6 +856,8 @@ jobs:
       - name: >
           Prepare breeze & CI image: ${{needs.build-info.outputs.default-python-version}}:${{env.IMAGE_TAG}}
         uses: ./.github/actions/prepare_breeze_and_image
+        with:
+          python-version: ${{ env.REPRODUCIBLE_PYTHON_VERSION }}
       - name: "Cleanup dist files"
         run: rm -fv ./dist/*
       - name: "Prepare provider documentation"
@@ -908,6 +917,8 @@ jobs:
       - name: >
           Prepare breeze & CI image: ${{needs.build-info.outputs.default-python-version}}:${{env.IMAGE_TAG}}
         uses: ./.github/actions/prepare_breeze_and_image
+        with:
+          python-version: ${{ env.REPRODUCIBLE_PYTHON_VERSION }}
       - name: "Cleanup dist files"
         run: rm -fv ./dist/*
       - name: "Prepare provider packages: sdist"
@@ -975,6 +986,8 @@ jobs:
       - name: >
           Prepare breeze & CI image: ${{matrix.python-version}}:${{env.IMAGE_TAG}}
         uses: ./.github/actions/prepare_breeze_and_image
+        with:
+          python-version: ${{ env.REPRODUCIBLE_PYTHON_VERSION }}
       - name: "Cleanup dist files"
         run: rm -fv ./dist/*
       - name: "Prepare provider packages: wheel"
@@ -1087,6 +1100,8 @@ jobs:
           persist-credentials: false
       - name: "Install Breeze"
         uses: ./.github/actions/breeze
+        with:
+          python-version: ${{ env.REPRODUCIBLE_PYTHON_VERSION }}
       - name: Setup git for tagging
         run: |
           git config --global user.email "name@example.com"
@@ -1853,6 +1868,8 @@ jobs:
         if: >
           needs.build-info.outputs.in-workflow-build == 'true' &&
           needs.build-info.outputs.default-branch == 'main'
+        with:
+          python-version: ${{ env.REPRODUCIBLE_PYTHON_VERSION }}
       - name: >
           Build PROD Images
           ${{needs.build-info.outputs.all-python-versions-list-as-string}}:${{env.IMAGE_TAG}}
@@ -2407,6 +2424,8 @@ jobs:
       - name: >
           Prepare breeze & CI image: ${{needs.build-info.outputs.default-python-version}}:${{env.IMAGE_TAG}}
         uses: ./.github/actions/prepare_breeze_and_image
+        with:
+          python-version: ${{ env.REPRODUCIBLE_PYTHON_VERSION }}
         env:
           # Always use default Python version of CI image for preparing packages
           PYTHON_MAJOR_MINOR_VERSION: "${{needs.build-info.outputs.default-python-version}}"
diff --git a/dev/breeze/doc/01_installation.rst b/dev/breeze/doc/01_installation.rst
index 962d1839fd..4e57983d1b 100644
--- a/dev/breeze/doc/01_installation.rst
+++ b/dev/breeze/doc/01_installation.rst
@@ -324,12 +324,24 @@ that Breeze works on
 
     .. note:: creating pipx virtual env ``apache-airflow-breeze`` with a specific python version
 
-        In ``pipx install --force -e ./dev/breeze`` or ``pipx install --force -e dev\breeze``, ``pipx`` uses default system python version to create virtual env for breeze.
+        In ``pipx install -e ./dev/breeze`` or ``pipx install  -e dev\breeze``, ``pipx`` uses default
+        system python version to create virtual env for breeze.
         We can use a specific version by providing python executable in ``--python``  argument. For example:
 
+
+        If you have breeze installed already with another Python version you can reinstall breeze with reinstall
+        command
+
+        .. code-block:: bash
+
+            pipx reinstall --python /Users/airflow/.pyenv/versions/3.8.16/bin/python apache-airflow-breeze
+
+        Or you can uninstall breeze and install it with a specific python version:
+
         .. code-block:: bash
 
-            pipx install -e ./dev/breeze --force --python /Users/airflow/.pyenv/versions/3.8.16/bin/python
+            pipx uninstall apache-airflow-breeze
+            pipx install -e ./dev/breeze --python /Users/airflow/.pyenv/versions/3.8.16/bin/python
 
 
 Running Breeze for the first time
diff --git a/dev/breeze/src/airflow_breeze/commands/release_candidate_command.py b/dev/breeze/src/airflow_breeze/commands/release_candidate_command.py
index a7b14234fe..16f3dd18a9 100644
--- a/dev/breeze/src/airflow_breeze/commands/release_candidate_command.py
+++ b/dev/breeze/src/airflow_breeze/commands/release_candidate_command.py
@@ -26,6 +26,7 @@ from airflow_breeze.commands.release_management_group import release_management
 from airflow_breeze.utils.confirm import confirm_action
 from airflow_breeze.utils.console import console_print
 from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT, DIST_DIR, OUT_DIR
+from airflow_breeze.utils.python_versions import check_python_3_9_or_above
 from airflow_breeze.utils.reproducible import get_source_date_epoch, repack_deterministically
 from airflow_breeze.utils.run_utils import run_command
 
@@ -310,6 +311,7 @@ def remove_old_releases(version, repo_root):
     "--version", required=True, help="The release candidate version e.g. 2.4.3rc1", envvar="VERSION"
 )
 def prepare_airflow_tarball(version: str):
+    check_python_3_9_or_above()
     from packaging.version import Version
 
     airflow_version = Version(version)
@@ -335,6 +337,7 @@ def prepare_airflow_tarball(version: str):
 )
 @option_answer
 def publish_release_candidate(version, previous_version, github_token):
+    check_python_3_9_or_above()
     from packaging.version import Version
 
     airflow_version = Version(version)
diff --git a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py
index 2506be5c86..e1be056179 100644
--- a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py
+++ b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py
@@ -142,7 +142,7 @@ from airflow_breeze.utils.provider_dependencies import (
     generate_providers_metadata_for_package,
     get_related_providers,
 )
-from airflow_breeze.utils.python_versions import get_python_version_list
+from airflow_breeze.utils.python_versions import check_python_3_9_or_above, get_python_version_list
 from airflow_breeze.utils.reproducible import get_source_date_epoch, repack_deterministically
 from airflow_breeze.utils.run_utils import (
     run_command,
@@ -387,6 +387,7 @@ def prepare_airflow_packages(
     version_suffix_for_pypi: str,
     use_local_hatch: bool,
 ):
+    check_python_3_9_or_above()
     perform_environment_checks()
     fix_ownership_using_docker()
     cleanup_python_generated_files()
@@ -631,6 +632,7 @@ def prepare_provider_packages(
     skip_tag_check: bool,
     version_suffix_for_pypi: str,
 ):
+    check_python_3_9_or_above()
     perform_environment_checks()
     fix_ownership_using_docker()
     cleanup_python_generated_files()
@@ -2472,6 +2474,7 @@ def prepare_helm_chart_tarball(
 ) -> None:
     import yaml
 
+    check_python_3_9_or_above()
     chart_yaml_file_content = CHART_YAML_FILE.read_text()
     chart_yaml_dict = yaml.safe_load(chart_yaml_file_content)
     version_in_chart = chart_yaml_dict["version"]
@@ -2613,6 +2616,8 @@ def prepare_helm_chart_tarball(
 @option_dry_run
 @option_verbose
 def prepare_helm_chart_package(sign_email: str):
+    check_python_3_9_or_above()
+
     import yaml
 
     from airflow_breeze.utils.kubernetes_utils import (
diff --git a/dev/breeze/src/airflow_breeze/utils/kubernetes_utils.py b/dev/breeze/src/airflow_breeze/utils/kubernetes_utils.py
index dcbb585db1..3812277d4b 100644
--- a/dev/breeze/src/airflow_breeze/utils/kubernetes_utils.py
+++ b/dev/breeze/src/airflow_breeze/utils/kubernetes_utils.py
@@ -347,8 +347,7 @@ def create_virtualenv(force_venv_setup: bool) -> RunCommandResult:
             "[info]You can uninstall breeze and install it again with earlier Python "
             "version. For example:[/]\n"
         )
-        get_console().print("pipx uninstall apache-airflow-breeze")
-        get_console().print("pipx install --python PYTHON_PATH -e ./dev/breeze\n")
+        get_console().print("pipx reinstall --python PYTHON_PATH apache-airflow-breeze\n")
         get_console().print(
             f"[info]PYTHON_PATH - path to your Python binary(< {higher_python_version_tuple})[/]\n"
         )
diff --git a/dev/breeze/src/airflow_breeze/utils/python_versions.py b/dev/breeze/src/airflow_breeze/utils/python_versions.py
index 3ac3f8be30..b06eb63c0f 100644
--- a/dev/breeze/src/airflow_breeze/utils/python_versions.py
+++ b/dev/breeze/src/airflow_breeze/utils/python_versions.py
@@ -43,3 +43,14 @@ def get_python_version_list(python_versions: str) -> list[str]:
         )
         sys.exit(1)
     return python_version_list
+
+
+def check_python_3_9_or_above():
+    if not sys.version_info >= (3, 9):
+        get_console().print("[error]Python 3.9 or later is required to prepare reproducible archives.\n")
+        get_console().print(
+            "[warning]Please reinstall Breeze in Python3.9+ environment. For example:[/]\n\n"
+            "pipx uninstall apache-airflow-breeze\n\n"
+            "pipx install --python $(which python3.9) -e ./dev/breeze --force\n"
+        )
+        sys.exit(1)
diff --git a/dev/breeze/src/airflow_breeze/utils/reproducible.py b/dev/breeze/src/airflow_breeze/utils/reproducible.py
index cf4005d9dd..418d5b2079 100644
--- a/dev/breeze/src/airflow_breeze/utils/reproducible.py
+++ b/dev/breeze/src/airflow_breeze/utils/reproducible.py
@@ -43,6 +43,7 @@ from pathlib import Path
 from subprocess import CalledProcessError, CompletedProcess
 
 from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT, OUT_DIR, REPRODUCIBLE_DIR
+from airflow_breeze.utils.python_versions import check_python_3_9_or_above
 from airflow_breeze.utils.run_utils import run_command
 
 
@@ -90,6 +91,7 @@ def repack_deterministically(
         tarinfo.mtime = timestamp
         return tarinfo
 
+    check_python_3_9_or_above()
     OUT_DIR.mkdir(exist_ok=True)
     shutil.rmtree(REPRODUCIBLE_DIR, ignore_errors=True)
     REPRODUCIBLE_DIR.mkdir(exist_ok=True)
@@ -147,6 +149,7 @@ def repack_deterministically(
 
 
 def main():
+    check_python_3_9_or_above()
     parser = ArgumentParser()
     parser.add_argument("-a", "--archive", help="archive to repack")
     parser.add_argument("-o", "--out", help="archive destination")
diff --git a/scripts/ci/install_breeze.sh b/scripts/ci/install_breeze.sh
index 7a61147f23..c5dadf09ec 100755
--- a/scripts/ci/install_breeze.sh
+++ b/scripts/ci/install_breeze.sh
@@ -19,7 +19,15 @@ set -euxo pipefail
 
 cd "$( dirname "${BASH_SOURCE[0]}" )/../../"
 
+PYTHON_ARG=""
+
+if [[ ${PYTHON_VERSION=} != "" ]]; then
+    PYTHON_ARG="--python=$(which python"${PYTHON_VERSION}") "
+fi
+
 python -m pip install --upgrade pip==24.0
 python -m pip install "pipx>=1.4.1"
-python -m pipx install --editable ./dev/breeze/ --force
+python -m pipx uninstall apache-airflow-breeze >/dev/null 2>&1 || true
+# shellcheck disable=SC2086
+python -m pipx install ${PYTHON_ARG} --editable ./dev/breeze/
 echo '/home/runner/.local/bin' >> "${GITHUB_PATH}"