You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by "amoghrajesh (via GitHub)" <gi...@apache.org> on 2023/08/07 10:59:10 UTC

[GitHub] [airflow] amoghrajesh commented on a diff in pull request #33144: Add update-constraints command that allows to modify released ones

amoghrajesh commented on code in PR #33144:
URL: https://github.com/apache/airflow/pull/33144#discussion_r1285705062


##########
dev/MANUALLY_GENERATING_IMAGE_CACHE_AND_CONSTRAINTS.md:
##########
@@ -73,6 +74,89 @@ git commit -m "Your commit message here" --no-verify
 git push
 ```
 
+# Manually updating already tagged constraint files
+
+Sometimes - very rarely - we need to fix historical constraint files when Airflow fails to install with the
+constraints that were used in the past. This happened already several times and usually only happens when
+there is a backwards-incompatible change in the build environment in Python installation toolchain
+(pip, setuptools, wheel, Cython etc.). The Python build environment is not controllable by us - by default
+pip uses `build isolation` which means that it will install the latest version of the build tools that
+are matching the expectations of the package being build and it might mean that new version of such tools can

Review Comment:
   nit: `expectations of the package being *built* and it might mean that new version of such tools can`



##########
dev/breeze/src/airflow_breeze/commands/release_management_commands.py:
##########
@@ -1275,3 +1275,160 @@ def generate_providers_metadata(refresh_constraints: bool, python: str | None):
     import json
 
     PROVIDER_METADATA_JSON_FILE_PATH.write_text(json.dumps(metadata_dict, indent=4, sort_keys=True))
+
+
+def fetch_remote(constraints_repo: Path, remote_name: str) -> None:
+    run_command(["git", "fetch", remote_name], cwd=constraints_repo)
+
+
+def checkout_constraint_tag_and_reset_branch(constraints_repo: Path, airflow_version: str) -> None:
+    run_command(
+        ["git", "reset", "--hard"],
+        cwd=constraints_repo,
+    )
+    # Switch to tag
+    run_command(
+        ["git", "checkout", f"constraints-{airflow_version}"],
+        cwd=constraints_repo,
+    )
+    # Create or reset branch to point
+    run_command(
+        ["git", "checkout", "-B", f"constraints-{airflow_version}-fix"],
+        cwd=constraints_repo,
+    )
+    get_console().print(
+        f"[info]Checked out constraints tag: constraints-{airflow_version} and "
+        f"reset branch constraints-{airflow_version}-fix to it.[/]"
+    )
+    result = run_command(
+        ["git", "show", "-s", "--format=%H"],
+        cwd=constraints_repo,
+        text=True,
+        capture_output=True,
+    )
+    get_console().print(f"[info]The hash commit of the tag:[/] {result.stdout}")
+
+
+def modify_single_file_constraints(constraints_file: Path, updated_constraints: tuple[str]) -> bool:
+    constraint_content = constraints_file.read_text()
+    original_content = constraint_content
+    for constraint in updated_constraints:
+        package, version = constraint.split("==")
+        constraint_content = re.sub(
+            rf"^{package}==.*$", f"{package}=={version}", constraint_content, flags=re.MULTILINE
+        )
+    if constraint_content != original_content:
+        if not get_dry_run():
+            constraints_file.write_text(constraint_content)
+        get_console().print("[success]Updated.[/]")
+        return True
+    else:
+        get_console().print("[warning]The file has not been modified.[/]")
+        return False
+
+
+def modify_all_constraint_files(constraints_repo: Path, updated_constraint: tuple[str]) -> bool:
+    get_console().print("[info]Updating constraints files:[/]")
+    modified = False
+    for constraints_file in constraints_repo.glob("constraints-*.txt"):
+        get_console().print(f"[info]Updating {constraints_file.name}")
+        if modify_single_file_constraints(constraints_file, updated_constraint):
+            modified = True
+    return modified
+
+
+def confirm_modifications(constraints_repo: Path) -> bool:
+    run_command(["git", "diff"], cwd=constraints_repo, env={"PAGER": ""})
+    confirm = user_confirm("Do you want to continue?")
+    if confirm == Answer.YES:
+        return True
+    elif confirm == Answer.NO:
+        return False
+    else:
+        sys.exit(1)
+
+
+def commit_constraints_and_tag(constraints_repo: Path, airflow_version: str, message: str) -> None:
+    run_command(
+        ["git", "commit", "-a", "--no-verify", "-m", message],
+        cwd=constraints_repo,
+    )
+    run_command(
+        ["git", "tag", f"constraints-{airflow_version}", "--force", "-s", "-m", message, "HEAD"],
+        cwd=constraints_repo,
+    )
+
+
+def push_constraints_and_tag(constraints_repo: Path, remote_name: str, airflow_version: str) -> None:
+    run_command(
+        ["git", "push", remote_name, f"constraints-{airflow_version}-fix"],
+        cwd=constraints_repo,
+    )
+    run_command(
+        ["git", "push", remote_name, f"constraints-{airflow_version}", "--force"],
+        cwd=constraints_repo,
+    )
+
+
+@release_management.command(
+    name="update-constraints", help="Update released constraints with manual changes."
+)
+@click.option(
+    "--constraints-repo",
+    type=click.Path(file_okay=False, dir_okay=True, path_type=Path, exists=True),
+    required=True,
+    envvar="CONSTRAINTS_REPO",
+    help="Path where airflow repository is checked out, with ``constraints-main`` branch checked out.",
+)
+@click.option(
+    "--remote-name",
+    type=str,
+    default="apache",
+    envvar="REMOTE_NAME",
+    help="Name of the remote to push the changes to.",
+)
+@click.option(
+    "--airflow-versions",
+    type=str,
+    required=True,
+    envvar="AIRFLOW_VERSIONS",
+    help="Comma separated list of Airflow versions to update constraints for.",
+)
+@click.option(
+    "--message",

Review Comment:
   Lets call this `commit-message` instead?



##########
BREEZE.rst:
##########
@@ -2106,13 +2106,30 @@ Those are all available flags of ``generate-constraints`` command:
 
 In case someone modifies setup.py, the scheduled CI Tests automatically upgrades and
 pushes changes to the constraint files, however you can also perform test run of this locally using
-the procedure described in `Refreshing CI Cache <dev/REFRESHING_CI_CACHE.md#manually-generating-constraint-files>`_
+the procedure described in the
+`Manually generating image cache and constraints <dev/MANUALLY_GENERATING_IMAGE_CACHE_AND_CONSTRAINTS.md>`_
 which utilises multiple processors on your local machine to generate such constraints faster.
 
 This bumps the constraint files to latest versions and stores hash of setup.py. The generated constraint
 and setup.py hash files are stored in the ``files`` folder and while generating the constraints diff
 of changes vs the previous constraint files is printed.
 
+Updating constraints
+""""""""""""""""""""
+
+Sometimes (very rarely) we might want to update individual packages in constraints that we generated and
+tagged already in the past. This can be done using ``breeze release-management update-constraints`` command.
+
+Those are all available flags of ``update-constraints`` command:

Review Comment:
   nit: `These`



##########
dev/MANUALLY_GENERATING_IMAGE_CACHE_AND_CONSTRAINTS.md:
##########
@@ -73,6 +74,89 @@ git commit -m "Your commit message here" --no-verify
 git push
 ```
 
+# Manually updating already tagged constraint files
+
+Sometimes - very rarely - we need to fix historical constraint files when Airflow fails to install with the
+constraints that were used in the past. This happened already several times and usually only happens when
+there is a backwards-incompatible change in the build environment in Python installation toolchain
+(pip, setuptools, wheel, Cython etc.). The Python build environment is not controllable by us - by default
+pip uses `build isolation` which means that it will install the latest version of the build tools that
+are matching the expectations of the package being build and it might mean that new version of such tools can
+break installation. This happened for example in July 2023 when major (3.0.0) version of Cython has
+been released and it broke `pymssql` installation - we had to update the constraint file to use `pymssql` 2.2.8
+instead of `2.2.7` because 2.2.7 did not limit but also did not work with the new version of Cython.
+
+Breeze has `update-constraints` command in `release-management` group that can be used to update the
+constraints.
+
+The way how updating constraints work:
+
+1. You need to have "airflow" repository checked out separately from the repository you are working on. For
+   example in `/home/myuser/airflow-constratints` folder.

Review Comment:
   typo:  `/home/myuser/airflow-constraints`



##########
dev/MANUALLY_GENERATING_IMAGE_CACHE_AND_CONSTRAINTS.md:
##########
@@ -73,6 +74,89 @@ git commit -m "Your commit message here" --no-verify
 git push
 ```
 
+# Manually updating already tagged constraint files
+
+Sometimes - very rarely - we need to fix historical constraint files when Airflow fails to install with the
+constraints that were used in the past. This happened already several times and usually only happens when
+there is a backwards-incompatible change in the build environment in Python installation toolchain
+(pip, setuptools, wheel, Cython etc.). The Python build environment is not controllable by us - by default
+pip uses `build isolation` which means that it will install the latest version of the build tools that
+are matching the expectations of the package being build and it might mean that new version of such tools can
+break installation. This happened for example in July 2023 when major (3.0.0) version of Cython has
+been released and it broke `pymssql` installation - we had to update the constraint file to use `pymssql` 2.2.8
+instead of `2.2.7` because 2.2.7 did not limit but also did not work with the new version of Cython.
+
+Breeze has `update-constraints` command in `release-management` group that can be used to update the
+constraints.
+
+The way how updating constraints work:
+
+1. You need to have "airflow" repository checked out separately from the repository you are working on. For
+   example in `/home/myuser/airflow-constratints` folder.
+2. You need to checkout `constraints-main` branch in this repository. By default the command expects that
+   there is a remote named "apache" pointing to the official Apache repository. You can override this
+    by passing `--remote-name` option to the command.
+3. You need to run `breeze release-management update-constraints` command. You can pass various options to
+   the command:
+      * path to the "constraints" repository
+      * remote name (optionally - default "apache")
+      * list of airflow versions to update constraints for
+      * list of constraints to update in the form of "package==version" (you can specify more than one)
+      * message to be used in commit message
+
+   Make sure you use exactly the same case for the package to be updated as the one already in the
+   constraints.
+
+   It's a good idea to add `--dry-run` option to the command to see what will be updated before you actually
+   run the command. However, even if you do not use `--dry-run` option, the command will ask you to
+   confirm the updates so you will have a chance to verify it before each version change.
+
+   You can also add `--verbose` instead of `--dry-run` and you will see the git commands being executed by
+   the command while it is doing its job.
+
+   Notes about "potentially breaking" constraints: the command is designed to work in a very safe way
+   and provide ways of coming back to the previous state manually if needed.
+
+   Tags are moved with ``--force`` option - this needs to be done because we are moving already existing tag,
+   however branches are pushed without force so there is no risk of losing history in the repository - you
+   can always see the history and revert the changes and restore old tags manually. Usually the "final" tags
+   are the same as the latest "rc*" tags for the same version so it is easy to find where the tag was
+   pointing before - we also print hash of the commits before attempting to make modifications so you can
+   always see what commit the tag has been pointing to before the command is run.
+
+4. The command will do the following for every Airflow version specified:
+      * checkout "constraints-<version>" tag
+      * reset "constraints-<version>-fix" branch to the tag
+      * update constraints in-place
+      * commit the changes
+      * tag the commit with "constraints-<version>" tag
+      * push the "constraints-<version>-fix" branch with the commit to the remote selected
+      * push the tag to the remote selected
+
+You should verify manually if the change is as expected by inspecting the constraints at
+
+https://github.com/apache/airflow/tree/constraints-<airfow-version>
+
+Example of updating constraints for Airflow 2.5.0 - 2.6.3 and updating `pymssql` constraint to 2.2.8:
+
+```bash
+breeze release-management update-constraints --constraints-repo /home/user/airflow-constraints \
+    --airflow-versions 2.5.0,2.5.1,2.5.2,2.5.3,2.6.0,2.6.1,2.6.2,2.6.3 \
+    --updated-constraint pymssql==2.2.8 \
+    --message "Update pymssql constraint to 2.2.8"
+```
+
+Example of updating multiple constraints:
+
+```bash
+breeze release-management update-constraints --constraints-repo /home/user/airflow-constraints \
+    --airflow-versions 2.5.0,2.5.1,2.5.2,2.5.3,2.6.0,2.6.1,2.6.2,2.6.3 \
+    --updated-constraint pymssql==2.2.8 \
+    --updated-constraint Authlib==1.3.0 \
+    --message "Update pymssql constraint to 2.2.8 and Authlib to 1.3.0"
+```
+

Review Comment:
   Nice description with examples!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@airflow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org