You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2021/11/15 10:41:06 UTC

[airflow] branch main updated: Add script to generate issue for status of testing of the rc (#19247)

This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 849a94b  Add script to generate issue for status of testing of the rc (#19247)
849a94b is described below

commit 849a94b5a40c51a7344d158a02a39449ebd720f2
Author: Jarek Potiuk <ja...@potiuk.com>
AuthorDate: Mon Nov 15 11:40:33 2021 +0100

    Add script to generate issue for status of testing of the rc (#19247)
---
 dev/ISSUE_TEMPLATE.md.jinja2  |  21 ++++
 dev/README_RELEASE_AIRFLOW.md |  29 +++++
 dev/prepare_release_issue.py  | 286 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 336 insertions(+)

diff --git a/dev/ISSUE_TEMPLATE.md.jinja2 b/dev/ISSUE_TEMPLATE.md.jinja2
new file mode 100644
index 0000000..35be892
--- /dev/null
+++ b/dev/ISSUE_TEMPLATE.md.jinja2
@@ -0,0 +1,21 @@
+
+
+We have a kind request for all the contributors to the latest [Apache Airflow RC {{version}}](https://pypi.org/project/apache-airflow/{{version}}/).
+
+Could you please help us to test the RC versions of Airflow?
+
+Please let us know in the comment if the issue is addressed in the latest RC.
+
+{% for pr_number in pr_list %}
+    {%- set pr = pull_requests[pr_number] -%}
+- [ ] [{{ pr.title }} (#{{ pr.number }})]({{ pr.html_url }}): {{ user_logins[pr_number] }}
+        {%- if linked_issues[pr_number] %}
+     Linked issues:
+            {%- for linked_issue in linked_issues[pr_number] %}
+     - [{{ linked_issue.title }} (#{{ linked_issue.number }})]({{ linked_issue.html_url }})
+            {%- endfor %}
+        {%- endif %}
+{% endfor %}
+
+Thanks to all who contributed to the release (probably not a complete list!):
+{{ all_user_logins }}
diff --git a/dev/README_RELEASE_AIRFLOW.md b/dev/README_RELEASE_AIRFLOW.md
index 2edbc20..99e077d 100644
--- a/dev/README_RELEASE_AIRFLOW.md
+++ b/dev/README_RELEASE_AIRFLOW.md
@@ -27,6 +27,7 @@
   - [[\Optional\] Prepare new release branches and cache](#%5Coptional%5C-prepare-new-release-branches-and-cache)
   - [Prepare PyPI convenience "snapshot" packages](#prepare-pypi-convenience-snapshot-packages)
   - [Prepare production Docker Image](#prepare-production-docker-image)
+  - [Prepare issue for testing status of rc](#prepare-issue-for-testing-status-of-rc)
   - [Prepare Vote email on the Apache Airflow release candidate](#prepare-vote-email-on-the-apache-airflow-release-candidate)
 - [Verify the release candidate by PMCs](#verify-the-release-candidate-by-pmcs)
   - [SVN check](#svn-check)
@@ -337,6 +338,32 @@ Production Docker images should be manually prepared and pushed by the release m
 This will wipe Breeze cache and docker-context-files in order to make sure the build is "clean". It
 also performs image verification before pushing the images.
 
+## Prepare issue for testing status of rc
+
+For now this part works for bugfix releases only, for major/minor ones we will experiment and
+see if there is a way to only extract important/not tested bugfixes and high-level changes to
+make the process manageable.
+
+
+Create an issue for testing status of the RC (PREVIOUS_RELEASE should be the previous release version
+(for example 2.1.0).
+
+```shell script
+cat <<EOF
+Status of testing of Apache Airflow ${VERSION}
+EOF
+```
+
+Content is generated with:
+
+```shell
+./dev/prepare_release_issue.py generate-issue-content --previous-release <PREVIOUS_RELEASE> \
+    --current-release ${VERSION}
+
+```
+
+Copy the URL of the issue.
+
 ## Prepare Vote email on the Apache Airflow release candidate
 
 - Use the dev/airflow-jira script to generate a list of Airflow JIRAs that were closed in the release.
@@ -363,6 +390,8 @@ until Monday, October 11, 2021 at 4:00 pm UTC, or until 3 binding +1 votes have
 
 https://www.timeanddate.com/worldclock/fixedtime.html?msg=8&iso=20211011T1600&p1=1440
 
+Status of testing of the release is kept in TODO:URL_OF_THE_ISSUE_HERE
+
 Consider this my (binding) +1.
 
 Airflow ${VERSION} is available at:
diff --git a/dev/prepare_release_issue.py b/dev/prepare_release_issue.py
new file mode 100755
index 0000000..c98684d
--- /dev/null
+++ b/dev/prepare_release_issue.py
@@ -0,0 +1,286 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import logging
+import os
+import re
+import subprocess
+import textwrap
+from collections import defaultdict
+from typing import Any, Dict, List, NamedTuple, Optional, Set
+
+import click
+from github import Github, Issue, PullRequest, UnknownObjectException
+from rich.console import Console
+from rich.progress import Progress
+
+logger = logging.getLogger(__name__)
+
+console = Console(width=400, color_system="standard")
+
+MY_DIR_PATH = os.path.dirname(__file__)
+SOURCE_DIR_PATH = os.path.abspath(os.path.join(MY_DIR_PATH, os.pardir))
+PR_PATTERN = re.compile(r".*\(#([0-9]+)\)")
+ISSUE_MATCH_IN_BODY = re.compile(r" #([0-9]+)[^0-9]")
+
+
+@click.group(context_settings={'help_option_names': ['-h', '--help'], 'max_content_width': 500})
+def cli():
+    ...
+
+
+option_verbose = click.option(
+    "--verbose",
+    is_flag=True,
+    help="Print verbose information about performed steps",
+)
+
+option_previous_release = click.option(
+    "--previous-release",
+    type=str,
+    required=True,
+    help="commit reference (for example hash or tag) of the previous release.",
+)
+
+option_current_release = click.option(
+    "--current-release",
+    type=str,
+    required=True,
+    help="commit reference (for example hash or tag) of the current release.",
+)
+
+option_github_token = click.option(
+    "--github-token",
+    type=str,
+    required=True,
+    help=textwrap.dedent(
+        """
+        Github token used to authenticate.
+        You can set omit it if you have GITHUB_TOKEN env variable set
+        Can be generated with:
+        https://github.com/settings/tokens/new?description=Read%20sssues&scopes=repo:status"""
+    ),
+    envvar='GITHUB_TOKEN',
+)
+
+option_excluded_pr_list = click.option(
+    "--excluded-pr-list", type=str, default='', help="Coma-separated list of PRs to exclude from the issue."
+)
+
+option_limit_pr_count = click.option(
+    "--limit-pr-count",
+    type=int,
+    default=None,
+    help="Limit PR count processes (useful for testing small subset of PRs).",
+)
+
+
+def get_git_log_command(
+    verbose: bool, from_commit: Optional[str] = None, to_commit: Optional[str] = None
+) -> List[str]:
+    """
+    Get git command to run for the current repo from the current folder (which is the package folder).
+    :param verbose: whether to print verbose info while getting the command
+    :param from_commit: if present - base commit from which to start the log from
+    :param to_commit: if present - final commit which should be the start of the log
+    :return: git command to run
+    """
+    git_cmd = [
+        "git",
+        "log",
+        "--pretty=format:%H %h %cd %s",
+        "--date=short",
+    ]
+    if from_commit and to_commit:
+        git_cmd.append(f"{from_commit}...{to_commit}")
+    elif from_commit:
+        git_cmd.append(from_commit)
+    git_cmd.extend(['--', '.'])
+    if verbose:
+        console.print(f"Command to run: '{' '.join(git_cmd)}'")
+    return git_cmd
+
+
+class Change(NamedTuple):
+    """Stores details about commits"""
+
+    full_hash: str
+    short_hash: str
+    date: str
+    message: str
+    message_without_backticks: str
+    pr: Optional[int]
+
+
+def get_change_from_line(line: str):
+    split_line = line.split(" ", maxsplit=3)
+    message = split_line[3]
+    pr = None
+    pr_match = PR_PATTERN.match(message)
+    if pr_match:
+        pr = pr_match.group(1)
+    return Change(
+        full_hash=split_line[0],
+        short_hash=split_line[1],
+        date=split_line[2],
+        message=message,
+        message_without_backticks=message.replace("`", "'").replace("&39;", "'"),
+        pr=int(pr) if pr else None,
+    )
+
+
+def get_changes(verbose: bool, previous_release: str, current_release: str) -> List[Change]:
+    change_strings = subprocess.check_output(
+        get_git_log_command(verbose, from_commit=previous_release, to_commit=current_release),
+        cwd=SOURCE_DIR_PATH,
+        universal_newlines=True,
+    )
+    return [get_change_from_line(line) for line in change_strings.split("\n")]
+
+
+def render_template(
+    template_name: str,
+    context: Dict[str, Any],
+    autoescape: bool = True,
+    keep_trailing_newline: bool = False,
+) -> str:
+    """
+    Renders template based on it's name. Reads the template from <name>_TEMPLATE.md.jinja2 in current dir.
+    :param template_name: name of the template to use
+    :param context: Jinja2 context
+    :param autoescape: Whether to autoescape HTML
+    :param keep_trailing_newline: Whether to keep the newline in rendered output
+    :return: rendered template
+    """
+    import jinja2
+
+    template_loader = jinja2.FileSystemLoader(searchpath=MY_DIR_PATH)
+    template_env = jinja2.Environment(
+        loader=template_loader,
+        undefined=jinja2.StrictUndefined,
+        autoescape=autoescape,
+        keep_trailing_newline=keep_trailing_newline,
+    )
+    template = template_env.get_template(f"{template_name}_TEMPLATE.md.jinja2")
+    content: str = template.render(context)
+    return content
+
+
+def print_issue_content(
+    current_release: str,
+    pull_requests: Dict[int, PullRequest.PullRequest],
+    linked_issues: Dict[int, List[Issue.Issue]],
+    users: Dict[int, Set[str]],
+):
+    pr_list = list(pull_requests.keys())
+    pr_list.sort()
+    user_logins: Dict[int, str] = {pr: "@" + " @".join(users[pr]) for pr in users}
+    all_users: Set[str] = set()
+    for user_list in users.values():
+        all_users.update(user_list)
+    all_user_logins = "@" + " @".join(all_users)
+    content = render_template(
+        template_name='ISSUE',
+        context={
+            'version': current_release,
+            'pr_list': pr_list,
+            'pull_requests': pull_requests,
+            'linked_issues': linked_issues,
+            'users': users,
+            'user_logins': user_logins,
+            'all_user_logins': all_user_logins,
+        },
+        autoescape=False,
+        keep_trailing_newline=True,
+    )
+    print(content)
+
+
+@cli.command()
+@option_github_token
+@option_previous_release
+@option_current_release
+@option_excluded_pr_list
+@option_verbose
+@option_limit_pr_count
+def generate_issue_content(
+    github_token: str,
+    previous_release: str,
+    current_release: str,
+    excluded_pr_list: str,
+    verbose: bool,
+    limit_pr_count: Optional[int],
+):
+    if excluded_pr_list:
+        excluded_prs = [int(pr) for pr in excluded_pr_list.split(",")]
+    else:
+        excluded_prs = []
+    changes = get_changes(verbose, previous_release, current_release)
+    prs = list(
+        filter(lambda pr: pr is not None and pr not in excluded_prs, [change.pr for change in changes])
+    )
+    g = Github(github_token)
+    repo = g.get_repo("apache/airflow")
+    pull_requests: Dict[int, PullRequest.PullRequest] = {}
+    linked_issues: Dict[int, List[Issue.Issue]] = defaultdict(lambda: [])
+    users: Dict[int, Set[str]] = defaultdict(lambda: set())
+    count_prs = len(prs)
+    if limit_pr_count:
+        count_prs = limit_pr_count
+    with Progress(console=console) as progress:
+        task = progress.add_task(f"Retrieving {count_prs} PRs ", total=count_prs)
+        for i in range(count_prs):
+            pr_number = prs[i]
+            progress.console.print(
+                f"Retrieving PR#{pr_number}: " f"https://github.com/apache/airflow/pull/{pr_number}"
+            )
+            try:
+                pr = repo.get_pull(pr_number)
+            except UnknownObjectException:
+                # Fallback to issue if PR not found
+                try:
+                    pr = repo.get_issue(pr_number)  # (same fields as PR)
+                except UnknownObjectException:
+                    console.print(f"[red]The PR #{pr_number} could not be found[/]")
+                continue
+            pull_requests[pr_number] = pr
+            # GitHub does not have linked issues in PR - but we quite rigorously add Fixes/Closes
+            # Relate so we can find those from the body
+            if pr.body:
+                body = pr.body.replace("\n", " ").replace("\r", " ")
+                for issue_match in ISSUE_MATCH_IN_BODY.finditer(body):
+                    linked_issue_number = int(issue_match.group(1))
+                    progress.console.print(
+                        f"Retrieving Linked issue PR#{linked_issue_number}: "
+                        f"https://github.com/apache/airflow/issue/{linked_issue_number}"
+                    )
+                    try:
+                        linked_issues[pr_number].append(repo.get_issue(linked_issue_number))
+                    except UnknownObjectException:
+                        progress.console.print(
+                            f"Failed to retrieve linked issue #{linked_issue_number}: Unknown Issue"
+                        )
+            users[pr_number].add(pr.user.login)
+            for linked_issue in linked_issues[pr_number]:
+                users[pr_number].add(linked_issue.user.login)
+            progress.advance(task)
+    print_issue_content(current_release, pull_requests, linked_issues, users)
+
+
+if __name__ == "__main__":
+    cli()