You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2023/05/20 12:34:53 UTC
[arrow-datafusion] branch main updated: Add Python script for generating changelog content (#6391)
This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 19af95253d Add Python script for generating changelog content (#6391)
19af95253d is described below
commit 19af95253d990283be95544bf7d47530524b59d1
Author: Andy Grove <an...@gmail.com>
AuthorDate: Sat May 20 06:34:47 2023 -0600
Add Python script for generating changelog content (#6391)
---
dev/release/README.md | 33 +++++++++--
dev/release/generate-changelog.py | 114 ++++++++++++++++++++++++++++++++++++++
2 files changed, 141 insertions(+), 6 deletions(-)
diff --git a/dev/release/README.md b/dev/release/README.md
index 71ebb27493..48a54a64df 100644
--- a/dev/release/README.md
+++ b/dev/release/README.md
@@ -76,14 +76,35 @@ PyPI.
### Change Log
-We maintain a `CHANGELOG.md` so our users know what has been
-changed between releases.
+We maintain a `CHANGELOG.md` so our users know what has been changed between releases.
-The CHANGELOG is managed automatically using
-[update_change_log.sh](https://github.com/apache/arrow-datafusion/blob/main/dev/release/update_change_log.sh)
+The changelog is generated using a Python script:
-This script creates a changelog using GitHub PRs and issues based on the labels
-associated with them.
+```bash
+$ GITHUB_TOKEN=<TOKEN> ./dev/release/generate-changelog.py apache/arrow-datafusion 24.0.0 HEAD > dev/changelog/25.0.0.md
+```
+
+This script creates a changelog from GitHub PRs based on the labels associated with them as well as looking for
+titles starting with `feat:`, `fix:`, or `docs:` . The script will produce output similar to:
+
+```
+Fetching list of commits between 24.0.0 and HEAD
+Fetching pull requests
+Categorizing pull requests
+Generating changelog content
+```
+
+This process is not fully automated, so there are some additional manual steps:
+
+- Add the ASF header to the generated file
+- Add a link to this changelog from the top-level `/datafusion/CHANGELOG.md`
+- Add the following content (copy from the previous version's changelog and update as appropriate:
+
+```
+## [24.0.0](https://github.com/apache/arrow-datafusion/tree/24.0.0) (2023-05-06)
+
+[Full Changelog](https://github.com/apache/arrow-datafusion/compare/23.0.0...24.0.0)
+```
## Prepare release commits and PR
diff --git a/dev/release/generate-changelog.py b/dev/release/generate-changelog.py
new file mode 100644
index 0000000000..ff9e8d4754
--- /dev/null
+++ b/dev/release/generate-changelog.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import sys
+from github import Github
+import os
+import re
+
+
+def print_pulls(repo_name, title, pulls):
+ if len(pulls) > 0:
+ print("**{}:**".format(title))
+ print()
+ for (pull, commit) in pulls:
+ url = "https://github.com/{}/pull/{}".format(repo_name, pull.number)
+ print("- {} [#{}]({}) ({})".format(pull.title, pull.number, url, commit.author.login))
+ print()
+
+
+def generate_changelog(repo, repo_name, tag1, tag2):
+
+ # get a list of commits between two tags
+ print(f"Fetching list of commits between {tag1} and {tag2}", file=sys.stderr)
+ comparison = repo.compare(tag1, tag2)
+
+ # get the pull requests for these commits
+ print("Fetching pull requests", file=sys.stderr)
+ unique_pulls = []
+ all_pulls = []
+ for commit in comparison.commits:
+ pulls = commit.get_pulls()
+ for pull in pulls:
+ # there can be multiple commits per PR if squash merge is not being used and
+ # in this case we should get all the author names, but for now just pick one
+ if pull.number not in unique_pulls:
+ unique_pulls.append(pull.number)
+ all_pulls.append((pull, commit))
+
+ # we split the pulls into categories
+ #TODO: make categories configurable
+ breaking = []
+ bugs = []
+ docs = []
+ enhancements = []
+
+ # categorize the pull requests based on GitHub labels
+ print("Categorizing pull requests", file=sys.stderr)
+ for (pull, commit) in all_pulls:
+
+ # see if PR title uses Conventional Commits
+ cc_type = ''
+ cc_scope = ''
+ cc_breaking = ''
+ parts = re.findall(r'^([a-z]+)(\([a-z]+\))?(!)?:', pull.title)
+ if len(parts) == 1:
+ parts_tuple = parts[0]
+ cc_type = parts_tuple[0] # fix, feat, docs, chore
+ cc_scope = parts_tuple[1] # component within project
+ cc_breaking = parts_tuple[2] == '!'
+
+ labels = [label.name for label in pull.labels]
+ #print(pull.number, labels, parts, file=sys.stderr)
+ if 'api change' in labels or cc_breaking:
+ breaking.append((pull, commit))
+ elif 'bug' in labels or cc_type == 'fix':
+ bugs.append((pull, commit))
+ elif 'enhancement' in labels or cc_type == 'feat':
+ enhancements.append((pull, commit))
+ elif 'documentation' in labels or cc_type == 'docs':
+ docs.append((pull, commit))
+
+ # produce the changelog content
+ print("Generating changelog content", file=sys.stderr)
+ print_pulls(repo_name, "Breaking changes", breaking)
+ print_pulls(repo_name, "Implemented enhancements", enhancements)
+ print_pulls(repo_name, "Fixed bugs", bugs)
+ print_pulls(repo_name, "Documentation updates", docs)
+ print_pulls(repo_name, "Merged pull requests", all_pulls)
+
+
+def cli(args=None):
+ """Process command line arguments."""
+ if not args:
+ args = sys.argv[1:]
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument("project", help="The project name e.g. apache/arrow-datafusion")
+ parser.add_argument("tag1", help="The previous release tag")
+ parser.add_argument("tag2", help="The current release tag")
+ args = parser.parse_args()
+
+ token = os.getenv("GITHUB_TOKEN")
+
+ g = Github(token)
+ repo = g.get_repo(args.project)
+ generate_changelog(repo, args.project, args.tag1, args.tag2)
+
+if __name__ == "__main__":
+ cli()
\ No newline at end of file