You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2022/05/31 21:01:08 UTC
[arrow] branch master updated: ARROW-16602: [Dev] Use GitHub API to merge pull request (#13184)
This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 01e4ad095a ARROW-16602: [Dev] Use GitHub API to merge pull request (#13184)
01e4ad095a is described below
commit 01e4ad095a7649afc7a7316447bc5cc1fd9679a2
Author: Sutou Kouhei <ko...@clear-code.com>
AuthorDate: Wed Jun 1 06:00:59 2022 +0900
ARROW-16602: [Dev] Use GitHub API to merge pull request (#13184)
We use local "git merge" to merge a pull request in
dev/merge_arrow_pr.py.
If we use "git merge" to merge a pull request, GitHub's Web UI shows
"Closed" mark not "Merged" mark in a pull request page. This sometimes
confuses new contributors. "Why was my pull request closed without
merging?" See
https://github.com/apache/arrow/pull/12004#issuecomment-1031619771 for
example.
If we use GitHub API
https://docs.github.com/en/rest/pulls/pulls#merge-a-pull-request to
merge a pull request, GitHub's Web UI shows "Merged" mark not "Closed"
mark. See https://github.com/apache/arrow/pull/13180 for example. I
used GitHub API to merge the pull request.
And we don't need to create a local branch on local repository to
merge a pull request. But we must specify ARROW_GITHUB_API_TOKEN to
run dev/merge_arrow_pr.py.
Authored-by: Sutou Kouhei <ko...@clear-code.com>
Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
dev/README.md | 6 +-
dev/merge.conf.sample | 4 +
dev/merge_arrow_pr.py | 202 ++++++++++++++++++-----------------------
dev/release/post-04-website.sh | 2 +-
dev/test_merge_arrow_pr.py | 18 ++--
5 files changed, 104 insertions(+), 128 deletions(-)
diff --git a/dev/README.md b/dev/README.md
index d861944e1e..267bf008ac 100644
--- a/dev/README.md
+++ b/dev/README.md
@@ -53,9 +53,9 @@ After installed, it runs the merge script.
have to install Python dependencies yourself and then run `dev/merge_arrow_pr.py`
directly)
-The merge script uses the GitHub REST API; if you encounter rate limit issues,
-you may set a `ARROW_GITHUB_API_TOKEN` environment variable to use a Personal
-Access Token.
+The merge script uses the GitHub REST API. You must set a
+`ARROW_GITHUB_API_TOKEN` environment variable to use a Personal Access
+Token. You need to add `workflow` scope to the Personal Access Token.
You can specify the username and the password of your JIRA account in
`APACHE_JIRA_USERNAME` and `APACHE_JIRA_PASSWORD` environment variables.
diff --git a/dev/merge.conf.sample b/dev/merge.conf.sample
index c71b211614..9c0aa41485 100644
--- a/dev/merge.conf.sample
+++ b/dev/merge.conf.sample
@@ -23,3 +23,7 @@
# token credentials. Ensure that the file is properly protected.
username=johnsmith
password=123456
+
+[github]
+# GitHub's personal access token. "workflow" scope is needed.
+api_token=ghp_ABC
diff --git a/dev/merge_arrow_pr.py b/dev/merge_arrow_pr.py
index 012a2ac6e7..117bdda562 100755
--- a/dev/merge_arrow_pr.py
+++ b/dev/merge_arrow_pr.py
@@ -33,8 +33,7 @@
# Configuration environment variables:
# - APACHE_JIRA_USERNAME: your Apache JIRA ID
# - APACHE_JIRA_PASSWORD: your Apache JIRA password
-# - ARROW_GITHUB_API_TOKEN: a GitHub API token to use for API requests (to
-# avoid rate limiting)
+# - ARROW_GITHUB_API_TOKEN: a GitHub API token to use for API requests
# - PR_REMOTE_NAME: the name of the remote to the Apache git repo (set to
# 'apache' by default)
# - DEBUG: use for testing to avoid pushing to apache (0 by default)
@@ -71,14 +70,12 @@ if DEBUG:
print("**************** DEBUGGING ****************")
-# Prefix added to temporary branches
-BRANCH_PREFIX = "PR_TOOL"
JIRA_API_BASE = "https://issues.apache.org/jira"
def get_json(url, headers=None):
- req = requests.get(url, headers=headers)
- return req.json()
+ response = requests.get(url, headers=headers)
+ return response.json()
def run_cmd(cmd):
@@ -101,21 +98,6 @@ def run_cmd(cmd):
return output
-original_head = run_cmd("git rev-parse HEAD")[:8]
-
-
-def clean_up():
- print("Restoring head pointer to %s" % original_head)
- run_cmd("git checkout %s" % original_head)
-
- branches = run_cmd("git branch").replace(" ", "").split("\n")
-
- for branch in [x for x in branches
- if x.startswith(BRANCH_PREFIX)]:
- print("Deleting local branch %s" % branch)
- run_cmd("git branch -D %s" % branch)
-
-
_REGEX_CI_DIRECTIVE = re.compile(r'\[[^\]]*\]')
@@ -255,20 +237,48 @@ URL\t\t{}/{}""".format(jira_id, summary, assignee, components, status,
class GitHubAPI(object):
- def __init__(self, project_name):
+ def __init__(self, project_name, cmd):
self.github_api = ("https://api.github.com/repos/apache/{0}"
.format(project_name))
- token = os.environ.get('ARROW_GITHUB_API_TOKEN', None)
- if token:
- self.headers = {'Authorization': 'token {0}'.format(token)}
- else:
- self.headers = None
+ token = None
+ config = load_configuration()
+ if "github" in config.sections():
+ token = config["github"]["api_token"]
+ if not token:
+ token = os.environ.get('ARROW_GITHUB_API_TOKEN')
+ if not token:
+ token = cmd.prompt('Env ARROW_GITHUB_API_TOKEN not set, '
+ 'please enter your GitHub API token '
+ '(GitHub personal access token):')
+ headers = {
+ 'Accept': 'application/vnd.github.v3+json',
+ 'Authorization': 'token {0}'.format(token),
+ }
+ self.headers = headers
def get_pr_data(self, number):
return get_json("%s/pulls/%s" % (self.github_api, number),
headers=self.headers)
+ def get_pr_commits(self, number):
+ return get_json("%s/pulls/%s/commits" % (self.github_api, number),
+ headers=self.headers)
+
+ def merge_pr(self, number, commit_title, commit_message):
+ url = f'{self.github_api}/pulls/{number}/merge'
+ payload = {
+ 'commit_title': commit_title,
+ 'commit_message': commit_message,
+ 'merge_method': 'squash',
+ }
+ response = requests.put(url, headers=self.headers, json=payload)
+ result = response.json()
+ if response.status_code != 200 and 'merged' not in result:
+ result['merged'] = False
+ result['message'] += f': {url}'
+ return result
+
class CommandInput(object):
"""
@@ -276,7 +286,6 @@ class CommandInput(object):
"""
def fail(self, msg):
- clean_up()
raise Exception(msg)
def prompt(self, prompt):
@@ -300,6 +309,7 @@ class PullRequest(object):
def __init__(self, cmd, github_api, git_remote, jira_con, number):
self.cmd = cmd
+ self._github_api = github_api
self.git_remote = git_remote
self.con = jira_con
self.number = number
@@ -358,35 +368,23 @@ class PullRequest(object):
"""
merge the requested PR and return the merge hash
"""
- pr_branch_name = "%s_MERGE_PR_%s" % (BRANCH_PREFIX, self.number)
- target_branch_name = "%s_MERGE_PR_%s_%s" % (BRANCH_PREFIX,
- self.number,
- self.target_ref.upper())
- run_cmd("git fetch %s pull/%s/head:%s" % (self.git_remote,
- self.number,
- pr_branch_name))
- run_cmd("git fetch %s %s:%s" % (self.git_remote, self.target_ref,
- target_branch_name))
- run_cmd("git checkout %s" % target_branch_name)
-
- had_conflicts = False
- try:
- run_cmd(['git', 'merge', pr_branch_name, '--ff', '--squash'])
- except Exception as e:
- msg = ("Error merging: %s\nWould you like to "
- "manually fix-up this merge?" % e)
- self.cmd.continue_maybe(msg)
- msg = ("Okay, please fix any conflicts and 'git add' "
- "conflicting files... Finished?")
- self.cmd.continue_maybe(msg)
- had_conflicts = True
-
- commit_authors = run_cmd(['git', 'log', 'HEAD..%s' % pr_branch_name,
- '--pretty=format:%an <%ae>']).split("\n")
- commit_co_authors = run_cmd(['git', 'log', 'HEAD..%s' % pr_branch_name,
- '--pretty=%(trailers:key=Co-authored-by,'
- 'valueonly)']).split("\n")
- commit_co_authors = list(filter(None, commit_co_authors))
+ commits = self._github_api.get_pr_commits(self.number)
+
+ def format_commit_author(commit):
+ author = commit['commit']['author']
+ name = author['name']
+ email = author['email']
+ return f'{name} <{email}>'
+ commit_authors = [format_commit_author(commit) for commit in commits]
+ co_authored_by_re = re.compile(r'^Co-authored-by:\s*(.*)')
+
+ def extract_co_authors(commit):
+ message = commit['commit']['message']
+ return co_authored_by_re.findall(message)
+ commit_co_authors = []
+ for commit in commits:
+ commit_co_authors.extend(extract_co_authors(commit))
+
all_commit_authors = commit_authors + commit_co_authors
distinct_authors = sorted(set(all_commit_authors),
key=lambda x: commit_authors.count(x),
@@ -396,74 +394,51 @@ class PullRequest(object):
print("Author {}: {}".format(i + 1, author))
if len(distinct_authors) > 1:
- primary_author, distinct_authors = get_primary_author(
+ primary_author, distinct_other_authors = get_primary_author(
self.cmd, distinct_authors)
else:
# If there is only one author, do not prompt for a lead author
- primary_author = distinct_authors[0]
-
- merge_message_flags = []
+ primary_author = distinct_authors.pop()
+ distinct_other_authors = []
- merge_message_flags += ["-m", self.title]
+ commit_title = f'{self.title} (#{self.number})'
+ commit_message_chunks = []
if self.body is not None:
- merge_message_flags += ["-m", self.body]
+ commit_message_chunks.append(self.body)
committer_name = run_cmd("git config --get user.name").strip()
committer_email = run_cmd("git config --get user.email").strip()
- authors = ("Authored-by:" if len(distinct_authors) == 1
+ authors = ("Authored-by:" if len(distinct_other_authors) == 0
else "Lead-authored-by:")
- authors += " %s" % (distinct_authors.pop(0))
+ authors += " %s" % primary_author
if len(distinct_authors) > 0:
authors += "\n" + "\n".join(["Co-authored-by: %s" % a
- for a in distinct_authors])
+ for a in distinct_other_authors])
authors += "\n" + "Signed-off-by: %s <%s>" % (committer_name,
committer_email)
+ commit_message_chunks.append(authors)
- if had_conflicts:
- committer_name = run_cmd("git config --get user.name").strip()
- committer_email = run_cmd("git config --get user.email").strip()
- message = ("This patch had conflicts when merged, "
- "resolved by\nCommitter: %s <%s>" %
- (committer_name, committer_email))
- merge_message_flags += ["-m", message]
-
- # The string "Closes #%s" string is required for GitHub to correctly
- # close the PR
- merge_message_flags += [
- "-m",
- "Closes #%s from %s"
- % (self.number, self.description)]
- merge_message_flags += ["-m", authors]
+ commit_message = "\n\n".join(commit_message_chunks)
if DEBUG:
- print("\n".join(merge_message_flags))
-
- run_cmd(['git', 'commit',
- '--no-verify', # do not run commit hooks
- '--author="%s"' % primary_author] +
- merge_message_flags)
-
- self.cmd.continue_maybe("Merge complete (local ref %s). Push to %s?"
- % (target_branch_name, self.git_remote))
+ print(commit_title)
+ print()
+ print(commit_message)
- try:
- push_cmd = ('git push %s %s:%s' % (self.git_remote,
- target_branch_name,
- self.target_ref))
- if DEBUG:
- print(push_cmd)
- else:
- run_cmd(push_cmd)
- except Exception as e:
- clean_up()
- self.cmd.fail("Exception while pushing: %s" % e)
+ if DEBUG:
+ merge_hash = None
+ else:
+ result = self._github_api.merge_pr(self.number,
+ commit_title,
+ commit_message)
+ if not result['merged']:
+ message = result['message']
+ self.cmd.fail(f'Failed to merge pull request: {message}')
+ merge_hash = result['sha']
- merge_hash = run_cmd("git rev-parse %s" % target_branch_name)[:8]
- clean_up()
print("Pull request #%s merged!" % self.number)
print("Merge hash: %s" % merge_hash)
- return merge_hash
def get_primary_author(cmd, distinct_authors):
@@ -475,7 +450,7 @@ def get_primary_author(cmd, distinct_authors):
"\"name <email>\" [%s]: " % distinct_authors[0])
if primary_author == "":
- return distinct_authors[0], distinct_authors
+ return distinct_authors[0], distinct_authors[1:]
if author_pat.match(primary_author):
break
@@ -483,10 +458,9 @@ def get_primary_author(cmd, distinct_authors):
# When primary author is specified manually, de-dup it from
# author list and put it at the head of author list.
- distinct_authors = [x for x in distinct_authors
- if x != primary_author]
- distinct_authors = [primary_author] + distinct_authors
- return primary_author, distinct_authors
+ distinct_other_authors = [x for x in distinct_authors
+ if x != primary_author]
+ return primary_author, distinct_other_authors
def prompt_for_fix_version(cmd, jira_issue):
@@ -581,25 +555,23 @@ def cli():
os.chdir(ARROW_HOME)
- github_api = GitHubAPI(PROJECT_NAME)
+ github_api = GitHubAPI(PROJECT_NAME, cmd)
jira_con = connect_jira(cmd)
pr = PullRequest(cmd, github_api, PR_REMOTE_NAME, jira_con, pr_num)
if pr.is_merged:
- print("Pull request %s has already been merged")
+ print("Pull request %s has already been merged" % pr_num)
sys.exit(0)
if not pr.is_mergeable:
- msg = ("Pull request %s is not mergeable in its current form.\n"
- % pr_num + "Continue? (experts only!)")
- cmd.continue_maybe(msg)
+ print("Pull request %s is not mergeable in its current form" % pr_num)
+ sys.exit(1)
pr.show()
cmd.continue_maybe("Proceed with merging pull request #%s?" % pr_num)
- # merged hash not used
pr.merge()
if pr.jira_issue is None:
diff --git a/dev/release/post-04-website.sh b/dev/release/post-04-website.sh
index d8a9df4249..0f41a97e66 100755
--- a/dev/release/post-04-website.sh
+++ b/dev/release/post-04-website.sh
@@ -67,7 +67,7 @@ rough_n_development_months=$((
git_tag=apache-arrow-${version}
git_range=apache-arrow-${previous_version}..${git_tag}
-committers_command_line="git shortlog -csn ${git_range}"
+committers_command_line="git shortlog -sn --group=trailer:signed-off-by ${git_range}"
contributors_command_line="git shortlog -sn ${git_range}"
committers=$(${committers_command_line})
diff --git a/dev/test_merge_arrow_pr.py b/dev/test_merge_arrow_pr.py
old mode 100644
new mode 100755
index 8fe1883508..5452152828
--- a/dev/test_merge_arrow_pr.py
+++ b/dev/test_merge_arrow_pr.py
@@ -211,22 +211,22 @@ def test_multiple_authors_bad_input():
distinct_authors = [a0, a1]
cmd = FakeCLI(responses=[''])
- primary_author, new_distinct_authors = merge_arrow_pr.get_primary_author(
- cmd, distinct_authors)
+ primary_author, distinct_other_authors = \
+ merge_arrow_pr.get_primary_author(cmd, distinct_authors)
assert primary_author == a0
- assert new_distinct_authors == [a0, a1]
+ assert distinct_other_authors == [a1]
cmd = FakeCLI(responses=['oops', a1])
- primary_author, new_distinct_authors = merge_arrow_pr.get_primary_author(
- cmd, distinct_authors)
+ primary_author, distinct_other_authors = \
+ merge_arrow_pr.get_primary_author(cmd, distinct_authors)
assert primary_author == a1
- assert new_distinct_authors == [a1, a0]
+ assert distinct_other_authors == [a0]
cmd = FakeCLI(responses=[a2])
- primary_author, new_distinct_authors = merge_arrow_pr.get_primary_author(
- cmd, distinct_authors)
+ primary_author, distinct_other_authors = \
+ merge_arrow_pr.get_primary_author(cmd, distinct_authors)
assert primary_author == a2
- assert new_distinct_authors == [a2, a0, a1]
+ assert distinct_other_authors == [a0, a1]
def test_jira_already_resolved():