You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by GitBox <gi...@apache.org> on 2022/04/12 18:37:39 UTC
[GitHub] [tvm] driazati commented on a diff in pull request #10833: [ci] Add GitHub Actions bot to merge PRs on demand

driazati commented on code in PR #10833:
URL: https://github.com/apache/tvm/pull/10833#discussion_r848754423


##########
tests/python/ci/sample_prs/pr10786-badci.json:
##########
@@ -0,0 +1,123 @@
+{

Review Comment:
   is there something more than the result that you were looking for? i.e. the actual usages here https://github.com/apache/tvm/pull/10833/files#diff-0dd95b1adca3c8ab9745a9be219a5683885a9710365a447ee290841b172747a6R72-R91



##########
tests/scripts/github_mergebot.py:
##########
@@ -0,0 +1,501 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import json
+import argparse
+import warnings
+import logging
+import traceback
+from typing import Dict, Any, List, Optional
+from pathlib import Path
+
+from git_utils import git, GitHubRepo, parse_remote
+from cmd_utils import init_log
+
+
+Review = Dict[str, Any]
+CIJob = Dict[str, Any]
+
+
+def js(obj: Any) -> str:
+    return json.dumps(obj, indent=2)
+
+
+PR_QUERY = """
+    query ($owner: String!, $name: String!, $number: Int!) {
+      repository(owner: $owner, name: $name) {
+        pullRequest(number: $number) {
+          title
+          body
+          state
+          comments(last: 100) {
+            pageInfo {
+              hasPreviousPage
+            }
+            nodes {
+              author {
+                login
+              }
+              updatedAt
+              body
+            }
+          }
+          authorCommits:commits(last:100) {
+            nodes {
+              commit {
+                authors(first:100) {
+                  nodes {
+                    name
+                    email
+                  }
+                }
+              }
+            }
+          }
+          commits(last: 1) {
+            nodes {
+              commit {
+                oid
+                statusCheckRollup {
+                  contexts(first: 100) {
+                    pageInfo {
+                      hasNextPage
+                    }
+                    nodes {
+                      ... on CheckRun {
+                        name
+                        checkSuite {
+                          workflowRun {
+                            workflow {
+                              name
+                            }
+                          }
+                        }
+                        status
+                        conclusion
+                        url
+                      }
+                      ... on StatusContext {
+                        state
+                        context
+                        targetUrl
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+          reviewDecision
+          reviews(last: 100) {
+            pageInfo {
+              hasPreviousPage
+            }
+            nodes {
+              body
+              updatedAt
+              authorCanPushToRepository
+              commit {
+                oid
+              }
+              state
+            }
+          }
+        }
+      }
+    }
+    """
+
+
+def walk(obj, visitor, parent_key=None):
+    """
+    Recursively call 'visitor' on all the children of a dictionary
+    """
+    visitor(obj, parent_key)
+    if isinstance(obj, dict):
+        for k, v in obj.items():
+            walk(v, visitor, parent_key=k)
+    elif isinstance(obj, list):
+        for v in obj:
+            walk(v, visitor)
+
+
+class PR:
+    def __init__(
+        self,
+        number: int,
+        owner: str,
+        repo: str,
+        dry_run: bool = False,
+        raw_data: Dict[str, Any] = None,
+    ):
+        self.owner = owner
+        self.number = number
+        self.repo_name = repo
+        self.dry_run = dry_run
+
+        self.github = GitHubRepo(user=owner, repo=repo, token=os.environ["GITHUB_TOKEN"])
+
+        if dry_run and raw_data:
+            # In test mode there is no need to fetch anything
+            self.raw = raw_data
+        else:
+            if os.getenv("DEBUG", "0") == "1":

Review Comment:
   one is for testing, the other is for development, we can probably delete this at some point once we have some more consensus on this PR (i.e. `DEBUG` will fetch + cache automatically, `--testing-pr-json` relies on a file being on disk)



##########
tests/scripts/github_mergebot.py:
##########
@@ -0,0 +1,501 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import json
+import argparse
+import warnings
+import logging
+import traceback
+from typing import Dict, Any, List, Optional
+from pathlib import Path
+
+from git_utils import git, GitHubRepo, parse_remote
+from cmd_utils import init_log
+
+
+Review = Dict[str, Any]
+CIJob = Dict[str, Any]
+
+
+def js(obj: Any) -> str:
+    return json.dumps(obj, indent=2)
+
+
+PR_QUERY = """
+    query ($owner: String!, $name: String!, $number: Int!) {
+      repository(owner: $owner, name: $name) {
+        pullRequest(number: $number) {
+          title
+          body
+          state
+          comments(last: 100) {
+            pageInfo {
+              hasPreviousPage
+            }
+            nodes {
+              author {
+                login
+              }
+              updatedAt
+              body
+            }
+          }
+          authorCommits:commits(last:100) {
+            nodes {
+              commit {
+                authors(first:100) {
+                  nodes {
+                    name
+                    email
+                  }
+                }
+              }
+            }
+          }
+          commits(last: 1) {
+            nodes {
+              commit {
+                oid
+                statusCheckRollup {
+                  contexts(first: 100) {
+                    pageInfo {
+                      hasNextPage
+                    }
+                    nodes {
+                      ... on CheckRun {
+                        name
+                        checkSuite {
+                          workflowRun {
+                            workflow {
+                              name
+                            }
+                          }
+                        }
+                        status
+                        conclusion
+                        url
+                      }
+                      ... on StatusContext {
+                        state
+                        context
+                        targetUrl
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+          reviewDecision
+          reviews(last: 100) {
+            pageInfo {
+              hasPreviousPage
+            }
+            nodes {
+              body
+              updatedAt
+              authorCanPushToRepository
+              commit {
+                oid
+              }
+              state
+            }
+          }
+        }
+      }
+    }
+    """
+
+
+def walk(obj, visitor, parent_key=None):
+    """
+    Recursively call 'visitor' on all the children of a dictionary
+    """
+    visitor(obj, parent_key)
+    if isinstance(obj, dict):
+        for k, v in obj.items():
+            walk(v, visitor, parent_key=k)
+    elif isinstance(obj, list):
+        for v in obj:
+            walk(v, visitor)
+
+
+class PR:
+    def __init__(
+        self,
+        number: int,
+        owner: str,
+        repo: str,
+        dry_run: bool = False,
+        raw_data: Dict[str, Any] = None,
+    ):
+        self.owner = owner
+        self.number = number
+        self.repo_name = repo
+        self.dry_run = dry_run
+
+        self.github = GitHubRepo(user=owner, repo=repo, token=os.environ["GITHUB_TOKEN"])
+
+        if dry_run and raw_data:
+            # In test mode there is no need to fetch anything
+            self.raw = raw_data
+        else:
+            if os.getenv("DEBUG", "0") == "1":
+                # For local runs fill in the requested data but cache it for
+                # later use
+                cached_path = Path("pr.json")
+                if not cached_path.exists():
+                    self.raw = self.fetch_data()
+                    with open(cached_path, "w") as f:
+                        json.dump(self.raw, f, indent=2)
+                else:
+                    with open(cached_path) as f:
+                        self.raw = json.load(f)
+            else:
+                # Usual path, fetch the PR's data based on the number from
+                # GitHub
+                self.raw = self.fetch_data()
+
+        def checker(obj, parent_key):
+            """
+            Verify that any paged results don't have extra data (if so the bot
+            may still work since most relevant comments will be more recent)
+            """
+            if parent_key == "pageInfo":
+                if obj.get("hasPreviousPage", False):
+                    warnings.warn(f"Found {obj} with a previous page, bot may be missing data")
+                if obj.get("hasNextPage", False):
+                    warnings.warn(f"Found {obj} with a next page, bot may be missing data")
+
+        walk(self.raw, checker)
+
+        logging.info(f"Verified data, running with PR {js(self.raw)}")
+
+    def __repr__(self):
+        return json.dumps(self.raw, indent=2)
+
+    def head_commit(self):
+        return self.raw["commits"]["nodes"][0]["commit"]
+
+    def co_authors(self) -> List[str]:
+        authors = []
+        for commit in self.raw["authorCommits"]["nodes"]:
+            # Co-authors always come after the main author according to the
+            # GitHub docs, so ignore the first item
+            for author in commit["commit"]["authors"]["nodes"][1:]:
+                name = author["name"]
+                email = author["email"]
+                authors.append(f"{name} <{email}>")
+
+        return list(set(authors))
+
+    def head_oid(self):
+        return self.head_commit()["oid"]
+
+    def ci_jobs(self) -> List[CIJob]:
+        """
+        Get a list of all CI jobs (GitHub Actions and other) in a unified format
+        """
+        jobs = []
+        for item in self.head_commit()["statusCheckRollup"]["contexts"]["nodes"]:
+            if "checkSuite" in item:
+                # GitHub Actions job, parse separately
+                status = item["conclusion"]
+                if status is None:
+                    # If the 'conclusion' isn't filled out the job hasn't
+                    # finished yet
+                    status = "PENDING"
+                jobs.append(
+                    {
+                        "name": item["checkSuite"]["workflowRun"]["workflow"]["name"]
+                        + " / "
+                        + item["name"],
+                        "url": item["url"],
+                        "status": status.upper(),
+                    }
+                )
+            else:
+                # GitHub Status (e.g. from Jenkins)
+                jobs.append(
+                    {
+                        "name": item["context"],
+                        "url": item["targetUrl"],
+                        "status": item["state"].upper(),
+                    }
+                )
+
+        logging.info(f"Found CI jobs for {self.head_commit()['oid']} {js(jobs)}")
+        return jobs
+
+    def reviews(self) -> List[Review]:
+        return self.raw["reviews"]["nodes"]
+
+    def head_commit_reviews(self) -> List[Review]:
+        """
+        Find reviews associated with the head commit
+        """
+        commits_to_review_status: Dict[str, List[Review]] = {}
+
+        for review in self.reviews():
+            if not review["authorCanPushToRepository"]:
+                # ignore reviews from non-committers
+                continue
+
+            oid = review["commit"]["oid"]
+            if oid in commits_to_review_status:
+                commits_to_review_status[oid].append(review)
+            else:
+                commits_to_review_status[oid] = [review]
+
+        # Only use the data for the head commit of the PR
+        head_reviews = commits_to_review_status.get(self.head_oid(), [])
+        return head_reviews
+
+    def fetch_data(self):
+        """
+        Fetch the data for this PR from GitHub
+        """
+        return self.github.graphql(
+            query=PR_QUERY,
+            variables={
+                "owner": self.owner,
+                "name": self.repo_name,
+                "number": self.number,
+            },
+        )["data"]["repository"]["pullRequest"]
+
+    def comment(self, text: str) -> None:
+        """
+        Leave the comment 'text' on this PR
+        """
+        logging.info(f"Commenting:\n{text}")
+        # TODO: Update latest comment in-place if there has been no activity
+        data = {"body": text}
+        url = f"issues/{self.number}/comments"
+        if self.dry_run:
+            logging.info(
+                f"Dry run, would have commented on url={url} commenting with data={js(data)}"
+            )
+            return
+
+        self.github.post(url, data=data)
+
+    def state(self) -> str:
+        """
+        PR state (OPEN, CLOSED, MERGED, etc)
+        """
+        return self.raw["state"]
+
+    def lint_commit_message(self, subject: str, body: str) -> bool:
+        # TODO: NYI (Add rules as decided in https://discuss.tvm.apache.org/t/commit-message-guideline/12334)
+        return True
+
+    def processed_body(self) -> str:
+        body = self.raw["body"].strip().replace("\r", "")
+        body = body.replace(
+            "Thanks for contributing to TVM!   Please refer to guideline https://tvm.apache.org/docs/contribute/ for useful information and tips. After the pull request is submitted, please request code reviews from [Reviewers](https://github.com/apache/incubator-tvm/blob/master/CONTRIBUTORS.md#reviewers) by @ them in the pull request thread.",
+            "",
+        )
+        return body
+
+    def body_with_co_authors(self) -> str:
+        """
+        Add 'Co-authored-by' strings to the PR body based on the prior commits
+        in the PR
+        """
+        body = self.processed_body()
+        author_lines = self.co_authors()
+        logging.info(f"Found co-authors: author_lines={author_lines}")
+        for author_line in author_lines:
+            if author_line not in body:
+                # If the line isn't already in the PR body (it could have been
+                # added manually), put it in
+                body = f"{body}\n\nCo-authored-by: {author_line}"
+
+        return body
+
+    def merge(self) -> None:
+        """
+        Request a merge of this PR via the GitHub API
+        """
+        url = f"pulls/{self.number}/merge"
+
+        title = self.raw["title"]
+        body = self.body_with_co_authors()
+        self.lint_commit_message(title, body)
+        logging.info(f"Full commit:\n{title}\n\n{body}")
+
+        data = {
+            "commit_title": title,
+            "commit_message": body,
+            # The SHA is necessary in case there was an update right when this
+            # script ran, GitHub will sort out who won
+            "sha": self.head_oid(),
+            "merge_method": "squash",
+        }
+        if self.dry_run:
+            logging.info(f"Dry run, would have merged with url={url} and data={js(data)}")
+            return
+
+        self.github.put(url, data=data)
+
+    def merge_requested(self) -> bool:
+        """
+        Check if this PR has had a merge requested
+        """
+        merge_commands = [
+            "merge",
+            "merge this",
+            "merge this pr",
+        ]
+        cancel_commands = [

Review Comment:
   one thing about cancels is that they'd have to be almost immediate, with GHA queueing + script runtime it's not more than 30 seconds before a merge comment to the merge actually happening. That's what this does, basically the comments are all in a list and if the last thing in the list is a merge command it'll merge, otherwise nothing happens. If someone later comments another merge it'll get merged as normal.
   
   One thing I probably should add is a filter for who can engage a merge, probably should be just the PR author + committers



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org