You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2022/08/26 20:50:56 UTC

[airflow] branch main updated: Add issue stats to PRotM score, enhance terminal output (#25741)

This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 90aba8afb5 Add issue stats to PRotM score, enhance terminal output (#25741)
90aba8afb5 is described below

commit 90aba8afb5bad02cfcfc4b2bd2a817700415417b
Author: Michael Robinson <68...@users.noreply.github.com>
AuthorDate: Fri Aug 26 16:50:47 2022 -0400

    Add issue stats to PRotM score, enhance terminal output (#25741)
---
 dev/stats/get_important_pr_candidates.py | 112 ++++++++++++++++++++++++++++---
 1 file changed, 101 insertions(+), 11 deletions(-)

diff --git a/dev/stats/get_important_pr_candidates.py b/dev/stats/get_important_pr_candidates.py
index 51a73f7157..614f9e2c41 100755
--- a/dev/stats/get_important_pr_candidates.py
+++ b/dev/stats/get_important_pr_candidates.py
@@ -19,6 +19,7 @@
 import logging
 import math
 import pickle
+import re
 import sys
 import textwrap
 from datetime import datetime
@@ -62,27 +63,44 @@ class PrStat:
     COMMENT_INTERACTION_VALUE = 1.0
     REACTION_INTERACTION_VALUE = 0.5
 
-    def __init__(self, pull_request: PullRequest):
+    def __init__(self, g, pull_request: PullRequest):
+        self.g = g
         self.pull_request = pull_request
         self._users: Set[str] = set()
+        self.issue_nums: List[int] = []
+        self.len_issue_comments = 0
+        self.num_issue_comments = 0
+        self.num_issue_reactions = 0
 
     @property
     def label_score(self) -> float:
+        """assigns label score"""
         for label in self.pull_request.labels:
             if "provider" in label.name:
                 return PrStat.PROVIDER_SCORE
         return PrStat.REGULAR_SCORE
 
     @cached_property
-    def num_comments(self) -> int:
+    def num_comments(self):
+        """counts reviewer comments"""
         comments = 0
         for comment in self.pull_request.get_comments():
             self._users.add(comment.user.login)
             comments += 1
         return comments
 
+    @cached_property
+    def num_conv_comments(self) -> int:
+        """counts conversational comments"""
+        conv_comments = 0
+        for conv_comment in self.pull_request.get_issue_comments():
+            self._users.add(conv_comment.user.login)
+            conv_comments += 1
+        return conv_comments
+
     @cached_property
     def num_reactions(self) -> int:
+        """counts reactions to reviewer comments"""
         reactions = 0
         for comment in self.pull_request.get_comments():
             for reaction in comment.get_reactions():
@@ -90,18 +108,78 @@ class PrStat:
                 reactions += 1
         return reactions
 
+    @cached_property
+    def num_conv_reactions(self) -> int:
+        """counts reactions to conversational comments"""
+        reactions = 0
+        for conv_comment in self.pull_request.get_issue_comments():
+            for reaction in conv_comment.get_reactions():
+                self._users.add(reaction.user.login)
+                reactions += 1
+        return reactions
+
     @cached_property
     def num_reviews(self) -> int:
+        """counts reviews"""
         reviews = 0
         for review in self.pull_request.get_reviews():
             self._users.add(review.user.login)
             reviews += 1
         return reviews
 
+    @cached_property
+    def issues(self):
+        """finds issues in PR"""
+        if self.pull_request.body is not None:
+            regex = r'(?<=closes: #|elated: #)\d{5}'
+            issue_strs = re.findall(regex, self.pull_request.body)
+            issue_ints = [eval(s) for s in issue_strs]
+            self.issue_nums = issue_ints
+            return issue_ints
+
+    @cached_property
+    def issue_reactions(self) -> int:
+        """counts reactions to issue comments"""
+        if self.issue_nums:
+            repo = self.g.get_repo("apache/airflow")
+            issue_reactions = 0
+            for num in self.issue_nums:
+                issue = repo.get_issue(number=num)
+                for reaction in issue.get_reactions():
+                    self._users.add(reaction.user.login)
+                    issue_reactions += 1
+            self.num_issue_reactions = issue_reactions
+            return issue_reactions
+        return 0
+
+    @cached_property
+    def issue_comments(self) -> int:
+        """counts issue comments and calculates comment length"""
+        issues = self.issues
+        if issues:
+            repo = self.g.get_repo("apache/airflow")
+            issue_comments = 0
+            len_issue_comments = 0
+            for num in issues:
+                issue = repo.get_issue(number=num)
+                for issue_comment in issue.get_comments():
+                    issue_comments += 1
+                    self._users.add(issue_comment.user.login)
+                    if issue_comment.body is not None:
+                        len_issue_comments += len(issue_comment.body)
+            self.len_issue_comments = len_issue_comments
+            self.num_issue_comments = issue_comments
+            return issue_comments
+        return 0
+
     @property
     def interaction_score(self) -> float:
-        interactions = self.num_comments * PrStat.COMMENT_INTERACTION_VALUE
-        interactions += self.num_reactions * PrStat.REACTION_INTERACTION_VALUE
+        interactions = (
+            self.num_comments + self.num_conv_comments + self.issue_comments
+        ) * PrStat.COMMENT_INTERACTION_VALUE
+        interactions += (
+            self.num_reactions + self.num_conv_reactions + self.issue_reactions
+        ) * PrStat.REACTION_INTERACTION_VALUE
         interactions += self.num_reviews * PrStat.REVIEW_INTERACTION_VALUE
         return interactions
 
@@ -149,6 +227,10 @@ class PrStat:
         for comment in self.pull_request.get_review_comments():
             if comment.body is not None:
                 length += len(comment.body)
+        for conv_comment in self.pull_request.get_issue_comments():
+            if conv_comment.body is not None:
+                length += len(conv_comment.body)
+        length += self.len_issue_comments
         return length
 
     @property
@@ -164,7 +246,7 @@ class PrStat:
             score *= 0.8
         if self.body_length < 20:
             score *= 0.4
-        return score
+        return round(score, 3)
 
     @property
     def score(self):
@@ -185,13 +267,14 @@ class PrStat:
         # If the body contains over 2000 characters, the PR should matter 40% more.
         # If the body contains fewer than 1000 characters, the PR should matter 20% less.
         #
-        return (
+        return round(
             1.0
             * self.interaction_score
             * self.label_score
             * self.length_score
             * self.change_score
-            / (math.log10(self.num_changed_files) if self.num_changed_files > 20 else 1.0)
+            / (math.log10(self.num_changed_files) if self.num_changed_files > 20 else 1.0),
+            3,
         )
 
     def __str__(self) -> str:
@@ -212,7 +295,12 @@ class PrStat:
             f'-- Interaction score: [green]{self.interaction_score}[/] '
             f'(users interacting: {self.num_interacting_users}, '
             f'reviews: {self.num_reviews}, '
-            f'comments: {self.num_comments})\n'
+            f'review comments: {self.num_comments}, '
+            f'review reactions: {self.num_reactions}, '
+            f'non-review comments: {self.num_conv_comments}, '
+            f'non-review reactions: {self.num_conv_reactions}, '
+            f'issue comments: {self.num_issue_comments}, '
+            f'issue reactions: {self.num_issue_reactions})\n'
             f'-- Change score: [green]{self.change_score}[/] '
             f'(changed files: {self.num_changed_files}, '
             f'additions: {self.num_additions}, '
@@ -255,7 +343,9 @@ def main(
     if load:
         console.print("Loading PRs from cache and recalculating scores.")
         selected_prs = pickle.load(load, encoding='bytes')
+        issue_num = 0
         for pr_stat in selected_prs:
+            issue_num += 1
             console.print(
                 f"[green]Loading PR: #{pr_stat.pull_request.number} `{pr_stat.pull_request.title}`.[/]"
                 f" Score: {pr_stat.score}."
@@ -272,7 +362,6 @@ def main(
         pulls = repo.get_pulls(state="closed", sort="created", direction='desc')
         issue_num = 0
         for pr in pulls:
-            issue_num += 1
             if not pr.merged:
                 continue
 
@@ -287,7 +376,8 @@ def main(
                 console.print("[bright_blue]Completed selecting candidates")
                 break
 
-            pr_stat = PrStat(pull_request=pr)  # type: ignore
+            issue_num += 1
+            pr_stat = PrStat(pull_request=pr, g=g)  # type: ignore
             console.print(
                 f"[green]Selecting PR: #{pr.number} `{pr.title}` as candidate.[/]"
                 f" Score: {pr_stat.score}."
@@ -302,7 +392,7 @@ def main(
                 console.print(f'[red]Reached {MAX_PR_CANDIDATES}. Stopping')
                 break
 
-    console.print(f"Top {top_number} PRs:")
+    console.print(f"Top {top_number} out of {issue_num} PRs:")
     for pr_stat in sorted(selected_prs, key=lambda s: -s.score)[:top_number]:
         console.print(f" * {pr_stat}")