You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@arrow.apache.org by "Krisztian Szucs (Jira)" <ji...@apache.org> on 2020/05/13 00:20:00 UTC

[jira] [Updated] (ARROW-8456) [Release] Add python script to help curating JIRA

     [ https://issues.apache.org/jira/browse/ARROW-8456?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Krisztian Szucs updated ARROW-8456:
-----------------------------------
    Issue Type: Improvement  (was: Task)

> [Release] Add python script to help curating JIRA
> -------------------------------------------------
>
>                 Key: ARROW-8456
>                 URL: https://issues.apache.org/jira/browse/ARROW-8456
>             Project: Apache Arrow
>          Issue Type: Improvement
>          Components: Developer Tools
>            Reporter: Krisztian Szucs
>            Priority: Major
>             Fix For: 1.0.0
>
>
> The following script produces reports like https://gist.github.com/kszucs/9857ef69c92a230ce5a5068551b83ed8
> {code:python}
> from jira import JIRA
> import warnings
> import pygit2
> import pandas as pd
> from io import StringIO
> class Patch:
>     
>     def __init__(self, commit):
>         self.commit = commit
>         self.issue_key, self.msg = self._parse(commit.message)
>         
>     def _parse(self, message):
>         first_line = message.splitlines()[0]
>         
>         m = re.match("(?P<ticket>((ARROW|PARQUET)\-\d+)):?(?P<msg>.*)", first_line)
>         if m is None:
>             return None, ''
>         values = m.groupdict()
>         return values['ticket'], values['msg']
>     
>     @property
>     def shortmessage(self):
>         if not self.msg:
>             return self.commit.message.splitlines()[0]
>         else:
>             return self.msg
>     @property
>     def sha(self):
>         return self.commit.id
>     
>     @property
>     def issue_url(self):
>         return 'https://issues.apache.org/jira/browse/{}'.format(self.issue_key)
>     
>     @property
>     def commit_url(self):
>         return 'https://github.com/apache/arrow/commit/{}'.format(self.sha)
>     
>     def to_markdown(self):
>         if self.issue_key is None:
>             return "[{}]({})\n".format(
>                 self.shortmessage, 
>                 self.commit_url
>             )
>         else:
>             return "[{}]({}): [{}]({})\n".format(
>                 self.issue_key, 
>                 self.issue_url, 
>                 self.shortmessage, 
>                 self.commit_url
>             )
>     
>     
> JIRA_SEARCH_LIMIT = 10000
> # JIRA_SEARCH_LIMIT = 50
> class Release:
>     """Release object for querying issues and commits
>     
>     Usage:
>         jira = JIRA(
>             {'server': 'https://issues.apache.org/jira'}, 
>             basic_auth=(user, password)
>         )
>         repo = pygit2.Repository('path/to/arrow/repo')
>         
>         release = Release(jira, repo, '0.15.1', '0.15.0')
>         # show the commits in application order
>         for commit in release.commits():
>             print(commit.oid)
>         # cherry-pick the patches to a branch
>         release.apply_patches_to('a-branch')
>     """
>     
>     def __init__(self, jira, repo, version, previous_version):
>         self.jira = jira
>         self.repo = repo
>         self.version = version
>         self.previous_version = previous_version
>         self._issues = None
>         self._patches = None
>         
>     def _tag(self, version):
>         return self.repo.revparse_single(f'refs/tags/apache-arrow-{version}')
>     
>     def issues(self):
>         # FIXME(kszucs): paginate instead of maxresults 
>         if self._issues is None:
>             query = f'project=ARROW AND fixVersion={self.version}'
>             self._issues = self.jira.search_issues(query, maxResults=JIRA_SEARCH_LIMIT)
>         return self._issues
>     
>     def patches(self):
>         """Commits belonging to release applied on master branch
>         
>         The returned commits' order corresponds to the output of
>         git log.
>         """
>         if self._patches is None:
>             previous_tag = self._tag(self.previous_version)
>             master = self.repo.branches['master']
>             ordering = pygit2.GIT_SORT_TOPOLOGICAL | pygit2.GIT_SORT_REVERSE
>             walker = self.repo.walk(master.target, ordering)        
>             walker.hide(previous_tag.oid)
>             self._patches = list(map(Patch, walker))
>         return self._patches
>     
>     def curate(self):
>         issues = self.issues()
>         patches = self.patches()
>         issue_keys = {issue.key for issue in self.issues()}
>         
>         within, outside, nojira = [], [], []
>         for p in patches:
>             if p.issue_key is None:
>                 nojira.append(p)
>             elif p.issue_key in issue_keys:
>                 within.append(p)
>                 issue_keys.remove(p.issue_key)
>             else:
>                 outside.append(p)
>                 
>         # remaining jira tickets
>         nopatch = list(issue_keys)
>                 
>         return within, outside, nojira, nopatch
>     def curation_report(self):
>         out = StringIO()
>         
>         out.write('Total number of JIRA tickets assigned to version {}: {}\n'
>                   .format(self.version, len(self.issues())))
>         out.write('\n')
>         out.write('Total number of applied patches since {}: {}\n'
>                   .format(self.previous_version, len(self.patches())))
>         
>         out.write('\n\n')
>         within, outside, nojira, nopatch = self.curate()
>         
>         out.write('Patches with assigned issue in {}:\n'.format(self.version))
>         for p in within:
>             out.write("- {}".format(p.to_markdown()))
>         
>         out.write('\n\n')
>         
>         out.write('Patches with assigned issue outside of {}:\n'.format(self.version))
>         for p in outside:
>             out.write("- {}".format(p.to_markdown()))
>         
>         out.write('\n\n')
>                       
>         out.write('Patches without assigned issue:\n')
>         for p in nojira:
>             out.write("- {}".format(p.to_markdown()))
>                       
>         out.write('\n\n')
>                       
>         out.write('JIRAs in {} without assigned patch:\n'.format(self.version))
>         for issue_key in nopatch:
>             url = 'https://issues.apache.org/jira/browse/{}'.format(issue_key)
>             out.write("- [{}]({})\n".format(issue_key, url))
>                       
>         return out.getvalue()
>             
>     def apply_patches_to(self, branch_name):
>         previous_tag = self._tag(self.previous_version)
>         branch = repo.create_branch(branch_name, previous_tag.get_object())
>         try:
>             head = branch.target
>             for commit in self.patches():
>                 base = repo.merge_base(commit.oid, head)
>                 parent_tree = commit.parents[0].tree
>                 index = repo.merge_trees(parent_tree, head, commit.oid)
>                 tree_id = index.write_tree(repo)
>                 head = repo.create_commit(
>                     branch.name, 
>                     commit.author, 
>                     commit.committer, 
>                     commit.message,
>                     tree_id, 
>                     [head]
>                 )
>         except pygit2.GitError:
>             repo.branches[branch_name].delete()
>             raise
> import os
> import pygit2
> from jira import JIRA 
> jira = JIRA(
>     {'server': 'https://issues.apache.org/jira'}, 
>     basic_auth=(
>         os.environ.get('APACHE_JIRA_USER'), 
>         os.environ.get('APACHE_JIRA_PASSWORD')
>     )
> )
> repo = pygit2.Repository('.')
> release = Release(jira, repo, version='0.17.0', previous_version='0.16.0')
> report = release.curation_report()
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)