You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@arrow.apache.org by "Krisztian Szucs (Jira)" <ji...@apache.org> on 2020/04/14 18:17:00 UTC

[jira] [Created] (ARROW-8456) [Release] Add python script to help curating JIRA

Krisztian Szucs created ARROW-8456:
--------------------------------------

             Summary: [Release] Add python script to help curating JIRA
                 Key: ARROW-8456
                 URL: https://issues.apache.org/jira/browse/ARROW-8456
             Project: Apache Arrow
          Issue Type: Task
          Components: Developer Tools
            Reporter: Krisztian Szucs
             Fix For: 1.0.0


The following script produces reports like https://gist.github.com/kszucs/9857ef69c92a230ce5a5068551b83ed8

{code:python}
from jira import JIRA
import warnings
import pygit2
import pandas as pd
from io import StringIO


class Patch:
    
    def __init__(self, commit):
        self.commit = commit
        self.issue_key, self.msg = self._parse(commit.message)
        
    def _parse(self, message):
        first_line = message.splitlines()[0]
        
        m = re.match("(?P<ticket>((ARROW|PARQUET)\-\d+)):?(?P<msg>.*)", first_line)
        if m is None:
            return None, ''

        values = m.groupdict()
        return values['ticket'], values['msg']
    
    @property
    def shortmessage(self):
        if not self.msg:
            return self.commit.message.splitlines()[0]
        else:
            return self.msg

    @property
    def sha(self):
        return self.commit.id
    
    @property
    def issue_url(self):
        return 'https://issues.apache.org/jira/browse/{}'.format(self.issue_key)
    
    @property
    def commit_url(self):
        return 'https://github.com/apache/arrow/commit/{}'.format(self.sha)
    
    def to_markdown(self):
        if self.issue_key is None:
            return "[{}]({})\n".format(
                self.shortmessage, 
                self.commit_url
            )
        else:
            return "[{}]({}): [{}]({})\n".format(
                self.issue_key, 
                self.issue_url, 
                self.shortmessage, 
                self.commit_url
            )
    
    
JIRA_SEARCH_LIMIT = 10000
# JIRA_SEARCH_LIMIT = 50


class Release:
    """Release object for querying issues and commits
    
    Usage:
        jira = JIRA(
            {'server': 'https://issues.apache.org/jira'}, 
            basic_auth=(user, password)
        )
        repo = pygit2.Repository('path/to/arrow/repo')
        
        release = Release(jira, repo, '0.15.1', '0.15.0')
        # show the commits in application order
        for commit in release.commits():
            print(commit.oid)
        # cherry-pick the patches to a branch
        release.apply_patches_to('a-branch')
    """
    
    def __init__(self, jira, repo, version, previous_version):
        self.jira = jira
        self.repo = repo
        self.version = version
        self.previous_version = previous_version
        self._issues = None
        self._patches = None
        
    def _tag(self, version):
        return self.repo.revparse_single(f'refs/tags/apache-arrow-{version}')
    
    def issues(self):
        # FIXME(kszucs): paginate instead of maxresults 
        if self._issues is None:
            query = f'project=ARROW AND fixVersion={self.version}'
            self._issues = self.jira.search_issues(query, maxResults=JIRA_SEARCH_LIMIT)
        return self._issues
    
    def patches(self):
        """Commits belonging to release applied on master branch
        
        The returned commits' order corresponds to the output of
        git log.
        """
        if self._patches is None:
            previous_tag = self._tag(self.previous_version)
            master = self.repo.branches['master']
            ordering = pygit2.GIT_SORT_TOPOLOGICAL | pygit2.GIT_SORT_REVERSE
            walker = self.repo.walk(master.target, ordering)        
            walker.hide(previous_tag.oid)
            self._patches = list(map(Patch, walker))

        return self._patches
    
    def curate(self):
        issues = self.issues()
        patches = self.patches()
        issue_keys = {issue.key for issue in self.issues()}
        
        within, outside, nojira = [], [], []
        for p in patches:
            if p.issue_key is None:
                nojira.append(p)
            elif p.issue_key in issue_keys:
                within.append(p)
                issue_keys.remove(p.issue_key)
            else:
                outside.append(p)
                
        # remaining jira tickets
        nopatch = list(issue_keys)
                
        return within, outside, nojira, nopatch

    def curation_report(self):
        out = StringIO()
        
        out.write('Total number of JIRA tickets assigned to version {}: {}\n'
                  .format(self.version, len(self.issues())))
        out.write('\n')
        out.write('Total number of applied patches since {}: {}\n'
                  .format(self.previous_version, len(self.patches())))
        
        out.write('\n\n')

        within, outside, nojira, nopatch = self.curate()
        
        out.write('Patches with assigned issue in {}:\n'.format(self.version))
        for p in within:
            out.write("- {}".format(p.to_markdown()))
        
        out.write('\n\n')
        
        out.write('Patches with assigned issue outside of {}:\n'.format(self.version))
        for p in outside:
            out.write("- {}".format(p.to_markdown()))
        
        out.write('\n\n')
                      
        out.write('Patches without assigned issue:\n')
        for p in nojira:
            out.write("- {}".format(p.to_markdown()))
                      
        out.write('\n\n')
                      
        out.write('JIRAs in {} without assigned patch:\n'.format(self.version))
        for issue_key in nopatch:
            url = 'https://issues.apache.org/jira/browse/{}'.format(issue_key)
            out.write("- [{}]({})\n".format(issue_key, url))
                      
        return out.getvalue()

            
    def apply_patches_to(self, branch_name):
        previous_tag = self._tag(self.previous_version)
        branch = repo.create_branch(branch_name, previous_tag.get_object())
        try:
            head = branch.target
            for commit in self.patches():
                base = repo.merge_base(commit.oid, head)
                parent_tree = commit.parents[0].tree
                index = repo.merge_trees(parent_tree, head, commit.oid)
                tree_id = index.write_tree(repo)
                head = repo.create_commit(
                    branch.name, 
                    commit.author, 
                    commit.committer, 
                    commit.message,
                    tree_id, 
                    [head]
                )
        except pygit2.GitError:
            repo.branches[branch_name].delete()
            raise


import os
import pygit2
from jira import JIRA 

jira = JIRA(
    {'server': 'https://issues.apache.org/jira'}, 
    basic_auth=(
        os.environ.get('APACHE_JIRA_USER'), 
        os.environ.get('APACHE_JIRA_PASSWORD')
    )
)
repo = pygit2.Repository('.')

release = Release(jira, repo, version='0.17.0', previous_version='0.16.0')

report = release.curation_report()
{code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)