You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by jm...@apache.org on 2022/05/23 15:55:09 UTC

[cassandra-builds] branch trunk updated: Add jenkins->jira integration script

This is an automated email from the ASF dual-hosted git repository.

jmckenzie pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/cassandra-builds.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 1f99e6e  Add jenkins->jira integration script
1f99e6e is described below

commit 1f99e6efd765b6b3f3f383f6178bac55666b3a48
Author: Josh McKenzie <jm...@apache.org>
AuthorDate: Thu Feb 17 14:03:49 2022 -0500

    Add jenkins->jira integration script
    
    This script will parse a jenkins job of test failures and format a comment to
    be added to JIRA about the build run, test failures, and attempt to link to
    existing JIRA tickets around those failures. Functionality exists in the script
    (as yet untested) to automate updating tickets with the results of this
    parsing; we'll need to determine where this is going to run in what contexts as
    well as set up an account for it to post from (JenkinsBot?) and then test and
    commit that change in tandem.
---
 .../jenkins_jira_integration.py                    | 361 +++++++++++++++++++++
 jenkins-jira-integration/requirements.txt          |   2 +
 2 files changed, 363 insertions(+)

diff --git a/jenkins-jira-integration/jenkins_jira_integration.py b/jenkins-jira-integration/jenkins_jira_integration.py
new file mode 100644
index 0000000..66c2160
--- /dev/null
+++ b/jenkins-jira-integration/jenkins_jira_integration.py
@@ -0,0 +1,361 @@
+#!/usr/bin/python
+
+import argparse
+import json
+import os
+import re
+import sys
+
+from typing import Dict, Set
+from jenkins import Jenkins
+from jira import JIRA
+
+import jenkins
+
+# Used in logging method to flip logging on and off
+VERBOSE = False
+
+# Some helpers to tidy up object indexing
+NO_PREVIOUS_BUILD = '-1'
+NUMBER = 'number'
+PREVIOUS_NUMBER = 'previous_number'
+UNKNOWN = 'unknown'
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description='Parse Jenkins build output and optionally update JIRA tickets with results for a single branch')
+    parser._action_groups.pop()
+    required = parser.add_argument_group('required arguments')
+    optional = parser.add_argument_group('optional arguments')
+
+    required.add_argument('--jenurl', type=str, help='Jenkins server url')
+    required.add_argument('--jenuser', type=str, required=True, help='JIRA server url')
+    required.add_argument('--jenpass', type=str, required=True, help='JIRA username')
+    required.add_argument('--jiraurl', type=str, required=True, help='JIRA server url')
+    required.add_argument('--jirauser', type=str, required=True, help='JIRA username')
+    required.add_argument('--jirapass', type=str, required=True, help='JIRA password')
+    required.add_argument('--branch', metavar='b', type=str, required=True,
+                          help='Branch Versions to pull Jenkins results for')
+    required.add_argument('--buildnum', metavar='n', type=str, help='Build number to process for CI status')
+
+    optional.add_argument('--verbose', action='store_true', default='False', help='Verbose logging')
+    optional.add_argument('--auto', action='store_true', default='False', help='Update Jira tickets with CI information automatically')
+
+    args = parser.parse_args()
+    global VERBOSE
+    if args.verbose:
+        VERBOSE = True
+
+    # first we build the path to and confirm existence of the buildnum requested
+    jenkins_url = 'https://ci-cassandra.apache.org'
+    log('Connecting to jenkins server...')
+    server = Jenkins(jenkins_url, username=args.jenuser, password=args.jenpass)
+
+    # We store a per-branch JSON file with cached data of test number, previous number, and all failures so we don't
+    # have to query JIRA for each build's data every time we run the script
+    ci_cache = build_local_cache(server, args.branch, args.buildnum)
+
+    log('Retrieving build. Branch: ' + args.branch + '. Build number: ' + args.buildnum)
+    build_data = retrieve_build_details(server, args.branch, args.buildnum)
+
+    # jira_results represents the data we're going to post to the final JIRA ticket about this CI run and test histories
+    jira_results = '[CI Results]\n'
+    jira_results += build_data.string_detailed()
+
+    # Add a space between CI meta and test details for aesthetics
+    jira_results += '\n'
+
+    jira = JIRA('https://issues.apache.org/jira', basic_auth=(args.jirauser, args.jirapass))
+
+    # Putting this in a table goes a long way towards making it parseable
+    jira_results += '||Test|Failures|JIRA||\n'
+    for test_name in build_data.test_failures:
+        failures, total, jiralink = get_test_failure_details(jira, test_name, ci_cache)
+        jira_results += ('|' + test_name + '|' + str(failures) + ' of ' + str(total) + '|' + jiralink + '\n')
+
+    # Next, we want to query JIRA for the cassandra ticket in question and see if we've given it an update yet on build status; protect against spamming
+    post_results_to_jira(jira, build_data.JIRA, args.branch, args.auto, jira_results)
+
+
+class BuildData:
+    def __init__(self, server: jenkins, branch: str, build_num: str) -> None:
+        """
+        A lot of the guts of this are painfully parsed out of the nested data structures in the Python API of a Jenkins build
+        For example, see the Python API link on the following build on trunk: https://ci-cassandra.apache.org/job/Cassandra-trunk/959/testReport/api/
+        (note: you may need to replace 959 w/a more recent build as they fall off the history to see it)
+        :param server: jenkins server to query; expected to already be authenticated and connected
+        :param branch: str of the branch name to pull from
+        :param build_num: str of the build number to query
+        """
+        self.branch = branch
+        self.number = build_num
+
+        self.paths = []
+        # TODO: Determine if depth=5 is necessary or in any way meaningful in this API Query. If so, document why.
+        build = server.get_build_info('Cassandra-' + branch, int(build_num), depth=5)
+
+        # The data structure and nesting in here _seems_ like it's going to be brittle and bite us in the future.
+        # Should be able to access whatever the latest format of the Python API linked above is in order to correct
+        # drifts and changes in the data structures, assuming things aren't completely dropped.
+        for change_set in build['changeSets']:
+            for items in change_set['items']:
+                self.sha = items['commitId']
+                for path in items['affectedPaths']:
+                    self.paths.append(path)
+                raw_comment = items['comment']
+
+                # cache this so if it doesn't match a C* JIRA we can let the user know details
+                self.commit_msg = raw_comment
+
+                # We need to catch anyone that left bound the ticket # instead of following the idiom
+                matches = re.match(r'[\S\s^]*CASSANDRA-([0-9]+)', raw_comment)
+                if matches is not None:
+                    self.JIRA = matches.group(1)
+                else:
+                    # We don't need to say anything here about it; we let whomever asked about this deal with the consequences
+                    self.JIRA = UNKNOWN
+
+        self.url = build['url']
+
+        previous_build = build['previousBuild']
+        if previous_build is None:
+            self.previous_number = NO_PREVIOUS_BUILD
+        else:
+            self.previous_number = previous_build[NUMBER]
+
+        self.result = build['result']
+
+        tests = server.get_build_test_report('Cassandra-' + branch, int(build_num), depth=5)
+
+        self.passcount = 0
+        self.failcount = 0
+        self.test_failures: Set[str] = set()
+
+        # If we had a bad build, it's possible we have no test run results and should just be done with it
+        if not tests:
+            return
+
+        self.passcount = tests['passCount']
+        self.failcount = tests['failCount']
+
+        suites = tests['suites']
+        for suite in suites:
+            for case in suite['cases']:
+                # We treat failures and regressions the same as we're going to rely on our history cache to provide per-test failure context
+                if (case['status'] == 'FAILED') or case['status'] == 'REGRESSION':
+                    self.test_failures.add(case['className'] + '.' + case['name'])
+
+    def string_detailed(self) -> str:
+        """
+        Builds out detailed job runs for posting on JIRA. Whitespace is lost but it looks clean enough w/things left
+        aligned excepting the affected paths as a bullet list.
+        """
+        result = ''
+        result += 'Branch: ' + self.branch + ', build number: ' + str(self.number) + '\n'
+
+        prefix = '' if self.branch == 'trunk' else 'Cassandra-'
+        result += '   butler url: https://butler.cassandra.apache.org/#/ci/upstream/compare/Cassandra-' + self.branch + '/' + prefix + self.branch + '\n'
+
+        result += '   jenkins url: ' + str(self.url) + '\n'
+        result += '   JIRA: CASSANDRA-' + str(self.JIRA) + '\n'
+        result += '   commit url: https://git-wip-us.apache.org/repos/asf?p=cassandra.git;a=commit;h=' + str(self.sha) + '\n'
+        result += '   affected paths:' + '\n'
+        for path in self.paths:
+            result += '* ' + path + '\n'
+        result += '\n   Build Result: ' + self.result + '\n'
+        result += '   Passing Tests: ' + str(self.passcount) + '\n'
+        result += '   Failing Tests: ' + str(self.failcount) + '\n'
+        return result
+
+
+def retrieve_build_details(server: jenkins, branch: str, build_num: int) -> BuildData:
+    result = None
+    found_build = False
+
+    log('Retrieving requested build: ' + str(build_num) + '. Branch: ' + branch + '. build_num: ' + str(build_num))
+    result = BuildData(server, branch, str(build_num))
+
+    # Some commit messages aren't JIRA related (ninjas, deb release changes, etc)
+    if result.JIRA == UNKNOWN:
+        print('No related CASSANDRA-NNNNN Jira found for build: ' + str(result.number) + '. Commit Message: ' + result.commit_msg)
+        print('Exiting processing; nothing to be done for this build if we can\'t determine the JIRA ticket it\'s associated with.')
+        sys.exit(-1)
+
+    log('Parsed JIRA number: ' + result.JIRA + ' for build: ' + str(result.number))
+    assert result is not None
+    return result
+
+
+def post_results_to_jira(jira: JIRA, ticket_number: str, branch: str, post_to_jira: bool, ci_results: str) -> None:
+    """
+    :param jira: Connected and authenticated JIRA instance
+    :param ticket_number: as named
+    :param branch: CI branch this job was for
+    :param post_to_jira: Whether we should actively update Jira with the comment or print it to local output
+            for a manual run / debugging
+    :param ci_results: str to post to jira ticket
+    :return:
+    """
+    log('Attempting to connect to jira and get issue: CASSANDRA-' + ticket_number)
+    issue = jira.issue('CASSANDRA-' + ticket_number)
+    log('Checking comments on ' + str(issue))
+    comments = issue.fields.comment.comments
+
+    # There's a little nuance here. We have two motions we could potentially need to go through
+    #   1) We don't have anything on this ticket for this branch by JenkinsBot, so we want to add
+    #   2) We already have an entry on this ticket for this branch by JenkinsBot, so we want to update
+
+    # TODO put the correct account name here
+    my_name = 'JenkinsBot'
+
+    # Walk the comments for any we authored with the branch name of what we're processing. If we find it, update it
+    for comment in comments:
+        if str(comment.author) == my_name:
+            # We know we wrote it; now we need to determine if this comment was for the branch we're currently
+            # processing to determine if we need to update it
+            comment_text = jira.comment('CASSANDRA-' + ticket_number, comment)
+            if 'Branch: ' + branch in comment_text.body:
+                # This is an update
+                if post_to_jira is True:
+                    comment.update(body=ci_results)
+                else:
+                    print('[UPDATE] comment to manually post to Jira for CASSANDRA-' + ticket_number)
+                    print(ci_results)
+                return
+
+    if post_to_jira is True:
+        print('Posting to JIRA from the bot is not yet tested and enabled.')
+        # issue.add_comment('CASSANDRA-' + ticket_number, ci_results)
+        print('[ADD] the following comment to Jira for CASSANDRA-' + ticket_number)
+        print(ci_results)
+    else:
+        print('[ADD] the following comment to Jira for CASSANDRA-' + ticket_number)
+        print(ci_results)
+
+
+def build_local_cache(server: jenkins, branch: str, buildnum: str) -> Dict:
+    """
+    For the input build back as far as we have history, we want to cache the following (k/v):
+        NUMBER:             str number of this build
+        PREVIOUS_NUMBER     str build number of the previous build to walk to when walking the cache
+        failures:           set of test failures seen for this build (note: sets aren't serializable in JSON so we cast to list)
+
+        We use the build number and previous number as strings since they're going to get case to that in the JSON
+        ser/deser anyway, so best not to mix.
+    :return: testnu->
+    """
+    if not os.path.isdir('cache'):
+        os.mkdir('cache')
+
+    cached_data = {}
+    json_file = 'cache/' + branch
+    if os.path.exists(json_file):
+        with open(json_file, 'r', encoding='utf-8') as infile:
+            cached_data = json.load(infile)
+        for build in cached_data:
+            log('Loaded build: [' + str(build) + '] from cache')
+
+    while buildnum != NO_PREVIOUS_BUILD:
+        log('Processing cache for build_number ' + buildnum + ' on branch [' + branch + ']')
+        if buildnum in cached_data:
+            build_data = cached_data[buildnum]
+            log('   Already found data for ' + buildnum + ' cached locally. Continuing to ' + str(build_data[PREVIOUS_NUMBER]))
+            buildnum = str(build_data[PREVIOUS_NUMBER])
+        else:
+            print('   Did not find build_number: ' + buildnum + ' in the json cache. Populating...')
+            # Pull the data from the Jenkins server as we don't have it in our cache
+            build_data = BuildData(server, branch, buildnum)
+
+            # The data in our JSON cache is a pretty simple subset of what we have in BuildData for a full CI report
+            newcache_data = {}
+            newcache_data[NUMBER] = build_data.number
+            newcache_data[PREVIOUS_NUMBER] = build_data.previous_number
+            # Can't serialize sets to JSON
+            newcache_data['failures'] = list(build_data.test_failures)
+
+            log('   Got data for ' + str(newcache_data[NUMBER]) + '. Caching and setting previous build pointer to: ' + str(newcache_data[PREVIOUS_NUMBER]))
+            if 'failures' in newcache_data:
+                log('   build number: ' + str(newcache_data[NUMBER]) + ' with failure count: ' + str(len(newcache_data['failures'])))
+
+            # JSON comes back out as a string; need to follow suit here so the cache is all the same data type
+            cached_data[str(build_data.number)] = newcache_data
+
+            # cast to str should be redundant but it's coming out as an int. Not worth arguing with it.
+            buildnum = str(newcache_data[PREVIOUS_NUMBER])
+
+    log('   Hit limit of builds. Saving JSON.')
+    with open(json_file, 'w', encoding='utf-8') as outfile:
+        log('Updating the cache with recently queried results')
+        json.dump(cached_data, outfile)
+
+    return cached_data
+
+
+def get_test_failure_details(jira: JIRA, test_name: str, data: Dict) -> tuple[int, int, str]:
+    """
+    :param data: Dict representing JSON cached data
+    :return: failures, total, test failure JIRA url if found, link to test board if not
+    """
+    fail_count = 0
+    total = 0
+
+    for build in data:
+        build_data = data[build]
+        total += 1
+        if test_name in build_data['failures']:
+            fail_count += 1
+
+    # See if we have a ticket for this failure already; pull out last 2 tokens from the . delimited test FQN
+    tokens = test_name.split('.')
+
+    # Take the class name only; we're not looking for a perfect match, just something of a jumping off point
+    class_name = tokens[-2]
+
+    # And split out the first token if there are underscores... /sigh
+    # https://jira.atlassian.com/browse/JRASERVER-31882
+    ctokens = class_name.split('_')
+    class_name = ctokens[0]
+    if '[' in class_name:
+        btokens = class_name.split('[')
+        class_name = btokens[1]
+
+    # If we have a runtime configured test name with braces in it, we search for the root test class name only since JQL
+    # is not fond of braces
+    query = 'project = CASSANDRA and resolution = unresolved and summary ~ "*' + class_name + '*"'
+    log('About to query via the following JQL: ' + query)
+    try:
+        log('Running query: ' + str(query))
+        has_jira = jira.search_issues(query)
+        log('Result: ' + str(has_jira))
+    except Exception as e:
+        print('ERROR! Got an exception attempting to get Jira for test failure.')
+        print('Query that raised the exception: ' + query)
+        print('Exception received: ' + str(e))
+        print('Aborting.')
+        sys.exit(-1)
+
+    result = ''
+    # We have a few states here:
+    #   1) Empty result; didn't find anything w/this name. Point to the test board w/link
+    #   2) We have *too many* results, or > 1. Link to the JQL that queries that so someone can check it out
+    #   3) We have a single ticket who's summary matches our failure name.
+    if has_jira == []:
+        result = '[No JIRA found|https://issues.apache.org/jira/secure/RapidBoard.jspa?rapidView=496&quickFilter=2252]'
+    elif len(has_jira) > 2:
+        # Clean up some of our most common culprits that show up in Jira queries
+        query = query.replace(' ', '%20')
+        query = query.replace('=', '%3D')
+        query = query.replace('\\', '%5C')
+        result = '[Multiple JIRAs found|https://issues.apache.org/jira/issues/?jql=' + query + ']'
+    else:
+        result = '[' + str(has_jira[0]) + '?|https://issues.apache.org/jira/browse/' + str(has_jira[0]) + ']|'
+
+    return fail_count, total, result
+
+
+def log(to_log: str) -> None:
+    if VERBOSE:
+        print(to_log)
+
+
+main()
diff --git a/jenkins-jira-integration/requirements.txt b/jenkins-jira-integration/requirements.txt
new file mode 100644
index 0000000..5e92ef5
--- /dev/null
+++ b/jenkins-jira-integration/requirements.txt
@@ -0,0 +1,2 @@
+jira
+python-jenkins


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@cassandra.apache.org
For additional commands, e-mail: commits-help@cassandra.apache.org