You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@community.apache.org by hu...@apache.org on 2019/08/06 23:16:23 UTC
svn commit: r1864571 - in /comdev/reporter.apache.org/trunk/scripts:
pdata.py wsgi.py
Author: humbedooh
Date: Tue Aug 6 23:16:23 2019
New Revision: 1864571
URL: http://svn.apache.org/viewvc?rev=1864571&view=rev
Log:
Start work on a WSGI edition of getjson.py, for maximum speed.
Added:
comdev/reporter.apache.org/trunk/scripts/pdata.py (with props)
comdev/reporter.apache.org/trunk/scripts/wsgi.py
Added: comdev/reporter.apache.org/trunk/scripts/pdata.py
URL: http://svn.apache.org/viewvc/comdev/reporter.apache.org/trunk/scripts/pdata.py?rev=1864571&view=auto
==============================================================================
--- comdev/reporter.apache.org/trunk/scripts/pdata.py (added)
+++ comdev/reporter.apache.org/trunk/scripts/pdata.py Tue Aug 6 23:16:23 2019
@@ -0,0 +1,405 @@
+#!/usr/bin/env python
+"""
+ WSGI script to return data to the reporter.a.o wizard
+
+ It also populates various json files from JIRA if they are stale:
+ data/JIRA/jira_projects.json - list of all JIRA projects
+ data/JIRA/%.json - for each JIRA project
+
+ Reads the following:
+ data/JIRA/jira_projects.json
+ data/JIRA/%s.json
+ data/health.json
+ data/releases/%s.json
+ data/pmcs.json
+ data/projects.json
+ data/mailinglists.json
+ data/maildata_extended.json
+ https://whimsy.apache.org/public/member-info.json
+ https://whimsy.apache.org/public/public_ldap_projects.json
+ data/cache/checker.json
+
+"""
+
+import os, sys, re, json, subprocess, time
+import base64, requests
+
+CACHE_TIMEOUT = 3600
+
+import committee_info
+from urlutils import UrlCache
+
+# This script may be called frequently, so don't just rely on IfNewer checks
+uc = UrlCache(cachedir='../data/cache', interval=CACHE_TIMEOUT, silent=True)
+
+# Relative path to home directory from here (site)
+RAOHOME_FULL = '/var/www/reporter.apache.org/'
+RAOHOME = RAOHOME_FULL
+
+COMMITTER_INFO = 'https://whimsy.apache.org/public/public_ldap_people.json'
+MEMBER_INFO = 'https://whimsy.apache.org/public/member-info.json'
+PROJECTS = 'https://whimsy.apache.org/public/public_ldap_projects.json'
+DESCRIPTIONS = 'https://projects.apache.org/json/foundation/committees.json'
+
+jmap = {
+ 'trafficserver': ['TS'],
+ 'cordova': ['CB'],
+ 'corinthia': ['COR']
+}
+
+pmap = {# convert mailing list name to PMC name
+ 'community': 'comdev',
+ 'ws': 'webservices',
+ 'hc': 'httpcomponents',
+ 'whimsical': 'whimsy',
+ 'empire': 'empire-db'
+}
+
+ldapmap = {
+ 'webservices': 'ws'
+}
+
+jirapass = ""
+with open(RAOHOME+"data/jirapass.txt", "r") as f:
+ jirapass = f.read().strip()
+ f.close()
+
+def readJson(filename, *default):
+ """Read a JSON file. If the read fails, return the default (if any) otherwise return the exception"""
+ data = {}
+ try:
+ with open(filename, "r") as f:
+ data = json.load(f)
+ f.close()
+ except:
+ if default == None:
+ raise
+ else:
+ return default[0] # only want first arg
+ return data
+
+def loadJson(url):
+ resp = uc.get(url, name=None, encoding='utf-8', errors=None)
+ j = json.load(resp)
+ resp.close()
+ return j
+
+projects = loadJson(PROJECTS)['projects']
+members = loadJson(MEMBER_INFO)['members']
+committers = loadJson(COMMITTER_INFO)['people']
+charters = loadJson(DESCRIPTIONS)
+
+def getPMCs(uid):
+ """Returns the array of LDAP committee groups to which the uid belongs. Excludes incubator"""
+ groups = []
+ for group in projects:
+ if group != "incubator" and 'pmc' in projects[group]:
+ if uid in projects[group]['owners']:
+ groups.append(group)
+ groups = [pmap.get(x, x) for x in groups]
+ return groups
+
+
+def isASFMember(uid):
+ """Determine if the uid is a member of the ASF"""
+ return uid in members
+
+def getJIRAProjects(project, tlpid):
+ """Reads data/JIRA/jira_projects.json (re-creating it if it is stale)
+ Returns the list of JIRA projects for the project argument
+ Assumes that the project names match or the project category matches
+ (after trimming "Apache " and spaces and lower-casing)"""
+ project = project.replace("Apache ", "").strip().lower()
+ refresh = True
+ x = {}
+ jiras = []
+ try:
+ mtime = 0
+ try:
+ st=os.stat(RAOHOME+"data/JIRA/jira_projects.json")
+ mtime=st.st_mtime
+ except:
+ pass
+ if mtime >= (time.time() - 86400):
+ refresh = False
+ x = readJson(RAOHOME+"data/JIRA/jira_projects.json")
+ else:
+ if sys.version_info >= (3, 0):
+ base64string = base64.encodestring(('%s:%s' % ('githubbot', jirapass)).encode('ascii'))[:-1]
+ else:
+ base64string = base64.encodestring('%s:%s' % ('githubbot', jirapass))[:-1]
+
+ try:
+ req = requests.get("https://issues.apache.org/jira/rest/api/2/project.json", headers = {"Authorization": "Basic %s" % base64string}).json()
+ with open(RAOHOME+"data/JIRA/jira_projects.json", "w") as f:
+ json.dump(x, f, indent=1)
+ f.close()
+ except:
+ pass
+ except:
+ pass
+
+ for entry in x:
+ # Check if this is actually a TLP not ours
+ mayuse = True
+ for xtlp in charters:
+ if fixProjectCategory(xtlp['name']) == fixProjectCategory(entry['name']) and xtlp['id'] != tlpid:
+ mayuse = False
+ break
+ elif fixProjectCategory(xtlp['name']) == fixProjectCategory(entry['name']) and xtlp['id'] == tlpid:
+ jiras.append(entry['key'])
+ mayuse = False
+ break
+ if mayuse and 'projectCategory' in entry and fixProjectCategory(entry['projectCategory']['name']) == project:
+ jiras.append(entry['key'])
+ return jiras
+
+def fixProjectCategory(cat):
+ return cat.replace("Apache ", "").replace(" Framework", "").strip().lower()
+
+def getJIRAS(project):
+ """Reads data/JIRA/%s.json % (project), re-creating it if it is stale
+ from the number of issues created and resolved in the last 91 days
+ Returns array of [created, resolved, project]
+ """
+ refresh = True
+ try:
+ st=os.stat(RAOHOME+"data/JIRA/%s.json" % project)
+ mtime=st.st_mtime
+ if mtime >= (time.time() - (2*86400)):
+ refresh = False
+ x = readJson(RAOHOME+"data/JIRA/%s.json" % project)
+ return x[0], x[1], x[2]
+ except:
+ pass
+
+ if refresh:
+ if sys.version_info >= (3, 0):
+ base64string = base64.encodestring(('%s:%s' % ('githubbot', jirapass)).encode('ascii'))[:-1]
+ else:
+ base64string = base64.encodestring('%s:%s' % ('githubbot', jirapass))[:-1]
+
+ try:
+ headers = {"Authorization": "Basic %s" % base64string}
+ req = requests.get("""https://issues.apache.org/jira/rest/api/2/search?jql=project%20=%20'""" + project + """'%20AND%20created%20%3E=%20-91d""", headers = headers)
+ cdata = req.json()
+ req = requests.get("""https://issues.apache.org/jira/rest/api/2/search?jql=project%20=%20'""" + project + """'%20AND%20resolved%20%3E=%20-91d""", headers = headers)
+ rdata = req.json
+ with open(RAOHOME+"data/JIRA/%s.json" % project, "w") as f:
+ json.dump([cdata['total'], rdata['total'], project], f, indent=1)
+ f.close()
+ return cdata['total'], rdata['total'], project
+ except Exception as err:
+ # Don't create an empty file if the request fails. The likely cause is that the project does not use JIRA,
+ # or getjson has been invoked with an invalid pmc name. Invalid files will cause the refresh script to
+ # retry the requests unnecessarily.
+ # Furthermore, if there is a temporary issue, creating an empty file will prevent a retry for 48hours.
+# with open(RAOHOME+"data/JIRA/%s.json" % project, "w") as f:
+# json.dump([0,0,None], f, indent=1)
+# f.close()
+ return 0,0, None
+"""
+Reads:
+ - committee_info.PMCsummary()
+ - data/health.json
+
+@return:
+ - dict contains pmc name & chair extracted from committee_info.PMCsummary()
+ - list of project names
+ - health entry from data/health.json
+"""
+
+lastPSummary = 0
+
+def getProjectData(project = None):
+ global lastPSummary
+ if lastPSummary < (time.time() - CACHE_TIMEOUT):
+ global pmcSummary
+ pmcSummary = committee_info.PMCsummary()
+ lastPSummary = time.time()
+ x = {}
+ y = []
+ z = {}
+ for xproject in pmcSummary:
+ y.append(xproject)
+ if xproject == project:
+ x['name'] = pmcSummary[project]['name']
+ x['chair'] = pmcSummary[project]['chair']
+ if project:
+ for entry in dataHealth:
+ if entry['group'] == project:
+ z = entry
+ for xtlp in charters:
+ if xtlp.get('id') == project:
+ x['charter'] = xtlp.get('charter', '')
+ return x, y, z;
+
+def getReleaseData(project):
+ """Reads data/releases/%s.json and returns the contents"""
+ return readJson(RAOHOME+"data/releases/%s.json" % project, {})
+
+groups = []
+
+pmcSummary = None
+dataHealth = None
+pchanges = None
+cchanges = None
+bugzillastats = None
+lastRead = 0
+ml = None
+mld = None
+pmcdates = None
+checker_json = None
+
+def generate(user, project, runkibble):
+ global lastRead
+ if re.match(r"^[-a-zA-Z0-9_.]+$", user):
+ isMember = isASFMember(user)
+
+ groups = getPMCs(user)
+
+ # Check if we need to re-read json inputs...
+ if lastRead < (time.time() - CACHE_TIMEOUT):
+ global pmcSummary, dataHealth,pchanges, cchanges, bugzillastats, ml, mld, pmcdates, checker_json
+ pmcSummary = committee_info.PMCsummary()
+ dataHealth = readJson(RAOHOME+"data/health.json", [])
+ pchanges = readJson(RAOHOME+"data/pmcs.json")
+ cchanges = readJson(RAOHOME+"data/projects.json")
+ bugzillastats = readJson(RAOHOME+"data/bugzillastats.json", {})
+ mld = readJson(RAOHOME+"data/maildata_extended.json")
+ ml = readJson(RAOHOME+"data/mailinglists.json")
+ pmcdates = readJson(RAOHOME+"data/pmcdates.json")
+ # fetch checker_json from checker.apache.org ; use cache as fallback
+ try:
+ checker_json = requests.get("https://checker.apache.org/json/", timeout = 1.0).json()
+ except:
+ checker_json = readJson(RAOHOME+"data/cache/checker.json", None)
+ lastRead = time.time()
+
+ emails = {}
+
+ for entry in mld: # e.g. hc-dev, ant-users, ws-dev
+ tlp = entry.split("-")[0]
+ nentry = entry
+ if tlp == "empire":
+ tlp = "empire-db"
+ nentry = entry.replace("empire-", "empire-db-")
+ if tlp in pmap: # convert ml prefix to PMC internal name
+ tlp = pmap[tlp]
+ if tlp == project:
+ emails[nentry] = mld[entry]
+
+ dates = {}
+ bdata = {} # bugzilla data
+ jdata = {}
+ cdata = {}
+ ddata = {}
+ rdata = {}
+ allpmcs = []
+ keys = {}
+ count = {}
+ health = {}
+
+ checker = {}
+
+ group = project
+
+ jiras = []
+ count = [0,0]
+ xgroup = group
+ if group in ldapmap:
+ xgroup = ldapmap[group]
+ if xgroup in pchanges:
+ count[0] = len(pchanges[xgroup])
+ if xgroup in cchanges:
+ count[1] = len(cchanges[xgroup])
+ jdata = [0,0, None]
+ ddata, allpmcs, health = getProjectData(group)
+ rdata = getReleaseData(group)
+ if group in bugzillastats:
+ bdata = bugzillastats
+ else:
+ bdata = [0,0,{}]
+ # a PMC may have projects using Bugzilla *and* JIRA - e.g. Tomcat - (or neither)
+ jiraname = group.upper()
+ if group in jmap:
+ keys = []
+ jdata[2] = []
+ for jiraname in jmap:
+ x,y, p = getJIRAS(jiraname)
+ jdata[0] += x
+ jdata[1] += y
+ if x > 0 or y > 0:
+ jdata[2].append(p)
+ keys.append(jiraname)
+ elif group in ddata and 'name' in ddata:
+ jiras = getJIRAProjects(ddata['name'], group)
+ keys = jiras
+ jdata[2] = []
+ for jiraname in jiras:
+ x,y, p= getJIRAS(jiraname)
+ jdata[0] += x
+ jdata[1] += y
+ if x > 0 or y > 0:
+ jdata[2].append(p)
+ elif jiraname:
+ keys=[jiraname]
+ x,y, p= getJIRAS(jiraname)
+ jdata[0] += x
+ jdata[1] += y
+ jdata[2] = p
+
+ cdata = cdata[xgroup] if xgroup in cdata else {'pmc': {}, 'committer': {}}
+ for pmc in pchanges:
+ if pmc == xgroup:
+ for member in pchanges[pmc]:
+ if pchanges[pmc][member][1] > 0:
+ cdata['pmc'][member] = pchanges[pmc][member]
+ for pmc in cchanges:
+ if pmc == xgroup:
+ for member in cchanges[pmc]:
+ if cchanges[pmc][member][1] > 0:
+ cdata['committer'][member] = cchanges[pmc][member]
+ if group in pmcdates: # Make sure we have this PMC in the JSON, so as to not bork
+ dates = pmcdates[group] # only send the groups we want
+ if checker_json and 'meta' in checker_json and 'projects' in checker_json:
+ meta = checker_json['meta']
+ prjs = checker_json['projects']
+ checker = prjs[group] if group in prjs else { 'errors': 0 }
+ checker['meta'] = meta
+
+ # Add in kibble data if called with only= OR only one project...
+ kibble = None
+ if runkibble:
+ try:
+ xenv = os.environ.copy()
+ if 'SCRIPT_NAME' in xenv:
+ del xenv['SCRIPT_NAME']
+ cmd = ('%s/site/wizard/kibble.py' % RAOHOME_FULL, project)
+ if jdata and jdata[2]:
+ cmd += tuple(jdata[2])
+ txt = subprocess.check_output(cmd, env = xenv)
+ kibble = json.loads(txt)
+ except subprocess.CalledProcessError as e:
+ return None
+
+
+ output = {
+ 'count': count,
+ 'delivery': emails,
+ 'jira': jdata,
+ 'bugzilla': bdata,
+ 'changes': cdata,
+ 'pmcdates': dates,
+ 'pdata': ddata,
+ 'releases': rdata,
+ 'keys': keys,
+ 'health': health,
+ 'checker': checker,
+ 'you': committers[user],
+ 'kibble': kibble,
+ }
+
+ return output
+ else:
+ return {'okay': False, 'error': "Invalid user credentials!"}
Propchange: comdev/reporter.apache.org/trunk/scripts/pdata.py
------------------------------------------------------------------------------
svn:executable = *
Added: comdev/reporter.apache.org/trunk/scripts/wsgi.py
URL: http://svn.apache.org/viewvc/comdev/reporter.apache.org/trunk/scripts/wsgi.py?rev=1864571&view=auto
==============================================================================
--- comdev/reporter.apache.org/trunk/scripts/wsgi.py (added)
+++ comdev/reporter.apache.org/trunk/scripts/wsgi.py Tue Aug 6 23:16:23 2019
@@ -0,0 +1,57 @@
+#!/usr/bin/env python2.7
+import os
+import cgi
+import json
+import pdata
+import time
+import re
+
+CACHE_TIMEOUT = 3600
+
+
+def app(environ, start_fn):
+ committers = pdata.loadJson(pdata.COMMITTER_INFO)['people']
+ project = environ.get('QUERY_STRING')
+ user = environ.get('HTTP_X_AUTHENTICATED_USER')
+
+ output = {'okay': False, 'error': 'Unknown user ID provided!'}
+
+ dumps = {}
+ groups = []
+ if user:
+ groups = pdata.getPMCs(user)
+ if project and user and re.match(r"[-a-z0-9]+", project):
+ groups = [project]
+
+ for xproject in groups:
+
+ # Try cache first? (max 6 hours old)
+ wanted_file = "/tmp/pdata-%s.json" % xproject
+ if xproject == project:
+ wanted_file = "/tmp/pdata-kibbled-%s.json" % xproject
+ if (os.path.exists(wanted_file) and os.path.getmtime(wanted_file) > (time.time() - CACHE_TIMEOUT)):
+ mpdata = json.load(open(wanted_file, "r"))
+ # If cache failed, generate fom scratch
+ else:
+ mpdata = pdata.generate(user, xproject, xproject == project)
+ open(wanted_file, "w").write(json.dumps(mpdata))
+ # Weave results into combined object, mindful of kibble data
+ for k, v in mpdata.items():
+ if k not in dumps:
+ dumps[k] = {}
+ if (k != 'kibble'):
+ dumps[k][xproject] = v
+ if k == 'kibble' and v:
+ dumps['kibble'] =v
+
+ # Set personalized vars, dump
+ if dumps and user:
+ ddata, allpmcs, health = pdata.getProjectData()
+ dumps['you'] = committers[user]
+ dumps['all'] = allpmcs
+ dumps['pmcs'] = groups
+ output = dumps
+
+ start_fn('200 OK', [('Content-Type', 'application/json')])
+
+ return [json.dumps(output, indent = 2).encode('ascii')]