You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@community.apache.org by hu...@apache.org on 2015/01/14 20:29:37 UTC
svn commit: r1651780 - in /comdev/projects.apache.org/scripts: ./ cronjobs/
import/
Author: humbedooh
Date: Wed Jan 14 19:29:36 2015
New Revision: 1651780
URL: http://svn.apache.org/r1651780
Log:
Rearrange script locations, adjust scripts for said change.
axe age.py, that's....rich's stuff
Added:
comdev/projects.apache.org/scripts/cronjobs/
comdev/projects.apache.org/scripts/cronjobs/parsechairs.py
comdev/projects.apache.org/scripts/cronjobs/parsecommitters.py
comdev/projects.apache.org/scripts/cronjobs/podlings.py
comdev/projects.apache.org/scripts/import/
comdev/projects.apache.org/scripts/import/parsecommittees.py
comdev/projects.apache.org/scripts/import/parsepmcs.py
comdev/projects.apache.org/scripts/import/rdfparse.py
Removed:
comdev/projects.apache.org/scripts/age.py
comdev/projects.apache.org/scripts/parsechairs.py
comdev/projects.apache.org/scripts/parsecommittees.py
comdev/projects.apache.org/scripts/parsecommitters.py
comdev/projects.apache.org/scripts/parsepmcs.py
comdev/projects.apache.org/scripts/podlings.py
comdev/projects.apache.org/scripts/rdfparse.py
Added: comdev/projects.apache.org/scripts/cronjobs/parsechairs.py
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/scripts/cronjobs/parsechairs.py?rev=1651780&view=auto
==============================================================================
--- comdev/projects.apache.org/scripts/cronjobs/parsechairs.py (added)
+++ comdev/projects.apache.org/scripts/cronjobs/parsechairs.py Wed Jan 14 19:29:36 2015
@@ -0,0 +1,23 @@
+import re, urllib.request
+import csv
+import json
+import os
+
+chairs = {}
+
+data = urllib.request.urlopen("http://www.apache.org/foundation/").read().decode('utf-8')
+x = 0
+
+for committer in re.findall(r"<tr>[\s\S]+?V\.P\., Apache [\s\S]+?</tr>", data, re.MULTILINE | re.UNICODE):
+ x += 1
+ #print(committer)
+ m = re.search(r"<td>V.P., (Apache [\s\S]+?)</td>[\s\S]*?<td>([\s\S]+?)</td>", committer, re.MULTILINE | re.UNICODE)
+ if m:
+ project = m.group(1)
+ person = m.group(2)
+ chairs[project] = person
+
+
+with open("../../site/json/foundation/chairs.json", "w") as f:
+ f.write(json.dumps(chairs))
+ f.close()
Added: comdev/projects.apache.org/scripts/cronjobs/parsecommitters.py
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/scripts/cronjobs/parsecommitters.py?rev=1651780&view=auto
==============================================================================
--- comdev/projects.apache.org/scripts/cronjobs/parsecommitters.py (added)
+++ comdev/projects.apache.org/scripts/cronjobs/parsecommitters.py Wed Jan 14 19:29:36 2015
@@ -0,0 +1,42 @@
+from xml.dom import minidom
+import xml.etree.ElementTree as ET
+import re, urllib.request
+import csv
+import json
+import os
+
+people = {}
+pmcs = {}
+data = urllib.request.urlopen("http://people.apache.org/committer-index.html").read().decode('utf-8')
+x = 0
+for committer in re.findall(r"<tr>([\S\s]+?)</tr>", data, re.MULTILINE | re.UNICODE):
+ x += 1
+## print(committer)
+ m = re.search(r"<a id='(.+?)'>[\s\S]+?<td.+?>(.+?)</td>[\s\S]+?>(.+)</td>", committer, re.MULTILINE | re.UNICODE)
+ if m:
+ cid = m.group(1)
+ cname = re.sub(r"<.+?>", "", m.group(2), 4)
+ cproj = m.group(3)
+ projects = []
+ isMember = False
+ if re.search(r"<b", committer, re.MULTILINE | re.UNICODE):
+ isMember = True
+ for project in re.findall(r"#([-a-z0-9._]+)", cproj):
+ projects.append(project)
+ pmcs[project] = pmcs[project] if project in pmcs else []
+ pmcs[project].append(cid)
+ people[cid] = {
+ 'name': cname,
+ 'member': isMember,
+ 'projects': projects
+ }
+
+
+with open("../../site/json/foundation/people.json", "w") as f:
+ f.write(json.dumps(people))
+ f.close()
+
+with open("../../site/json/foundation/committers.json", "w") as f:
+ f.write(json.dumps(pmcs))
+ f.close()
+
\ No newline at end of file
Added: comdev/projects.apache.org/scripts/cronjobs/podlings.py
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/scripts/cronjobs/podlings.py?rev=1651780&view=auto
==============================================================================
--- comdev/projects.apache.org/scripts/cronjobs/podlings.py (added)
+++ comdev/projects.apache.org/scripts/cronjobs/podlings.py Wed Jan 14 19:29:36 2015
@@ -0,0 +1,112 @@
+from xml.dom import minidom
+import re, urllib.request
+import csv
+import json
+from datetime import datetime
+
+data = urllib.request.urlopen("http://incubator.apache.org/podlings.xml").read()
+xmldoc = minidom.parseString(data)
+itemlist = xmldoc.getElementsByTagName('podling')
+
+new = {}
+grads = {}
+ret = {}
+current = 0
+cpods = {}
+
+fieldnames = ['month', 'new', 'graduated', 'retired']
+for year in range(2003,2016):
+ for month in range(1,13):
+ m = "%u-%02u" % (year, month)
+ grads[m] = 0
+ new[m] = 0
+ ret[m] = 0
+
+for s in itemlist :
+ name = s.attributes['name'].value
+ uname = s.attributes['resource'].value
+ status = s.attributes['status'].value
+ sd = s.attributes['startdate'].value
+ ed = s.attributes['enddate'].value if 'enddate' in s.attributes else None
+ desc = "No description"
+ for c in s.childNodes:
+ if c.__class__.__name__ != 'Text' and c.tagName == 'description':
+ desc = c.childNodes[0].data
+ break
+ #print(name, status, sd, ed)
+ if sd and re.match(r"(\d{4}-\d+)", sd):
+ sd = re.match(r"(\d{4}-\d+)", sd).group(1)
+ if ed and re.match(r"(\d{4}-\d+)", ed):
+ ed = re.match(r"(\d{4}-\d+)", ed).group(1)
+
+ new[sd] += 1
+ if status == "graduated":
+ if not ed:
+ ed = sd
+ print("%s did not specify a graduation date, assuming %s!" % (name,ed))
+ grads[ed] += 1
+ elif status == "retired":
+ if not ed:
+ ed = sd
+ print("%s did not specify a retirement date, assuming %s!" % (name,ed))
+ ret[ed] += 1
+ elif status == "current":
+ current += 1
+ cpods[uname] = {
+ 'started': sd,
+ 'name': "Apache %s (Incubating)" % name,
+ 'description': desc,
+ 'homepage': "http://%s.incubator.apache.org/" % uname,
+ 'podling': True
+ }
+
+
+with open('podlings.csv', 'w') as csvfile:
+ writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+ writer.writeheader()
+ for year in range(2003,2015):
+ for month in range(1,13):
+ m = "%u-%02u" % (year, month)
+ writer.writerow({
+ 'month': m,
+ 'new': new[m],
+ 'graduated': grads[m],
+ 'retired': ret[m]
+ })
+ csvfile.close()
+
+currentMonth = datetime.now().month
+currentYear = datetime.now().year
+
+js = []
+for year in range(2003,2016):
+ for month in range(1,13):
+ m = "%u-%02u" % (year, month)
+ mjs = {
+ 'month': m,
+ 'new': new[m],
+ 'graduated': grads[m],
+ 'retired': ret[m],
+ 'current': 0
+ }
+ if currentYear > year or (currentYear == year and currentMonth >= month):
+ js.append(mjs)
+
+js.reverse()
+
+for i in js:
+ i['current'] = current
+ current -= i['new']
+ current += i['graduated']
+ current += i['retired']
+
+with open('../../site/json/foundation/evolution.json', 'w') as f:
+ f.write(json.dumps(js))
+ f.close()
+
+with open('../../site/json/foundation/podlings.json', 'w') as f:
+ f.write(json.dumps(cpods))
+ f.close()
+
+print("Done!")
+
\ No newline at end of file
Added: comdev/projects.apache.org/scripts/import/parsecommittees.py
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/scripts/import/parsecommittees.py?rev=1651780&view=auto
==============================================================================
--- comdev/projects.apache.org/scripts/import/parsecommittees.py (added)
+++ comdev/projects.apache.org/scripts/import/parsecommittees.py Wed Jan 14 19:29:36 2015
@@ -0,0 +1,51 @@
+import re, urllib.request
+import csv
+import json
+import os
+
+chairs = json.load(open("../../site/json/foundation/chairs.json"))
+data = None
+committees = {}
+psize = {}
+c = {}
+
+try:
+ with open("committee-info.txt", "rb") as f:
+ data = f.read().decode('utf-8')
+ f.close()
+except:
+ print("You will need to download committee-info.txt and place it in this folder first")
+ os.exit(1)
+
+x = 0
+
+for year in range(1995,2015):
+ for month in range(1,13):
+ committees["%04u-%02u" % (year, month)] = []
+
+for pmc in re.findall(r"\* .+?\s+\(est\. [0-9/]+[^\r\n]+", data, re.UNICODE | re.IGNORECASE):
+
+ #print(pmc)
+ m = re.search(r"\* (.+?)\s+\(est. ([0-9]+)/([0-9]+)", pmc, re.IGNORECASE | re.UNICODE)
+ if m:
+ project = m.group(1)
+ month = m.group(2)
+ year = m.group(3)
+ project = "Apache %s" % project
+ if not re.search(r"[Cc]ommit", pmc, re.IGNORECASE):
+ #print(project)
+ x += 1
+ committees["%s-%s" % (year, month)].append(project)
+ else:
+ print("%s was found but is not an STLP" % project)
+ c[project] = True
+
+with open("../../site/json/foundation/committees.json", "w") as f:
+ f.write(json.dumps(committees))
+ f.close()
+
+for chair in chairs:
+ if not chair in c:
+ print("%s is not in committee-info!" % chair)
+
+print(x)
\ No newline at end of file
Added: comdev/projects.apache.org/scripts/import/parsepmcs.py
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/scripts/import/parsepmcs.py?rev=1651780&view=auto
==============================================================================
--- comdev/projects.apache.org/scripts/import/parsepmcs.py (added)
+++ comdev/projects.apache.org/scripts/import/parsepmcs.py Wed Jan 14 19:29:36 2015
@@ -0,0 +1,63 @@
+from xml.dom import minidom
+import xml.etree.ElementTree as ET
+import re, urllib.request
+import csv
+import json
+import os
+
+data = urllib.request.urlopen("https://svn.apache.org/repos/asf/infrastructure/site-tools/trunk/projects/data_files/").read().decode('utf-8')
+itemlist = re.findall(r">([a-z0-9]+)\.rdf<", data)
+
+projects = {}
+
+def handleChild(el):
+ retval = None
+ hasKids = False
+ for child in list(el):
+ hasKids = True
+ attribs = {}
+ for key in el.attrib:
+ xkey = re.sub(r"\{.+\}", "", key)
+ attribs[xkey] = el.attrib[key]
+ tag = re.sub(r"\{.+\}", "", el.tag)
+ value = attribs['resource'] if 'resource' in attribs else el.text
+ if not hasKids:
+ retval = value
+ else:
+ retval = {}
+ for child in list(el):
+ k, v = handleChild(child)
+ retval[k] = v
+ return tag, retval
+
+for s in itemlist :
+ url = "https://svn.apache.org/repos/asf/infrastructure/site-tools/trunk/projects/data_files/%s.rdf" % s
+ print(url)
+ try:
+ rdf = urllib.request.urlopen(url).read()
+ rdfxml = ET.fromstring(rdf)
+ project = rdfxml[0]
+ pjson = {
+
+ }
+ prname = None
+ for el in project:
+ k, v = handleChild(el)
+ if k in pjson:
+ if type(pjson[k]) is str:
+ pjson[k] = "%s, %s" % (pjson[k], v)
+ else:
+ for xk in v:
+ pjson[k][xk] = v[xk]
+ else:
+ pjson[k] = v
+ projects[s] = pjson
+
+ except Exception as err:
+ print(err)
+
+with open ("../../site/json/foundation/pmcs.json", "w") as f:
+ f.write(json.dumps(projects))
+ f.close()
+print("Done!")
+
\ No newline at end of file
Added: comdev/projects.apache.org/scripts/import/rdfparse.py
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/scripts/import/rdfparse.py?rev=1651780&view=auto
==============================================================================
--- comdev/projects.apache.org/scripts/import/rdfparse.py (added)
+++ comdev/projects.apache.org/scripts/import/rdfparse.py Wed Jan 14 19:29:36 2015
@@ -0,0 +1,91 @@
+from xml.dom import minidom
+import xml.etree.ElementTree as ET
+import re, urllib.request
+import csv
+import json
+import os
+
+data = urllib.request.urlopen("https://svn.apache.org/repos/asf/infrastructure/site-tools/trunk/projects/files.xml").read()
+xmldoc = minidom.parseString(data)
+itemlist = xmldoc.getElementsByTagName('location')
+
+projects = {}
+
+def handleChild(el):
+ retval = None
+ hasKids = False
+ for child in list(el):
+ hasKids = True
+ attribs = {}
+ for key in el.attrib:
+ xkey = re.sub(r"\{.+\}", "", key)
+ attribs[xkey] = el.attrib[key]
+ tag = re.sub(r"\{.+\}", "", el.tag)
+ value = attribs['resource'] if 'resource' in attribs else el.text
+ if not hasKids:
+ retval = value
+ else:
+ retval = {}
+ for child in list(el):
+ k, v = handleChild(child)
+ retval[k] = v
+ if k == "location":
+ retval = v
+ break
+ return tag, retval
+
+for s in itemlist :
+ url = s.childNodes[0].data
+ try:
+ rdf = urllib.request.urlopen(url).read()
+ rdfxml = ET.fromstring(rdf)
+ project = rdfxml[0]
+ pjson = {
+ 'doap': url
+ }
+ prname = None
+ for el in project:
+ k, v = handleChild(el)
+ if k in pjson and not k in ['name','homepage']:
+ if type(pjson[k]) is str:
+ pjson[k] = "%s, %s" % (pjson[k], v)
+ else:
+ for xk in v:
+ pjson[k][xk] = v[xk]
+ else:
+ pjson[k] = v
+
+ if pjson['homepage']:
+ m = re.match(r"https?://([^.]+)\.", pjson['homepage'], re.IGNORECASE)
+ if m:
+ prname = m.group(1)
+ nn = pjson['name'].replace("Apache ", "").replace(" ", "-").lower()
+ m = re.search(r"http://([-a-z0-9]+)\.", pjson['pmc'])
+ if m:
+ pjson['pmc'] = m.group(1)
+ if re.search(r"/[a-z0-9+A-Z]+/?$", pjson['homepage']) or os.path.isfile("../site/json/projects/%s.json" % prname):
+ prname = "%s-%s" % (prname, nn)
+ if prname:
+ add = {}
+ for k in pjson:
+ if type(pjson[k]) is not str:
+ for e in pjson[k]:
+ add[e] = pjson[k][e]
+ pjson[k] = None
+
+ for e in add:
+ pjson[e] = add[e]
+ if not os.path.isfile("../../site/json/projects/%s.json" % prname):
+ projects[prname] = pjson
+ print("Writing %s.json..." % prname)
+ with open ("../site/json/projects/%s.json" % prname, "w") as f:
+ f.write(json.dumps(pjson))
+ f.close()
+ except Exception as err:
+ print("Error: %s" % err)
+
+with open ("../../site/json/foundation/projects.json", "w") as f:
+ f.write(json.dumps(projects))
+ f.close()
+print("Done!")
+
\ No newline at end of file