You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@community.apache.org by hb...@apache.org on 2015/06/28 22:49:20 UTC
svn commit: r1688067 - in /comdev/projects.apache.org: scripts/README.txt
scripts/import/parsecommittees.py scripts/import/parsepmcs.py
site/json/foundation/HEADER.html
Author: hboutemy
Date: Sun Jun 28 20:49:19 2015
New Revision: 1688067
URL: http://svn.apache.org/r1688067
Log:
merged parsepmcs.py into parsecommittees.py
Removed:
comdev/projects.apache.org/scripts/import/parsepmcs.py
Modified:
comdev/projects.apache.org/scripts/README.txt
comdev/projects.apache.org/scripts/import/parsecommittees.py
comdev/projects.apache.org/site/json/foundation/HEADER.html
Modified: comdev/projects.apache.org/scripts/README.txt
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/scripts/README.txt?rev=1688067&r1=1688066&r2=1688067&view=diff
==============================================================================
--- comdev/projects.apache.org/scripts/README.txt (original)
+++ comdev/projects.apache.org/scripts/README.txt Sun Jun 28 20:49:19 2015
@@ -30,16 +30,13 @@ various sources:
2. importing data (import)
-- parsecommittees.py: Parses committee-info.txt to detect new and retired committees
+- parsecommittees.py: Parses committee-info.txt to detect new and retired committees and imports PMC data (RDF) from
+ PMC data files
in: site/json/foundation/committees.json + site/json/foundation/committees-retired.json
+ data/board/committee-info.txt (https://svn.apache.org/repos/private/committers/board/committee-info.txt)
+ data/committees.xml + PMC data data/committees/*.rdf
out: site/json/foundation/committees.json + site/json/foundation/committees-retired.json + site/doap/{committeeId}/pmc.rdf
-
-- parsepmcs.py: imports PMC data (RDF) from PMC data files. No need
- to run that more than once?
- in: data/committees.xml + PMC data data/committees/*.rdf
- out: site/json/foundation/pmcs.json + site/doap/{committeeId}/pmc-doap.rdf
+ + site/json/foundation/pmcs.json + site/doap/{committeeId}/pmc-doap.rdf
- parseprojects.py: Parses existing projects RDF(DOAP) files and turns them into JSON objects.
in: data/projects.xml + projects' DOAP files
Modified: comdev/projects.apache.org/scripts/import/parsecommittees.py
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/scripts/import/parsecommittees.py?rev=1688067&r1=1688066&r2=1688067&view=diff
==============================================================================
--- comdev/projects.apache.org/scripts/import/parsecommittees.py (original)
+++ comdev/projects.apache.org/scripts/import/parsecommittees.py Sun Jun 28 20:49:19 2015
@@ -46,9 +46,30 @@ with open("../../site/json/foundation/pe
people = json.loads(f.read())
f.close()
+def handleChild(el):
+ retval = None
+ hasKids = False
+ for child in list(el):
+ hasKids = True
+ attribs = {}
+ for key in el.attrib:
+ xkey = re.sub(r"\{.+\}", "", key)
+ attribs[xkey] = el.attrib[key]
+ tag = re.sub(r"\{.+\}", "", el.tag)
+ value = attribs['resource'] if 'resource' in attribs else el.text
+ if not hasKids:
+ retval = value
+ else:
+ retval = {}
+ for child in list(el):
+ k, v = handleChild(child)
+ retval[k] = v
+ return tag, retval
+
# get PMC Data from /data/committees.xml
print("reading PMC Data (/data/committees.xml)")
-pmcData = {} # id -> url
+pmcs = {}
+pmcDataUrls = {} # id -> url
with open("../../data/committees.xml", "r") as f:
xmldoc = minidom.parseString(f.read())
f.close()
@@ -57,17 +78,44 @@ for s in itemlist :
url = s.childNodes[0].data
try:
if url.startswith('http'):
- data = urllib.request.urlopen(url).read()
+ print(url)
+ rdf = urllib.request.urlopen(url).read()
else:
- data = open("../../data/%s" % url, 'r').read()
+ rdf = open("../../data/%s" % url, 'r').read()
url = "https://svn.apache.org/repos/asf/comdev/projects.apache.org/data/%s" % url
- rdfxml = ET.fromstring(data)
+ rdfxml = ET.fromstring(rdf)
data = rdfxml[0]
- if data.tag == '{http://projects.apache.org/ns/asfext#}pmc':
- id = data.attrib['{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about']
- pmcData[id] = url
+ committeeId = data.attrib['{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about']
+ pmcDataUrls[committeeId] = url
+
+ # transform PMC data RDF to json
+ pmcjson = {
+ 'rdf': url
+ }
+ pmcname = None
+ for el in data:
+ k, v = handleChild(el)
+ if k in pmcjson:
+ # merge multiple values
+ if type(pmcjson[k]) is str:
+ pmcjson[k] = "%s, %s" % (pmcjson[k], v)
+ else:
+ for xk in v:
+ pmcjson[k][xk] = v[xk]
+ else:
+ pmcjson[k] = v
+
+ pmcs[committeeId] = pmcjson
+
+ # copy PMC RDF data to /doap/{committeeId}/pmc-doap.rdf
+ if type(rdf) is str:
+ mode = "w"
else:
- print("WARN: unexpected content in " % url)
+ mode = "wb"
+ with open("../../site/doap/%s/pmc-doap.rdf" % committeeId, mode) as f:
+ f.write(rdf)
+ f.close()
+
except Exception as err:
print(err)
@@ -200,10 +248,10 @@ for pmc in re.findall(r"\* .+?\s+\(est\.
print("WARN: %s (%s) missing from http://www.apache.org/#projects-list" % (committeeShortName, homepage))
# TODO committee['description'] (or charter) not in committee-info.txt
# TODO committee['retired'] not in committee-info.txt
- if committeeId in pmcData:
- committee['rdf'] = pmcData[committeeId]
+ if committeeId in pmcDataUrls:
+ committee['rdf'] = pmcDataUrls[committeeId]
else:
- print("WARN: %s (%s) missing from pmc_list.xml" % (committeeShortName, committeeId))
+ print("WARN: %s (%s) missing from committees.xml" % (committeeShortName, committeeId))
committeesList.append(committee)
committeesMap[committeeId] = committee;
@@ -264,6 +312,10 @@ with open("../../site/json/foundation/co
f.write(json.dumps(committeesRetired, sort_keys=True, indent=0))
f.close()
+with open ("../../site/json/foundation/pmcs.json", "w") as f:
+ f.write(json.dumps(pmcs, sort_keys=True, indent=0))
+ f.close()
+
# compare with chairs, for consistency checking
chairs = json.load(open("../../site/json/foundation/chairs.json"))
for chair in chairs:
Modified: comdev/projects.apache.org/site/json/foundation/HEADER.html
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/site/json/foundation/HEADER.html?rev=1688067&r1=1688066&r2=1688067&view=diff
==============================================================================
--- comdev/projects.apache.org/site/json/foundation/HEADER.html (original)
+++ comdev/projects.apache.org/site/json/foundation/HEADER.html Sun Jun 28 20:49:19 2015
@@ -6,10 +6,10 @@ for more information.
<li><a href="accounts-evolution.json"><code>accounts-evolution.json</code></a> generated by <code>cronjobs/countaccounts.py</code> from LDAP</li>
<li><i><a href="chairs.json"><code>chairs.json</code></a> generated by <code>cronjobs/parsechairs.py</code> from <a href="http://www.apache.org/foundation/">http://www.apache.org/foundation/</a> (not used any more, replaced by committees.json)</i></li>
<li><a href="groups.json"><code>groups.json</code></a>, <a href="people.json"><code>people.json</code></a> and <a href="people_name.json"><code>people_name.json</code></a> generated by <code>cronjobs/parsecommitters.py</code> from <a href="http://people.apache.org/committer-index.html">http://people.apache.org/committer-index.html</a></li>
-<li><i><a href="pmcs.json"><code>pmcs.json</code></a> imported with <code>import/parsepmcs.py</code> from <a href='https://svn.apache.org/repos/asf/infrastructure/site-tools/trunk/projects/pmc_list.xml'>PMC DOAP files</a> (not used any more, replaced by committees.json)</i></li>
<li><a href="podlings.json"><code>podlings.json</code></a> and <a href="podlings-evolution.json"><code>podlings-evolution.json</code></a> generated by <code>cronjobs/podlings.py</code> from <a href="http://incubator.apache.org/podlings.xml">http://incubator.apache.org/podlings.xml</a></li>
<li><a href="projects.json"><code>projects.json</code></a> imported with <code>import/rdfparse.py</code> from <a href='https://svn.apache.org/repos/asf/infrastructure/site-tools/trunk/projects/files.xml'>projects' DOAP files</a></li>
<li><a href="releases.json"><code>releases.json</code></a> generated by <code>cronjobs/parsereleases.py</code> from <a href="http://www.apache.org/dist/">http://www.apache.org/dist/</a></li>
<li><a href="committees.json"><code>committees.json</code></a> and <a href="committees-retired.json"><code>committees-retired.json</code></a> imported with <code>import/parsecommittees.py</code> from <code>https://svn.apache.org/repos/private/committers/board/committee-info.txt</code></li>
+<li><i><a href="pmcs.json"><code>pmcs.json</code></a> imported with <code>import/parsecommittees.py</code> from <a href='https://svn.apache.org/repos/asf/comdev/projects.apache.org/data/committees.xml'>PMC DOAP files</a> (not used any more, replaced by committees.json)</i></li>
</ul>
<code><a href="http://svn.apache.org/viewvc/comdev/projects.apache.org/site/json/foundation/">site/json/foundation</a></code>
\ No newline at end of file