You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@community.apache.org by hb...@apache.org on 2015/06/28 22:49:20 UTC
svn commit: r1688067 - in /comdev/projects.apache.org: scripts/README.txt scripts/import/parsecommittees.py scripts/import/parsepmcs.py site/json/foundation/HEADER.html

Author: hboutemy
Date: Sun Jun 28 20:49:19 2015
New Revision: 1688067

URL: http://svn.apache.org/r1688067
Log:
merged parsepmcs.py into parsecommittees.py

Removed:
    comdev/projects.apache.org/scripts/import/parsepmcs.py
Modified:
    comdev/projects.apache.org/scripts/README.txt
    comdev/projects.apache.org/scripts/import/parsecommittees.py
    comdev/projects.apache.org/site/json/foundation/HEADER.html

Modified: comdev/projects.apache.org/scripts/README.txt
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/scripts/README.txt?rev=1688067&r1=1688066&r2=1688067&view=diff
==============================================================================
--- comdev/projects.apache.org/scripts/README.txt (original)
+++ comdev/projects.apache.org/scripts/README.txt Sun Jun 28 20:49:19 2015
@@ -30,16 +30,13 @@ various sources:
 
 2. importing data (import)
 
-- parsecommittees.py: Parses committee-info.txt to detect new and retired committees
+- parsecommittees.py: Parses committee-info.txt to detect new and retired committees and imports PMC data (RDF) from
+  PMC data files
   in: site/json/foundation/committees.json + site/json/foundation/committees-retired.json
       + data/board/committee-info.txt (https://svn.apache.org/repos/private/committers/board/committee-info.txt)
       + data/committees.xml + PMC data data/committees/*.rdf
   out: site/json/foundation/committees.json + site/json/foundation/committees-retired.json + site/doap/{committeeId}/pmc.rdf
-
-- parsepmcs.py: imports PMC data (RDF) from PMC data files. No need
-  to run that more than once?
-  in: data/committees.xml + PMC data data/committees/*.rdf
-  out: site/json/foundation/pmcs.json + site/doap/{committeeId}/pmc-doap.rdf
+      + site/json/foundation/pmcs.json + site/doap/{committeeId}/pmc-doap.rdf
 
 - parseprojects.py: Parses existing projects RDF(DOAP) files and turns them into JSON objects.
   in: data/projects.xml + projects' DOAP files

Modified: comdev/projects.apache.org/scripts/import/parsecommittees.py
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/scripts/import/parsecommittees.py?rev=1688067&r1=1688066&r2=1688067&view=diff
==============================================================================
--- comdev/projects.apache.org/scripts/import/parsecommittees.py (original)
+++ comdev/projects.apache.org/scripts/import/parsecommittees.py Sun Jun 28 20:49:19 2015
@@ -46,9 +46,30 @@ with open("../../site/json/foundation/pe
     people = json.loads(f.read())
     f.close()
 
+def handleChild(el):
+    retval = None
+    hasKids = False
+    for child in list(el):
+        hasKids = True
+    attribs = {}
+    for key in el.attrib:
+        xkey = re.sub(r"\{.+\}", "", key)
+        attribs[xkey] = el.attrib[key]
+    tag = re.sub(r"\{.+\}", "", el.tag)
+    value = attribs['resource'] if 'resource' in attribs else el.text
+    if not hasKids:
+        retval = value
+    else:
+        retval = {}
+        for child in list(el):
+            k, v = handleChild(child)
+            retval[k] = v
+    return tag, retval
+
 # get PMC Data from /data/committees.xml
 print("reading PMC Data (/data/committees.xml)")
-pmcData = {} # id -> url
+pmcs = {}
+pmcDataUrls = {} # id -> url
 with open("../../data/committees.xml", "r") as f:
     xmldoc = minidom.parseString(f.read())
     f.close()
@@ -57,17 +78,44 @@ for s in itemlist :
     url = s.childNodes[0].data
     try:
         if url.startswith('http'):
-            data = urllib.request.urlopen(url).read()
+            print(url)
+            rdf = urllib.request.urlopen(url).read()
         else:
-            data = open("../../data/%s" % url, 'r').read()
+            rdf = open("../../data/%s" % url, 'r').read()
             url = "https://svn.apache.org/repos/asf/comdev/projects.apache.org/data/%s" % url
-        rdfxml = ET.fromstring(data)
+        rdfxml = ET.fromstring(rdf)
         data = rdfxml[0]
-        if data.tag == '{http://projects.apache.org/ns/asfext#}pmc':
-            id = data.attrib['{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about']
-            pmcData[id] = url
+        committeeId = data.attrib['{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about']
+        pmcDataUrls[committeeId] = url
+
+        # transform PMC data RDF to json
+        pmcjson = {
+            'rdf': url
+        }
+        pmcname = None
+        for el in data:
+            k, v = handleChild(el)
+            if k in pmcjson:
+                # merge multiple values
+                if type(pmcjson[k]) is str:
+                    pmcjson[k] = "%s, %s" % (pmcjson[k], v)
+                else:
+                    for xk in v:
+                        pmcjson[k][xk] = v[xk]
+            else:
+                pmcjson[k] = v
+
+        pmcs[committeeId] = pmcjson
+
+        # copy PMC RDF data to /doap/{committeeId}/pmc-doap.rdf
+        if type(rdf) is str:
+            mode = "w"
         else:
-            print("WARN: unexpected content in " % url)
+            mode = "wb"
+        with open("../../site/doap/%s/pmc-doap.rdf" % committeeId, mode) as f:
+            f.write(rdf)
+            f.close()
+
     except Exception as err:
         print(err)
 
@@ -200,10 +248,10 @@ for pmc in re.findall(r"\* .+?\s+\(est\.
                 print("WARN: %s (%s) missing from http://www.apache.org/#projects-list" % (committeeShortName, homepage))
             # TODO committee['description'] (or charter) not in committee-info.txt
             # TODO committee['retired'] not in committee-info.txt
-            if committeeId in pmcData:
-                committee['rdf'] = pmcData[committeeId]
+            if committeeId in pmcDataUrls:
+                committee['rdf'] = pmcDataUrls[committeeId]
             else:
-                print("WARN: %s (%s) missing from pmc_list.xml" % (committeeShortName, committeeId))
+                print("WARN: %s (%s) missing from committees.xml" % (committeeShortName, committeeId))
             committeesList.append(committee)
             committeesMap[committeeId] = committee;
 
@@ -264,6 +312,10 @@ with open("../../site/json/foundation/co
     f.write(json.dumps(committeesRetired, sort_keys=True, indent=0))
     f.close()
 
+with open ("../../site/json/foundation/pmcs.json", "w") as f:
+    f.write(json.dumps(pmcs, sort_keys=True, indent=0))
+    f.close()
+
 # compare with chairs, for consistency checking
 chairs = json.load(open("../../site/json/foundation/chairs.json"))
 for chair in chairs:

Modified: comdev/projects.apache.org/site/json/foundation/HEADER.html
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/site/json/foundation/HEADER.html?rev=1688067&r1=1688066&r2=1688067&view=diff
==============================================================================
--- comdev/projects.apache.org/site/json/foundation/HEADER.html (original)
+++ comdev/projects.apache.org/site/json/foundation/HEADER.html Sun Jun 28 20:49:19 2015
@@ -6,10 +6,10 @@ for more information.
 <li><a href="accounts-evolution.json"><code>accounts-evolution.json</code></a> generated by <code>cronjobs/countaccounts.py</code> from LDAP</li>
 <li><i><a href="chairs.json"><code>chairs.json</code></a> generated by <code>cronjobs/parsechairs.py</code> from <a href="http://www.apache.org/foundation/">http://www.apache.org/foundation/</a> (not used any more, replaced by committees.json)</i></li>
 <li><a href="groups.json"><code>groups.json</code></a>, <a href="people.json"><code>people.json</code></a> and <a href="people_name.json"><code>people_name.json</code></a> generated by <code>cronjobs/parsecommitters.py</code> from <a href="http://people.apache.org/committer-index.html">http://people.apache.org/committer-index.html</a></li>
-<li><i><a href="pmcs.json"><code>pmcs.json</code></a> imported with <code>import/parsepmcs.py</code> from <a href='https://svn.apache.org/repos/asf/infrastructure/site-tools/trunk/projects/pmc_list.xml'>PMC DOAP files</a> (not used any more, replaced by committees.json)</i></li>
 <li><a href="podlings.json"><code>podlings.json</code></a> and <a href="podlings-evolution.json"><code>podlings-evolution.json</code></a> generated by <code>cronjobs/podlings.py</code> from <a href="http://incubator.apache.org/podlings.xml">http://incubator.apache.org/podlings.xml</a></li>
 <li><a href="projects.json"><code>projects.json</code></a> imported with <code>import/rdfparse.py</code> from <a href='https://svn.apache.org/repos/asf/infrastructure/site-tools/trunk/projects/files.xml'>projects' DOAP files</a></li>
 <li><a href="releases.json"><code>releases.json</code></a> generated by <code>cronjobs/parsereleases.py</code> from <a href="http://www.apache.org/dist/">http://www.apache.org/dist/</a></li>
 <li><a href="committees.json"><code>committees.json</code></a> and <a href="committees-retired.json"><code>committees-retired.json</code></a> imported with <code>import/parsecommittees.py</code> from <code>https://svn.apache.org/repos/private/committers/board/committee-info.txt</code></li>
+<li><i><a href="pmcs.json"><code>pmcs.json</code></a> imported with <code>import/parsecommittees.py</code> from <a href='https://svn.apache.org/repos/asf/comdev/projects.apache.org/data/committees.xml'>PMC DOAP files</a> (not used any more, replaced by committees.json)</i></li>
 </ul>
 <code><a href="http://svn.apache.org/viewvc/comdev/projects.apache.org/site/json/foundation/">site/json/foundation</a></code>
\ No newline at end of file