You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@community.apache.org by se...@apache.org on 2015/09/15 23:44:36 UTC
svn commit: r1703296 - /comdev/projects.apache.org/scripts/import/parseprojects.py

Author: sebb
Date: Tue Sep 15 21:44:36 2015
New Revision: 1703296

URL: http://svn.apache.org/r1703296
Log:
COMDEV-156 parseprojects.py: Calculation of projectJsonFilename is flawed

Modified:
    comdev/projects.apache.org/scripts/import/parseprojects.py

Modified: comdev/projects.apache.org/scripts/import/parseprojects.py
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/scripts/import/parseprojects.py?rev=1703296&r1=1703295&r2=1703296&view=diff
==============================================================================
--- comdev/projects.apache.org/scripts/import/parseprojects.py (original)
+++ comdev/projects.apache.org/scripts/import/parseprojects.py Tue Sep 15 21:44:36 2015
@@ -54,6 +54,37 @@ for r in committeesRetired:
 projects = {}
 failures = []
 
+# Convert project name to unique file name
+def name2fileName(s, pmc):
+    retval = None
+    fn = s.strip().lower()
+    fn = fn.replace(" %s " % pmc," ") # drop PMC name
+    fn = fn.replace(' (incubating)','') # will be under the incubator PMC anyway
+    fn = re.sub('^apache ', '', fn) # Drop leading Apache
+    fn = re.sub(' library$', '', fn) # Drop trailing Library
+    fn = fn.replace('.net','dotnet')
+    fn = re.sub("[^a-z0-9+-]", "_", fn) # sanitise the name
+    if fn == pmc:
+        retval = pmc
+    else:
+        retval = "%s-%s" % (pmc, fn)
+    #print("=========== %s, %s => %s " % (s,pmc,retval))
+    return retval
+
+# Process external PMC descriptor file to extract the PMC name
+def getPMC(url):
+    print("Parsing PMC descriptor file %s" % url)
+    rdf = urllib.request.urlopen(url).read()
+    md = minidom.parseString(rdf)
+    pmc = (md.getElementsByTagName('asfext:pmc') or md.getElementsByTagName('asfext:PMC'))[0]
+    t = pmc.tagName.lower()
+    a = pmc.getAttribute('rdf:about')
+    md.unlink()
+    if t == 'asfext:pmc':
+        return a
+    print("WARN: could not find asfext:pmc in %s " % url)
+    return 'Unknown'
+
 def handleChild(el):
     retval = None
     hasKids = False
@@ -113,20 +144,28 @@ for s in itemlist :
             m = re.match(r"https?://([^.]+)\.", homepage, re.IGNORECASE)
             if m:
                 siteId = site2committee(m.group(1))
-            nn = re.sub("http.+\.apache\.org/?", "", homepage)
-            if (nn == ""):
-                projectJsonFilename = siteId
-            else:
-                nn = nn.replace('/', ' ').strip().split().pop().replace('-project', '')
-                if nn.startswith("%s-" % siteId):
-                    projectJsonFilename = nn
-                else:
-                    projectJsonFilename = "%s-%s" % (siteId, nn)
         else:
             print("WARN: no homepage defined in %s, pmc = %s" % (url, pjson['pmc']))
 
+        pmc = 'Unknown'
         if not 'pmc' in pjson:
             print("WARN: no asfext:pmc in %s" % url)
+        else:
+            pmcrdf = pjson['pmc']
+            pmcrdf = pmcrdf.replace('/anakia', '').replace('/texen', '') # temporary hack
+            # Extract the pmc name if it is a shortcut
+            m = re.match(r"https?://([^.]+)\.apache\.org/?$", pmcrdf, re.IGNORECASE)
+            if m:
+                pmc = m.group(1)
+            else:
+                # Not a shortcut, so read the descriptor file
+                pmc = getPMC(pmcrdf)
+            
+        if pjson['name']:
+            projectJsonFilename = name2fileName(pjson['name'], pmc)
+        else:
+            print("WARN: no name defined in %s, pmc = %s" % (url, pjson['pmc']))
+
         if 'pmc' in pjson and pjson['pmc'].startswith('http://attic.apache.org'):
             committeeId = 'attic'
         elif '.incubator.' in homepage:
@@ -158,7 +197,7 @@ for s in itemlist :
             projects[projectJsonFilename] = pjson
             #for e in add:
             #    pjson[e] = add[e]
-            print("Writing projects/%s.json \t+ copying source DOAP to doap/%s/%s.rdf..." % (projectJsonFilename, committeeId, projectJsonFilename))
+            print("Writing projects/%-40s + copying source DOAP to doap/%s/%s.rdf..." % (projectJsonFilename+".json", committeeId, projectJsonFilename))
             with open ("../../site/json/projects/%s.json" % projectJsonFilename, "w") as f:
                 f.write(json.dumps(pjson, sort_keys=True, indent=0))
                 f.close()