You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@community.apache.org by se...@apache.org on 2015/09/15 23:44:36 UTC
svn commit: r1703296 -
/comdev/projects.apache.org/scripts/import/parseprojects.py
Author: sebb
Date: Tue Sep 15 21:44:36 2015
New Revision: 1703296
URL: http://svn.apache.org/r1703296
Log:
COMDEV-156 parseprojects.py: Calculation of projectJsonFilename is flawed
Modified:
comdev/projects.apache.org/scripts/import/parseprojects.py
Modified: comdev/projects.apache.org/scripts/import/parseprojects.py
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/scripts/import/parseprojects.py?rev=1703296&r1=1703295&r2=1703296&view=diff
==============================================================================
--- comdev/projects.apache.org/scripts/import/parseprojects.py (original)
+++ comdev/projects.apache.org/scripts/import/parseprojects.py Tue Sep 15 21:44:36 2015
@@ -54,6 +54,37 @@ for r in committeesRetired:
projects = {}
failures = []
+# Convert project name to unique file name
+def name2fileName(s, pmc):
+ retval = None
+ fn = s.strip().lower()
+ fn = fn.replace(" %s " % pmc," ") # drop PMC name
+ fn = fn.replace(' (incubating)','') # will be under the incubator PMC anyway
+ fn = re.sub('^apache ', '', fn) # Drop leading Apache
+ fn = re.sub(' library$', '', fn) # Drop trailing Library
+ fn = fn.replace('.net','dotnet')
+ fn = re.sub("[^a-z0-9+-]", "_", fn) # sanitise the name
+ if fn == pmc:
+ retval = pmc
+ else:
+ retval = "%s-%s" % (pmc, fn)
+ #print("=========== %s, %s => %s " % (s,pmc,retval))
+ return retval
+
+# Process external PMC descriptor file to extract the PMC name
+def getPMC(url):
+ print("Parsing PMC descriptor file %s" % url)
+ rdf = urllib.request.urlopen(url).read()
+ md = minidom.parseString(rdf)
+ pmc = (md.getElementsByTagName('asfext:pmc') or md.getElementsByTagName('asfext:PMC'))[0]
+ t = pmc.tagName.lower()
+ a = pmc.getAttribute('rdf:about')
+ md.unlink()
+ if t == 'asfext:pmc':
+ return a
+ print("WARN: could not find asfext:pmc in %s " % url)
+ return 'Unknown'
+
def handleChild(el):
retval = None
hasKids = False
@@ -113,20 +144,28 @@ for s in itemlist :
m = re.match(r"https?://([^.]+)\.", homepage, re.IGNORECASE)
if m:
siteId = site2committee(m.group(1))
- nn = re.sub("http.+\.apache\.org/?", "", homepage)
- if (nn == ""):
- projectJsonFilename = siteId
- else:
- nn = nn.replace('/', ' ').strip().split().pop().replace('-project', '')
- if nn.startswith("%s-" % siteId):
- projectJsonFilename = nn
- else:
- projectJsonFilename = "%s-%s" % (siteId, nn)
else:
print("WARN: no homepage defined in %s, pmc = %s" % (url, pjson['pmc']))
+ pmc = 'Unknown'
if not 'pmc' in pjson:
print("WARN: no asfext:pmc in %s" % url)
+ else:
+ pmcrdf = pjson['pmc']
+ pmcrdf = pmcrdf.replace('/anakia', '').replace('/texen', '') # temporary hack
+ # Extract the pmc name if it is a shortcut
+ m = re.match(r"https?://([^.]+)\.apache\.org/?$", pmcrdf, re.IGNORECASE)
+ if m:
+ pmc = m.group(1)
+ else:
+ # Not a shortcut, so read the descriptor file
+ pmc = getPMC(pmcrdf)
+
+ if pjson['name']:
+ projectJsonFilename = name2fileName(pjson['name'], pmc)
+ else:
+ print("WARN: no name defined in %s, pmc = %s" % (url, pjson['pmc']))
+
if 'pmc' in pjson and pjson['pmc'].startswith('http://attic.apache.org'):
committeeId = 'attic'
elif '.incubator.' in homepage:
@@ -158,7 +197,7 @@ for s in itemlist :
projects[projectJsonFilename] = pjson
#for e in add:
# pjson[e] = add[e]
- print("Writing projects/%s.json \t+ copying source DOAP to doap/%s/%s.rdf..." % (projectJsonFilename, committeeId, projectJsonFilename))
+ print("Writing projects/%-40s + copying source DOAP to doap/%s/%s.rdf..." % (projectJsonFilename+".json", committeeId, projectJsonFilename))
with open ("../../site/json/projects/%s.json" % projectJsonFilename, "w") as f:
f.write(json.dumps(pjson, sort_keys=True, indent=0))
f.close()