You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@community.apache.org by hb...@apache.org on 2015/05/15 00:22:39 UTC

svn commit: r1679469 [1/4] - in /comdev/projects.apache.org: scripts/import/ site/json/foundation/ site/json/projects/

Author: hboutemy
Date: Thu May 14 22:22:37 2015
New Revision: 1679469

URL: http://svn.apache.org/r1679469
Log:
new algorithm to guess project id

Added:
    comdev/projects.apache.org/site/json/projects/ant-compress.json
    comdev/projects.apache.org/site/json/projects/ant-dotnet.json
    comdev/projects.apache.org/site/json/projects/ant-proper.html.json
    comdev/projects.apache.org/site/json/projects/ant-props.json
    comdev/projects.apache.org/site/json/projects/axis-core.json
    comdev/projects.apache.org/site/json/projects/axis-sandesha.json
    comdev/projects.apache.org/site/json/projects/commons-bcel.json
    comdev/projects.apache.org/site/json/projects/commons-bsf.json
    comdev/projects.apache.org/site/json/projects/commons-chain.json
    comdev/projects.apache.org/site/json/projects/commons-cli.json
    comdev/projects.apache.org/site/json/projects/commons-codec.json
    comdev/projects.apache.org/site/json/projects/commons-collections.json
    comdev/projects.apache.org/site/json/projects/commons-compress.json
    comdev/projects.apache.org/site/json/projects/commons-configuration.json
    comdev/projects.apache.org/site/json/projects/commons-daemon.json
    comdev/projects.apache.org/site/json/projects/commons-dbcp.json
    comdev/projects.apache.org/site/json/projects/commons-dbutils.json
    comdev/projects.apache.org/site/json/projects/commons-digester.json
    comdev/projects.apache.org/site/json/projects/commons-discovery.json
    comdev/projects.apache.org/site/json/projects/commons-el.json
    comdev/projects.apache.org/site/json/projects/commons-email.json
    comdev/projects.apache.org/site/json/projects/commons-exec.json
    comdev/projects.apache.org/site/json/projects/commons-fileupload.json
    comdev/projects.apache.org/site/json/projects/commons-functor.json
    comdev/projects.apache.org/site/json/projects/commons-index.html.json
    comdev/projects.apache.org/site/json/projects/commons-io.json
    comdev/projects.apache.org/site/json/projects/commons-jci.json
    comdev/projects.apache.org/site/json/projects/commons-jcs.json
    comdev/projects.apache.org/site/json/projects/commons-jelly.json
    comdev/projects.apache.org/site/json/projects/commons-jexl.json
    comdev/projects.apache.org/site/json/projects/commons-jxpath.json
    comdev/projects.apache.org/site/json/projects/commons-lang.json
    comdev/projects.apache.org/site/json/projects/commons-launcher.json
    comdev/projects.apache.org/site/json/projects/commons-logging.json
    comdev/projects.apache.org/site/json/projects/commons-math.json
    comdev/projects.apache.org/site/json/projects/commons-modeler.json
    comdev/projects.apache.org/site/json/projects/commons-ognl.json
    comdev/projects.apache.org/site/json/projects/commons-pool.json
    comdev/projects.apache.org/site/json/projects/commons-primitives.json
    comdev/projects.apache.org/site/json/projects/commons-proxy.json
    comdev/projects.apache.org/site/json/projects/commons-scxml.json
    comdev/projects.apache.org/site/json/projects/commons-validator.json
    comdev/projects.apache.org/site/json/projects/commons-vfs.json
    comdev/projects.apache.org/site/json/projects/commons-weaver.json
    comdev/projects.apache.org/site/json/projects/directory-1.5.json
    comdev/projects.apache.org/site/json/projects/directory-studio.json
    comdev/projects.apache.org/site/json/projects/hc-httpclient-3.x.json
    comdev/projects.apache.org/site/json/projects/hc-httpcomponents-client.json
    comdev/projects.apache.org/site/json/projects/jakarta-cactus.json
    comdev/projects.apache.org/site/json/projects/jmeter-index.html.json
    comdev/projects.apache.org/site/json/projects/logging-chainsaw.html.json
    comdev/projects.apache.org/site/json/projects/lucene-core.json
    comdev/projects.apache.org/site/json/projects/orc.json
    comdev/projects.apache.org/site/json/projects/parquet.json
    comdev/projects.apache.org/site/json/projects/tomcat-rdc.json
    comdev/projects.apache.org/site/json/projects/velocity-devel.json
    comdev/projects.apache.org/site/json/projects/velocity-tools.json
    comdev/projects.apache.org/site/json/projects/xalan-xalan-c.json
    comdev/projects.apache.org/site/json/projects/xalan-xalan-j.json
    comdev/projects.apache.org/site/json/projects/xerces-external.json
    comdev/projects.apache.org/site/json/projects/xerces-resolver.json
    comdev/projects.apache.org/site/json/projects/xerces-xerces-c.json
    comdev/projects.apache.org/site/json/projects/xerces-xerces-p.json
    comdev/projects.apache.org/site/json/projects/xerces-xerces2-j.json
    comdev/projects.apache.org/site/json/projects/xmlgraphics-commons.json
    comdev/projects.apache.org/site/json/projects/zest.json
Removed:
    comdev/projects.apache.org/site/json/projects/ant-.net-ant-library.json
    comdev/projects.apache.org/site/json/projects/ant-apacheeasyant.json
    comdev/projects.apache.org/site/json/projects/ant-compress-ant-library.json
    comdev/projects.apache.org/site/json/projects/ant-props-ant-library.json
    comdev/projects.apache.org/site/json/projects/ant-vss-ant-library.json
    comdev/projects.apache.org/site/json/projects/attic-click.json
    comdev/projects.apache.org/site/json/projects/axis-axis2.json
    comdev/projects.apache.org/site/json/projects/axis-sandesha2.json
    comdev/projects.apache.org/site/json/projects/commons-commons-bcel.json
    comdev/projects.apache.org/site/json/projects/commons-commons-bsf.json
    comdev/projects.apache.org/site/json/projects/commons-commons-chain.json
    comdev/projects.apache.org/site/json/projects/commons-commons-cli.json
    comdev/projects.apache.org/site/json/projects/commons-commons-codec.json
    comdev/projects.apache.org/site/json/projects/commons-commons-collections.json
    comdev/projects.apache.org/site/json/projects/commons-commons-compress.json
    comdev/projects.apache.org/site/json/projects/commons-commons-configuration.json
    comdev/projects.apache.org/site/json/projects/commons-commons-daemon.json
    comdev/projects.apache.org/site/json/projects/commons-commons-dbcp.json
    comdev/projects.apache.org/site/json/projects/commons-commons-dbutils.json
    comdev/projects.apache.org/site/json/projects/commons-commons-digester.json
    comdev/projects.apache.org/site/json/projects/commons-commons-discovery.json
    comdev/projects.apache.org/site/json/projects/commons-commons-el.json
    comdev/projects.apache.org/site/json/projects/commons-commons-email.json
    comdev/projects.apache.org/site/json/projects/commons-commons-exec.json
    comdev/projects.apache.org/site/json/projects/commons-commons-fileupload.json
    comdev/projects.apache.org/site/json/projects/commons-commons-functor.json
    comdev/projects.apache.org/site/json/projects/commons-commons-io.json
    comdev/projects.apache.org/site/json/projects/commons-commons-jci.json
    comdev/projects.apache.org/site/json/projects/commons-commons-jcs.json
    comdev/projects.apache.org/site/json/projects/commons-commons-jelly.json
    comdev/projects.apache.org/site/json/projects/commons-commons-jexl.json
    comdev/projects.apache.org/site/json/projects/commons-commons-jxpath.json
    comdev/projects.apache.org/site/json/projects/commons-commons-lang.json
    comdev/projects.apache.org/site/json/projects/commons-commons-launcher.json
    comdev/projects.apache.org/site/json/projects/commons-commons-logging.json
    comdev/projects.apache.org/site/json/projects/commons-commons-math.json
    comdev/projects.apache.org/site/json/projects/commons-commons-modeler.json
    comdev/projects.apache.org/site/json/projects/commons-commons-ognl.json
    comdev/projects.apache.org/site/json/projects/commons-commons-pool.json
    comdev/projects.apache.org/site/json/projects/commons-commons-primitives.json
    comdev/projects.apache.org/site/json/projects/commons-commons-proxy.json
    comdev/projects.apache.org/site/json/projects/commons-commons-scxml.json
    comdev/projects.apache.org/site/json/projects/commons-commons-validator.json
    comdev/projects.apache.org/site/json/projects/commons-commons-vfs.json
    comdev/projects.apache.org/site/json/projects/commons-commons-weaver.json
    comdev/projects.apache.org/site/json/projects/commons.json
    comdev/projects.apache.org/site/json/projects/corinthia.json
    comdev/projects.apache.org/site/json/projects/directory-directory-studio.json
    comdev/projects.apache.org/site/json/projects/directory-directory.json
    comdev/projects.apache.org/site/json/projects/hc-commons-httpclient.json
    comdev/projects.apache.org/site/json/projects/hc.json
    comdev/projects.apache.org/site/json/projects/hivemind.json
    comdev/projects.apache.org/site/json/projects/incubator-droids-(incubating).json
    comdev/projects.apache.org/site/json/projects/incubator.json
    comdev/projects.apache.org/site/json/projects/jakarta-jakarta-cactus.json
    comdev/projects.apache.org/site/json/projects/jmeter.json
    comdev/projects.apache.org/site/json/projects/logging.json
    comdev/projects.apache.org/site/json/projects/lucene-lucene-core.json
    comdev/projects.apache.org/site/json/projects/marmotta.json
    comdev/projects.apache.org/site/json/projects/sling.json
    comdev/projects.apache.org/site/json/projects/tez.json
    comdev/projects.apache.org/site/json/projects/tinkerpop.json
    comdev/projects.apache.org/site/json/projects/tomcat-reusable-dialog-components-(rdc)-taglib.json
    comdev/projects.apache.org/site/json/projects/uima.json
    comdev/projects.apache.org/site/json/projects/velocity-velocity-dvsl.json
    comdev/projects.apache.org/site/json/projects/velocity-velocity-tools.json
    comdev/projects.apache.org/site/json/projects/xalan-xalan-for-java-xslt-processor.json
    comdev/projects.apache.org/site/json/projects/xalan.json
    comdev/projects.apache.org/site/json/projects/xerces-xerces-for-java-xml-parser.json
    comdev/projects.apache.org/site/json/projects/xerces-xerces-for-perl-xml-parser.json
    comdev/projects.apache.org/site/json/projects/xerces-xml-commons-external.json
    comdev/projects.apache.org/site/json/projects/xerces-xml-commons-resolver.json
    comdev/projects.apache.org/site/json/projects/xerces.json
    comdev/projects.apache.org/site/json/projects/xmlgraphics-xml-graphics-commons.json
Modified:
    comdev/projects.apache.org/scripts/import/rdfparse.py
    comdev/projects.apache.org/site/json/foundation/projects.json
    comdev/projects.apache.org/site/json/projects/allura.json
    comdev/projects.apache.org/site/json/projects/ant-antunit.json
    comdev/projects.apache.org/site/json/projects/ant-ivy.json
    comdev/projects.apache.org/site/json/projects/ant-ivyde.json
    comdev/projects.apache.org/site/json/projects/ant.json
    comdev/projects.apache.org/site/json/projects/apr.json
    comdev/projects.apache.org/site/json/projects/archiva.json
    comdev/projects.apache.org/site/json/projects/bloodhound.json
    comdev/projects.apache.org/site/json/projects/chemistry.json
    comdev/projects.apache.org/site/json/projects/cloudstack.json
    comdev/projects.apache.org/site/json/projects/directory.json
    comdev/projects.apache.org/site/json/projects/flex.json
    comdev/projects.apache.org/site/json/projects/flink.json
    comdev/projects.apache.org/site/json/projects/forrest.json
    comdev/projects.apache.org/site/json/projects/httpd.json
    comdev/projects.apache.org/site/json/projects/jena.json
    comdev/projects.apache.org/site/json/projects/mina-ftpserver.json
    comdev/projects.apache.org/site/json/projects/mina-sshd.json
    comdev/projects.apache.org/site/json/projects/mina-vysper.json
    comdev/projects.apache.org/site/json/projects/mina.json
    comdev/projects.apache.org/site/json/projects/mrunit.json
    comdev/projects.apache.org/site/json/projects/myfaces-tobago.json
    comdev/projects.apache.org/site/json/projects/ofbiz.json
    comdev/projects.apache.org/site/json/projects/oltu.json
    comdev/projects.apache.org/site/json/projects/onami.json
    comdev/projects.apache.org/site/json/projects/oodt.json
    comdev/projects.apache.org/site/json/projects/pdfbox.json
    comdev/projects.apache.org/site/json/projects/samza.json
    comdev/projects.apache.org/site/json/projects/servicemix.json
    comdev/projects.apache.org/site/json/projects/sis.json
    comdev/projects.apache.org/site/json/projects/subversion.json
    comdev/projects.apache.org/site/json/projects/taverna.json
    comdev/projects.apache.org/site/json/projects/tcl-rivet.json
    comdev/projects.apache.org/site/json/projects/tomcat.json
    comdev/projects.apache.org/site/json/projects/trafficserver.json
    comdev/projects.apache.org/site/json/projects/vxquery.json
    comdev/projects.apache.org/site/json/projects/wicket.json
    comdev/projects.apache.org/site/json/projects/xmlgraphics-batik.json
    comdev/projects.apache.org/site/json/projects/xmlgraphics-fop.json

Modified: comdev/projects.apache.org/scripts/import/rdfparse.py
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/scripts/import/rdfparse.py?rev=1679469&r1=1679468&r2=1679469&view=diff
==============================================================================
--- comdev/projects.apache.org/scripts/import/rdfparse.py (original)
+++ comdev/projects.apache.org/scripts/import/rdfparse.py Thu May 14 22:22:37 2015
@@ -1,7 +1,6 @@
 from xml.dom import minidom
 import xml.etree.ElementTree as ET
 import re, urllib.request
-import csv
 import json
 import os
 import traceback
@@ -45,6 +44,8 @@ for s in itemlist :
             'doap': url
         }
         prname = None
+        tlp = None
+        id = None
         for el in project:
             k, v = handleChild(el)
             if k in pjson and not k in ['name','homepage']:
@@ -57,19 +58,23 @@ for s in itemlist :
                 pjson[k] = v
 
         if pjson['homepage']:
-            m = re.match(r"https?://([^.]+)\.", pjson['homepage'], re.IGNORECASE)
+            homepage = pjson['homepage']
+            m = re.match(r"https?://([^.]+)\.", homepage, re.IGNORECASE)
             if m:
-                prname = m.group(1)
-        nn = pjson['name'].replace("Apache ", "").replace(" ", "-").lower()
+                tlp = m.group(1)
+            nn = re.sub("http.+\.apache\.org/?", "", homepage)
+            if (nn == ""):
+                id = tlp
+            else:
+                nn = nn.replace('/', ' ').strip().split().pop().replace('commons-', '').replace('-project', '')
+                id = "%s-%s" % (tlp, nn)
         m = re.search(r"http://([-a-z0-9]+)\.", pjson['pmc'])
         if m:
             pjson['pmc'] = m.group(1)
-        if re.search(r"/[a-z0-9+A-Z]+/?$", pjson['homepage']) or os.path.isfile("../site/json/projects/%s.json" % prname):
-            prname = "%s-%s" % (prname, nn)
         # replace category url with id, by removing http://projects.apache.org/category/
         if pjson['category']:
             pjson['category'] = pjson['category'].replace("http://projects.apache.org/category/", "")
-        if prname:
+        if id:
             add = {}
             for k in pjson:
                 if type(pjson[k]) is not str:
@@ -77,22 +82,23 @@ for s in itemlist :
                         add[e] = pjson[k][e]
                     pjson[k] = None
 
-            projects[prname] = pjson
+            projects[id] = pjson
             for e in add:
                 pjson[e] = add[e]
-            if not os.path.isfile("../../site/json/projects/%s.json" % prname):
-                print("Writing %s.json..." % prname)
-                with open ("../../site/json/projects/%s.json" % prname, "w") as f:
-                    f.write(json.dumps(pjson, sort_keys=True, indent=0))
-                    f.close()
+            print("Writing projects/%s.json..." % id)
+            with open ("../../site/json/projects/%s.json" % id, "w") as f:
+                f.write(json.dumps(pjson, sort_keys=True, indent=0))
+                f.close()
+        else:
+            print("Unable to extract project id from %s" % url)
     except Exception as err:
         print("Error when reading %s's doap file %s:" % (prname, url))
         print("-"*60)
         traceback.print_exc()
         print("-"*60)
 
+print("Writing foundation/projects.json...")
 with open ("../../site/json/foundation/projects.json", "w") as f:
     f.write(json.dumps(projects, sort_keys=True, indent=0))
     f.close()
 print("Done!")
-    
\ No newline at end of file