You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@community.apache.org by se...@apache.org on 2015/10/14 00:13:00 UTC
svn commit: r1708525 - /comdev/projects.apache.org/scripts/import/parsecommittees.py

Author: sebb
Date: Tue Oct 13 22:12:59 2015
New Revision: 1708525

URL: http://svn.apache.org/viewvc?rev=1708525&view=rev
Log:
Need some encodings

Modified:
    comdev/projects.apache.org/scripts/import/parsecommittees.py

Modified: comdev/projects.apache.org/scripts/import/parsecommittees.py
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/scripts/import/parsecommittees.py?rev=1708525&r1=1708524&r2=1708525&view=diff
==============================================================================
--- comdev/projects.apache.org/scripts/import/parsecommittees.py (original)
+++ comdev/projects.apache.org/scripts/import/parsecommittees.py Tue Oct 13 22:12:59 2015
@@ -9,6 +9,7 @@ import urllib.request
 import xml.etree.ElementTree as ET
 import xml.dom.minidom as minidom
 import datetime
+import traceback
 
 """
 Reads:
@@ -60,7 +61,7 @@ shortdescs = {
     'labs': 'A place for innovation where committers of the foundation can experiment with new ideas'
 }
 
-with open("../../site/json/foundation/people.json", "r") as f:
+with open("../../site/json/foundation/people.json", "r", encoding='utf-8') as f:
     people = json.loads(f.read())
     f.close()
 
@@ -100,7 +101,7 @@ for s in itemlist :
             print(url)
             rdf = urllib.request.urlopen(url).read()
         else:
-            rdf = open("../../data/%s" % url, 'r').read()
+            rdf = open("../../data/%s" % url, 'r', encoding='utf-8').read()
             url = "https://svn.apache.org/repos/asf/comdev/projects.apache.org/data/%s" % url
         rdfxml = ET.fromstring(rdf)
         data = rdfxml[0]
@@ -129,22 +130,24 @@ for s in itemlist :
         # copy PMC RDF data to /doap/{committeeId}/pmc-doap.rdf
         if type(rdf) is str:
             mode = "w"
+            encoding = 'utf-8'
         else:
             mode = "wb"
+            encoding = None
         path = "../../site/doap/%s" % committeeId
         try:
             os.stat(path)
         except:
             print("INFO: creating new directory %s for %s" % (path, url))
             os.mkdir(path)
-        with open("%s/pmc-doap.rdf" % path, mode) as f:
+        with open("%s/pmc-doap.rdf" % path, mode, encoding=encoding) as f:
             f.write(rdf)
             f.close()
 
     except Exception as err:
-        print(err)
+        print("ERROR: processing %s:\n%s" % (url,traceback.format_exc()))
 
-print("reading /data/board/committee-info.txt")
+print("Reading /data/board/committee-info.txt")
 with open("../../data/board/committee-info.txt", "rb") as f:
     cidata = f.read().decode('utf-8')
     f.close()
@@ -302,7 +305,7 @@ for pmc in re.findall(r"\* .+?\s+\(est\.
             directory = "../../site/doap/%s" % committeeId
             if not os.path.exists(directory):
                 os.makedirs(directory)
-            with open("%s/pmc.rdf" % directory, "w") as f:
+            with open("%s/pmc.rdf" % directory, "w", encoding='utf-8') as f:
                 f.write(minidom.parseString(ET.tostring(doap, encoding="utf-8")).toprettyxml(indent="\t"))
                 f.close()
 
@@ -353,3 +356,4 @@ print("Writing json/foundation/pmcs.json
 with open ("../../site/json/foundation/pmcs.json", "w") as f:
     json.dump(pmcs, f, sort_keys=True, indent=0)
     f.close()
+print("All done")
\ No newline at end of file