You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@community.apache.org by se...@apache.org on 2015/10/14 00:13:00 UTC
svn commit: r1708525 -
/comdev/projects.apache.org/scripts/import/parsecommittees.py
Author: sebb
Date: Tue Oct 13 22:12:59 2015
New Revision: 1708525
URL: http://svn.apache.org/viewvc?rev=1708525&view=rev
Log:
Need some encodings
Modified:
comdev/projects.apache.org/scripts/import/parsecommittees.py
Modified: comdev/projects.apache.org/scripts/import/parsecommittees.py
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/scripts/import/parsecommittees.py?rev=1708525&r1=1708524&r2=1708525&view=diff
==============================================================================
--- comdev/projects.apache.org/scripts/import/parsecommittees.py (original)
+++ comdev/projects.apache.org/scripts/import/parsecommittees.py Tue Oct 13 22:12:59 2015
@@ -9,6 +9,7 @@ import urllib.request
import xml.etree.ElementTree as ET
import xml.dom.minidom as minidom
import datetime
+import traceback
"""
Reads:
@@ -60,7 +61,7 @@ shortdescs = {
'labs': 'A place for innovation where committers of the foundation can experiment with new ideas'
}
-with open("../../site/json/foundation/people.json", "r") as f:
+with open("../../site/json/foundation/people.json", "r", encoding='utf-8') as f:
people = json.loads(f.read())
f.close()
@@ -100,7 +101,7 @@ for s in itemlist :
print(url)
rdf = urllib.request.urlopen(url).read()
else:
- rdf = open("../../data/%s" % url, 'r').read()
+ rdf = open("../../data/%s" % url, 'r', encoding='utf-8').read()
url = "https://svn.apache.org/repos/asf/comdev/projects.apache.org/data/%s" % url
rdfxml = ET.fromstring(rdf)
data = rdfxml[0]
@@ -129,22 +130,24 @@ for s in itemlist :
# copy PMC RDF data to /doap/{committeeId}/pmc-doap.rdf
if type(rdf) is str:
mode = "w"
+ encoding = 'utf-8'
else:
mode = "wb"
+ encoding = None
path = "../../site/doap/%s" % committeeId
try:
os.stat(path)
except:
print("INFO: creating new directory %s for %s" % (path, url))
os.mkdir(path)
- with open("%s/pmc-doap.rdf" % path, mode) as f:
+ with open("%s/pmc-doap.rdf" % path, mode, encoding=encoding) as f:
f.write(rdf)
f.close()
except Exception as err:
- print(err)
+ print("ERROR: processing %s:\n%s" % (url,traceback.format_exc()))
-print("reading /data/board/committee-info.txt")
+print("Reading /data/board/committee-info.txt")
with open("../../data/board/committee-info.txt", "rb") as f:
cidata = f.read().decode('utf-8')
f.close()
@@ -302,7 +305,7 @@ for pmc in re.findall(r"\* .+?\s+\(est\.
directory = "../../site/doap/%s" % committeeId
if not os.path.exists(directory):
os.makedirs(directory)
- with open("%s/pmc.rdf" % directory, "w") as f:
+ with open("%s/pmc.rdf" % directory, "w", encoding='utf-8') as f:
f.write(minidom.parseString(ET.tostring(doap, encoding="utf-8")).toprettyxml(indent="\t"))
f.close()
@@ -353,3 +356,4 @@ print("Writing json/foundation/pmcs.json
with open ("../../site/json/foundation/pmcs.json", "w") as f:
json.dump(pmcs, f, sort_keys=True, indent=0)
f.close()
+print("All done")
\ No newline at end of file