You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@community.apache.org by se...@apache.org on 2015/08/19 12:55:49 UTC

svn commit: r1696555 - /comdev/reporter.apache.org/trunk/mailglomper.py

Author: sebb
Date: Wed Aug 19 10:55:48 2015
New Revision: 1696555

URL: http://svn.apache.org/r1696555
Log:
Add comment re projects with hyphens (empire-db)
Pretty print output to make debugging easier

Modified:
    comdev/reporter.apache.org/trunk/mailglomper.py

Modified: comdev/reporter.apache.org/trunk/mailglomper.py
URL: http://svn.apache.org/viewvc/comdev/reporter.apache.org/trunk/mailglomper.py?rev=1696555&r1=1696554&r2=1696555&view=diff
==============================================================================
--- comdev/reporter.apache.org/trunk/mailglomper.py (original)
+++ comdev/reporter.apache.org/trunk/mailglomper.py Wed Aug 19 10:55:48 2015
@@ -29,6 +29,18 @@ for i in range(0,7):
 data = urllib.urlopen("http://mail-archives.us.apache.org/mod_mbox/").read()
 print("Fetched %u bytes of main data" % len(data))
 y = 0
+"""
+N.B. The project name empire-db is truncated to empire in the main list
+
+Rather than fixing this here, it is done in the scripts that read the output file
+This is because those scripts assume that the first hyphen separates the
+project name from the mailing list name.
+Since list names may contain hyphens (e.g. lucene-net-dev) that is a necessary assumption.
+
+Potentially the generated file could use a separator that is not allowed in project names,
+but this would require converting the input file and potentially allowing both separators in
+the files that process the output for a short while.
+"""
 for mlist in re.finditer(r"<a href='([-a-z0-9]+)/'", data):
         ml = mlist.group(1)
         y += 1
@@ -61,7 +73,7 @@ for mlist in re.finditer(r"<a href='([-a
                 with open("data/maildata_extended.json",'w+') as f:
                         f.write(json.dumps(mls))
 with open("data/maildata_extended.json",'w+') as f:
-        f.write(json.dumps(mls))
+        f.write(json.dumps(mls, indent=1))
 print("Dumped JSON")