You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@community.apache.org by hb...@apache.org on 2015/05/09 13:38:04 UTC
svn commit: r1678478 - in /comdev/projects.apache.org/scripts: README.txt import/parsecommittees.py

Author: hboutemy
Date: Sat May  9 11:38:04 2015
New Revision: 1678478

URL: http://svn.apache.org/r1678478
Log:
improved parsecommittees.py to only add new committees instead of overwriting everything

Modified:
    comdev/projects.apache.org/scripts/README.txt
    comdev/projects.apache.org/scripts/import/parsecommittees.py

Modified: comdev/projects.apache.org/scripts/README.txt
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/scripts/README.txt?rev=1678478&r1=1678477&r2=1678478&view=diff
==============================================================================
--- comdev/projects.apache.org/scripts/README.txt (original)
+++ comdev/projects.apache.org/scripts/README.txt Sat May  9 11:38:04 2015
@@ -30,8 +30,8 @@ various sources:
 
 2. importing data (import)
 
-- parsecommittees.py: Parses committee-info.txt and its TLP information
-  in: foundation/chairs.json + committee-info.txt (https://svn.apache.org/repos/private/committers/board/committee-info.txt)
+- parsecommittees.py: Parses committee-info.txt to detect new TLPs and add them to committees.json
+  in: foundation/committees.json + committee-info.txt (https://svn.apache.org/repos/private/committers/board/committee-info.txt)
   out: foundation/committees.json
 
 - parsepmcs.py: imports PMC data from the old project.apache.org site. No need

Modified: comdev/projects.apache.org/scripts/import/parsecommittees.py
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/scripts/import/parsecommittees.py?rev=1678478&r1=1678477&r2=1678478&view=diff
==============================================================================
--- comdev/projects.apache.org/scripts/import/parsecommittees.py (original)
+++ comdev/projects.apache.org/scripts/import/parsecommittees.py Sat May  9 11:38:04 2015
@@ -2,15 +2,20 @@ import re
 import json
 import sys
 
-renames = {
+# Committee names from committees-info.txt that do not match committees.json
+renamesCommittee2Json = {
     'Apache APR': 'Apache Portable Runtime',
-    'Apache Logging': 'Apache Logging Services'
+    'Apache Perl': 'Apache mod_perl'
+}
+# Committee names from http://www.apache.org/foundation/ that do not match committees.json
+renamesChairs2Json = {
+    'Apache Logging Services': 'Apache Logging',
+    'Apache Perl': 'Apache mod_perl'
 }
 
-data = None
-committees = {}
-psize = {}
-c = {}
+with open("../../site/json/foundation/committees.json", "r") as f:
+    committees = json.loads(f.read())
+    f.close()
 
 try:
     with open("committee-info.txt", "rb") as f:
@@ -22,28 +27,34 @@ except:
     print("(requires authentication with committer karma)")
     sys.exit(1)
 
-x = 0
-
-for year in range(1995,2016):
-    for month in range(1,13):
-        committees["%04u-%02u" % (year, month)] = []
+tlpCount = 0
+addedTlps = []
+c = {}
 
-for pmc in re.findall(r"\* .+?\s+\(est\. [0-9/]+[^\r\n]+", data, re.UNICODE | re.IGNORECASE):
+for pmc in re.findall(r"\* .+?\s+\(est\. [0-9/]+[^\r\n]+", data):
 
     #print(pmc)
-    m = re.search(r"\* (.+?)\s+\(est. ([0-9]+)/([0-9]+)", pmc, re.IGNORECASE | re.UNICODE)
+    m = re.search(r"\* (.+?)\s+\(est. ([0-9]+)/([0-9]+)", pmc)
     if m:
         project = m.group(1)
         month = m.group(2)
         year = m.group(3)
-        project = "Apache %s" % project
-        if project in renames:
-            project = renames[project]
         if not re.search(r"Committee", pmc):
+            # Classical TLP
+            project = "Apache %s" % project
+            if project in renamesCommittee2Json:
+                project = renamesCommittee2Json[project]
             #print(project)
-            x += 1
-            committees["%s-%s" % (year, month)].append(project)
+            tlpCount += 1
+            key = "%s-%s" % (year, month)
+            if not key in committees:
+                committees[key] = []
+            if not project in committees[key]:
+                committees[key].append(project)
+                committees[key].sort()
+                addedTlps.append("%s: %s" % (key, project))
         else:
+            # Special Committee (Officer's, President's or Board)
             print("INFO: %s ignored %s" % (project, pmc[pmc.rfind('('):]))
         c[project] = True
 
@@ -51,10 +62,15 @@ with open("../../site/json/foundation/co
     f.write(json.dumps(committees, sort_keys=True, indent=0))
     f.close()
 
+print("found %s new TLPs from %s TLPs in committee_info.txt" % (len(addedTlps), tlpCount))
+addedTlps.sort()
+for added in addedTlps:
+    print("- %s" % added)
+
 # compare with chairs, for consistency checking
 chairs = json.load(open("../../site/json/foundation/chairs.json"))
 for chair in chairs:
+    if chair in renamesChairs2Json:
+        chair = renamesChairs2Json[chair]
     if not chair in c:
         print("WARN: %s is in http://www.apache.org/foundation/ but not in committee-info.txt: typo somewhere or retirement in progress?" % chair)
-
-print("found %s TLPs in committee_info.txt" % x)
\ No newline at end of file