You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@community.apache.org by se...@apache.org on 2015/10/01 00:33:37 UTC

svn commit: r1706165 - /comdev/reporter.apache.org/trunk/data/parsepmcs.py

Author: sebb
Date: Wed Sep 30 22:33:37 2015
New Revision: 1706165

URL: http://svn.apache.org/viewvc?rev=1706165&view=rev
Log:
COMDEV-164 parsepmcs.py does not handle new groups correctly
COMDEV-167 parsepmcs.py regex for committer groups also matches -pmc groups

Modified:
    comdev/reporter.apache.org/trunk/data/parsepmcs.py

Modified: comdev/reporter.apache.org/trunk/data/parsepmcs.py
URL: http://svn.apache.org/viewvc/comdev/reporter.apache.org/trunk/data/parsepmcs.py?rev=1706165&r1=1706164&r2=1706165&view=diff
==============================================================================
--- comdev/reporter.apache.org/trunk/data/parsepmcs.py (original)
+++ comdev/reporter.apache.org/trunk/data/parsepmcs.py Wed Sep 30 22:33:37 2015
@@ -3,10 +3,22 @@ import sys
 if sys.hexversion < 0x030000F0:
     raise RuntimeError("This script requires Python3")
 """
-   This script updates:
-   pmcs.json
-   projects.json
+   This script
+   reads: http://people.apache.org/committer-index.html 
+   and updates:
+   pmcs.json - members of pmcs
+   projects.json - committers of projects
    
+   The json files have the format:
+   
+   dict: key=pmc/project,
+         value=dict: key=availid,
+         value=array:
+         [
+         full name,
+         time.time() when entry was added to an existing group
+         time.time() when entry was last seen,
+         ]
 """
 import re
 import urllib.request
@@ -37,58 +49,75 @@ newgroups = []
 
 data = urllib.request.urlopen("http://people.apache.org/committer-index.html").read().decode('utf-8')
 x = 0
+stamp = time.time()
 for committer in re.findall(r"<tr>([\S\s]+?)</tr>", data, re.MULTILINE | re.UNICODE):
     x += 1
 ##    print(committer)
     m = re.search(r"<a id='(.+?)'>[\s\S]+?<td.+?>\s*(.+?)</td>[\s\S]+?>(.+)</td>", committer, re.MULTILINE | re.UNICODE)
     if m:
-        cid = m.group(1)
-        cname = re.sub(r"<.+?>", "", m.group(2), 4)
-        cproj = m.group(3)
+        cid = m.group(1) # committer id / availid
+        cname = re.sub(r"<.+?>", "", m.group(2), 4) # committer name
+        cproj = m.group(3) # list of authgroups to which the person belongs
         isMember = False
         if re.search(r"<b", committer, re.MULTILINE | re.UNICODE):
             isMember = True
-        for project in re.findall(r"#([-a-z0-9._]+)-pmc", cproj):
-            now = time.time()
-            if not project in pmcs:
-                pmcs[project] = {}
-                newgroups.append(project)
-            if project in newgroups:
-                now = 0
-            if not cid in pmcs[project]:
-                pmcs[project][cid] = [cname, now, time.time()]
-            else:
-                pmcs[project][cid] = [pmcs[project][cid][0], pmcs[project][cid][1], time.time()]
-                
-        for project in re.findall(r"#([-a-z0-9._]+)(?!-pmc)", cproj):
-            now = time.time()
-            if not project in projects:
-                projects[project] = {}
-                newgroups.append(project)
-            elif project in newgroups:
-                now = 0
-            if not cid in projects[project]:
-                projects[project][cid] = [cname, now, time.time()]
+        # process the groups
+        for group in re.findall(r"#([-a-z0-9._]+)'", cproj):
+            now = stamp
+            if group.endswith("-pmc"):
+                project = group[0:-4] # drop the "-pmc" suffix
+#                 print("PMC %s %s => %s" % (cid, group, project))
+                if not project in pmcs: # a new project
+                    print("New pmc group %s" % project)
+                    pmcs[project] = {}
+                    newgroups.append(group)
+                if not cid in pmcs[project]: # new to the group
+                    if group in newgroups: # the group is also new
+                        now = 0
+                    print("New pmc entry %s %s %u" % (project, cid, now))
+                    pmcs[project][cid] = [cname, now, stamp]
+                else:
+                    # update the entry last seen time
+                    pmcs[project][cid] = [pmcs[project][cid][0], pmcs[project][cid][1], stamp]
             else:
-                projects[project][cid] = [projects[project][cid][0], projects[project][cid][1], time.time()]
-    
+                project = group
+#                 print("Unx %s %s" % (cid, project))
+                now = stamp
+                if not project in projects:
+                    print("New unx group %s" % project)
+                    projects[project] = {}
+                    newgroups.append(group)
+                if not cid in projects[project]: # new to the group
+                    if group in newgroups: # the group is also new
+                        now = 0
+                    print("New unx entry %s %s %u" % (project,cid,now))
+                    projects[project][cid] = [cname, now, stamp]
+                else:
+                    # update the entry last seen time
+                    projects[project][cid] = [projects[project][cid][0], projects[project][cid][1], stamp]
+
+
 # Delete retired members
 ret = 0
 for project in projects:
     for cid in projects[project]:
         if len(projects[project][cid]) < 3 or projects[project][cid][2] < (time.time() - (86400*3)):
-            projects[project][cid] = "!"
+            if project.endswith("-pmc"): # these were mistaken entries
+                continue
+            print("Dropping project entry %s %s" % (project, cid))
+            projects[project][cid] = "!" # flag for deletion
             ret += 1
     projects[project] =  {i:projects[project][i] for i in projects[project] if projects[project][i]!="!"}
 
 for project in pmcs:
     for cid in pmcs[project]:
         if len(pmcs[project][cid]) < 3 or pmcs[project][cid][2] < (time.time() - (86400*3)):
-            pmcs[project][cid] = "!"
+            print("Dropping pmc entry %s %s" % (project, cid))
+            pmcs[project][cid] = "!" # flag for deletion
             ret += 1
     pmcs[project] =  {i:pmcs[project][i] for i in pmcs[project] if pmcs[project][i]!="!"}
 
-    
+
 print("Writing pmcs.json")
 with open("pmcs.json", "w") as f:
     json.dump(pmcs, f, sort_keys=True, indent=1)