You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@whimsical.apache.org by cu...@apache.org on 2020/02/17 02:24:52 UTC

[whimsy] branch master updated (8c37d76 -> a7072e6)

This is an automated email from the ASF dual-hosted git repository.

curcuru pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/whimsy.git.


    from 8c37d76  Fix bug
     new b801f7c  Include id as well
     new a2085ce  Precompute cohorts instead
     new 2e89059  Ensure we output the id too
     new a7072e6  Display cohorts in by month view

The 4 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 tools/mboxhdr2csv.rb         | 21 +++++++++++--
 www/members/list-traffic.cgi | 56 ++++++++++++++++++++++++++++------
 www/members/meeting-util.rb  | 71 +++++++++++++++++++++++++++++++++++---------
 3 files changed, 122 insertions(+), 26 deletions(-)


[whimsy] 04/04: Display cohorts in by month view

Posted by cu...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

curcuru pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/whimsy.git

commit a7072e6f76d878bb1e32b409a8805d89fb8ff98d
Author: Shane Curcuru <as...@shanecurcuru.org>
AuthorDate: Sun Feb 16 21:24:38 2020 -0500

    Display cohorts in by month view
---
 www/members/list-traffic.cgi | 56 +++++++++++++++++++++++++++++++++++++-------
 1 file changed, 47 insertions(+), 9 deletions(-)

diff --git a/www/members/list-traffic.cgi b/www/members/list-traffic.cgi
index a2dc486..d1ffc49 100755
--- a/www/members/list-traffic.cgi
+++ b/www/members/list-traffic.cgi
@@ -10,6 +10,7 @@ require 'whimsy/asf/agenda'
 require 'date'
 require 'mail'
 require '../../tools/mboxhdr2csv.rb'
+require_relative 'meeting-util'
 
 user = ASF::Person.new($USER)
 unless user.asf_member?
@@ -26,9 +27,34 @@ SRV_MAIL = "/srv/mail/#{LIST_ROOT}"
 
 WEEK_TOTAL = '@@total' # Use @@ so it can't match who name/emails
 WEEK_START = '@@start'
+COHORT_STYLES = { # TODO find better ways to colorize
+  'Zero to two years' => 'text-warning',
+  'Two to five years' => 'text-success',
+  'Five to ten years' => 'text-info',
+  'Ten or more years' => 'text-primary',
+  'Non-members' => 'text-muted'
+}
+
+# Define simple styles for various 'ages' of Members
+# 1-2 years, 3-5 years, 5-10 years, 10+ years 
+def style_cohorts(cohorts)
+  today = Date.today.year
+  cohorts['cohorts'].each do |id, date|
+    case date[0,4].to_i
+    when (today-1)..today
+      cohorts['cohorts'][id] = COHORT_STYLES['Zero to two years']
+    when (today-5)...(today-1)
+      cohorts['cohorts'][id] = COHORT_STYLES['Two to five years']
+    when (today-10)...(today-5)
+      cohorts['cohorts'][id] = COHORT_STYLES['Five to ten years']
+    else
+      cohorts['cohorts'][id] = COHORT_STYLES['Ten or more years']
+    end
+  end
+end
 
 # Display monthly statistics for all available data
-def display_monthly(months:, nondiscuss:)
+def display_monthly(months:, nondiscuss:, cohorts:)
   months.sort.reverse.each do |month|
     data = MailUtils.get_mails_month(mailroot: SRV_MAIL, yearmonth: month, nondiscuss: nondiscuss)
     next if data.empty?
@@ -53,8 +79,13 @@ def display_monthly(months:, nondiscuss:)
         _ul.list_group do
           _li.list_group_item.list_group_item_info "Long Tail - All Senders"
           _li.list_group_item do
-            data[MailUtils::MAILCOUNT].each do |id, num|
-              _! "#{id} (#{num}), "
+            data[MailUtils::MAILCOUNT].each do |name, num|
+              id = (name.match(/.+[(](\w+)/) || [])[1]
+              if cohorts['cohorts'].has_key?(id)
+                _span! "#{name} (#{num}), ", class: "#{cohorts['cohorts'][id]}"
+              else
+                _span! "#{name} (#{num}), ", class: "#{cohorts['cohorts'][COHORT_STYLES['Non-member']]}"
+              end
             end
           end
         end
@@ -129,24 +160,31 @@ _html do
       helpblock: -> {
         _p %{
           This script displays simple (and likely slightly lossy) analysis of traffic on the #{LIST_ROOT}@ mailing list.
-          In particular, mapping From: email to a committer may not work (meaning individual senders may have multiple spots),
-          and Subject lines displayed may be truncated (meaning threads may not fully be tracked).  Work in progress.
+          In particular, mapping From: email to a committer may not work (meaning individual senders may have multiple spots
+          or be miscategorized).  Work in progress.  Server only stores last year of mail.
         }
         _p do
           _ 'Senders of more than 10% of all emails in a month are highlighted. '
           _ 'Senders of more than 20%, 10%, or 5% of all emails in a week are highlighted in the '
           _a 'By week view (supply ?week in URL).', href: '?week'
         end
-
+        _p do
+          _ 'For the All Senders column, Members are colorized by approximate years of membership like so: '
+          _br
+          COHORT_STYLES.each do |name, style|
+            _span "#{name}, ", class: "#{style}"
+          end
+          _ ' note that due to email address variations, some entries may be incorrectly marked.'
+        end
       }
     ) do
       months = Dir["#{SRV_MAIL}/*"].map {|path| File.basename(path).untaint}.grep(/^\d+$/)
-      _.error "HACK - server log one"
-
+      attendance = MeetingUtil.get_attendance(ASF::SVN['Meetings'])
+      style_cohorts(attendance)
       if ENV['QUERY_STRING'].include? 'week'
         display_weekly(months: months, nondiscuss: MailUtils::NONDISCUSSION_SUBJECTS["<#{LIST_ROOT}.apache.org>"])
       else
-        display_monthly(months: months, nondiscuss: MailUtils::NONDISCUSSION_SUBJECTS["<#{LIST_ROOT}.apache.org>"])
+        display_monthly(months: months, nondiscuss: MailUtils::NONDISCUSSION_SUBJECTS["<#{LIST_ROOT}.apache.org>"], cohorts: attendance)
       end
     end
   end


[whimsy] 02/04: Precompute cohorts instead

Posted by cu...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

curcuru pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/whimsy.git

commit a2085ced51d5adcb111aa866a9573bebbf2abd42
Author: Shane Curcuru <as...@shanecurcuru.org>
AuthorDate: Sun Feb 16 20:16:50 2020 -0500

    Precompute cohorts instead
---
 www/members/meeting-util.rb | 71 ++++++++++++++++++++++++++++++++++++---------
 1 file changed, 57 insertions(+), 14 deletions(-)

diff --git a/www/members/meeting-util.rb b/www/members/meeting-util.rb
index 998ddad..ae4e1be 100644
--- a/www/members/meeting-util.rb
+++ b/www/members/meeting-util.rb
@@ -1,5 +1,8 @@
+#!/usr/bin/env ruby
 # Utility methods and structs related to Member's Meetings
 # NOTE: Assumes 21st century '2*'
+$LOAD_PATH.unshift '/srv/whimsy/lib'
+require 'whimsy/asf'
 require 'json'
 
 class MeetingUtil
@@ -78,23 +81,63 @@ class MeetingUtil
     return JSON.parse(IO.read(File.join(mtg_root, 'attendance.json')))
   end
 
-  # Get a member's cohort (first meeting eligible to attend; typically year after they were elected)
-  # @param mtg_root local copy of RECORDS
-  # @param att_cache hash from attendance.json (see also attend-matrix.py elsewhere); Side effect is updated
-  # @param name of a Member (see also name mapping for various corrections)
-  def self.get_cohort(mtg_root, att_cache, name)
-    if att_cache.nil? or att_cache.empty?
-      att_cache = JSON.parse(IO.read(File.join(mtg_root, 'attendance.json')))
-      att_cache['cohorts'] = {}
-      # Precompute all cohorts, and leave cached
-      att_cache['members'].each do |date, names|
-        names.each do |nam|
-          att_cache['cohorts'][nam] = date
+  # Parse all memapp-received.txt files to get better set of names
+  # @see whimsy/www/members/attendance-xcheck.cgi
+  def self.read_memapps(dir)
+    memapps = Hash.new('unknown')
+    Dir[File.join(dir, '*', 'memapp-received.txt')].each do |received|
+      meeting = File.basename(File.dirname(received))
+      next if meeting.include? 'template'
+      text = File.read(received)
+      list = text.scan(/(.+)\s<(.*)@.*>.*Yes/i)
+      if list.empty?
+        list = text.scan(/^(?:no\s*)*(?:yes\s+)+(\w\S*)\s+(.*)\s*/)
+      else
+        # reverse order of id name type files
+        list.each {|a| a[0], a[1] = a[1], a[0] }
+      end
+      list.each { |itm| memapps[itm[1].strip] = [itm[0], meeting] }
+    end
+    return memapps
+  end
+
+  # Annotate the attendance.json file with cohorts by id
+  # This allows easy use by other tools
+  def self.annotate_attendance(dir)
+    attendance = JSON.parse(IO.read(File.join(dir, 'attendance.json')))
+    memapps = read_memapps(dir)
+    iclas = ASF::ICLA.preload
+    memapp_map = JSON.parse(IO.read(File.join(dir, 'memapp-map.json')))
+    attendance['cohorts'] = {}
+    attendance['unmatched'] = []
+    attendance['members'].each do |date, ary|
+      ary.each do |nam|
+        found = iclas.select{|i| i.icla.legal_name == nam}
+        found = iclas.select{|i| i.icla.name == nam} if found.empty?
+        if found.empty?
+          if memapps.has_key?(nam)
+            attendance['cohorts'][memapps[nam][0]] = date
+          elsif memapp_map.has_key?(nam)
+            attendance['cohorts'][memapp_map[nam]] = date
+          else
+            attendance['unmatched'] << nam
+          end
+        else
+          attendance['cohorts'][found[0].icla.id] = date
         end
       end
     end
-    # TODO map any well-known mis-formatted names
-    return att_cache['cohorts'][name]
+    File.open(File.join(dir, 'attendance-cohorts.json'), 'w') do |f| # Do not overwrite blindly; manual copy if desired
+      f.puts JSON.pretty_generate(attendance)
+    end
   end
+end
 
+# ## ### #### ##### ######
+# Main method for command line use
+if __FILE__ == $PROGRAM_NAME
+  dir = ARGV[0]
+  dir ||= '.'
+  MeetingUtil.annotate_attendance(dir)
+  puts "DONE, check attendance-cohorts.json"
 end


[whimsy] 03/04: Ensure we output the id too

Posted by cu...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

curcuru pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/whimsy.git

commit 2e8905953ef870af1a9c3bef5b25e5036940d373
Author: Shane Curcuru <as...@shanecurcuru.org>
AuthorDate: Sun Feb 16 21:24:17 2020 -0500

    Ensure we output the id too
---
 tools/mboxhdr2csv.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/mboxhdr2csv.rb b/tools/mboxhdr2csv.rb
index e0c7bcd..05c9990 100644
--- a/tools/mboxhdr2csv.rb
+++ b/tools/mboxhdr2csv.rb
@@ -217,7 +217,7 @@ module MailUtils
     emails[MAILS].sort_by! { |email| email[DATE] }
     emails[MAILCOUNT] = Hash.new {|h, k| h[k] = 0 }
     emails[MAILS].each do |mail|
-      emails[MAILCOUNT][mail[WHO]] += 1
+      emails[MAILCOUNT]["#{mail[WHO]} (#{mail[AVAILID]})"] += 1
     end
     emails[MAILCOUNT] = emails[MAILCOUNT].sort_by { |k,v| -v}.to_h
 


[whimsy] 01/04: Include id as well

Posted by cu...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

curcuru pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/whimsy.git

commit b801f7ce4dd674d49c578844cd9bfb65e3396882
Author: Shane Curcuru <as...@shanecurcuru.org>
AuthorDate: Sun Feb 16 20:16:23 2020 -0500

    Include id as well
---
 tools/mboxhdr2csv.rb | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/tools/mboxhdr2csv.rb b/tools/mboxhdr2csv.rb
index c89ddd0..e0c7bcd 100644
--- a/tools/mboxhdr2csv.rb
+++ b/tools/mboxhdr2csv.rb
@@ -28,6 +28,7 @@ module MailUtils
   DATE = 'date'
   FROM = 'from'
   WHO = 'who'
+  AVAILID = 'id'
   SUBJECT = 'subject'
   TOOLS = 'tools'
   MAILS = 'mails'
@@ -86,7 +87,7 @@ module MailUtils
 
   # Annotate mailhash by adding :who and :committer (where known)
   # @param mdata Hash to evaluate and annotate
-  # Side effect: adds :who and :committer from ASF::Person.find_by_email
+  # Side effect: adds :who, :committer, :id from ASF::Person.find_by_email
   # :committer = 'n' if not found; 'N' if error, 'counsel' for special case
   def find_who_from(mdata)
     # Remove bogus INVALID before doing lookups
@@ -96,36 +97,47 @@ module MailUtils
     when /Mark.Radcliffe/i
       mdata[:who] = 'Mark.Radcliffe'
       mdata[:committer] = COUNSEL
+      mdata[:id] = 'markfradcliffe'
     when /mattmann/i
       mdata[:who] = 'Chris Mattmann'
       mdata[:committer] = MEMBER
+      mdata[:id] = 'mattmann'
     when /jagielski/i
       mdata[:who] = 'Jim Jagielski'
       mdata[:committer] = MEMBER
+      mdata[:id] = 'jim'
     when /delacretaz/i
       mdata[:who] = 'Bertrand Delacretaz'
       mdata[:committer] = MEMBER
+      mdata[:id] = 'bdelacretaz'
     when /curcuru/i
       mdata[:who] = 'Shane Curcuru'
       mdata[:committer] = MEMBER
+      mdata[:id] = 'curcuru'
     when /steitz/i
       mdata[:who] = 'Phil Steitz'
       mdata[:committer] = MEMBER
+      mdata[:id] = 'psteitz'
     when /gardler/i  # Effectively unique (see: Heidi)
       mdata[:who] = 'Ross Gardler'
       mdata[:committer] = MEMBER
+      mdata[:id] = 'rgardler'
     when /Craig (L )?Russell/i # Optimize since Secretary sends a lot of mail
       mdata[:who] = 'Craig L Russell'
       mdata[:committer] = MEMBER
+      mdata[:id] = 'clr'
     when /McGrail/i
       mdata[:who] = 'Kevin A. McGrail'
       mdata[:committer] = MEMBER
+      mdata[:id] = 'kmcgrail'
     when /sallykhudairi@yahoo/i 
       mdata[:who] = 'Sally Khudairi'
       mdata[:committer] = MEMBER
+      mdata[:id] = 'sk'
     when /sk@haloworldwide.com/i
       mdata[:who] = 'Sally Khudairi'
       mdata[:committer] = MEMBER
+      mdata[:id] = 'sk'
     else
       begin
         # TODO use Real Name (JIRA) to attempt to lookup some notifications
@@ -133,6 +145,7 @@ module MailUtils
         person = ASF::Person.find_by_email(tmp.address.dup)
         if person
           mdata[:who] = person.cn
+          mdata[:id] = person.id
           if person.asf_member?
             mdata[:committer] = MEMBER
           else
@@ -141,10 +154,12 @@ module MailUtils
         else
           mdata[:who] = "#{tmp.display_name} <#{tmp.address}>"
           mdata[:committer] = 'n'
+          mdata[:id] = 'unknown'
         end
       rescue
         mdata[:who] = mdata[:from] # Use original value here
         mdata[:committer] = 'N'
+        mdata[:id] = 'unknown'
       end
     end
   end
@@ -178,7 +193,7 @@ module MailUtils
       temp = {from: data[FROM]} # pass a hash
       MailUtils.find_who_from(temp) # update the hash
       # pick out the bits we want
-      data[WHO], data[COMMITTER] = temp[:who], temp[:committer] 
+      data[WHO], data[COMMITTER], data[AVAILID] = temp[:who], temp[:committer], temp[:id]
 
       data[SUBJECT] = message[/^Subject: (.*)/, 1]
       if nondiscuss