You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@whimsical.apache.org by cu...@apache.org on 2019/06/20 21:55:13 UTC
[whimsy] branch master updated: Namespaces for MailUtils
This is an automated email from the ASF dual-hosted git repository.
curcuru pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/whimsy.git
The following commit(s) were added to refs/heads/master by this push:
new 2265343 Namespaces for MailUtils
2265343 is described below
commit 22653438068b118d07f86803c6902bfc6a416a77
Author: Shane Curcuru <as...@shanecurcuru.org>
AuthorDate: Thu Jun 20 17:53:59 2019 -0400
Namespaces for MailUtils
---
tools/mboxhdr2csv.rb | 544 +++++++++++++++++++++---------------------
www/officers/list-traffic.cgi | 112 +--------
2 files changed, 281 insertions(+), 375 deletions(-)
diff --git a/tools/mboxhdr2csv.rb b/tools/mboxhdr2csv.rb
index 9bc0e59..0987d37 100644
--- a/tools/mboxhdr2csv.rb
+++ b/tools/mboxhdr2csv.rb
@@ -1,5 +1,5 @@
#!/usr/bin/env ruby
-# Analyze mbox files for general statistics into CSV
+# Analyze mbox files (downloaded by PonyAPI) for general statistics into CSV
# - Per list messages per month over time (PMOT)
# - Count messages group by list -> graph months as time
# - Per list contentlines per lists PMOT
@@ -18,299 +18,307 @@ require 'json'
require 'date'
require 'optparse'
-MBOX_EXT = '.mbox'
-MEMBER = 'member'
-COMMITTER = 'committer'
-COUNSEL = 'counsel'
-INVALID = '.INVALID'
-VERSION = 'mboxhdr2json'
-URIRX = URI.regexp(['http', 'https'])
+# Various utility functions/data for mailing list analysis
+module MailUtils
+ extend self
+ MEMBER = 'member'
+ COMMITTER = 'committer'
+ COUNSEL = 'counsel'
+ INVALID = '.INVALID'
-# Subject regexes that are non-discussion oriented
-# Analysis: don't bother with content lines in these messages,
-# because most of the content is tool-generated
-NONDISCUSSION_SUBJECTS = { # Note: none applicable to members@
- '<board.apache.org>' => {
- missing: /\AMissing\s((\S+\s){1,3})Board/, # whimsy/www/board/agenda/views/buttons/email.js.rb
- feedback: /\ABoard\sfeedback\son\s20/, # whimsy/www/board/agenda/views/actions/feedback.json.rb
- notice: /\A\[NOTICE\]/i,
- report: /\A\[REPORT\]/i,
- resolution: /\A\[RESOLUTION\]/i,
- svn_agenda: %r{\Aboard: r\d{4,8} - /foundation/board/},
- svn_iclas: %r{\Aboard: r\d{4,8} - /foundation/officers/iclas.txt}
- },
- '<operations.apache.org>' => {
- notice: /\A\[NOTICE\]/i,
- report: /\A\[REPORT\]/i,
- svn_general: %r{\Asvn commit: r/},
- svn_bills: %r{\Abills: r\d{4,8} -}
- },
- '<trademarks.apache.org>' => {
- report: /\A\[REPORT\]/i,
- svn_general: %r{\Asvn commit: r/}
- },
- '<fundraising.apache.org>' => {
- report: /\A\[REPORT\]/i,
- svn_bills: %r{\Abills: r\d{4,8} -}
+ # Subject regexes that are non-discussion oriented
+ # Analysis: don't bother with content lines in these messages,
+ # because most of the content is tool-generated
+ NONDISCUSSION_SUBJECTS = { # Note: none applicable to members@
+ '<board.apache.org>' => {
+ missing: /\AMissing\s((\S+\s){1,3})Board/, # whimsy/www/board/agenda/views/buttons/email.js.rb
+ feedback: /\ABoard\sfeedback\son\s20/, # whimsy/www/board/agenda/views/actions/feedback.json.rb
+ notice: /\A\[NOTICE\]/i,
+ report: /\A\[REPORT\]/i,
+ resolution: /\A\[RESOLUTION\]/i,
+ svn_agenda: %r{\Aboard: r\d{4,8} - /foundation/board/},
+ svn_iclas: %r{\Aboard: r\d{4,8} - /foundation/officers/iclas.txt}
+ },
+ '<operations.apache.org>' => {
+ notice: /\A\[NOTICE\]/i,
+ report: /\A\[REPORT\]/i,
+ svn_general: %r{\Asvn commit: r/},
+ svn_bills: %r{\Abills: r\d{4,8} -}
+ },
+ '<trademarks.apache.org>' => {
+ report: /\A\[REPORT\]/i,
+ svn_general: %r{\Asvn commit: r/}
+ },
+ '<fundraising.apache.org>' => {
+ report: /\A\[REPORT\]/i,
+ svn_bills: %r{\Abills: r\d{4,8} -}
+ }
}
-}
-# Read a ponyapi.rb mbox file and return mails (text content only)
-# @param f path to .mbox or .mbox.gz
-# @return [mail1, mail2, ...]
-def read_mbox(f)
- if f.end_with? '.gz'
- stream = StringIO.new(mbox)
- reader = Zlib::GzipReader.new(stream)
- mbox = reader.read
- reader.close
- stream.close rescue nil
- else
- mbox = File.read(f)
- end
- mbox.force_encoding Encoding::ASCII_8BIT
- messages = mbox.split(/^From .*/)
- messages.shift # Drop first item (not a message)
- return messages
-end
-
-# Process an mbox file into mailhash of selected headers and lines of text
-# @param f path to .mbox or .mbox.gz
-# @return [mail1hash, mail2hash, ...], [ [parseerr, order], ...]
-# @return nil, [read, errors2...] if mbox file can't be read
-# mailhash contains :from, :subject, :listid, :date, :messageid,
-# :inreplyto, :lines (count), plus :who and :committer
-def mbox2stats(f)
- begin
- mails = read_mbox(f)
- rescue => e
- return nil, e
- end
- errs = []
- messages = []
- order = 0
- mails.each do |message|
- mdata = {}
- mail = nil
+ # @see www/secretary/workbench/models/message.rb
+ # @see https://github.com/mikel/mail/issues/39
+ def liberal_email_parser(addr)
begin
- # Preserve message order in case it's important
- order += 1
- # Enforce linefeeds; makes Mail happy; borks binary attachments (not used in this script)
- mail = Mail.read_from_string(message.gsub(/\r?\n/, "\r\n"))
- mdata[:order] = order
- begin # HACK for cases where some values don't parse, try to get good enough values in rescue
- mdata[:from] = mail[:from].value
- mdata[:subject] = mail[:subject].value
- mdata[:listid] = mail[:List_Id].value
- mdata[:date] = mail.date.to_s
- rescue => ee
- mdata[:from] = mail[:from]
- mdata[:subject] = mail[:subject]
- mdata[:listid] = mail[:List_Id]
- mdata[:date] = mail.date.to_s
- mdata[:parseerr] = mail.errors
- end
- mdata[:messageid] = mail.message_id
- mdata[:inreplyto] = mail.in_reply_to
- if mail.multipart?
- text_part = mail.text_part.decoded.split(/\r?\n/)
+ addr = Mail::Address.new(addr)
+ rescue
+ if addr =~ /^"([^"]*)" <(.*)>$/
+ addr = Mail::Address.new
+ addr.address = $2
+ addr.display_name = $1
+ elsif addr =~ /^([^"]*) <(.*)>$/
+ addr = Mail::Address.new
+ addr.address = $2
+ addr.display_name = $1
else
- text_part = mail.body.decoded.split(/\r?\n/)
- end
- ctr = 0 # Count text lines of nonblank, nonreply content
- links = 0 # Count number of apparent hyperlinks
- text_part.each do |l|
- case l
- when /\A\s*>/
- # Don't count reply lines, even when indented
- when /\A\s*\z/
- # Don't count blank lines
- when /\AOn.*wrote:\z/
- # Don't count most common reply header
- when /\A-----Original Message-----/
- # Stop counting if it seems like a forwarded message
- break
- # TODO: figure out if we're in a .sig block, and stop counting
- else
- links += 1 if l =~ URIRX
- ctr += 1
- end
- end
- mdata[:lines] = ctr
- mdata[:links] = links
- # Annotate various other precomputable data
- find_who_from mdata
- begin
- d = DateTime.parse(mdata[:date])
- mdata[:y] = d.year
- mdata[:m] = d.month
- mdata[:d] = d.day
- mdata[:w] = d.wday
- mdata[:h] = d.hour
- mdata[:z] = d.zone
- rescue => noop
- # no-op - not critical
- puts "DEBUG: #{e.message} parsing: #{mdata[:date]}"
- end
- regex = NONDISCUSSION_SUBJECTS[mdata[:listid]] # Use subject regex for this list (if any)
- if regex
- regex.each do |typ, rx|
- if mdata[:subject] =~ rx
- mdata[:nondiscuss] = typ
- break # regex.each
- end
- end
+ raise
end
- # Push our hash
- messages << mdata
- rescue => e
- errs << [e, mdata[:order]]
end
+ return addr
end
- return messages, errs
-end
-# Annotate mailhash by adding :who and :committer (where known)
-# @param mdata Hash to evaluate and annotate
-# Side effect: adds :who and :committer from ASF::Person.find_by_email
-# :committer = 'n' if not found; 'N' if error, 'counsel' for special case
-def find_who_from(mdata)
- # Remove bogus INVALID before doing lookups
- from = mdata[:from].sub(INVALID, '')
- # Micro-optimize unique names
- case from
- when /Mark.Radcliffe/i
- mdata[:who] = 'Mark.Radcliffe'
- mdata[:committer] = COUNSEL
- when /mattmann/i
- mdata[:who] = 'Chris Mattmann'
- mdata[:committer] = MEMBER
- when /jagielski/i
- mdata[:who] = 'Jim Jagielski'
- mdata[:committer] = MEMBER
- when /delacretaz/i
- mdata[:who] = 'Bertrand Delacretaz'
- mdata[:committer] = MEMBER
- when /curcuru/i
- mdata[:who] = 'Shane Curcuru'
- mdata[:committer] = MEMBER
- when /steitz/i
- mdata[:who] = 'Phil Steitz'
- mdata[:committer] = MEMBER
- when /gardler/i # Effectively unique (see: Heidi)
- mdata[:who] = 'Ross Gardler'
- mdata[:committer] = MEMBER
- when /Craig (L )?Russell/i # Optimize since Secretary sends a lot of mail
- mdata[:who] = 'Craig L Russell'
- mdata[:committer] = MEMBER
- when /McGrail/i
- mdata[:who] = 'Kevin A. McGrail'
- mdata[:committer] = MEMBER
- when /sallykhudairi@yahoo/i
- mdata[:who] = 'Sally Khudairi'
- mdata[:committer] = MEMBER
- when /sk@haloworldwide.com/i
- mdata[:who] = 'Sally Khudairi'
- mdata[:committer] = MEMBER
- else
- begin
- # TODO use Real Name (JIRA) to attempt to lookup some notifications
- tmp = liberal_email_parser(from)
- person = ASF::Person.find_by_email(tmp.address.dup)
- if person
- mdata[:who] = person.cn
- if person.asf_member?
- mdata[:committer] = MEMBER
+ # Annotate mailhash by adding :who and :committer (where known)
+ # @param mdata Hash to evaluate and annotate
+ # Side effect: adds :who and :committer from ASF::Person.find_by_email
+ # :committer = 'n' if not found; 'N' if error, 'counsel' for special case
+ def find_who_from(mdata)
+ # Remove bogus INVALID before doing lookups
+ from = mdata[:from].sub(INVALID, '')
+ # Micro-optimize unique names
+ case from
+ when /Mark.Radcliffe/i
+ mdata[:who] = 'Mark.Radcliffe'
+ mdata[:committer] = COUNSEL
+ when /mattmann/i
+ mdata[:who] = 'Chris Mattmann'
+ mdata[:committer] = MEMBER
+ when /jagielski/i
+ mdata[:who] = 'Jim Jagielski'
+ mdata[:committer] = MEMBER
+ when /delacretaz/i
+ mdata[:who] = 'Bertrand Delacretaz'
+ mdata[:committer] = MEMBER
+ when /curcuru/i
+ mdata[:who] = 'Shane Curcuru'
+ mdata[:committer] = MEMBER
+ when /steitz/i
+ mdata[:who] = 'Phil Steitz'
+ mdata[:committer] = MEMBER
+ when /gardler/i # Effectively unique (see: Heidi)
+ mdata[:who] = 'Ross Gardler'
+ mdata[:committer] = MEMBER
+ when /Craig (L )?Russell/i # Optimize since Secretary sends a lot of mail
+ mdata[:who] = 'Craig L Russell'
+ mdata[:committer] = MEMBER
+ when /McGrail/i
+ mdata[:who] = 'Kevin A. McGrail'
+ mdata[:committer] = MEMBER
+ when /sallykhudairi@yahoo/i
+ mdata[:who] = 'Sally Khudairi'
+ mdata[:committer] = MEMBER
+ when /sk@haloworldwide.com/i
+ mdata[:who] = 'Sally Khudairi'
+ mdata[:committer] = MEMBER
+ else
+ begin
+ # TODO use Real Name (JIRA) to attempt to lookup some notifications
+ tmp = liberal_email_parser(from)
+ person = ASF::Person.find_by_email(tmp.address.dup)
+ if person
+ mdata[:who] = person.cn
+ if person.asf_member?
+ mdata[:committer] = MEMBER
+ else
+ mdata[:committer] = COMMITTER
+ end
else
- mdata[:committer] = COMMITTER
+ mdata[:who] = "#{tmp.display_name} <#{tmp.address}>"
+ mdata[:committer] = 'n'
end
- else
- mdata[:who] = "#{tmp.display_name} <#{tmp.address}>"
- mdata[:committer] = 'n'
+ rescue
+ mdata[:who] = mdata[:from] # Use original value here
+ mdata[:committer] = 'N'
end
- rescue
- mdata[:who] = mdata[:from] # Use original value here
- mdata[:committer] = 'N'
end
end
end
-# @see www/secretary/workbench/models/message.rb
-# @see https://github.com/mikel/mail/issues/39
-def liberal_email_parser(addr)
- begin
- addr = Mail::Address.new(addr)
- rescue
- if addr =~ /^"([^"]*)" <(.*)>$/
- addr = Mail::Address.new
- addr.address = $2
- addr.display_name = $1
- elsif addr =~ /^([^"]*) <(.*)>$/
- addr = Mail::Address.new
- addr.address = $2
- addr.display_name = $1
+module MboxUtils
+ extend self
+ MBOX_EXT = '.mbox'
+ VERSION = 'mboxhdr2json'
+ URIRX = URI.regexp(['http', 'https'])
+
+ # Read a ponyapi.rb mbox file and return mails (text content only)
+ # @param f path to .mbox or .mbox.gz
+ # @return [mail1, mail2, ...]
+ def read_mbox(f)
+ if f.end_with? '.gz'
+ stream = StringIO.new(mbox)
+ reader = Zlib::GzipReader.new(stream)
+ mbox = reader.read
+ reader.close
+ stream.close rescue nil
else
- raise
+ mbox = File.read(f)
end
+ mbox.force_encoding Encoding::ASCII_8BIT
+ messages = mbox.split(/^From .*/)
+ messages.shift # Drop first item (not a message)
+ return messages
end
- return addr
-end
-# Scan dir tree for mboxes and output individual mailhash as JSONs
-# @param dir to scan (whole tree)
-# @param ext file extension to glob for
-# Side effect: writes out f.chomp(ext).json files
-# @note writes string VERSION for differentiating from other *.json
-def scan_dir_mbox2stats(dir, ext = MBOX_EXT)
- Dir["#{dir}/**/*#{ext}".untaint].sort.each do |f|
- mails, errs = mbox2stats(f.untaint)
- File.open("#{f.chomp(ext)}.json", "w") do |fout|
- fout.puts JSON.pretty_generate(["#{VERSION}", mails, errs])
+ # Process an mbox file into mailhash of selected headers and lines of text
+ # @param f path to .mbox or .mbox.gz
+ # @return [mail1hash, mail2hash, ...], [ [parseerr, order], ...]
+ # @return nil, [read, errors2...] if mbox file can't be read
+ # mailhash contains :from, :subject, :listid, :date, :messageid,
+ # :inreplyto, :lines (count), plus :who and :committer
+ def mbox2stats(f)
+ begin
+ mails = read_mbox(f)
+ rescue => e
+ return nil, e
end
+ errs = []
+ messages = []
+ order = 0
+ mails.each do |message|
+ mdata = {}
+ mail = nil
+ begin
+ # Preserve message order in case it's important
+ order += 1
+ # Enforce linefeeds; makes Mail happy; borks binary attachments (not used in this script)
+ mail = Mail.read_from_string(message.gsub(/\r?\n/, "\r\n"))
+ mdata[:order] = order
+ begin # HACK for cases where some values don't parse, try to get good enough values in rescue
+ mdata[:from] = mail[:from].value
+ mdata[:subject] = mail[:subject].value
+ mdata[:listid] = mail[:List_Id].value
+ mdata[:date] = mail.date.to_s
+ rescue => ee
+ mdata[:from] = mail[:from]
+ mdata[:subject] = mail[:subject]
+ mdata[:listid] = mail[:List_Id]
+ mdata[:date] = mail.date.to_s
+ mdata[:parseerr] = mail.errors
+ end
+ mdata[:messageid] = mail.message_id
+ mdata[:inreplyto] = mail.in_reply_to
+ if mail.multipart?
+ text_part = mail.text_part.decoded.split(/\r?\n/)
+ else
+ text_part = mail.body.decoded.split(/\r?\n/)
+ end
+ ctr = 0 # Count text lines of nonblank, nonreply content
+ links = 0 # Count number of apparent hyperlinks
+ text_part.each do |l|
+ case l
+ when /\A\s*>/
+ # Don't count reply lines, even when indented
+ when /\A\s*\z/
+ # Don't count blank lines
+ when /\AOn.*wrote:\z/
+ # Don't count most common reply header
+ when /\A-----Original Message-----/
+ # Stop counting if it seems like a forwarded message
+ break
+ # TODO: figure out if we're in a .sig block, and stop counting
+ else
+ links += 1 if l =~ URIRX
+ ctr += 1
+ end
+ end
+ mdata[:lines] = ctr
+ mdata[:links] = links
+ # Annotate various other precomputable data
+ MailUtils.find_who_from(mdata)
+ begin
+ d = DateTime.parse(mdata[:date])
+ mdata[:y] = d.year
+ mdata[:m] = d.month
+ mdata[:d] = d.day
+ mdata[:w] = d.wday
+ mdata[:h] = d.hour
+ mdata[:z] = d.zone
+ rescue => noop
+ # no-op - not critical
+ puts "DEBUG: #{e.message} parsing: #{mdata[:date]}"
+ end
+ regex = MailUtils::NONDISCUSSION_SUBJECTS[mdata[:listid]] # Use subject regex for this list (if any)
+ if regex
+ regex.each do |typ, rx|
+ if mdata[:subject] =~ rx
+ mdata[:nondiscuss] = typ
+ break # regex.each
+ end
+ end
+ end
+ # Push our hash
+ messages << mdata
+ rescue => e
+ errs << [e, mdata[:order]]
+ end
+ end
+ return messages, errs
end
-end
-# Scan dir tree for mailhash JSONs and output an overview CSV of all
-# @return [ error1, error2, ...] if any errors
-# Side effect: writes out dir/outname CSV file
-# @note reads string VERSION for differentiating from other *.json
-def scan_dir_stats2csv(dir, outname, ext = '.json')
- errors = []
- jzons = []
- Dir["#{dir}/**/*#{ext}".untaint].sort.each do |f|
- begin
- tmp = JSON.parse(File.read(f))
- if tmp[0].kind_of?(String) && tmp[0].start_with?(VERSION)
- jzons << tmp.drop(1)
+ # Scan dir tree for mboxes and output individual mailhash as JSONs
+ # @param dir to scan (whole tree)
+ # @param ext file extension to glob for
+ # Side effect: writes out f.chomp(ext).json files
+ # @note writes string VERSION for differentiating from other *.json
+ def scan_dir_mbox2stats(dir, ext = MBOX_EXT)
+ Dir["#{dir}/**/*#{ext}".untaint].sort.each do |f|
+ mails, errs = mbox2stats(f.untaint)
+ File.open("#{f.chomp(ext)}.json", "w") do |fout|
+ fout.puts JSON.pretty_generate(["#{VERSION}", mails, errs])
end
- rescue => e
- puts "ERROR: parse of #{f} raised #{e.message[0..255]}"
- errors << "#{e.message}\n\t#{e.backtrace.join("\n\t")}"
- next
end
end
- raise ArgumentError, "#{__method__} called with no valid mbox json files in #{dir}" if jzons.length == 0
- puts "#{__method__} processing #{jzons.length} mbox json files"
- # Write out headers and the first array in new csv
- csvfile = File.join("#{dir}", outname)
- csv = CSV.open(csvfile, "w", headers: %w( year month day weekday hour zone listid who subject lines links committer messageid inreplyto ), write_headers: true)
- jzons.shift[0].each do |m|
- csv << [ m['y'], m['m'], m['d'], m['w'], m['h'], m['z'], m['listid'], m['who'], m['subject'], m['lines'], m['links'], m['committer'], m['messageid'], m['inreplyto'] ]
- end
- # Write out all remaining arrays, without headers, appending
- jzons.each do |j|
- begin
- j[0].each do |m|
- csv << [ m['y'], m['m'], m['d'], m['w'], m['h'], m['z'], m['listid'], m['who'], m['subject'], m['lines'], m['links'], m['committer'], m['messageid'], m['inreplyto'] ]
+
+ # Scan dir tree for mailhash JSONs and output an overview CSV of all
+ # @return [ error1, error2, ...] if any errors
+ # Side effect: writes out dir/outname CSV file
+ # @note reads string VERSION for differentiating from other *.json
+ def scan_dir_stats2csv(dir, outname, ext = '.json')
+ errors = []
+ jzons = []
+ Dir["#{dir}/**/*#{ext}".untaint].sort.each do |f|
+ begin
+ tmp = JSON.parse(File.read(f))
+ if tmp[0].kind_of?(String) && tmp[0].start_with?(VERSION)
+ jzons << tmp.drop(1)
+ end
+ rescue => e
+ puts "ERROR: parse of #{f} raised #{e.message[0..255]}"
+ errors << "#{e.message}\n\t#{e.backtrace.join("\n\t")}"
+ next
+ end
+ end
+ raise ArgumentError, "#{__method__} called with no valid mbox json files in #{dir}" if jzons.length == 0
+ puts "#{__method__} processing #{jzons.length} mbox json files"
+ # Write out headers and the first array in new csv
+ csvfile = File.join("#{dir}", outname)
+ csv = CSV.open(csvfile, "w", headers: %w( year month day weekday hour zone listid who subject lines links committer messageid inreplyto ), write_headers: true)
+ jzons.shift[0].each do |m|
+ csv << [ m['y'], m['m'], m['d'], m['w'], m['h'], m['z'], m['listid'], m['who'], m['subject'], m['lines'], m['links'], m['committer'], m['messageid'], m['inreplyto'] ]
+ end
+ # Write out all remaining arrays, without headers, appending
+ jzons.each do |j|
+ begin
+ j[0].each do |m|
+ csv << [ m['y'], m['m'], m['d'], m['w'], m['h'], m['z'], m['listid'], m['who'], m['subject'], m['lines'], m['links'], m['committer'], m['messageid'], m['inreplyto'] ]
+ end
+ rescue => e
+ puts "ERROR: write of #{f} raised #{e.message[0..255]}"
+ errors << "#{e.message}\n\t#{e.backtrace.join("\n\t")}"
+ next
end
- rescue => e
- puts "ERROR: write of #{f} raised #{e.message[0..255]}"
- errors << "#{e.message}\n\t#{e.backtrace.join("\n\t")}"
- next
end
+ csv.close # Just in case
+ return errors
end
- csv.close # Just in case
- return errors
end
# ## ### #### ##### ######
@@ -352,11 +360,11 @@ end
if __FILE__ == $PROGRAM_NAME
options = optparse
if options[:json]
- puts "START: Parsing #{options[:dir]}/*#{MBOX_EXT} into *.json"
- scan_dir_mbox2stats(options[:dir]) # Side effect: writes out f.chomp(ext).json files
+ puts "START: Parsing #{options[:dir]}/*#{MboxUtils::MBOX_EXT} into *.json"
+ MboxUtils.scan_dir_mbox2stats(options[:dir]) # Side effect: writes out f.chomp(ext).json files
end
puts "START: Analyzing #{options[:dir]}/*.json into #{options[:output]}"
- errs = scan_dir_stats2csv(options[:dir], options[:output])
+ errs = MboxUtils.scan_dir_stats2csv(options[:dir], options[:output])
if errs
errs.each do |e|
puts "ERROR: #{e}"
diff --git a/www/officers/list-traffic.cgi b/www/officers/list-traffic.cgi
index b54a93d..0d27e86 100755
--- a/www/officers/list-traffic.cgi
+++ b/www/officers/list-traffic.cgi
@@ -9,6 +9,7 @@ require 'whimsy/asf'
require 'whimsy/asf/agenda'
require 'date'
require 'mail'
+require '../../tools/mboxhdr2csv.rb'
user = ASF::Person.new($USER)
unless user.asf_member? or ASF.pmc_chairs.include? user
@@ -29,109 +30,6 @@ MAILCOUNT = 'mailcount'
WEEK_TOTAL = '@@total' # Use @@ so it can't match who name/emails
WEEK_START = '@@start'
-### ---- Copied from tools/mboxhdr2csv.rb; should be refactored ----
-MEMBER = 'member'
-COMMITTER = 'committer'
-COUNSEL = 'counsel'
-# Subject regexes that are non-discussion oriented for flagging
-NONDISCUSSION_SUBJECTS = { # Note: none applicable to members@
- '<board.apache.org>' => {
- missing: /\AMissing\s((\S+\s){1,3})Board/, # whimsy/www/board/agenda/views/buttons/email.js.rb
- feedback: /\ABoard\sfeedback\son\s20/, # whimsy/www/board/agenda/views/actions/feedback.json.rb
- notice: /\A\[NOTICE\]/i,
- report: /\A\[REPORT\]/i,
- resolution: /\A\[RESOLUTION\]/i,
- svn_agenda: %r{\Aboard: r\d{4,8} - /foundation/board/},
- svn_iclas: %r{\Aboard: r\d{4,8} - /foundation/officers/iclas.txt}
- }
-}
-# Annotate mailhash by adding :who and COMMITTER (where known)
-# @param email address to check
-# @returns ['Full Name', 'committer-flag'
-# COMMITTER = 'n' if not found; 'N' if error, 'counsel' for special case
-def find_who_from(email)
- # Remove bogus INVALID before doing lookups
- from = email.sub('.INVALID', '')
- who = nil
- committer = nil
- # Micro-optimize unique names
- case from
- when /Mark.Radcliffe/i
- who = 'Mark.Radcliffe'
- committer = COUNSEL
- when /mattmann/i
- who = 'Chris Mattmann'
- committer = MEMBER
- when /jagielski/i
- who = 'Jim Jagielski'
- committer = MEMBER
- when /delacretaz/i
- who = 'Bertrand Delacretaz'
- committer = MEMBER
- when /curcuru/i
- who = 'Shane Curcuru'
- committer = MEMBER
- when /steitz/i
- who = 'Phil Steitz'
- committer = MEMBER
- when /gardler/i # Effectively unique (see: Heidi)
- who = 'Ross Gardler'
- committer = MEMBER
- when /Craig (L )?Russell/i # Optimize since Secretary sends a lot of mail
- who = 'Craig L Russell'
- committer = MEMBER
- when /McGrail/i
- who = 'Kevin A. McGrail'
- committer = MEMBER
- when /khudairi/i
- who = 'Sally Khudairi'
- committer = MEMBER
- else
- begin
- # TODO use Real Name (JIRA) to attempt to lookup some notifications
- tmp = liberal_email_parser(from)
- person = ASF::Person.find_by_email(tmp.address.dup)
- if person
- who = person.cn
- if person.asf_member?
- committer = MEMBER
- else
- committer = COMMITTER
- end
- else
- who = "#{tmp.display_name} <#{tmp.address}>"
- committer = 'n'
- end
- rescue
- who = from # Use original value here
- committer = 'N'
- end
- end
- return who, committer
-end
-
-# @see www/secretary/workbench/models/message.rb
-# @see https://github.com/mikel/mail/issues/39
-def liberal_email_parser(addr)
- begin
- addr = Mail::Address.new(addr)
- rescue
- if addr =~ /^"([^"]*)" <(.*)>$/
- addr = Mail::Address.new
- addr.address = $2
- addr.display_name = $1
- elsif addr =~ /^([^"]*) <(.*)>$/
- addr = Mail::Address.new
- addr.address = $2
- addr.display_name = $1
- else
- raise
- end
- end
- return addr
-end
-### ---- Copied from tools/mboxhdr2csv.rb; should be refactored ----
-
# Get {MAILS: [{date, who, subject, flag},...\, TOOLS: [{...},...] } from the specified list for a month
# May cache data in SRV_MAIL/yearmonth.json
# Returns empty hash if error or if can't find month
@@ -152,7 +50,7 @@ def get_mails_month(yearmonth:, nondiscuss:)
data = {}
data[DATE] = DateTime.parse(message[/^Date: (.*)/, 1]).iso8601
data[FROM] = message[/^From: (.*)/, 1]
- data[WHO], data[COMMITTER] = find_who_from(data[FROM])
+ data[WHO], data[MailUtils::COMMITTER] = MailUtils.find_who_from(data[FROM])
data[SUBJECT] = message[/^Subject: (.*)/, 1]
if nondiscuss
nondiscuss.each do |typ, rx|
@@ -196,7 +94,7 @@ end
# Display monthly statistics for all available data
def display_monthly(months:, nondiscuss:)
months.sort.reverse.each do |month|
- data = get_mails_month(yearmonth: month, nondiscuss: NONDISCUSSION_SUBJECTS['<board.apache.org>'])
+ data = get_mails_month(yearmonth: month, nondiscuss: nondiscuss)
next if data.empty?
_h1 "board@ statistics for #{month} (total mails: #{data[MAILS].length + data[TOOLS].length})", id: "#{month}"
_div.row do
@@ -308,9 +206,9 @@ _html do
) do
months = Dir["#{SRV_MAIL}/*"].map {|path| File.basename(path).untaint}.grep(/^\d+$/)
if ENV['QUERY_STRING'].include? 'week'
- display_weekly(months: months, nondiscuss: NONDISCUSSION_SUBJECTS['<board.apache.org>'])
+ display_weekly(months: months, nondiscuss: MailUtils::NONDISCUSSION_SUBJECTS['<board.apache.org>'])
else
- display_monthly(months: months, nondiscuss: NONDISCUSSION_SUBJECTS['<board.apache.org>'])
+ display_monthly(months: months, nondiscuss: MailUtils::NONDISCUSSION_SUBJECTS['<board.apache.org>'])
end
end
end