You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@whimsical.apache.org by cu...@apache.org on 2018/05/27 15:31:42 UTC
[whimsy] branch master updated: NameMap between two id systems
This is an automated email from the ASF dual-hosted git repository.
curcuru pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/whimsy.git
The following commit(s) were added to refs/heads/master by this push:
new 62e0d36 NameMap between two id systems
62e0d36 is described below
commit 62e0d36351827be3f74ac399afbd49dc1cd29415
Author: Shane Curcuru <as...@shanecurcuru.org>
AuthorDate: Sun May 27 11:31:30 2018 -0400
NameMap between two id systems
---
lib/whimsy/namemap.rb | 265 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 265 insertions(+)
diff --git a/lib/whimsy/namemap.rb b/lib/whimsy/namemap.rb
new file mode 100644
index 0000000..484e5ac
--- /dev/null
+++ b/lib/whimsy/namemap.rb
@@ -0,0 +1,265 @@
+#!/usr/bin/env ruby
+require 'json'
+
+puts 'DANGER, WILL ROBINSON! THIS IS NOT READY FOR PRODUCTION USE!'
+
+# Map usernames between different systems (id.a.o and JIRA|Confluence)
+# Input data is:
+# committers = [{ID=>'curcuru', NAME =>'Shane Curcuru', MAIL=>['mail1', 'mail2',...]}, ...]
+# other = [{ID=>'curcuru', NAME =>'Shane Curcuru', MAIL=>'mailone'}, ...]
+# Analyzes matching IDs and emails, and returns:
+# matches = hash by committer id of all committers, and if they matched
+# SAME:... committer ID and an email address match exactly
+# Most likely committer has same ID on both
+# DIFF:... committer ID matches, but none of the emails matched
+# Most likely the other account is a different person than committer
+# NONE:... there were no ID matches
+# Committer's ID is not found in other system
+# crossmatches = hash by email address of any other matches from
+# any of the other system's emails to any committer's emails
+# REVIEW:... email address(es) matched, but IDs did not
+# Note: emails may not be unique across accounts on either side
+# Need to manually investigate, since may involve multiple people/accounts
+# DIFF: an email address matched, but the IDs did not
+# Manually invesigate to see why
+# NOTE: You *must* manually evaluate the results!
+module NameMap
+ extend self
+ COMMITTER_JSON = 'https://whimsy.apache.org/roster/committer/index.json'
+ ID = 'id'
+ MAIL = 'mail'
+ NAME = 'name'
+
+ TEST_COMMITTERS = [ # Drawn from Whimsy's committer data
+ {
+ 'id' => 'curcuru',
+ 'name' => 'Shane Curcuru',
+ 'mail' => [
+ 'asf@shanecurcuru.org',
+ 'asfl@shanecurcuru.org',
+ 'curcuru@apache.org'
+ ],
+ 'member' => true
+ },
+ {
+ 'id' => 'makemyday',
+ 'name' => 'Clint Eastwood',
+ 'mail' => [
+ 'clint@eastwood.com',
+ 'gun@smoke.org'
+ ],
+ },
+ {
+ 'id' => 'robocop',
+ 'name' => 'Peter Weller',
+ 'mail' => [
+ 'peter@buckaroo.com',
+ 'both@lists.find',
+ ],
+ },
+ {
+ 'id' => 'laurel',
+ 'name' => 'Yanni (musician)',
+ 'mail' => [
+ 'both@lists.find',
+ ],
+ },
+ {
+ 'id' => 'emailUnMatch',
+ 'name' => 'Test Case1c',
+ 'mail' => [
+ 'email@example.com',
+ ],
+ },
+ ]
+
+ TEST_OTHER = [ # Any other system must provide id,name,email for each user
+ {
+ 'id' => 'curcuru',
+ 'name' => 'Shane Curcuru',
+ 'mail' => 'asf@shanecurcuru.org',
+ },
+ {
+ 'id' => 'bogie',
+ 'name' => 'Rick Blaine',
+ 'mail' => 'piano@sam.org',
+ },
+ {
+ 'id' => 'makemyday',
+ 'name' => 'Doris Day',
+ 'mail' => 'doris@day.movies',
+ },
+ {
+ 'id' => 'yanni',
+ 'name' => 'Laurel Hardy',
+ 'mail' => 'both@lists.find',
+ },
+ {
+ 'id' => 'yannidouble',
+ 'name' => 'Laurel and Hardy',
+ 'mail' => 'both@lists.find',
+ },
+ {
+ 'id' => 'emailNotMatch',
+ 'name' => 'Test Case1o',
+ 'mail' => 'email@example.com',
+ },
+ ]
+
+ # Read committer accounts
+ # @param io stream to read JSON from
+ # @return json data
+ def get_committers(io)
+ if io
+ return JSON.parse(io)
+ else
+ return TEST_COMMITTERS
+ end
+ end
+
+ # Read other system accounts
+ # TODO Depends on file format of exported other system accounts
+ # @param f filename to read from
+ # @return json data
+ def get_other(f)
+ return TEST_OTHER
+ end
+
+ # Transform committer accounts into lookup hashes
+ # @param committers array from COMMITTER_JSON
+ # @return byid, bymail - hashes for lookups to committer accounts
+ # byid - hash by id of data
+ # bymail - hash by id of array of datum (in case non-unique emails)
+ def hash_committers(committers)
+ byid = {}
+ bymail = {}
+ committers.each do |hsh|
+ byid[hsh[ID]] = hsh
+ hsh[MAIL].each do |addr| # Committers can have multiple emails
+ (bymail[addr] ||= []) << hsh
+ end
+ end
+ return byid, bymail
+ end
+
+ # Transform other system accounts into lookup hashes
+ # @param other array of hashes including 'id', 'name', 'mail' keys
+ # @return byid, bymail - hashes for lookups to other system accounts
+ # byid - hash by id of data
+ # bymail - hash by id of array of datum (in case non-unique emails)
+ def hash_other(other)
+ byid = {}
+ bymail = {}
+ other.each do |hsh|
+ byid[hsh[ID]] = hsh
+ (bymail[hsh[MAIL]] ||= []) << hsh
+ end
+ return byid, bymail
+ end
+
+ # Compare committer ids to other system account ids
+ # @param cids - hash by id of committer data
+ # @param cmails - hash by email of [committer1, ...]
+ # @param cids - hash by id of other system account data
+ # @param cmails - hash by email of [other1, ...]
+ # @return matches, crossmatches - list of committer ids matched or not; list of emails cross-matched
+ def compare(cids, cmails, oids, omails)
+ matches = {}
+ crossmatches = {}
+
+ # For every committer, check for a matching account in other system
+ cids.each do |cid, committer|
+ # If the other system has identical id as committer
+ if oids.has_key?(cid)
+ # Cross-check all our mails with the other account to see if *any* match
+ committer[MAIL].each do |caddr|
+ # If one matches exactly with a single other account, log a likely match
+ if caddr.eql?(oids[cid][MAIL])
+ matches[cid] = "SAME:email match:(#{committer[NAME]},#{caddr}):(#{oids[cid][NAME]},#{oids[cid][MAIL]})"
+ break
+ end
+ end
+ if matches[cid].nil?
+ # None of our emails matched the other2 email, log
+ if committer[MAIL].length == 1
+ matches[cid] = "DIFF:email no match:(#{committer[NAME]},#{committer[MAIL][0]}):(#{oids[cid][NAME]},#{oids[cid][MAIL]})"
+ else
+ matches[cid] = "DIFF:email no match:(#{committer[NAME]},#{committer[MAIL].length} addresses):(#{oids[cid][NAME]},#{oids[cid][MAIL]})"
+ end
+ end
+ else # No id match, log it
+ matches[cid] = "NONE:no id match found"
+ end
+ end
+
+ # Also cross-check email addresses of other system to all committer emails
+ omails.each do |omail, other_accounts|
+ if cmails.has_key?(omail)
+ # Each bymail entry is an array; usually 1 element, but sometimes more
+ if cmails[omail].length == 1 && other_accounts.length == 1
+ # Simple case: check single id value
+ if cmails[omail][0][ID].eql?(other_accounts[0][ID])
+ # no-op: If both emails have single account that matches, ignore (was logged above)
+ else
+ # Mismatch of two IDs with same (unique) emails
+ crossmatches[omail] = "DIFF:id no match:(#{cmails[omail][0][ID]},#{cmails[omail][0][NAME]}):(#{other_accounts[0][ID]},#{other_accounts[0][NAME]})"
+ end
+ else
+ # Complex case: check through arrays of accounts with same email
+ str = "REVIEW:#{omail}:"
+ cmails[omail].each do |itm|
+ str += "(#{itm[ID]},#{itm[NAME]})"
+ end
+ str += ':'
+ other_accounts.each do |itm|
+ str += "(#{itm[ID]},#{itm[NAME]})"
+ end
+ crossmatches[omail] = str
+ end
+ end
+ end
+ return matches, crossmatches
+ end
+
+ # Compare a committer list to another system's list
+ # @param cio io stream to read committer accounts from
+ # @param ofile filename to read other system accounts from
+ # @return matches, crossmatches - list of committer ids matched or not; list of emails cross-matched
+ def report(cio = nil, ofile = nil)
+ cids, cmails = hash_committers(get_committers(cio))
+ oids, omails = hash_other(get_other(ofile))
+ matches, crossmatches = compare(cids, cmails, oids, omails)
+ return matches, crossmatches
+ end
+
+ # Check for email duplicates in committer roster
+ # @return hash of any committers with duplicate emails
+ # @return histogram of how many aliases committers list
+ def committer_dups(io)
+ dups = {}
+ histogram = Hash.new{|k,v| v = 0}
+ cids, cmails = hash_committers(get_committers(io))
+ cids.each do |id, hsh|
+ histogram[hsh[MAIL].length] += 1
+ end
+ cmails.each do |addr, ary|
+ if ary.length > 1
+ dups[addr] = ''
+ ary.each do |hsh|
+ dups[addr] += "#{hsh[ID]},"
+ end
+ end
+ end
+ return dups, histogram
+ end
+end
+
+#### MAIN TESTING CODE
+matches, crossmatches = NameMap.report()
+puts JSON.pretty_generate(matches)
+puts JSON.pretty_generate(crossmatches)
+
+# dups, histogram = NameMap.committer_dups(File.read('committerlist-from-whimsy.json'))
+# puts JSON.pretty_generate(dups)
+# puts JSON.pretty_generate(histogram)
+
--
To stop receiving notification emails like this one, please contact
curcuru@apache.org.