You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@whimsical.apache.org by cu...@apache.org on 2018/05/27 15:31:42 UTC

[whimsy] branch master updated: NameMap between two id systems

This is an automated email from the ASF dual-hosted git repository.

curcuru pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/whimsy.git


The following commit(s) were added to refs/heads/master by this push:
     new 62e0d36  NameMap between two id systems
62e0d36 is described below

commit 62e0d36351827be3f74ac399afbd49dc1cd29415
Author: Shane Curcuru <as...@shanecurcuru.org>
AuthorDate: Sun May 27 11:31:30 2018 -0400

    NameMap between two id systems
---
 lib/whimsy/namemap.rb | 265 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 265 insertions(+)

diff --git a/lib/whimsy/namemap.rb b/lib/whimsy/namemap.rb
new file mode 100644
index 0000000..484e5ac
--- /dev/null
+++ b/lib/whimsy/namemap.rb
@@ -0,0 +1,265 @@
+#!/usr/bin/env ruby
+require 'json'
+
+puts 'DANGER, WILL ROBINSON! THIS IS NOT READY FOR PRODUCTION USE!'
+
+# Map usernames between different systems (id.a.o and JIRA|Confluence)
+# Input data is:
+#   committers = [{ID=>'curcuru', NAME =>'Shane Curcuru', MAIL=>['mail1', 'mail2',...]}, ...]
+#   other = [{ID=>'curcuru', NAME =>'Shane Curcuru', MAIL=>'mailone'}, ...]
+# Analyzes matching IDs and emails, and returns:
+#   matches = hash by committer id of all committers, and if they matched
+#     SAME:... committer ID and an email address match exactly
+#       Most likely committer has same ID on both
+#     DIFF:... committer ID matches, but none of the emails matched
+#       Most likely the other account is a different person than committer
+#     NONE:... there were no ID matches
+#       Committer's ID is not found in other system
+#   crossmatches = hash by email address of any other matches from
+#     any of the other system's emails to any committer's emails
+#     REVIEW:... email address(es) matched, but IDs did not
+#       Note: emails may not be unique across accounts on either side
+#       Need to manually investigate, since may involve multiple people/accounts
+#     DIFF: an email address matched, but the IDs did not
+#       Manually invesigate to see why
+# NOTE: You *must* manually evaluate the results!
+module NameMap
+  extend self
+  COMMITTER_JSON = 'https://whimsy.apache.org/roster/committer/index.json'
+  ID = 'id'
+  MAIL = 'mail'
+  NAME = 'name'
+  
+  TEST_COMMITTERS = [ # Drawn from Whimsy's committer data
+    {
+      'id' => 'curcuru',
+      'name' => 'Shane Curcuru',
+      'mail' => [
+        'asf@shanecurcuru.org',
+        'asfl@shanecurcuru.org',
+        'curcuru@apache.org'
+      ],
+      'member' => true
+    },
+    {
+      'id' => 'makemyday',
+      'name' => 'Clint Eastwood',
+      'mail' => [
+        'clint@eastwood.com',
+        'gun@smoke.org'
+      ],
+    },
+    {
+      'id' => 'robocop',
+      'name' => 'Peter Weller',
+      'mail' => [
+        'peter@buckaroo.com',
+        'both@lists.find',
+      ],
+    },
+    {
+      'id' => 'laurel',
+      'name' => 'Yanni (musician)',
+      'mail' => [
+        'both@lists.find',
+      ],
+    },
+    {
+      'id' => 'emailUnMatch',
+      'name' => 'Test Case1c',
+      'mail' => [
+        'email@example.com',
+      ],
+    },
+  ]
+  
+  TEST_OTHER = [ # Any other system must provide id,name,email for each user
+    {
+      'id' => 'curcuru',
+      'name' => 'Shane Curcuru',
+      'mail' => 'asf@shanecurcuru.org',
+    },
+    {
+      'id' => 'bogie',
+      'name' => 'Rick Blaine',
+      'mail' => 'piano@sam.org',
+    },
+    {
+      'id' => 'makemyday',
+      'name' => 'Doris Day',
+      'mail' => 'doris@day.movies',
+    },
+    {
+      'id' => 'yanni',
+      'name' => 'Laurel Hardy',
+      'mail' => 'both@lists.find',
+    },
+    {
+      'id' => 'yannidouble',
+      'name' => 'Laurel and Hardy',
+      'mail' => 'both@lists.find',
+    },
+    {
+      'id' => 'emailNotMatch',
+      'name' => 'Test Case1o',
+      'mail' => 'email@example.com',
+    },
+  ]
+  
+  # Read committer accounts
+  # @param io stream to read JSON from
+  # @return json data
+  def get_committers(io)
+    if io
+      return JSON.parse(io)
+    else
+      return TEST_COMMITTERS
+    end
+  end
+  
+  # Read other system accounts
+  # TODO Depends on file format of exported other system accounts
+  # @param f filename to read from
+  # @return json data
+  def get_other(f)
+    return TEST_OTHER
+  end
+  
+  # Transform committer accounts into lookup hashes
+  # @param committers array from COMMITTER_JSON
+  # @return byid, bymail - hashes for lookups to committer accounts
+  #    byid - hash by id of data
+  #    bymail - hash by id of array of datum (in case non-unique emails)
+  def hash_committers(committers)
+    byid = {}
+    bymail = {}
+    committers.each do |hsh|
+      byid[hsh[ID]] = hsh
+      hsh[MAIL].each do |addr| # Committers can have multiple emails
+        (bymail[addr] ||= []) << hsh
+      end
+    end
+    return byid, bymail
+  end
+  
+  # Transform other system accounts into lookup hashes
+  # @param other array of hashes including 'id', 'name', 'mail' keys
+  # @return byid, bymail - hashes for lookups to other system accounts
+  #    byid - hash by id of data
+  #    bymail - hash by id of array of datum (in case non-unique emails)
+  def hash_other(other)
+    byid = {}
+    bymail = {}
+    other.each do |hsh|
+      byid[hsh[ID]] = hsh
+      (bymail[hsh[MAIL]] ||= []) << hsh
+    end
+    return byid, bymail
+  end
+  
+  # Compare committer ids to other system account ids
+  # @param cids - hash by id of committer data
+  # @param cmails - hash by email of [committer1, ...]
+  # @param cids - hash by id of other system account data
+  # @param cmails - hash by email of [other1, ...]
+  # @return matches, crossmatches - list of committer ids matched or not; list of emails cross-matched
+  def compare(cids, cmails, oids, omails)
+    matches = {}
+    crossmatches = {}
+    
+    # For every committer, check for a matching account in other system
+    cids.each do |cid, committer|
+      # If the other system has identical id as committer
+      if oids.has_key?(cid)
+        # Cross-check all our mails with the other account to see if *any* match
+        committer[MAIL].each do |caddr|
+          # If one matches exactly with a single other account, log a likely match
+          if caddr.eql?(oids[cid][MAIL])
+            matches[cid] = "SAME:email match:(#{committer[NAME]},#{caddr}):(#{oids[cid][NAME]},#{oids[cid][MAIL]})"
+            break
+          end
+        end
+        if matches[cid].nil?
+          # None of our emails matched the other2 email, log
+          if committer[MAIL].length == 1
+            matches[cid] = "DIFF:email no match:(#{committer[NAME]},#{committer[MAIL][0]}):(#{oids[cid][NAME]},#{oids[cid][MAIL]})"
+          else
+            matches[cid] = "DIFF:email no match:(#{committer[NAME]},#{committer[MAIL].length} addresses):(#{oids[cid][NAME]},#{oids[cid][MAIL]})"
+          end
+        end
+      else # No id match, log it
+        matches[cid] = "NONE:no id match found"
+      end
+    end
+    
+    # Also cross-check email addresses of other system to all committer emails
+    omails.each do |omail, other_accounts|
+      if cmails.has_key?(omail)
+        # Each bymail entry is an array; usually 1 element, but sometimes more
+        if cmails[omail].length == 1 && other_accounts.length == 1
+          # Simple case: check single id value
+          if cmails[omail][0][ID].eql?(other_accounts[0][ID])
+            # no-op: If both emails have single account that matches, ignore (was logged above)
+          else
+            # Mismatch of two IDs with same (unique) emails
+            crossmatches[omail] = "DIFF:id no match:(#{cmails[omail][0][ID]},#{cmails[omail][0][NAME]}):(#{other_accounts[0][ID]},#{other_accounts[0][NAME]})"
+          end
+        else
+          # Complex case: check through arrays of accounts with same email
+          str = "REVIEW:#{omail}:"
+          cmails[omail].each do |itm|
+            str += "(#{itm[ID]},#{itm[NAME]})"
+          end
+          str += ':'
+          other_accounts.each do |itm|
+            str += "(#{itm[ID]},#{itm[NAME]})"
+          end
+          crossmatches[omail] = str
+        end
+      end
+    end
+    return matches, crossmatches
+  end
+  
+  # Compare a committer list to another system's list
+  # @param cio io stream to read committer accounts from
+  # @param ofile filename to read other system accounts from
+  # @return matches, crossmatches - list of committer ids matched or not; list of emails cross-matched
+  def report(cio = nil, ofile = nil)
+    cids, cmails = hash_committers(get_committers(cio))
+    oids, omails = hash_other(get_other(ofile))
+    matches, crossmatches = compare(cids, cmails, oids, omails)
+    return matches, crossmatches
+  end
+  
+  # Check for email duplicates in committer roster
+  # @return hash of any committers with duplicate emails
+  # @return histogram of how many aliases committers list
+  def committer_dups(io)
+    dups = {}
+    histogram = Hash.new{|k,v| v = 0}
+    cids, cmails = hash_committers(get_committers(io))
+    cids.each do |id, hsh|
+      histogram[hsh[MAIL].length] += 1
+    end
+    cmails.each do |addr, ary|
+      if ary.length > 1
+        dups[addr] = ''
+        ary.each do |hsh|
+          dups[addr] += "#{hsh[ID]},"
+        end
+      end
+    end
+    return dups, histogram
+  end
+end
+
+#### MAIN TESTING CODE
+matches, crossmatches = NameMap.report()
+puts JSON.pretty_generate(matches)
+puts JSON.pretty_generate(crossmatches)
+
+# dups, histogram = NameMap.committer_dups(File.read('committerlist-from-whimsy.json'))
+# puts JSON.pretty_generate(dups)
+# puts JSON.pretty_generate(histogram)
+

-- 
To stop receiving notification emails like this one, please contact
curcuru@apache.org.