You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by an...@apache.org on 2014/12/03 01:36:37 UTC

spark git commit: [Release] Translate unknown author names automatically

Repository: spark
Updated Branches:
  refs/heads/master 2d4f6e70f -> 5da21f07d


[Release] Translate unknown author names automatically


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5da21f07
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5da21f07
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5da21f07

Branch: refs/heads/master
Commit: 5da21f07d862212067719ddaa2fef6e09db21c10
Parents: 2d4f6e7
Author: Andrew Or <an...@databricks.com>
Authored: Tue Dec 2 16:36:12 2014 -0800
Committer: Andrew Or <an...@databricks.com>
Committed: Tue Dec 2 16:36:12 2014 -0800

----------------------------------------------------------------------
 dev/create-release/generate-contributors.py | 36 ++++-----
 dev/create-release/releaseutils.py          | 93 ++++++++++++++++++++++++
 2 files changed, 111 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/5da21f07/dev/create-release/generate-contributors.py
----------------------------------------------------------------------
diff --git a/dev/create-release/generate-contributors.py b/dev/create-release/generate-contributors.py
index f4bf734..99c29ef 100755
--- a/dev/create-release/generate-contributors.py
+++ b/dev/create-release/generate-contributors.py
@@ -26,23 +26,11 @@ from releaseutils import *
 
 # You must set the following before use!
 JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira")
+JIRA_USERNAME = os.environ.get("JIRA_USERNAME", None)
+JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD", None)
 START_COMMIT = os.environ.get("START_COMMIT", "37b100")
 END_COMMIT = os.environ.get("END_COMMIT", "3693ae")
 
-try:
-    from jira.client import JIRA
-except ImportError:
-    print "This tool requires the jira-python library"
-    print "Install using 'sudo pip install jira-python'"
-    sys.exit(-1)
-
-try:
-    import unidecode
-except ImportError:
-    print "This tool requires the unidecode library to decode obscure github usernames"
-    print "Install using 'sudo pip install unidecode'"
-    sys.exit(-1)
-
 # If commit range is not specified, prompt the user to provide it
 if not START_COMMIT or not END_COMMIT:
     print "A commit range is required to proceed."
@@ -52,6 +40,8 @@ if not START_COMMIT or not END_COMMIT:
         END_COMMIT = raw_input("Please specify ending commit hash (non-inclusive): ")
 
 # Verify provided arguments
+if not JIRA_USERNAME: sys.exit("JIRA_USERNAME must be provided")
+if not JIRA_PASSWORD: sys.exit("JIRA_PASSWORD must be provided")
 start_commit_line = get_one_line(START_COMMIT)
 end_commit_line = get_one_line(END_COMMIT)
 num_commits = num_commits_in_range(START_COMMIT, END_COMMIT)
@@ -70,6 +60,14 @@ if response.lower() != "y" and response:
     sys.exit("Ok, exiting")
 print "==================================================================================\n"
 
+# Setup JIRA and github clients. We use two JIRA clients, one with authentication
+# and one without, because authentication is slow and required only when we query
+# JIRA user details but not Spark issues
+jira_options = { "server": JIRA_API_BASE }
+jira_client = JIRA(options = jira_options)
+jira_client_auth = JIRA(options = jira_options, basic_auth = (JIRA_USERNAME, JIRA_PASSWORD))
+github_client = Github()
+
 # Find all commits within this range
 print "Gathering commits within range [%s..%s)" % (START_COMMIT, END_COMMIT)
 commits = get_one_line_commits(START_COMMIT, END_COMMIT)
@@ -129,14 +127,16 @@ warnings = []
 # }
 #
 author_info = {}
-jira_options = { "server": JIRA_API_BASE }
-jira = JIRA(jira_options)
 print "\n=========================== Compiling contributor list ==========================="
 for commit in filtered_commits:
     commit_hash = re.findall("^[a-z0-9]+", commit)[0]
     issues = re.findall("SPARK-[0-9]+", commit.upper())
+    # Translate the author in case the github username is not an actual name
+    # Also guard against any special characters used in the name
+    # Note the JIRA client we use here must have authentication enabled
     author = get_author(commit_hash)
-    author = unidecode.unidecode(unicode(author, "UTF-8")) # guard against special characters
+    author = unidecode.unidecode(unicode(author, "UTF-8"))
+    author = translate_author(author, github_client, jira_client_auth, warnings)
     date = get_date(commit_hash)
     # Parse components from the commit message, if any
     commit_components = find_components(commit, commit_hash)
@@ -151,7 +151,7 @@ for commit in filtered_commits:
             author_info[author][issue_type].add(component)
     # Find issues and components associated with this commit
     for issue in issues:
-        jira_issue = jira.issue(issue)
+        jira_issue = jira_client.issue(issue)
         jira_type = jira_issue.fields.issuetype.name
         jira_type = translate_issue_type(jira_type, issue, warnings)
         jira_components = [translate_component(c.name, commit_hash, warnings)\

http://git-wip-us.apache.org/repos/asf/spark/blob/5da21f07/dev/create-release/releaseutils.py
----------------------------------------------------------------------
diff --git a/dev/create-release/releaseutils.py b/dev/create-release/releaseutils.py
index e56d7fa..0d6830b 100755
--- a/dev/create-release/releaseutils.py
+++ b/dev/create-release/releaseutils.py
@@ -21,6 +21,29 @@
 import re
 from subprocess import Popen, PIPE
 
+try:
+    from jira.client import JIRA
+    from jira.exceptions import JIRAError
+except ImportError:
+    print "This tool requires the jira-python library"
+    print "Install using 'sudo pip install jira-python'"
+    sys.exit(-1)
+
+try:
+    from github import Github
+    from github import GithubException
+except ImportError:
+    print "This tool requires the PyGithub library"
+    print "Install using 'sudo pip install PyGithub'"
+    sys.exit(-1)
+
+try:
+    import unidecode
+except ImportError:
+    print "This tool requires the unidecode library to decode obscure github usernames"
+    print "Install using 'sudo pip install unidecode'"
+    sys.exit(-1)
+
 # Utility functions run git commands (written with Git 1.8.5)
 def run_cmd(cmd): return Popen(cmd, stdout=PIPE).communicate()[0]
 def get_author(commit_hash):
@@ -122,3 +145,73 @@ def nice_join(str_list):
     else:
         return ", ".join(str_list[:-1]) + ", and " + str_list[-1]
 
+# Return the full name of the specified user on Github
+# If the user doesn't exist, return None
+def get_github_name(author, github_client):
+    if github_client:
+        try:
+            return github_client.get_user(author).name
+        except GithubException as e:
+            # If this is not a "not found" exception
+            if e.status != 404:
+                raise e
+    return None
+
+# Return the full name of the specified user on JIRA
+# If the user doesn't exist, return None
+def get_jira_name(author, jira_client):
+    if jira_client:
+        try:
+            return jira_client.user(author).displayName
+        except JIRAError as e:
+            # If this is not a "not found" exception
+            if e.status_code != 404:
+                raise e
+    return None
+
+# Return whether the given name is in the form <First Name><space><Last Name>
+def is_valid_author(author):
+    if not author: return False
+    author_words = len(author.split(" "))
+    return author_words == 2 or author_words == 3
+
+# Capitalize the first letter of each word in the given author name
+def capitalize_author(author):
+    if not author: return None
+    words = author.split(" ")
+    words = [w[0].capitalize() + w[1:] for w in words if w]
+    return " ".join(words)
+
+# Maintain a mapping of translated author names as a cache
+translated_authors = {}
+
+# Format the given author in a format appropriate for the contributors list.
+# If the author is not an actual name, search github and JIRA for potential
+# replacements and log all candidates as a warning.
+def translate_author(github_author, github_client, jira_client, warnings):
+    if is_valid_author(github_author):
+        return capitalize_author(github_author)
+    # If the translated author is already cached, just return it
+    if github_author in translated_authors:
+        return translated_authors[github_author]
+    # Otherwise, author name is not found, so we need to search for an alternative name
+    candidates = set()
+    github_name = get_github_name(github_author, github_client)
+    jira_name = get_jira_name(github_author, jira_client)
+    if is_valid_author(github_name): github_name = capitalize_author(github_name)
+    if is_valid_author(jira_name): jira_name = capitalize_author(jira_name)
+    if github_name: candidates.add(github_name)
+    if jira_name: candidates.add(jira_name)
+    # Only use the github name as a replacement automatically
+    # The JIRA name may not make sense because it can belong to someone else
+    if is_valid_author(github_name):
+        candidates_message = " (another candidate is %s)" % jira_name if jira_name else ""
+        warnings.append("Replacing github user %s with %s%s" % (github_author, github_name, candidates_message))
+        translated_authors[github_name] = github_name
+        return translated_authors[github_name]
+    # No direct replacement, so return the original author and list any candidates found
+    candidates_message = " (candidates: %s)" % nice_join(candidates) if candidates else ""
+    warnings.append("Unable to find a replacement for github user %s%s" % (github_author, candidates_message))
+    translated_authors[github_author] = github_author
+    return translated_authors[github_author]
+


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org