You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@lucene.apache.org by to...@apache.org on 2022/06/30 15:46:39 UTC

[lucene-jira-archive] branch main updated: convert emoji, handle conversion errors caused by newline chars

This is an automated email from the ASF dual-hosted git repository.

tomoko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene-jira-archive.git


The following commit(s) were added to refs/heads/main by this push:
     new d3a9cf8  convert emoji, handle conversion errors caused by newline chars
d3a9cf8 is described below

commit d3a9cf87c8fa5fadb6f333916cdad6f4793c9f91
Author: Tomoko Uchida <to...@gmail.com>
AuthorDate: Fri Jul 1 00:46:29 2022 +0900

    convert emoji, handle conversion errors caused by newline chars
---
 migration/src/jira_util.py | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/migration/src/jira_util.py b/migration/src/jira_util.py
index 16c91ea..4fc5d17 100644
--- a/migration/src/jira_util.py
+++ b/migration/src/jira_util.py
@@ -154,6 +154,27 @@ def extract_pull_requests(o: dict) -> list[str]:
     return res
 
 
+JIRA_EMOJI_TO_UNICODE = {
+    "(y)": "\U0001F44D",
+    "(n)": "\U0001F44E",
+    "(i)": "\U0001F6C8",
+    "(/)": "\u2714",
+    "(x)": "\u274C",
+    "(!)": "\u26A0",
+    "(+)": "\u002B",
+    "(-)": "\u2212",
+    "(?)": "\u003F",
+    "(on)": "\U0001F4A1",
+    "(off)": "\U0001F4A1",
+    "(*)": "\u2B50",
+    "(*r)": "\u2B50",
+    "(*g)": "\u2B50",
+    "(*b)": "\u2B50",
+    "(flag)": "\U0001F3F4",
+    "(flagoff)": "\U0001F3F3"
+}
+
+REGEX_CRLF = re.compile(r"\r\n\s*")
 REGEX_JIRA_KEY = re.compile(r"[^/]LUCENE-\d+")
 REGEX_MENTION = re.compile(r"@\w+")
 REGEX_LINK = re.compile(r"\[([^\]]+)\]\(([^\)]+)\)")
@@ -169,7 +190,9 @@ def convert_text(text: str, att_replace_map: dict[str, str] = {}) -> str:
                 res = f"[{m.group(1)}]({repl})"
         return res
 
-    text = text.replace("\r\n", "\n")
+    text = re.sub(REGEX_CRLF, "\n", text)
+    for emoji, unicode in JIRA_EMOJI_TO_UNICODE.items():
+        text = text.replace(emoji, unicode)
     text = jira2markdown.convert(text)
 
     # markup @ mentions with ``