You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by to...@apache.org on 2022/07/30 03:46:52 UTC
[lucene-jira-archive] 01/01: fix regex capturing mentions

This is an automated email from the ASF dual-hosted git repository.

tomoko pushed a commit to branch correct-mention-regex
in repository https://gitbox.apache.org/repos/asf/lucene-jira-archive.git

commit 5eb469b58b1e2f07499667d219358ba77e025e13
Author: Tomoko Uchida <to...@gmail.com>
AuthorDate: Sat Jul 30 12:46:42 2022 +0900

    fix regex capturing mentions
---
 migration/src/jira_util.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/migration/src/jira_util.py b/migration/src/jira_util.py
index b16f1eff..06b8acb1 100644
--- a/migration/src/jira_util.py
+++ b/migration/src/jira_util.py
@@ -1,4 +1,5 @@
 import re
+import itertools
 from dataclasses import dataclass
 from collections import defaultdict
 from typing import Optional
@@ -227,8 +228,8 @@ JIRA_EMOJI_TO_UNICODE = {
 
 REGEX_CRLF = re.compile(r"\r\n")
 REGEX_JIRA_KEY = re.compile(r"[^/]LUCENE-\d+")
-REGEX_MENTION_ATMARK = re.compile(r"((?<=^)@[\w\.]\+|(?<=[\s\(\"'])@[\w\.]+)(?=[\s\)\"'\?!,\.$])")  # this regex may capture only "@" + "<username>" mentions
-REGEX_MENION_TILDE = re.compile(r"((?<=^)\[~[\w\.]+\]|(?<=[\s\(\"'])\[~[\w\.]+\])(?=[\s\)\"'\?!,\.$])")  # this regex may capture only "[~" + "<username>" + "]" mentions
+REGEX_MENTION_ATMARK = re.compile(r"(^@[\w\.]+)|((?<=[\s\(\"'])@[\w\.]+)(?=[\s\)\"'\?!,\.$])")  # this regex may capture only "@" + "<username>" mentions
+REGEX_MENION_TILDE = re.compile(r"(^\[~[\w\.]+\])|((?<=[\s\(\"'])\[~[\w\.]+\])(?=[\s\)\"'\?!,\.$])")  # this regex may capture only "[~" + "<username>" + "]" mentions
 REGEX_LINK = re.compile(r"\[([^\]]+)\]\(([^\)]+)\)")
 
 
@@ -262,7 +263,7 @@ def convert_text(text: str, att_replace_map: dict[str, str] = {}, account_map: d
     # convert @ mentions
     mentions = re.findall(REGEX_MENTION_ATMARK, text)
     if mentions:
-        mentions = set(mentions)
+        mentions = set(filter(lambda x: x != '', itertools.chain.from_iterable(mentions)))
         for m in mentions:
             jira_id = m[1:]
             disp_name = jira_users.get(jira_id)
@@ -274,7 +275,7 @@ def convert_text(text: str, att_replace_map: dict[str, str] = {}, account_map: d
     # convert ~ mentions
     mentions = re.findall(REGEX_MENION_TILDE, text)
     if mentions:
-        mentions = set(mentions)
+        mentions = set(filter(lambda x: x != '', itertools.chain.from_iterable(mentions)))
         for m in mentions:
             jira_id = m[2:-1]
             disp_name = jira_users.get(jira_id)