You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by to...@apache.org on 2022/07/30 03:46:52 UTC
[lucene-jira-archive] 01/01: fix regex capturing mentions
This is an automated email from the ASF dual-hosted git repository.
tomoko pushed a commit to branch correct-mention-regex
in repository https://gitbox.apache.org/repos/asf/lucene-jira-archive.git
commit 5eb469b58b1e2f07499667d219358ba77e025e13
Author: Tomoko Uchida <to...@gmail.com>
AuthorDate: Sat Jul 30 12:46:42 2022 +0900
fix regex capturing mentions
---
migration/src/jira_util.py | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/migration/src/jira_util.py b/migration/src/jira_util.py
index b16f1eff..06b8acb1 100644
--- a/migration/src/jira_util.py
+++ b/migration/src/jira_util.py
@@ -1,4 +1,5 @@
import re
+import itertools
from dataclasses import dataclass
from collections import defaultdict
from typing import Optional
@@ -227,8 +228,8 @@ JIRA_EMOJI_TO_UNICODE = {
REGEX_CRLF = re.compile(r"\r\n")
REGEX_JIRA_KEY = re.compile(r"[^/]LUCENE-\d+")
-REGEX_MENTION_ATMARK = re.compile(r"((?<=^)@[\w\.]\+|(?<=[\s\(\"'])@[\w\.]+)(?=[\s\)\"'\?!,\.$])") # this regex may capture only "@" + "<username>" mentions
-REGEX_MENION_TILDE = re.compile(r"((?<=^)\[~[\w\.]+\]|(?<=[\s\(\"'])\[~[\w\.]+\])(?=[\s\)\"'\?!,\.$])") # this regex may capture only "[~" + "<username>" + "]" mentions
+REGEX_MENTION_ATMARK = re.compile(r"(^@[\w\.]+)|((?<=[\s\(\"'])@[\w\.]+)(?=[\s\)\"'\?!,\.$])") # this regex may capture only "@" + "<username>" mentions
+REGEX_MENION_TILDE = re.compile(r"(^\[~[\w\.]+\])|((?<=[\s\(\"'])\[~[\w\.]+\])(?=[\s\)\"'\?!,\.$])") # this regex may capture only "[~" + "<username>" + "]" mentions
REGEX_LINK = re.compile(r"\[([^\]]+)\]\(([^\)]+)\)")
@@ -262,7 +263,7 @@ def convert_text(text: str, att_replace_map: dict[str, str] = {}, account_map: d
# convert @ mentions
mentions = re.findall(REGEX_MENTION_ATMARK, text)
if mentions:
- mentions = set(mentions)
+ mentions = set(filter(lambda x: x != '', itertools.chain.from_iterable(mentions)))
for m in mentions:
jira_id = m[1:]
disp_name = jira_users.get(jira_id)
@@ -274,7 +275,7 @@ def convert_text(text: str, att_replace_map: dict[str, str] = {}, account_map: d
# convert ~ mentions
mentions = re.findall(REGEX_MENION_TILDE, text)
if mentions:
- mentions = set(mentions)
+ mentions = set(filter(lambda x: x != '', itertools.chain.from_iterable(mentions)))
for m in mentions:
jira_id = m[2:-1]
disp_name = jira_users.get(jira_id)