You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by to...@apache.org on 2022/07/08 18:08:16 UTC
[lucene-jira-archive] 01/01: improve match expressions for lists
This is an automated email from the ASF dual-hosted git repository.
tomoko pushed a commit to branch fix-bullet-list
in repository https://gitbox.apache.org/repos/asf/lucene-jira-archive.git
commit 4548495d00a1dff36aaf559d60e50b2d28fe2730
Author: Tomoko Uchida <to...@gmail.com>
AuthorDate: Sat Jul 9 03:08:03 2022 +0900
improve match expressions for lists
---
migration/src/jira_util.py | 13 +++++-
migration/src/markup/__init__.py | 0
migration/src/markup/lists.py | 98 ++++++++++++++++++++++++++++++++++++++++
migration/src/markup/py.typed | 0
4 files changed, 109 insertions(+), 2 deletions(-)
diff --git a/migration/src/jira_util.py b/migration/src/jira_util.py
index d0c2bff5..7dcecdec 100644
--- a/migration/src/jira_util.py
+++ b/migration/src/jira_util.py
@@ -4,7 +4,10 @@ from collections import defaultdict
from typing import Optional
import jira2markdown
+from jira2markdown.elements import MarkupElements
+from jira2markdown.markup.lists import UnorderedList, OrderedList
+from markup.lists import UnorderedTweakedList, OrderedTweakedList
@dataclass
class Attachment(object):
@@ -190,11 +193,17 @@ def convert_text(text: str, att_replace_map: dict[str, str] = {}) -> str:
res = f"[{m.group(1)}]({repl})"
return res
- text = re.sub(REGEX_CRLF, "\n", text)
+ text = re.sub(REGEX_CRLF, "\n", text) # jira2markup does not support carriage return (?)
+ # convert Jira special emojis into corresponding or similar Unicode characters
for emoji, unicode in JIRA_EMOJI_TO_UNICODE.items():
text = text.replace(emoji, unicode)
- text = jira2markdown.convert(text)
+
+ # convert Jira markup into Markdown with customization
+ elements = MarkupElements()
+ elements.replace(UnorderedList, UnorderedTweakedList)
+ elements.replace(OrderedList, OrderedTweakedList)
+ text = jira2markdown.convert(text, elements=elements)
# markup @ mentions with ``
mentions = re.findall(REGEX_MENTION, text)
diff --git a/migration/src/markup/__init__.py b/migration/src/markup/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/migration/src/markup/lists.py b/migration/src/markup/lists.py
new file mode 100644
index 00000000..47853a2c
--- /dev/null
+++ b/migration/src/markup/lists.py
@@ -0,0 +1,98 @@
+from pyparsing import (
+ Char,
+ Combine,
+ LineEnd,
+ LineStart,
+ Literal,
+ MatchFirst,
+ OneOrMore,
+ ZeroOrMore,
+ Optional,
+ ParserElement,
+ ParseResults,
+ SkipTo,
+ StringEnd,
+ White,
+)
+
+from jira2markdown.markup.advanced import Panel
+from jira2markdown.markup.base import AbstractMarkup
+from jira2markdown.markup.text_effects import BlockQuote, Color
+from jira2markdown.markup.lists import ListIndentState, ListIndent
+
+
+class TweakedList(AbstractMarkup):
+ is_inline_element = False
+
+ def __init__(self, nested_token: str, nested_indent: int, tokens: str, indent: int, bullet: str, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ self.nested_token = nested_token
+ self.nested_indent = nested_indent
+ self.tokens = tokens
+ self.indent = indent
+ self.bullet = bullet
+
+ self.indent_state = ListIndentState()
+
+ def action(self, tokens: ParseResults) -> str:
+ result = []
+
+ for line in tokens:
+ # print(repr(line))
+ bullets, text = line.split(" ", maxsplit=1)
+
+ nested_indent = 0
+ while bullets[0] == self.nested_token:
+ nested_indent += 1
+ bullets = bullets[1:]
+
+ count = nested_indent * self.nested_indent + len(bullets) * self.indent
+
+ line_padding = " " * count
+ item_padding = " " * (count - self.indent) + self.bullet + " "
+ text = self.markup.transformString(text).splitlines() or [""]
+
+ result.append(
+ "\n".join([item_padding + line if i == 0 else line_padding + line for i, line in enumerate(text)]),
+ )
+
+ self.indent_state.reset()
+ text_end = "\n" if (tokens[-1][-1] == "\n") else ""
+ return "\n".join(result) + text_end
+
+ @property
+ def expr(self) -> ParserElement:
+ NL = LineEnd()
+ LIST_BREAK = NL + Optional(White(" \t")) + NL | StringEnd()
+ IGNORE = BlockQuote(**self.init_kwargs).expr | Panel(**self.init_kwargs).expr | Color(**self.init_kwargs).expr
+ ROW = Optional(LineStart()) + Combine(
+ Optional(White(" \t"))
+ + Optional(self.nested_token, default="")
+ + ListIndent(self.indent_state, self.tokens)
+ + SkipTo(NL + Optional(White(" \t")) + Char(self.nested_token + self.tokens) | LIST_BREAK, ignore=IGNORE)
+ + Optional(NL),
+ )
+
+ return OneOrMore(ROW, stopOn=LIST_BREAK).setParseAction(self.action)
+
+
+class UnorderedTweakedList(TweakedList):
+ def __init__(self, *args, **kwargs):
+ super().__init__(nested_token="#", nested_indent=3, tokens="*-", indent=2, bullet="-", *args, **kwargs)
+
+ def action(self, tokens: ParseResults) -> str:
+ result = super().action(tokens)
+ first_line = (result.splitlines() or [""])[0].strip()
+
+ # Text with dashed below it turns into a heading. To prevent this
+ # add a line break before an empty list.
+ if first_line == "-":
+ return "\n" + result
+ else:
+ return result
+
+
+class OrderedTweakedList(TweakedList):
+ def __init__(self, *args, **kwargs):
+ super().__init__(nested_token="*", nested_indent=2, tokens="#", indent=3, bullet="1.", *args, **kwargs)
diff --git a/migration/src/markup/py.typed b/migration/src/markup/py.typed
new file mode 100644
index 00000000..e69de29b