You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by to...@apache.org on 2022/07/08 18:08:16 UTC
[lucene-jira-archive] 01/01: improve match expressions for lists

This is an automated email from the ASF dual-hosted git repository.

tomoko pushed a commit to branch fix-bullet-list
in repository https://gitbox.apache.org/repos/asf/lucene-jira-archive.git

commit 4548495d00a1dff36aaf559d60e50b2d28fe2730
Author: Tomoko Uchida <to...@gmail.com>
AuthorDate: Sat Jul 9 03:08:03 2022 +0900

    improve match expressions for lists
---
 migration/src/jira_util.py       | 13 +++++-
 migration/src/markup/__init__.py |  0
 migration/src/markup/lists.py    | 98 ++++++++++++++++++++++++++++++++++++++++
 migration/src/markup/py.typed    |  0
 4 files changed, 109 insertions(+), 2 deletions(-)

diff --git a/migration/src/jira_util.py b/migration/src/jira_util.py
index d0c2bff5..7dcecdec 100644
--- a/migration/src/jira_util.py
+++ b/migration/src/jira_util.py
@@ -4,7 +4,10 @@ from collections import defaultdict
 from typing import Optional
 
 import jira2markdown
+from jira2markdown.elements import MarkupElements
+from jira2markdown.markup.lists import UnorderedList, OrderedList
 
+from markup.lists import UnorderedTweakedList, OrderedTweakedList
 
 @dataclass
 class Attachment(object):
@@ -190,11 +193,17 @@ def convert_text(text: str, att_replace_map: dict[str, str] = {}) -> str:
                 res = f"[{m.group(1)}]({repl})"
         return res
 
-    text = re.sub(REGEX_CRLF, "\n", text)
+    text = re.sub(REGEX_CRLF, "\n", text)  # jira2markup does not support carriage return (?)
 
+    # convert Jira special emojis into corresponding or similar Unicode characters
     for emoji, unicode in JIRA_EMOJI_TO_UNICODE.items():
         text = text.replace(emoji, unicode)
-    text = jira2markdown.convert(text)
+
+    # convert Jira markup into Markdown with customization
+    elements = MarkupElements()
+    elements.replace(UnorderedList, UnorderedTweakedList)
+    elements.replace(OrderedList, OrderedTweakedList)
+    text = jira2markdown.convert(text, elements=elements)
 
     # markup @ mentions with ``
     mentions = re.findall(REGEX_MENTION, text)
diff --git a/migration/src/markup/__init__.py b/migration/src/markup/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/migration/src/markup/lists.py b/migration/src/markup/lists.py
new file mode 100644
index 00000000..47853a2c
--- /dev/null
+++ b/migration/src/markup/lists.py
@@ -0,0 +1,98 @@
+from pyparsing import (
+    Char,
+    Combine,
+    LineEnd,
+    LineStart,
+    Literal,
+    MatchFirst,
+    OneOrMore,
+    ZeroOrMore,
+    Optional,
+    ParserElement,
+    ParseResults,
+    SkipTo,
+    StringEnd,
+    White,
+)
+
+from jira2markdown.markup.advanced import Panel
+from jira2markdown.markup.base import AbstractMarkup
+from jira2markdown.markup.text_effects import BlockQuote, Color
+from jira2markdown.markup.lists import ListIndentState, ListIndent
+
+
+class TweakedList(AbstractMarkup):
+    is_inline_element = False
+
+    def __init__(self, nested_token: str, nested_indent: int, tokens: str, indent: int, bullet: str, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        self.nested_token = nested_token
+        self.nested_indent = nested_indent
+        self.tokens = tokens
+        self.indent = indent
+        self.bullet = bullet
+
+        self.indent_state = ListIndentState()
+
+    def action(self, tokens: ParseResults) -> str:
+        result = []
+
+        for line in tokens:
+            # print(repr(line))
+            bullets, text = line.split(" ", maxsplit=1)
+
+            nested_indent = 0
+            while bullets[0] == self.nested_token:
+                nested_indent += 1
+                bullets = bullets[1:]
+
+            count = nested_indent * self.nested_indent + len(bullets) * self.indent
+
+            line_padding = " " * count
+            item_padding = " " * (count - self.indent) + self.bullet + " "
+            text = self.markup.transformString(text).splitlines() or [""]
+
+            result.append(
+                "\n".join([item_padding + line if i == 0 else line_padding + line for i, line in enumerate(text)]),
+            )
+
+        self.indent_state.reset()
+        text_end = "\n" if (tokens[-1][-1] == "\n") else ""
+        return "\n".join(result) + text_end
+
+    @property
+    def expr(self) -> ParserElement:
+        NL = LineEnd()
+        LIST_BREAK = NL + Optional(White(" \t")) + NL | StringEnd()
+        IGNORE = BlockQuote(**self.init_kwargs).expr | Panel(**self.init_kwargs).expr | Color(**self.init_kwargs).expr
+        ROW = Optional(LineStart()) + Combine(
+            Optional(White(" \t"))
+            + Optional(self.nested_token, default="")
+            + ListIndent(self.indent_state, self.tokens)
+            + SkipTo(NL + Optional(White(" \t")) + Char(self.nested_token + self.tokens) | LIST_BREAK, ignore=IGNORE)
+            + Optional(NL),
+        )
+
+        return OneOrMore(ROW, stopOn=LIST_BREAK).setParseAction(self.action)
+
+
+class UnorderedTweakedList(TweakedList):
+    def __init__(self, *args, **kwargs):
+        super().__init__(nested_token="#", nested_indent=3, tokens="*-", indent=2, bullet="-", *args, **kwargs)
+
+    def action(self, tokens: ParseResults) -> str:
+        result = super().action(tokens)
+        first_line = (result.splitlines() or [""])[0].strip()
+
+        # Text with dashed below it turns into a heading. To prevent this
+        # add a line break before an empty list.
+        if first_line == "-":
+            return "\n" + result
+        else:
+            return result
+
+
+class OrderedTweakedList(TweakedList):
+    def __init__(self, *args, **kwargs):
+        super().__init__(nested_token="*", nested_indent=2, tokens="#", indent=3, bullet="1.", *args, **kwargs)
diff --git a/migration/src/markup/py.typed b/migration/src/markup/py.typed
new file mode 100644
index 00000000..e69de29b