You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by br...@apache.org on 2022/02/09 16:20:29 UTC

[allura] 07/11: [#8408] tricky parts of Markdown upgrade

This is an automated email from the ASF dual-hosted git repository.

brondsem pushed a commit to branch db/8408
in repository https://gitbox.apache.org/repos/asf/allura.git

commit 70b4885452ef34c7d400c31886e66ba77f8a38a8
Author: Dave Brondsema <db...@slashdotmedia.com>
AuthorDate: Mon Jan 17 12:47:16 2022 -0500

    [#8408] tricky parts of Markdown upgrade
---
 Allura/allura/lib/markdown_extensions.py | 58 ++++++++++++++++++++++----------
 Allura/allura/tests/test_helpers.py      |  5 +--
 2 files changed, 44 insertions(+), 19 deletions(-)

diff --git a/Allura/allura/lib/markdown_extensions.py b/Allura/allura/lib/markdown_extensions.py
index cfef8e7..826a3ae 100644
--- a/Allura/allura/lib/markdown_extensions.py
+++ b/Allura/allura/lib/markdown_extensions.py
@@ -21,6 +21,7 @@ from __future__ import unicode_literals
 from __future__ import absolute_import
 import re
 import logging
+from typing import List
 
 from six.moves.urllib.parse import urljoin
 
@@ -44,6 +45,32 @@ log = logging.getLogger(__name__)
 
 MACRO_PATTERN = r'\[\[([^\]\[]+)\]\]'
 
+# SHORT_REF_RE copied from markdown pre 3.0
+SHORT_REF_RE = markdown.inlinepatterns.NOIMG + r'\[([^\]]+)\]'
+
+# FORGE_LINK_RE copied from markdown pre 3.0's LINK_RE
+NOBRACKET = r'[^\]\[]*'
+BRK = (
+    r'\[(' +
+    (NOBRACKET + r'(\[')*6 +
+    (NOBRACKET + r'\])*')*6 +
+    NOBRACKET + r')\]'
+)
+FORGE_LINK_RE = markdown.inlinepatterns.NOIMG + BRK + \
+    r'''\(\s*(<.*?>|((?:(?:\(.*?\))|[^\(\)]))*?)\s*((['"])(.*?)\12\s*)?\)'''
+
+
+def clear_markdown_registry(reg: markdown.util.Registry, keep: List[str] = []):
+    keep_items = {}
+    for name in keep:
+        keep_items[name] = reg[name]
+
+    # this resets Registry's internal data structures to be empty
+    reg.__init__()
+
+    for name, item in keep_items.items():
+        reg.register(item, name, 50)  # arbitrary priority :(
+
 
 class CommitMessageExtension(markdown.Extension):
 
@@ -70,22 +97,20 @@ class CommitMessageExtension(markdown.Extension):
     """
 
     def __init__(self, app):
-        markdown.Extension.__init__(self)
+        super().__init__()
         self.app = app
         self._use_wiki = False
 
-    def extendMarkdown(self, md, md_globals):
+    def extendMarkdown(self, md):
         md.registerExtension(self)
         # remove default preprocessors and add our own
-        md.preprocessors.clear()
+        clear_markdown_registry(md.preprocessors)
         md.preprocessors['trac_refs'] = PatternReplacingProcessor(TracRef1(), TracRef2(), TracRef3(self.app))
         # remove all inlinepattern processors except short refs and links
-        md.inlinePatterns.clear()
-        md.inlinePatterns["link"] = markdown.inlinepatterns.LinkPattern(markdown.inlinepatterns.LINK_RE, md)
-        md.inlinePatterns['short_reference'] = ForgeLinkPattern(markdown.inlinepatterns.SHORT_REF_RE, md, ext=self)
+        clear_markdown_registry(md.inlinePatterns, keep=['link'])
+        md.inlinePatterns['short_reference'] = ForgeLinkPattern(SHORT_REF_RE, md, ext=self)
         # remove all default block processors except for paragraph
-        md.parser.blockprocessors.clear()
-        md.parser.blockprocessors['paragraph'] = markdown.blockprocessors.ParagraphProcessor(md.parser)
+        clear_markdown_registry(md.parser.blockprocessors, keep=['paragraph'])
         # wrap artifact link text in square brackets
         self.forge_link_tree_processor = ForgeLinkTreeProcessor(md)
         md.treeprocessors['links'] = self.forge_link_tree_processor
@@ -242,7 +267,7 @@ class PatternReplacingProcessor(markdown.preprocessors.Preprocessor):
 class ForgeExtension(markdown.Extension):
 
     def __init__(self, wiki=False, email=False, macro_context=None):
-        markdown.Extension.__init__(self)
+        super().__init__()
         self._use_wiki = wiki
         self._is_email = email
         self._macro_context = macro_context
@@ -257,8 +282,8 @@ class ForgeExtension(markdown.Extension):
                               AutolinkPattern(r'(http(?:s?)://[a-zA-Z0-9./\-\\_%?&=+#;~:!]+)', md),
                               '<escape')
         # replace the link pattern with our extended version
-        md.inlinePatterns['link'] = ForgeLinkPattern(markdown.inlinepatterns.LINK_RE, md, ext=self)
-        md.inlinePatterns['short_reference'] = ForgeLinkPattern(markdown.inlinepatterns.SHORT_REF_RE, md, ext=self)
+        md.inlinePatterns['link'] = ForgeLinkPattern(FORGE_LINK_RE, md, ext=self)
+        md.inlinePatterns['short_reference'] = ForgeLinkPattern(SHORT_REF_RE, md, ext=self)
         # macro must be processed before links
         md.inlinePatterns.add('macro', ForgeMacroPattern(MACRO_PATTERN, md, ext=self), '<link')
         self.forge_link_tree_processor = ForgeLinkTreeProcessor(md)
@@ -317,13 +342,13 @@ class UserMentionInlinePattern(markdown.inlinepatterns.Pattern):
         return result
 
 
-class ForgeLinkPattern(markdown.inlinepatterns.LinkPattern):
+class ForgeLinkPattern(markdown.inlinepatterns.Pattern):
 
     artifact_re = re.compile(r'((.*?):)?((.*?):)?(.+)')
 
     def __init__(self, *args, **kwargs):
         self.ext = kwargs.pop('ext')
-        markdown.inlinepatterns.LinkPattern.__init__(self, *args, **kwargs)
+        super().__init__(*args, **kwargs)
 
     def handleMatch(self, m):
         el = markdown.util.etree.Element('a')
@@ -347,7 +372,7 @@ class ForgeLinkPattern(markdown.inlinepatterns.LinkPattern):
                 return '[TOC]'  # skip TOC
             if self.artifact_re.match(href):
                 href, classes = self._expand_alink(href, is_link_with_brackets)
-            el.set('href', self.sanitize_url(self.unescape(href.strip())))
+            el.set('href', self.unescape(href.strip()))
             el.set('class', classes)
         else:
             el.set('href', '')
@@ -394,7 +419,7 @@ class ForgeMacroPattern(markdown.inlinepatterns.Pattern):
     def __init__(self, *args, **kwargs):
         self.ext = kwargs.pop('ext')
         self.macro = macro.parse(self.ext._macro_context)
-        markdown.inlinepatterns.Pattern.__init__(self, *args, **kwargs)
+        super().__init__(*args, **kwargs)
 
     def handleMatch(self, m):
         html = self.macro(m.group(2))
@@ -503,8 +528,7 @@ class HTMLSanitizer(markdown.postprocessors.Postprocessor):
 class AutolinkPattern(markdown.inlinepatterns.Pattern):
 
     def __init__(self, pattern, markdown_instance=None):
-        markdown.inlinepatterns.Pattern.__init__(
-            self, pattern, markdown_instance)
+        super().__init__(pattern, markdown_instance)
         # override the complete regex, requiring the preceding text (.*?) to end
         # with whitespace or beginning of line "\s|^"
         self.compiled_re = re.compile("^(.*?\s|^)%s(.*?)$" % pattern,
diff --git a/Allura/allura/tests/test_helpers.py b/Allura/allura/tests/test_helpers.py
index d743a95..535b525 100644
--- a/Allura/allura/tests/test_helpers.py
+++ b/Allura/allura/tests/test_helpers.py
@@ -270,8 +270,9 @@ def test_render_any_markup_plain():
 
 
 def test_render_any_markup_formatting():
-    assert_equals(h.render_any_markup('README.md', '### foo\n'
-                                      '    <script>alert(1)</script> bar'),
+    # this is broken until markdown 3.1 fixes it again
+    assert_equals(str(h.render_any_markup('README.md', '### foo\n'
+                                          '    <script>alert(1)</script> bar')),
                   '<div class="markdown_content"><h3 id="foo">foo</h3>\n'
                   '<div class="codehilite"><pre><span></span><span class="nt">'
                   '&lt;script&gt;</span>alert(1)<span class="nt">'