You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by br...@apache.org on 2022/02/09 18:50:28 UTC

[allura] 01/01: [#8410] markdown regex improvement

This is an automated email from the ASF dual-hosted git repository.

brondsem pushed a commit to branch db/8410
in repository https://gitbox.apache.org/repos/asf/allura.git

commit 7502e36e27f7f423ebbbd748a4438042861cf508
Author: Dave Brondsema <db...@slashdotmedia.com>
AuthorDate: Wed Feb 9 13:50:20 2022 -0500

    [#8410] markdown regex improvement
---
 Allura/allura/lib/markdown_extensions.py |  6 +++-
 Allura/allura/tests/test_globals.py      | 51 ++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/Allura/allura/lib/markdown_extensions.py b/Allura/allura/lib/markdown_extensions.py
index 27303c0..e3844f8 100644
--- a/Allura/allura/lib/markdown_extensions.py
+++ b/Allura/allura/lib/markdown_extensions.py
@@ -50,7 +50,8 @@ MACRO_PATTERN = r'\[\[([^\]\[]+)\]\]'
 SHORT_REF_RE = markdown.inlinepatterns.NOIMG + r'\[([^\]]+)\]'
 
 # FORGE_LINK_RE copied from markdown pre 3.0's LINK_RE
-NOBRACKET = r'[^\]\[]*'
+# TODO: replace these with newer approach, see ForgeLinkPattern
+NOBRACKET = r'[^\]\[]{0,50}'  # "*" changed to {0,50} for performance mitigation
 BRK = (
     r'\[(' +
     (NOBRACKET + r'(\[')*6 +
@@ -344,6 +345,9 @@ class UserMentionInlinePattern(markdown.inlinepatterns.Pattern):
 
 
 class ForgeLinkPattern(markdown.inlinepatterns.Pattern):
+    # TODO: convert from extending Pattern to extending InlineProcessor
+    #  which is how core Markdown library in 3.0 made its base link parsing much faster.
+    # https://github.com/Python-Markdown/markdown/commit/d18c3d0acab0e7469c3284c897afcb61f9dd1fea
 
     artifact_re = re.compile(r'((.*?):)?((.*?):)?(.+)')
 
diff --git a/Allura/allura/tests/test_globals.py b/Allura/allura/tests/test_globals.py
index fa9572f..3974764 100644
--- a/Allura/allura/tests/test_globals.py
+++ b/Allura/allura/tests/test_globals.py
@@ -608,6 +608,57 @@ def test_markdown_invalid_script_in_link2():
                  'rel="nofollow">xss</a></p></div>', r)
 
 
+def test_markdown_extremely_slow():
+    r = g.markdown.convert('''bonjour, voila ce que j'obtient en voulant ajouter un utilisateur a un groupe de sécurite, que ce soit sur un groupe pre-existant, ou sur un groupe crée.
+message d'erreur:
+
+ERROR: Could not complete the Add UserLogin To SecurityGroup [file:/C:/neogia/ofbizNeogia/applications/securityext/script/org/ofbiz/securityext/securitygroup/SecurityGroupServices.xml#addUserLoginToSecurityGroup] process [problem creating the newEntity value: Exception while inserting the following entity: [GenericEntity:UserLoginSecurityGroup][createdStamp,2006-01-23 17:42:39.312(java.sql.Timestamp)][createdTxStamp,2006-01-23 17:42:38.875(java.sql.Timestamp)][fromDate,2006-01-23 17:42:3 [...]
+
+à priori les données du formulaire ne sont pas traitées : VALUES (?, ?, ?, ?, ?, ?, ?, ?) ce qui entraine l'echec du traitement SQL.
+
+
+Si une idée vous vient à l'esprit, merci de me tenir au courant.
+
+cordialement, julien.''')
+    assert True   # finished!
+
+
+@td.with_tool('test', 'Wiki', 'wiki-len')
+def test_markdown_link_length_limits():
+    with h.push_context('test', 'wiki-len', neighborhood='Projects'):
+        # these are always ok, no matter the NOBRACKET length
+        WM.Page.upsert(title='12345678901').commit()
+        text = g.markdown.convert('See [12345678901]')
+        assert 'href="/p/test/wiki-len/12345678901/">[12345678901]</a>' in text, text
+        WM.Page.upsert(title='this is 26 characters long').commit()
+        text = g.markdown.convert('See [this is 26 characters long]')
+        assert 'href="/p/test/wiki-len/this%20is%2026%20characters%20long/">[this is 26 characters long]</a>' in text, text
+
+        # NOBRACKET regex length impacts standard markdown links
+        text = g.markdown.convert('See [short](http://a.de)')
+        assert 'href="http://a.de" rel="nofollow">short</a>' in text, text
+        text = g.markdown.convert('See [this is 26 characters long](http://a.de)')
+        assert 'href="http://a.de" rel="nofollow">this is 26 characters long</a>' in text, text  # {0,12} fails {0,13} ok
+
+        # NOBRACKET regex length impacts our custom artifact links
+        text = g.markdown.convert('See [short](Home)')
+        assert 'href="/p/test/wiki-len/Home/">short</a>' in text, text
+        text = g.markdown.convert('See [123456789](Home)')
+        assert 'href="/p/test/wiki-len/Home/">123456789</a>' in text, text
+        text = g.markdown.convert('See [12345678901](Home)')
+        assert 'href="/p/test/wiki-len/Home/">12345678901</a>' in text, text  # {0,5} fails, {0,6} ok
+        text = g.markdown.convert('See [this is 16 chars](Home)')
+        assert 'href="/p/test/wiki-len/Home/">this is 16 chars</a>' in text, text  # {0,7} fails {0,8} ok
+        text = g.markdown.convert('See [this is 26 characters long](Home)')
+        assert 'href="/p/test/wiki-len/Home/">this is 26 characters long</a>' in text, text  # {0,12} fails {0,13} ok
+
+        # breaking point, currently.  Would be nice if this worked and made a real link:
+        char110long = '1234567890'*11
+        text = g.markdown.convert(f'See [{char110long}](Home)')
+        assert f'<span>[{char110long}]</span>(Home)' in text, text  # current limitation, not a link
+        # assert f'href="/p/test/wiki-len/Home/">{char110long}</a>' in text, text  # ideal output
+
+
 @td.with_wiki
 def test_macro_include():
     r = g.markdown.convert('[[include ref=Home id=foo]]')