You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by br...@apache.org on 2022/02/09 18:50:27 UTC

[allura] branch db/8410 created (now 7502e36)

This is an automated email from the ASF dual-hosted git repository.

brondsem pushed a change to branch db/8410
in repository https://gitbox.apache.org/repos/asf/allura.git.


      at 7502e36  [#8410] markdown regex improvement

This branch includes the following new commits:

     new 7502e36  [#8410] markdown regex improvement

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


[allura] 01/01: [#8410] markdown regex improvement

Posted by br...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

brondsem pushed a commit to branch db/8410
in repository https://gitbox.apache.org/repos/asf/allura.git

commit 7502e36e27f7f423ebbbd748a4438042861cf508
Author: Dave Brondsema <db...@slashdotmedia.com>
AuthorDate: Wed Feb 9 13:50:20 2022 -0500

    [#8410] markdown regex improvement
---
 Allura/allura/lib/markdown_extensions.py |  6 +++-
 Allura/allura/tests/test_globals.py      | 51 ++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/Allura/allura/lib/markdown_extensions.py b/Allura/allura/lib/markdown_extensions.py
index 27303c0..e3844f8 100644
--- a/Allura/allura/lib/markdown_extensions.py
+++ b/Allura/allura/lib/markdown_extensions.py
@@ -50,7 +50,8 @@ MACRO_PATTERN = r'\[\[([^\]\[]+)\]\]'
 SHORT_REF_RE = markdown.inlinepatterns.NOIMG + r'\[([^\]]+)\]'
 
 # FORGE_LINK_RE copied from markdown pre 3.0's LINK_RE
-NOBRACKET = r'[^\]\[]*'
+# TODO: replace these with newer approach, see ForgeLinkPattern
+NOBRACKET = r'[^\]\[]{0,50}'  # "*" changed to {0,50} for performance mitigation
 BRK = (
     r'\[(' +
     (NOBRACKET + r'(\[')*6 +
@@ -344,6 +345,9 @@ class UserMentionInlinePattern(markdown.inlinepatterns.Pattern):
 
 
 class ForgeLinkPattern(markdown.inlinepatterns.Pattern):
+    # TODO: convert from extending Pattern to extending InlineProcessor
+    #  which is how core Markdown library in 3.0 made its base link parsing much faster.
+    # https://github.com/Python-Markdown/markdown/commit/d18c3d0acab0e7469c3284c897afcb61f9dd1fea
 
     artifact_re = re.compile(r'((.*?):)?((.*?):)?(.+)')
 
diff --git a/Allura/allura/tests/test_globals.py b/Allura/allura/tests/test_globals.py
index fa9572f..3974764 100644
--- a/Allura/allura/tests/test_globals.py
+++ b/Allura/allura/tests/test_globals.py
@@ -608,6 +608,57 @@ def test_markdown_invalid_script_in_link2():
                  'rel="nofollow">xss</a></p></div>', r)
 
 
+def test_markdown_extremely_slow():
+    r = g.markdown.convert('''bonjour, voila ce que j'obtient en voulant ajouter un utilisateur a un groupe de sécurite, que ce soit sur un groupe pre-existant, ou sur un groupe crée.
+message d'erreur:
+
+ERROR: Could not complete the Add UserLogin To SecurityGroup [file:/C:/neogia/ofbizNeogia/applications/securityext/script/org/ofbiz/securityext/securitygroup/SecurityGroupServices.xml#addUserLoginToSecurityGroup] process [problem creating the newEntity value: Exception while inserting the following entity: [GenericEntity:UserLoginSecurityGroup][createdStamp,2006-01-23 17:42:39.312(java.sql.Timestamp)][createdTxStamp,2006-01-23 17:42:38.875(java.sql.Timestamp)][fromDate,2006-01-23 17:42:3 [...]
+
+à priori les données du formulaire ne sont pas traitées : VALUES (?, ?, ?, ?, ?, ?, ?, ?) ce qui entraine l'echec du traitement SQL.
+
+
+Si une idée vous vient à l'esprit, merci de me tenir au courant.
+
+cordialement, julien.''')
+    assert True   # finished!
+
+
+@td.with_tool('test', 'Wiki', 'wiki-len')
+def test_markdown_link_length_limits():
+    with h.push_context('test', 'wiki-len', neighborhood='Projects'):
+        # these are always ok, no matter the NOBRACKET length
+        WM.Page.upsert(title='12345678901').commit()
+        text = g.markdown.convert('See [12345678901]')
+        assert 'href="/p/test/wiki-len/12345678901/">[12345678901]</a>' in text, text
+        WM.Page.upsert(title='this is 26 characters long').commit()
+        text = g.markdown.convert('See [this is 26 characters long]')
+        assert 'href="/p/test/wiki-len/this%20is%2026%20characters%20long/">[this is 26 characters long]</a>' in text, text
+
+        # NOBRACKET regex length impacts standard markdown links
+        text = g.markdown.convert('See [short](http://a.de)')
+        assert 'href="http://a.de" rel="nofollow">short</a>' in text, text
+        text = g.markdown.convert('See [this is 26 characters long](http://a.de)')
+        assert 'href="http://a.de" rel="nofollow">this is 26 characters long</a>' in text, text  # {0,12} fails {0,13} ok
+
+        # NOBRACKET regex length impacts our custom artifact links
+        text = g.markdown.convert('See [short](Home)')
+        assert 'href="/p/test/wiki-len/Home/">short</a>' in text, text
+        text = g.markdown.convert('See [123456789](Home)')
+        assert 'href="/p/test/wiki-len/Home/">123456789</a>' in text, text
+        text = g.markdown.convert('See [12345678901](Home)')
+        assert 'href="/p/test/wiki-len/Home/">12345678901</a>' in text, text  # {0,5} fails, {0,6} ok
+        text = g.markdown.convert('See [this is 16 chars](Home)')
+        assert 'href="/p/test/wiki-len/Home/">this is 16 chars</a>' in text, text  # {0,7} fails {0,8} ok
+        text = g.markdown.convert('See [this is 26 characters long](Home)')
+        assert 'href="/p/test/wiki-len/Home/">this is 26 characters long</a>' in text, text  # {0,12} fails {0,13} ok
+
+        # breaking point, currently.  Would be nice if this worked and made a real link:
+        char110long = '1234567890'*11
+        text = g.markdown.convert(f'See [{char110long}](Home)')
+        assert f'<span>[{char110long}]</span>(Home)' in text, text  # current limitation, not a link
+        # assert f'href="/p/test/wiki-len/Home/">{char110long}</a>' in text, text  # ideal output
+
+
 @td.with_wiki
 def test_macro_include():
     r = g.markdown.convert('[[include ref=Home id=foo]]')