You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by br...@apache.org on 2022/02/09 18:50:28 UTC
[allura] 01/01: [#8410] markdown regex improvement
This is an automated email from the ASF dual-hosted git repository.
brondsem pushed a commit to branch db/8410
in repository https://gitbox.apache.org/repos/asf/allura.git
commit 7502e36e27f7f423ebbbd748a4438042861cf508
Author: Dave Brondsema <db...@slashdotmedia.com>
AuthorDate: Wed Feb 9 13:50:20 2022 -0500
[#8410] markdown regex improvement
---
Allura/allura/lib/markdown_extensions.py | 6 +++-
Allura/allura/tests/test_globals.py | 51 ++++++++++++++++++++++++++++++++
2 files changed, 56 insertions(+), 1 deletion(-)
diff --git a/Allura/allura/lib/markdown_extensions.py b/Allura/allura/lib/markdown_extensions.py
index 27303c0..e3844f8 100644
--- a/Allura/allura/lib/markdown_extensions.py
+++ b/Allura/allura/lib/markdown_extensions.py
@@ -50,7 +50,8 @@ MACRO_PATTERN = r'\[\[([^\]\[]+)\]\]'
SHORT_REF_RE = markdown.inlinepatterns.NOIMG + r'\[([^\]]+)\]'
# FORGE_LINK_RE copied from markdown pre 3.0's LINK_RE
-NOBRACKET = r'[^\]\[]*'
+# TODO: replace these with newer approach, see ForgeLinkPattern
+NOBRACKET = r'[^\]\[]{0,50}' # "*" changed to {0,50} for performance mitigation
BRK = (
r'\[(' +
(NOBRACKET + r'(\[')*6 +
@@ -344,6 +345,9 @@ class UserMentionInlinePattern(markdown.inlinepatterns.Pattern):
class ForgeLinkPattern(markdown.inlinepatterns.Pattern):
+ # TODO: convert from extending Pattern to extending InlineProcessor
+ # which is how core Markdown library in 3.0 made its base link parsing much faster.
+ # https://github.com/Python-Markdown/markdown/commit/d18c3d0acab0e7469c3284c897afcb61f9dd1fea
artifact_re = re.compile(r'((.*?):)?((.*?):)?(.+)')
diff --git a/Allura/allura/tests/test_globals.py b/Allura/allura/tests/test_globals.py
index fa9572f..3974764 100644
--- a/Allura/allura/tests/test_globals.py
+++ b/Allura/allura/tests/test_globals.py
@@ -608,6 +608,57 @@ def test_markdown_invalid_script_in_link2():
'rel="nofollow">xss</a></p></div>', r)
+def test_markdown_extremely_slow():
+ r = g.markdown.convert('''bonjour, voila ce que j'obtient en voulant ajouter un utilisateur a un groupe de sécurite, que ce soit sur un groupe pre-existant, ou sur un groupe crée.
+message d'erreur:
+
+ERROR: Could not complete the Add UserLogin To SecurityGroup [file:/C:/neogia/ofbizNeogia/applications/securityext/script/org/ofbiz/securityext/securitygroup/SecurityGroupServices.xml#addUserLoginToSecurityGroup] process [problem creating the newEntity value: Exception while inserting the following entity: [GenericEntity:UserLoginSecurityGroup][createdStamp,2006-01-23 17:42:39.312(java.sql.Timestamp)][createdTxStamp,2006-01-23 17:42:38.875(java.sql.Timestamp)][fromDate,2006-01-23 17:42:3 [...]
+
+à priori les données du formulaire ne sont pas traitées : VALUES (?, ?, ?, ?, ?, ?, ?, ?) ce qui entraine l'echec du traitement SQL.
+
+
+Si une idée vous vient à l'esprit, merci de me tenir au courant.
+
+cordialement, julien.''')
+ assert True # finished!
+
+
+@td.with_tool('test', 'Wiki', 'wiki-len')
+def test_markdown_link_length_limits():
+ with h.push_context('test', 'wiki-len', neighborhood='Projects'):
+ # these are always ok, no matter the NOBRACKET length
+ WM.Page.upsert(title='12345678901').commit()
+ text = g.markdown.convert('See [12345678901]')
+ assert 'href="/p/test/wiki-len/12345678901/">[12345678901]</a>' in text, text
+ WM.Page.upsert(title='this is 26 characters long').commit()
+ text = g.markdown.convert('See [this is 26 characters long]')
+ assert 'href="/p/test/wiki-len/this%20is%2026%20characters%20long/">[this is 26 characters long]</a>' in text, text
+
+ # NOBRACKET regex length impacts standard markdown links
+ text = g.markdown.convert('See [short](http://a.de)')
+ assert 'href="http://a.de" rel="nofollow">short</a>' in text, text
+ text = g.markdown.convert('See [this is 26 characters long](http://a.de)')
+ assert 'href="http://a.de" rel="nofollow">this is 26 characters long</a>' in text, text # {0,12} fails {0,13} ok
+
+ # NOBRACKET regex length impacts our custom artifact links
+ text = g.markdown.convert('See [short](Home)')
+ assert 'href="/p/test/wiki-len/Home/">short</a>' in text, text
+ text = g.markdown.convert('See [123456789](Home)')
+ assert 'href="/p/test/wiki-len/Home/">123456789</a>' in text, text
+ text = g.markdown.convert('See [12345678901](Home)')
+ assert 'href="/p/test/wiki-len/Home/">12345678901</a>' in text, text # {0,5} fails, {0,6} ok
+ text = g.markdown.convert('See [this is 16 chars](Home)')
+ assert 'href="/p/test/wiki-len/Home/">this is 16 chars</a>' in text, text # {0,7} fails {0,8} ok
+ text = g.markdown.convert('See [this is 26 characters long](Home)')
+ assert 'href="/p/test/wiki-len/Home/">this is 26 characters long</a>' in text, text # {0,12} fails {0,13} ok
+
+ # breaking point, currently. Would be nice if this worked and made a real link:
+ char110long = '1234567890'*11
+ text = g.markdown.convert(f'See [{char110long}](Home)')
+ assert f'<span>[{char110long}]</span>(Home)' in text, text # current limitation, not a link
+ # assert f'href="/p/test/wiki-len/Home/">{char110long}</a>' in text, text # ideal output
+
+
@td.with_wiki
def test_macro_include():
r = g.markdown.convert('[[include ref=Home id=foo]]')