You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by br...@apache.org on 2015/08/03 20:57:44 UTC
[2/4] allura git commit: [#7947] use beautifulsoup4 for correct
identification of tricky tag situations during URL rewriting
[#7947] use beautifulsoup4 for correct identification of tricky tag situations during URL rewriting
Project: http://git-wip-us.apache.org/repos/asf/allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/allura/commit/e0e2f0c4
Tree: http://git-wip-us.apache.org/repos/asf/allura/tree/e0e2f0c4
Diff: http://git-wip-us.apache.org/repos/asf/allura/diff/e0e2f0c4
Branch: refs/heads/db/7947
Commit: e0e2f0c4057a33256d59da72c73626848a1d6579
Parents: 556a99e
Author: Dave Brondsema <db...@slashdotmedia.com>
Authored: Mon Aug 3 18:45:36 2015 +0000
Committer: Dave Brondsema <db...@slashdotmedia.com>
Committed: Mon Aug 3 18:55:26 2015 +0000
----------------------------------------------------------------------
Allura/allura/lib/markdown_extensions.py | 17 ++++++-----------
Allura/allura/tests/test_globals.py | 7 +++++++
requirements.txt | 3 ++-
3 files changed, 15 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/allura/blob/e0e2f0c4/Allura/allura/lib/markdown_extensions.py
----------------------------------------------------------------------
diff --git a/Allura/allura/lib/markdown_extensions.py b/Allura/allura/lib/markdown_extensions.py
index 83e8069..cbccf5d 100644
--- a/Allura/allura/lib/markdown_extensions.py
+++ b/Allura/allura/lib/markdown_extensions.py
@@ -20,7 +20,7 @@ import logging
from urlparse import urljoin
from tg import config
-from BeautifulSoup import BeautifulSoup
+from bs4 import BeautifulSoup
import html5lib
import html5lib.serializer
import html5lib.filters.alphabeticalattributes
@@ -441,7 +441,8 @@ class RelativeLinkRewriter(markdown.postprocessors.Postprocessor):
self._make_absolute = make_absolute
def run(self, text):
- soup = BeautifulSoup(text)
+ soup = BeautifulSoup(text, 'html5lib') # 'html.parser' parser gives weird </li> behaviour with test_macro_members
+
if self._make_absolute:
rewrite = self._rewrite_abs
else:
@@ -450,15 +451,9 @@ class RelativeLinkRewriter(markdown.postprocessors.Postprocessor):
rewrite(link, 'href')
for link in soup.findAll('img'):
rewrite(link, 'src')
- # BeautifulSoup always stores data in unicode,
- # but when doing unicode(soup) it does some strange things
- # like nesting html comments, e.g. returns <!--<!-- comment -->-->
- # instead of <!-- comment -->.
- # Converting soup object to string representation first,
- # and then back to unicode avoids that.
- # str() called on BeautifulSoup document always returns string
- # encoded in utf-8, so this should always work.
- return h.really_unicode(str(soup))
+
+ # html5lib parser adds html/head/body tags, so output <body> without its own tags
+ return unicode(soup.body)[len('<body>'):-len('</body>')]
def _rewrite(self, tag, attr):
val = tag.get(attr)
http://git-wip-us.apache.org/repos/asf/allura/blob/e0e2f0c4/Allura/allura/tests/test_globals.py
----------------------------------------------------------------------
diff --git a/Allura/allura/tests/test_globals.py b/Allura/allura/tests/test_globals.py
index ca04652..91565ae 100644
--- a/Allura/allura/tests/test_globals.py
+++ b/Allura/allura/tests/test_globals.py
@@ -480,6 +480,13 @@ def test_markdown_invalid_tagslash():
r = g.markdown.convert('<div/onload><img src=x onerror=alert(document.cookie)>')
assert_not_in('onerror', r)
+def test_markdown_invalid_script_in_link():
+ r = g.markdown.convert('[xss](http://"><a onmouseover=prompt(document.domain)>xss</a>)')
+ assert_equal('''<div class="markdown_content"><p><a class="" href='http://"><a%20onmouseover=prompt(document.domain)>xss</a>' rel="nofollow">xss</a></p></div>''', r)
+
+def test_markdown_invalid_script_in_link2():
+ r = g.markdown.convert('[xss](http://"><img src=x onerror=alert(document.cookie)>)')
+ assert_equal('''<div class="markdown_content"><p><a class="" href='http://"><img%20src=x%20onerror=alert(document.cookie)>' rel="nofollow">xss</a></p></div>''', r)
@td.with_wiki
def test_macro_include():
http://git-wip-us.apache.org/repos/asf/allura/blob/e0e2f0c4/requirements.txt
----------------------------------------------------------------------
diff --git a/requirements.txt b/requirements.txt
index 77581e2..d327584 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
ActivityStream==0.2.0
BeautifulSoup==3.2.0
+BeautifulSoup4==4.4.0
Beaker==1.6.4
chardet==1.0.1
colander==0.9.3
@@ -81,4 +82,4 @@ q==2.3
WebError==0.10.3
-e git://github.com/brondsem/sphinx-argparse.git#egg=sphinx-argparse # pending merge requests
sphinx-rtd-theme==0.1.6
-sphinxcontrib-programoutput==0.8
\ No newline at end of file
+sphinxcontrib-programoutput==0.8