You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by br...@apache.org on 2014/07/03 22:55:14 UTC

git commit: [#7528] change html sanitization library to fix XSS issue

Repository: allura
Updated Branches:
  refs/heads/db/7528 [created] b0729afc0


[#7528] change html sanitization library to fix XSS issue

Feedparser library hasn't been updated in years.  html5lib
has the same sanitizer with updates, including fixes for this parsing
issue.  It also has other behavioral changes including:

* trailing slashes removed from URLs (weird, and not ideal)
* quotes around attributes like href removed when not necessary
* changed order of attributes
* no closing </p>; <br> instead of <br />; etc
* disallowed tags are escaped instead of removed
* comments are removed instead of left in


Project: http://git-wip-us.apache.org/repos/asf/allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/allura/commit/b0729afc
Tree: http://git-wip-us.apache.org/repos/asf/allura/tree/b0729afc
Diff: http://git-wip-us.apache.org/repos/asf/allura/diff/b0729afc

Branch: refs/heads/db/7528
Commit: b0729afc09d8a57a9c0b83847575162bb445eb6e
Parents: 18cff87
Author: Dave Brondsema <db...@slashdotmedia.com>
Authored: Thu Jul 3 17:59:41 2014 +0000
Committer: Dave Brondsema <db...@slashdotmedia.com>
Committed: Thu Jul 3 20:54:46 2014 +0000

----------------------------------------------------------------------
 Allura/allura/lib/app_globals.py                |   3 +-
 Allura/allura/lib/markdown_extensions.py        |  16 +--
 Allura/allura/lib/utils.py                      |  22 ++--
 Allura/allura/tests/functional/test_root.py     |   2 +-
 Allura/allura/tests/test_globals.py             | 104 +++++++++++--------
 Allura/allura/tests/test_markdown.py            |  14 +--
 Allura/allura/tests/test_utils.py               |  17 +--
 Allura/allura/tests/unit/test_solr.py           |   2 +-
 ForgeBlog/forgeblog/tests/test_commands.py      |   8 +-
 .../forgeblog/tests/unit/test_blog_post.py      |   2 +-
 .../tests/functional/test_controllers.py        |   2 +-
 .../forgetracker/tests/functional/test_root.py  |   6 +-
 requirements.txt                                |   1 +
 13 files changed, 113 insertions(+), 86 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/allura/blob/b0729afc/Allura/allura/lib/app_globals.py
----------------------------------------------------------------------
diff --git a/Allura/allura/lib/app_globals.py b/Allura/allura/lib/app_globals.py
index 9548cac..f30cee5 100644
--- a/Allura/allura/lib/app_globals.py
+++ b/Allura/allura/lib/app_globals.py
@@ -107,7 +107,7 @@ class ForgeMarkdown(markdown.Markdown):
         md5 = None
         if cache.md5 is not None:
             md5 = hashlib.md5(source_text.encode('utf-8')).hexdigest()
-            if cache.md5 == md5:
+            if cache.md5 == md5 and getattr(cache, 'fix7528', False):
                 return h.html.literal(cache.html)
 
         start = time.time()
@@ -126,6 +126,7 @@ class ForgeMarkdown(markdown.Markdown):
             if md5 is None:
                 md5 = hashlib.md5(source_text.encode('utf-8')).hexdigest()
             cache.md5, cache.html, cache.render_time = md5, html, render_time
+            cache.fix7528 = True  # flag to indicate good caches created after [#7528] was fixed
         return html
 
 

http://git-wip-us.apache.org/repos/asf/allura/blob/b0729afc/Allura/allura/lib/markdown_extensions.py
----------------------------------------------------------------------
diff --git a/Allura/allura/lib/markdown_extensions.py b/Allura/allura/lib/markdown_extensions.py
index ebfc4bf..3649998 100644
--- a/Allura/allura/lib/markdown_extensions.py
+++ b/Allura/allura/lib/markdown_extensions.py
@@ -21,7 +21,8 @@ from urlparse import urljoin
 
 from tg import config
 from BeautifulSoup import BeautifulSoup
-
+import html5lib
+import html5lib.serializer
 import markdown
 
 from . import macro
@@ -504,12 +505,13 @@ class RelativeLinkRewriter(markdown.postprocessors.Postprocessor):
 class HTMLSanitizer(markdown.postprocessors.Postprocessor):
 
     def run(self, text):
-        try:
-            p = ForgeHTMLSanitizer('utf-8')
-        except TypeError:  # $@%## pre-released versions from SOG
-            p = ForgeHTMLSanitizer('utf-8', '')
-        p.feed(text.encode('utf-8'))
-        return unicode(p.output(), 'utf-8')
+        print text
+        parser = html5lib.HTMLParser(tokenizer=ForgeHTMLSanitizer)
+        parsed = parser.parse(text)
+        serializer = html5lib.serializer.HTMLSerializer()
+        walker = html5lib.getTreeWalker("etree")
+        out = ''.join(serializer.serialize(walker(parsed)))
+        return out
 
 
 class AutolinkPattern(markdown.inlinepatterns.Pattern):

http://git-wip-us.apache.org/repos/asf/allura/blob/b0729afc/Allura/allura/lib/utils.py
----------------------------------------------------------------------
diff --git a/Allura/allura/lib/utils.py b/Allura/allura/lib/utils.py
index 2096daa..14c485d 100644
--- a/Allura/allura/lib/utils.py
+++ b/Allura/allura/lib/utils.py
@@ -44,7 +44,7 @@ from webhelpers.html import literal
 from webob import exc
 from pygments.formatters import HtmlFormatter
 from setproctitle import getproctitle
-from feedparser import _HTMLSanitizer
+import html5lib.sanitizer
 
 from ew import jinja2_ew as ew
 from ming.utils import LazyProperty
@@ -538,12 +538,16 @@ def serve_file(fp, filename, content_type, last_modified=None, cache_expires=Non
         return iter(lambda: fp.read(block_size), '')
 
 
-class ForgeHTMLSanitizer(_HTMLSanitizer):
+class ForgeHTMLSanitizer(html5lib.sanitizer.HTMLSanitizer):
+
+    valid_iframe_srcs = ('https://www.youtube.com/embed/', 'https://www.gittip.com/')
+
+    def sanitize_token(self, token):
+        if 'iframe' in self.allowed_elements:
+            self.allowed_elements.remove('iframe')
+        if token.get('name') == 'iframe':
+            attrs = dict(token.get('data'))
+            if attrs.get('src', '').startswith(self.valid_iframe_srcs):
+                self.allowed_elements.append('iframe')
+        return super(ForgeHTMLSanitizer, self).sanitize_token(token)
 
-    def unknown_starttag(self, tag, attrs):
-        if 'iframe' in self.acceptable_elements:
-            self.acceptable_elements.remove('iframe')
-        if (tag == 'iframe') and (dict(attrs).get('src', '').startswith('https://www.youtube.com/embed/') or
-                                  dict(attrs).get('src', '').startswith('https://www.gittip.com/')):
-            self.acceptable_elements.add('iframe')
-        _HTMLSanitizer.unknown_starttag(self, tag, attrs)

http://git-wip-us.apache.org/repos/asf/allura/blob/b0729afc/Allura/allura/tests/functional/test_root.py
----------------------------------------------------------------------
diff --git a/Allura/allura/tests/functional/test_root.py b/Allura/allura/tests/functional/test_root.py
index b498cd9..9681d61 100644
--- a/Allura/allura/tests/functional/test_root.py
+++ b/Allura/allura/tests/functional/test_root.py
@@ -149,7 +149,7 @@ class TestRootController(TestController):
         n = M.Neighborhood.query.get(name='Projects')
         r = self.app.get(
             '/nf/markdown_to_html?markdown=*aaa*bb[wiki:Home]&project=test&app=bugs&neighborhood=%s' % n._id, validate_chunk=True)
-        assert '<p><em>aaa</em>bb<a class="alink" href="/p/test/wiki/Home/">[wiki:Home]</a></p>' in r, r
+        assert '<p><em>aaa</em>bb<a href="/p/test/wiki/Home/" class="alink">[wiki:Home]</a></p>' in r, r
 
     def test_slash_redirect(self):
         self.app.get('/p', status=301)

http://git-wip-us.apache.org/repos/asf/allura/blob/b0729afc/Allura/allura/tests/test_globals.py
----------------------------------------------------------------------
diff --git a/Allura/allura/tests/test_globals.py b/Allura/allura/tests/test_globals.py
index b546671..702e037 100644
--- a/Allura/allura/tests/test_globals.py
+++ b/Allura/allura/tests/test_globals.py
@@ -120,45 +120,45 @@ def test_macro_projects():
                        project=p_nbhd.neighborhood_project,
                        user=M.User.by_username('test-admin')):
         r = g.markdown_wiki.convert('[[projects]]')
-        assert '<img alt="Test Project Logo"' in r, r
-        assert '<img alt="A Subproject Logo"' in r, r
+        assert 'alt="Test Project Logo"' in r, r
+        assert 'alt="A Subproject Logo"' in r, r
         r = g.markdown_wiki.convert('[[projects labels=root]]')
-        assert '<img alt="Test Project Logo"' in r, r
-        assert '<img alt="A Subproject Logo"' not in r, r
+        assert 'alt="Test Project Logo"' in r, r
+        assert 'alt="A Subproject Logo"' not in r, r
         r = g.markdown_wiki.convert('[[projects labels=sub1]]')
-        assert '<img alt="Test Project Logo"' not in r, r
-        assert '<img alt="A Subproject Logo"' in r, r
+        assert 'alt="Test Project Logo"' not in r, r
+        assert 'alt="A Subproject Logo"' in r, r
         r = g.markdown_wiki.convert('[[projects labels=test]]')
-        assert '<img alt="Test Project Logo"' in r, r
-        assert '<img alt="A Subproject Logo"' in r, r
+        assert 'alt="Test Project Logo"' in r, r
+        assert 'alt="A Subproject Logo"' in r, r
         r = g.markdown_wiki.convert('[[projects labels=test,root]]')
-        assert '<img alt="Test Project Logo"' in r, r
-        assert '<img alt="A Subproject Logo"' not in r, r
+        assert 'alt="Test Project Logo"' in r, r
+        assert 'alt="A Subproject Logo"' not in r, r
         r = g.markdown_wiki.convert('[[projects labels=test,sub1]]')
-        assert '<img alt="Test Project Logo"' not in r, r
-        assert '<img alt="A Subproject Logo"' in r, r
+        assert 'alt="Test Project Logo"' not in r, r
+        assert 'alt="A Subproject Logo"' in r, r
         r = g.markdown_wiki.convert('[[projects labels=root|sub1]]')
-        assert '<img alt="Test Project Logo"' in r, r
-        assert '<img alt="A Subproject Logo"' in r, r
+        assert 'alt="Test Project Logo"' in r, r
+        assert 'alt="A Subproject Logo"' in r, r
         r = g.markdown_wiki.convert('[[projects labels=test,root|root,sub1]]')
-        assert '<img alt="Test Project Logo"' in r, r
-        assert '<img alt="A Subproject Logo"' not in r, r
+        assert 'alt="Test Project Logo"' in r, r
+        assert 'alt="A Subproject Logo"' not in r, r
         r = g.markdown_wiki.convert('[[projects labels=test,root|test,sub1]]')
-        assert '<img alt="Test Project Logo"' in r, r
-        assert '<img alt="A Subproject Logo"' in r, r
+        assert 'alt="Test Project Logo"' in r, r
+        assert 'alt="A Subproject Logo"' in r, r
         r = g.markdown_wiki.convert('[[projects show_total=True sort=random]]')
-        assert '<p class="macro_projects_total">3 Projects</p>' in r, r
+        assert '<p class="macro_projects_total">3 Projects' in r, r
         r = g.markdown_wiki.convert(
             '[[projects show_total=True private=True sort=random]]')
-        assert '<p class="macro_projects_total">1 Projects</p>' in r, r
-        assert '<img alt="Test 2 Logo"' in r, r
-        assert '<img alt="Test Project Logo"' not in r, r
-        assert '<img alt="A Subproject Logo"' not in r, r
+        assert '<p class="macro_projects_total">1 Projects' in r, r
+        assert 'alt="Test 2 Logo"' in r, r
+        assert 'alt="Test Project Logo"' not in r, r
+        assert 'alt="A Subproject Logo"' not in r, r
 
         r = g.markdown_wiki.convert('[[projects show_proj_icon=True]]')
-        assert '<img alt="Test Project Logo"' in r
+        assert 'alt="Test Project Logo"' in r
         r = g.markdown_wiki.convert('[[projects show_proj_icon=False]]')
-        assert '<img alt="Test Project Logo"' not in r
+        assert 'alt="Test Project Logo"' not in r
 
 
 def test_macro_gittip_button():
@@ -167,7 +167,7 @@ def test_macro_gittip_button():
     with h.push_config(c, project=p_test):
         r = g.markdown_wiki.convert('[[gittip_button username=test]]')
     assert_equal(
-        r, u'<div class="markdown_content"><p><iframe height="22pt" src="https://www.gittip.com/test/widget.html" style="border: 0; margin: 0; padding: 0;" width="48pt"></iframe></p>\n</div>')
+        r, u'<div class="markdown_content"><p><iframe width="48pt" height="22pt" style="border: 0; margin: 0; padding: 0;" src="https://www.gittip.com/test/widget.html"></iframe>\n</p></div>')
 
 
 def test_macro_neighborhood_feeds():
@@ -212,7 +212,7 @@ def test_macro_members():
     r = g.markdown_wiki.convert('[[members limit=2]]')
     assert_equal(r, '<div class="markdown_content"><h6>Project Members:</h6>\n'
                  '<ul class="md-users-list">\n'
-                 '<li><a href="/u/test-admin/">Test Admin</a> (admin)</li><li><a href="/u/test-user/">Test User</a></li>\n'
+                 '<li><a href="/u/test-admin">Test Admin</a> (admin)</li><li><a href="/u/test-user">Test User</a></li>\n'
                  '<li class="md-users-list-more"><a href="/p/test/_members">All Members</a></li>\n'
                  '</ul>\n'
                  '</div>')
@@ -225,7 +225,7 @@ def test_macro_members_escaping():
     r = g.markdown_wiki.convert('[[members]]')
     assert_equal(r, u'<div class="markdown_content"><h6>Project Members:</h6>\n'
                  u'<ul class="md-users-list">\n'
-                 u'<li><a href="/u/test-admin/">Test Admin &lt;script&gt;</a> (admin)</li>\n'
+                 u'<li><a href="/u/test-admin">Test Admin &lt;script&gt;</a> (admin)</li>\n'
                  u'</ul>\n</div>')
 
 
@@ -236,7 +236,7 @@ def test_macro_project_admins():
     with h.push_context('test', neighborhood='Projects'):
         r = g.markdown_wiki.convert('[[project_admins]]')
     assert_equal(
-        r, u'<div class="markdown_content"><h6>Project Admins:</h6>\n<ul class="md-users-list">\n<li><a href="/u/test-admin/">Test \xc5dmin &lt;script&gt;</a></li>\n</ul>\n</div>')
+        r, u'<div class="markdown_content"><h6>Project Admins:</h6>\n<ul class="md-users-list">\n<li><a href="/u/test-admin">Test \xc5dmin &lt;script&gt;</a></li>\n</ul>\n</div>')
 
 
 @with_setup(setUp)
@@ -253,7 +253,7 @@ def test_macro_project_admins_one_br():
 
 
 @td.with_wiki
-def test_macro_include_extra_br():
+def test_macro_include_no_extra_br():
     p_nbhd = M.Neighborhood.query.get(name='Projects')
     p_test = M.Project.query.get(shortname='test', neighborhood_id=p_nbhd._id)
     wiki = p_test.app_instance('wiki')
@@ -278,6 +278,7 @@ def test_macro_include_extra_br():
 <div><div class="markdown_content"><p>included page 2</p></div></div>
 <div><div class="markdown_content"><p>included page 3</p></div></div>
 </p>
+<p></p>
 </div>
 '''.strip().replace('\n', '')
     assert html.strip().replace('\n', '') == expected_html, html
@@ -324,7 +325,7 @@ def test_macro_embed(oembed_fetch):
     }
     r = g.markdown_wiki.convert(
         '[[embed url=http://www.youtube.com/watch?v=kOLpSPEA72U]]')
-    assert_in('<div class="grid-20"><iframe height="270" src="https://www.youtube.com/embed/kOLpSPEA72U?feature=oembed" width="480"></iframe></div>',
+    assert_in('<div class="grid-20"><iframe src="https://www.youtube.com/embed/kOLpSPEA72U?feature=oembed" height="270" width="480"></iframe>\n</div>',
               r)
 
 
@@ -355,29 +356,29 @@ def test_wiki_artifact_links():
     assert 'See <span>[18:13:49]</span>' in text, text
     with h.push_context('test', 'wiki', neighborhood='Projects'):
         text = g.markdown.convert('Read [here](Home) about our project')
-        assert '<a class="" href="/p/test/wiki/Home/">here</a>' in text, text
+        assert '<a href="/p/test/wiki/Home/" class="">here</a>' in text, text
         text = g.markdown.convert('[Go home](test:wiki:Home)')
-        assert '<a class="" href="/p/test/wiki/Home/">Go home</a>' in text, text
+        assert '<a href="/p/test/wiki/Home/" class="">Go home</a>' in text, text
         text = g.markdown.convert('See [test:wiki:Home]')
-        assert '<a class="alink" href="/p/test/wiki/Home/">[test:wiki:Home]</a>' in text, text
+        assert '<a href="/p/test/wiki/Home/" class="alink">[test:wiki:Home]</a>' in text, text
 
 
 def test_markdown_links():
     with patch.dict(tg.config, {'nofollow_exempt_domains': 'foobar.net'}):
         text = g.markdown.convert(
             'Read [here](http://foobar.net/) about our project')
-        assert_in('href="http://foobar.net/">here</a> about', text)
+        assert_in('href="http://foobar.net/" class="">here</a> about', text)
 
     text = g.markdown.convert(
         'Read [here](http://foobar.net/) about our project')
-    assert_in('href="http://foobar.net/" rel="nofollow">here</a> about', text)
+    assert_in('href="http://foobar.net/" class="" rel="nofollow">here</a> about', text)
 
     text = g.markdown.convert('Read [here](/p/foobar/blah) about our project')
-    assert_in('href="/p/foobar/blah">here</a> about', text)
+    assert_in('href="/p/foobar/blah" class="">here</a> about', text)
 
     text = g.markdown.convert('Read <http://foobar.net/> about our project')
     assert_in(
-        'href="http://foobar.net/" rel="nofollow">http://foobar.net/</a> about', text)
+        'href="http://foobar.net" rel="nofollow">http://foobar.net/</a> about', text)
 
 
 def test_markdown_and_html():
@@ -397,7 +398,7 @@ def test_markdown_within_html():
 
 def test_markdown_with_html_comments():
     text = g.markdown.convert('test <!-- comment -->')
-    assert '<div class="markdown_content"><p>test <!-- comment --></p></div>' == text, text
+    assert '<div class="markdown_content"><p>test </p></div>' == text, text
 
 
 def test_markdown_big_text():
@@ -411,7 +412,7 @@ def test_markdown_big_text():
 def test_markdown_basics():
     with h.push_context('test', 'wiki', neighborhood='Projects'):
         text = g.markdown.convert('# Foo!\n[Home]')
-        assert '<a class="alink" href=' in text, text
+        assert '<a href="/p/test/wiki/Home/" class="alink">[Home]</a>' in text, text
         text = g.markdown.convert('# Foo!\n[Rooted]')
         assert '<a href=' not in text, text
 
@@ -464,6 +465,21 @@ def test_markdown_autolink_with_escape():
     assert 'href="http://www.phpmyadmin.net/home_page/security/#target"' in r, r
 
 
+def test_markdown_invalid_script():
+    r = g.markdown.convert('<script>alert(document.cookies)</script>')
+    assert_equal('<div class="markdown_content">&lt;script&gt;alert(document.cookies)&lt;/script&gt;\n</div>', r)
+
+
+def test_markdown_invalid_onerror():
+    r = g.markdown.convert('<img src=x onerror=alert(document.cookie)>')
+    assert_not_in('onerror', r)
+
+
+def test_markdown_invalid_tagslash():
+    r = g.markdown.convert('<div/onload><img src=x onerror=alert(document.cookie)>')
+    assert_not_in('onerror', r)
+
+
 @td.with_wiki
 def test_macro_include():
     r = g.markdown.convert('[[include ref=Home id=foo]]')
@@ -564,8 +580,8 @@ def test_myprojects_macro():
     for p in c.user.my_projects():
         if p.deleted or p.is_nbhd_project:
             continue
-        proj_title = '<h2><a href="%s">%s</a></h2>' % (p.url(), p.name)
-        assert proj_title in r
+        proj_title = '<h2><a href="%s">%s</a></h2>' % (p.url().rstrip('/'), p.name)
+        assert_in(proj_title, r)
 
     h.set_context('u/test-user-1', 'wiki', neighborhood='Users')
     user = M.User.query.get(username='test-user-1')
@@ -573,8 +589,8 @@ def test_myprojects_macro():
     for p in user.my_projects():
         if p.deleted or p.is_nbhd_project:
             continue
-        proj_title = '<h2><a href="%s">%s</a></h2>' % (p.url(), p.name)
-        assert proj_title in r
+        proj_title = '<h2><a href="%s">%s</a></h2>' % (p.url().rstrip('/'), p.name)
+        assert_in(proj_title, r)
 
 
 @td.with_wiki

http://git-wip-us.apache.org/repos/asf/allura/blob/b0729afc/Allura/allura/tests/test_markdown.py
----------------------------------------------------------------------
diff --git a/Allura/allura/tests/test_markdown.py b/Allura/allura/tests/test_markdown.py
index 7c6d164..3ff53b4 100644
--- a/Allura/allura/tests/test_markdown.py
+++ b/Allura/allura/tests/test_markdown.py
@@ -134,13 +134,13 @@ class TestCommitMessageExtension(unittest.TestCase):
 Not *strong* or _underlined_."""
 
         expected_html = """\
-<div class="markdown_content"><p># Not A Heading #<br />
----<br />
-* <a href="/p/project/tool/artifact/">#100</a>, <a href="/p/project/tool/artifact/">r2</a><br />
-* <a href="/p/project/tool/artifact/">ticket:100</a><br />
-* <a href="/p/project/tool/artifact/#abc">comment:13:ticket:2</a><br />
-* <a href="/p/project/tool/2/tree/test.py#l3">source:test.py@2#L3</a></p>
-<p>Not *strong* or _underlined_.</p></div>"""
+<div class="markdown_content"><p># Not A Heading #<br>
+---<br>
+* <a href=/p/project/tool/artifact/>#100</a>, <a href=/p/project/tool/artifact/>r2</a><br>
+* <a href=/p/project/tool/artifact/>ticket:100</a><br>
+* <a href=/p/project/tool/artifact/#abc>comment:13:ticket:2</a><br>
+* <a href=/p/project/tool/2/tree/test.py#l3>source:test.py@2#L3</a></p>
+<p>Not *strong* or _underlined_.</div>"""
 
         md = ForgeMarkdown(
             extensions=[mde.CommitMessageExtension(app), 'nl2br'],

http://git-wip-us.apache.org/repos/asf/allura/blob/b0729afc/Allura/allura/tests/test_utils.py
----------------------------------------------------------------------
diff --git a/Allura/allura/tests/test_utils.py b/Allura/allura/tests/test_utils.py
index e791503..5e4135b 100644
--- a/Allura/allura/tests/test_utils.py
+++ b/Allura/allura/tests/test_utils.py
@@ -239,14 +239,17 @@ class TestCodeStats(unittest.TestCase):
 
 class TestHTMLSanitizer(unittest.TestCase):
 
+    def simple_tag_list(self, sanitizer):
+        # no attrs, no close tag flag check, just real simple
+        return [
+            t['name'] for t in sanitizer if t.get('name')
+        ]
+
     def test_html_sanitizer_iframe(self):
-        p = utils.ForgeHTMLSanitizer('utf-8', '')
-        p.feed('<div><iframe></iframe></div>')
-        assert_equal(p.output(), '<div></div>')
+        p = utils.ForgeHTMLSanitizer('<div><iframe></iframe></div>')
+        assert_equal(self.simple_tag_list(p), ['div', 'div'])
 
     def test_html_sanitizer_youtube_iframe(self):
-        p = utils.ForgeHTMLSanitizer('utf-8', '')
-        p.feed(
-            '<div><iframe src="https://www.youtube.com/embed/kOLpSPEA72U?feature=oembed"></iframe></div>')
+        p = utils.ForgeHTMLSanitizer('<div><iframe src="https://www.youtube.com/embed/kOLpSPEA72U?feature=oembed"></iframe></div>')
         assert_equal(
-            p.output(), '<div><iframe src="https://www.youtube.com/embed/kOLpSPEA72U?feature=oembed"></iframe></div>')
+            self.simple_tag_list(p), ['div', 'iframe', 'div'])

http://git-wip-us.apache.org/repos/asf/allura/blob/b0729afc/Allura/allura/tests/unit/test_solr.py
----------------------------------------------------------------------
diff --git a/Allura/allura/tests/unit/test_solr.py b/Allura/allura/tests/unit/test_solr.py
index aca6969..0ee5b13 100644
--- a/Allura/allura/tests/unit/test_solr.py
+++ b/Allura/allura/tests/unit/test_solr.py
@@ -111,7 +111,7 @@ class TestSearchIndexable(unittest.TestCase):
 
     def test_solarize_html_in_text(self):
         self.obj.index = lambda: dict(text='<script>a(1)</script>')
-        assert_equal(self.obj.solarize(), dict(text=''))
+        assert_equal(self.obj.solarize(), dict(text='<script>a(1)</script>'))
         self.obj.index = lambda: dict(text='&lt;script&gt;a(1)&lt;/script&gt;')
         assert_equal(self.obj.solarize(), dict(text='<script>a(1)</script>'))
 

http://git-wip-us.apache.org/repos/asf/allura/blob/b0729afc/ForgeBlog/forgeblog/tests/test_commands.py
----------------------------------------------------------------------
diff --git a/ForgeBlog/forgeblog/tests/test_commands.py b/ForgeBlog/forgeblog/tests/test_commands.py
index 7f2d0f7..b0cd2b4 100644
--- a/ForgeBlog/forgeblog/tests/test_commands.py
+++ b/ForgeBlog/forgeblog/tests/test_commands.py
@@ -149,8 +149,8 @@ def test_plaintext_preprocessor():
     html = g.markdown.convert(text)
     assert_equal(html,
                  '<div class="markdown_content"><p>1. foo '
-                 '#foo bar <a class="" href="../baz">baz</a> foo bar '
-                 '#foo bar <a class="" href="../baz"> baz </a></p></div>'
+                 '#foo bar <a href="../baz" class="">baz</a> foo bar '
+                 '#foo bar <a href="../baz" class=""> baz </a></p></div>'
                  )
 
 
@@ -172,6 +172,6 @@ def test_plaintext_preprocessor_wrapped():
     html = g.markdown.convert(text)
     assert_equal(html,
                  '<div class="markdown_content"><p>1. foo</p>\n'
-                 '<p>#foo bar <a class="" href="../baz">baz</a> foo bar </p>\n'
-                 '<p>#foo bar <a class="" href="../baz"> baz </a></p></div>'
+                 '<p>#foo bar <a href="../baz" class="">baz</a> foo bar </p>\n'
+                 '<p>#foo bar <a href="../baz" class=""> baz </a></p></div>'
                  )

http://git-wip-us.apache.org/repos/asf/allura/blob/b0729afc/ForgeBlog/forgeblog/tests/unit/test_blog_post.py
----------------------------------------------------------------------
diff --git a/ForgeBlog/forgeblog/tests/unit/test_blog_post.py b/ForgeBlog/forgeblog/tests/unit/test_blog_post.py
index fadfcf0..4d0b740 100644
--- a/ForgeBlog/forgeblog/tests/unit/test_blog_post.py
+++ b/ForgeBlog/forgeblog/tests/unit/test_blog_post.py
@@ -132,6 +132,6 @@ class TestHtmlPreview(BlogTestWithModel):
                     'fugiat nulla pariatur. Excepteur sint occaecat cupidatat '
                     'non proident, sunt in culpa qui officia deserunt mollit '
                     'anim id est laborum.... '
-                    '<a class="" href="/p/test/blog/%s/%02i/untitled/">'
+                    '<a href="/p/test/blog/%s/%02i/untitled/" class="">'
                     'read more</a></p></div>') % (now.year, now.month)
         assert_equal(self._make_post(text).html_text_preview, expected)

http://git-wip-us.apache.org/repos/asf/allura/blob/b0729afc/ForgeGit/forgegit/tests/functional/test_controllers.py
----------------------------------------------------------------------
diff --git a/ForgeGit/forgegit/tests/functional/test_controllers.py b/ForgeGit/forgegit/tests/functional/test_controllers.py
index 3af6f1b..632cad5 100644
--- a/ForgeGit/forgegit/tests/functional/test_controllers.py
+++ b/ForgeGit/forgegit/tests/functional/test_controllers.py
@@ -124,7 +124,7 @@ class TestRootController(_TestCase):
         resp = self.app.get(
             '/src-git/ci/1e146e67985dcd71c74de79613719bef7bddca4a/log/')
         assert 'Initial commit' in resp
-        assert '<div class="markdown_content"><p>Change README</p></div>' in resp
+        assert '<div class="markdown_content"><p>Change README</div>' in resp
         assert 'tree/README?format=raw">Download</a>' not in resp
         assert 'Tree' in resp.html.findAll(
             'td')[2].text, resp.html.findAll('td')[2].text

http://git-wip-us.apache.org/repos/asf/allura/blob/b0729afc/ForgeTracker/forgetracker/tests/functional/test_root.py
----------------------------------------------------------------------
diff --git a/ForgeTracker/forgetracker/tests/functional/test_root.py b/ForgeTracker/forgetracker/tests/functional/test_root.py
index e075112..7c9fffe 100644
--- a/ForgeTracker/forgetracker/tests/functional/test_root.py
+++ b/ForgeTracker/forgetracker/tests/functional/test_root.py
@@ -891,8 +891,8 @@ class TestFunctionalController(TrackerTestController):
         assert_not_in('Tickets: <s>#1</s>', r)
         assert_in('Tickets: <s>#2</s>', r)
 
-        assert_in('<a class="alink" href="/p/test/bugs/1/">[#1]</a>', r)
-        assert_in('<a class="alink strikethrough" href="/p/test/bugs/2/">[#2]</a>', r)
+        assert_in('<a href="/p/test/bugs/1/" class="alink">[#1]</a>', r.body)
+        assert_in('<a href="/p/test/bugs/2/" class="alink strikethrough">[#2]</a>', r.body)
 
     def test_ticket_view_editable(self):
         summary = 'test ticket view page can be edited'
@@ -2324,7 +2324,7 @@ class TestFunctionalController(TrackerTestController):
             return_path, rcpts, body = _client.sendmail.call_args[0]
             body = body.split('\n')
             assert 'Subject: [test:bugs] #1 test <h2> ticket' in body
-            assert '<p><strong> <a class="alink" href="http://localhost/p/test/bugs/1/">[bugs:#1]</a> test &lt;h2&gt; ticket</strong></p>' in body
+            assert '<p><strong> <a href="http://localhost/p/test/bugs/1/" class="alink">[bugs:#1]</a> test &lt;h2&gt; ticket</strong></p>' in body
 
     @patch('forgetracker.search.query_filter_choices')
     def test_multiselect(self, query_filter_choices):

http://git-wip-us.apache.org/repos/asf/allura/blob/b0729afc/requirements.txt
----------------------------------------------------------------------
diff --git a/requirements.txt b/requirements.txt
index cf32f03..be82654 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -14,6 +14,7 @@ feedparser==5.1.3
 FormEncode==1.2.4
 # dep of Creoleparser
 Genshi==0.6
+html5lib==0.999
 # dep of oauth2
 httplib2==0.7.4
 iso8601==0.1.4