You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by jo...@apache.org on 2012/11/06 21:25:48 UTC
[2/2] git commit: [#4888] use markdown escaping instead of the custom
`[plain]` tag
[#4888] use markdown escaping instead of the custom `[plain]` tag
Project: http://git-wip-us.apache.org/repos/asf/incubator-allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-allura/commit/7375db4a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-allura/tree/7375db4a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-allura/diff/7375db4a
Branch: refs/heads/master
Commit: 7375db4aad5807fbfa2bf8cdb70a0ad8b255eda0
Parents: e1b98e2
Author: Dave Brondsema <db...@geek.net>
Authored: Mon Nov 5 17:32:35 2012 +0000
Committer: Cory Johns <jo...@geek.net>
Committed: Tue Nov 6 20:25:17 2012 +0000
----------------------------------------------------------------------
Allura/allura/lib/markdown_extensions.py | 8 +-
ForgeBlog/forgeblog/command/rssfeeds.py | 116 ++---------------------
ForgeBlog/forgeblog/tests/test_commands.py | 73 ++------------
3 files changed, 24 insertions(+), 173 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/7375db4a/Allura/allura/lib/markdown_extensions.py
----------------------------------------------------------------------
diff --git a/Allura/allura/lib/markdown_extensions.py b/Allura/allura/lib/markdown_extensions.py
index ea27f49..44197c3 100644
--- a/Allura/allura/lib/markdown_extensions.py
+++ b/Allura/allura/lib/markdown_extensions.py
@@ -54,6 +54,11 @@ class ForgeExtension(markdown.Extension):
self.forge_processor.reset()
class PlainTextPreprocessor(markdown.preprocessors.Preprocessor):
+ '''
+ This was used earlier for [plain] tags that the Blog tool's rss importer
+ created, before html2text did good escaping of all special markdown chars.
+ Can be deprecated.
+ '''
def run(self, lines):
text = "\n".join(lines)
@@ -289,7 +294,7 @@ class LineOrientedTreeProcessor(markdown.treeprocessors.Treeprocessor):
def __init__(self, md):
self._markdown = md
-
+
def run(self, root):
for node in root.getiterator('p'):
if not node.text: continue
@@ -324,4 +329,3 @@ class AutolinkPattern(markdown.inlinepatterns.LinkPattern):
result.text = old_link
result.set('href', old_link)
return result
-
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/7375db4a/ForgeBlog/forgeblog/command/rssfeeds.py
----------------------------------------------------------------------
diff --git a/ForgeBlog/forgeblog/command/rssfeeds.py b/ForgeBlog/forgeblog/command/rssfeeds.py
index 4ae7485..c824f96 100644
--- a/ForgeBlog/forgeblog/command/rssfeeds.py
+++ b/ForgeBlog/forgeblog/command/rssfeeds.py
@@ -1,6 +1,5 @@
from time import mktime
from datetime import datetime
-from HTMLParser import HTMLParser
import feedparser
import html2text
@@ -21,104 +20,6 @@ from allura.lib.decorators import exceptionless
html2text.BODY_WIDTH = 0
-class MDHTMLParser(HTMLParser):
- def __init__(self):
- HTMLParser.__init__(self)
- self.NO_END_TAGS = ["area", "base", "basefont", "br", "col", "frame",
- "hr", "img", "input", "link", "meta", "param"]
- self.CUSTTAG_OPEN = u"[plain]"
- self.CUSTTAG_CLOSE = u"[/plain]"
- self.result_doc = u""
- self.custom_tag_opened = False
-
- def handle_starttag(self, tag, attrs):
- if self.custom_tag_opened:
- self.result_doc = u"%s%s" % (self.result_doc, self.CUSTTAG_CLOSE)
- self.custom_tag_opened = False
-
- tag_text = u"<%s" % tag
- for attr in attrs:
- if attr[1].find('"'):
- tag_text = u"%s %s='%s'" % (tag_text, attr[0], attr[1])
- else:
- tag_text = u'%s %s="%s"' % (tag_text, attr[0], attr[1])
- if tag not in self.NO_END_TAGS:
- tag_text = tag_text + ">"
- else:
- tag_text = tag_text + "/>"
- self.result_doc = u"%s%s" % (self.result_doc, tag_text)
-
- def handle_endtag(self, tag):
- if tag not in self.NO_END_TAGS:
- if self.custom_tag_opened:
- self.result_doc = u"%s%s" % (self.result_doc, self.CUSTTAG_CLOSE)
- self.custom_tag_opened = False
-
- self.result_doc = u"%s</%s>" % (self.result_doc, tag)
-
- def handle_data(self, data):
- res_data = ''
-
- for line in data.splitlines(True):
- # pre-emptive special case
- if not line or line.isspace():
- # don't wrap all whitespace lines
- res_data += line
- continue
-
- # open custom tag
- if not self.custom_tag_opened:
- res_data += self.CUSTTAG_OPEN
- self.custom_tag_opened = True
- # else: cust tag might be open already from previous incomplete data block
-
- # data
- res_data += line.rstrip('\r\n') # strip EOL (add close tag before)
-
- # close custom tag
- if line.endswith(('\r','\n')):
- res_data += self.CUSTTAG_CLOSE + '\n'
- self.custom_tag_opened = False
- # else: no EOL could mean we're dealing with incomplete data block;
- # leave it open for next handle_data, handle_starttag, or handle_endtag to clean up
-
- self.result_doc += res_data
-
- def handle_comment(self, data):
- if self.custom_tag_opened:
- self.result_doc = u"%s%s" % (self.result_doc, self.CUSTTAG_CLOSE)
- self.custom_tag_opened = False
-
- self.result_doc = u"%s<!-- %s -->" % (self.result_doc, data)
-
- def handle_entityref(self, name):
- if not self.custom_tag_opened:
- self.result_doc = u"%s%s" % (self.result_doc, self.CUSTTAG_OPEN)
- self.custom_tag_opened = True
-
- self.result_doc = u"%s&%s;" % (self.result_doc, name)
-
- def handle_charref(self, name):
- if not self.custom_tag_opened:
- self.result_doc = u"%s%s" % (self.result_doc, self.CUSTTAG_OPEN)
- self.custom_tag_opened = True
-
- self.result_doc = u"%s&%s;" % (self.result_doc, name)
-
- def handle_decl(self, data):
- if self.custom_tag_opened:
- self.result_doc = u"%s%s" % (self.result_doc, self.CUSTTAG_CLOSE)
- self.custom_tag_opened = False
-
- self.result_doc = u"%s<!%s>" % (self.result_doc, data)
-
- def close(self):
- HTMLParser.close(self)
-
- if self.custom_tag_opened:
- self.result_doc = u"%s%s" % (self.result_doc, self.CUSTTAG_CLOSE)
- self.custom_tag_opened = False
-
class RssFeedsCommand(base.BlogCommand):
summary = 'Rss feed client'
@@ -189,18 +90,17 @@ class RssFeedsCommand(base.BlogCommand):
content = u''
for ct in e.content:
if ct.type != 'text/html':
- content += '[plain]%s[/plain]' % ct.value
+ content += html2text.escape_md_section(ct.value, snob=True)
else:
- parser = MDHTMLParser()
- parser.feed(ct.value)
- parser.close() # must be before using the result_doc
- markdown_content = html2text.html2text(parser.result_doc, baseurl=e.link)
-
+ html2md = html2text.HTML2Text(baseurl=e.link)
+ html2md.escape_snob = True
+ markdown_content = html2md.handle(ct.value)
content += markdown_content
else:
- content = '[plain]%s[/plain]' % getattr(e, 'summary',
- getattr(e, 'subtitle',
- getattr(e, 'title')))
+ content = html2text.escape_md_section(getattr(e, 'summary',
+ getattr(e, 'subtitle',
+ getattr(e, 'title'))),
+ snob=True)
content += u' [link](%s)' % e.link
updated = datetime.utcfromtimestamp(mktime(e.updated_parsed))
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/7375db4a/ForgeBlog/forgeblog/tests/test_commands.py
----------------------------------------------------------------------
diff --git a/ForgeBlog/forgeblog/tests/test_commands.py b/ForgeBlog/forgeblog/tests/test_commands.py
index 58961bd..0357c82 100644
--- a/ForgeBlog/forgeblog/tests/test_commands.py
+++ b/ForgeBlog/forgeblog/tests/test_commands.py
@@ -89,71 +89,18 @@ def test_pull_rss_feeds(parsefeed):
assert_equal(posts.count(), 3)
posts = posts.all()
assert_equal(posts[0].title, 'Test')
- assert_equal(posts[0].text, '[plain]This is a test[/plain] [link](http://example.com/)')
+ assert_equal(posts[0].text, 'This is a test [link](http://example.com/)')
assert_equal(posts[1].title, 'Default Title 2')
- assert_equal(posts[1].text, '[plain]Test feed[/plain] [link](http://example.com/)')
+ assert_equal(posts[1].text, 'Test feed [link](http://example.com/)')
assert_equal(posts[2].title, 'Default Title 3')
- assert_equal(posts[2].text,
- "[plain]1. foo[/plain]\n"
- "\n"
- "[plain]#foo bar [/plain][[plain]baz[/plain]](baz) "
- "[plain]foo bar[/plain] \n"
- "\n"
- "[plain]#foo bar [/plain][ [plain]baz[/plain] ](baz)\n "
- "[link](http://example.com/)"
- )
-
-def test_plaintext_parser():
- parser = rssfeeds.MDHTMLParser()
- parser.feed(
- '1. foo\n'
- '\n'
- '#foo bar <a href="baz">baz</a>\n'
- 'foo bar\n'
- '\n'
- '#foo bar <a href="baz">\n'
- 'baz\n'
- '</a>\n'
- )
- parser.close()
- assert_equal(parser.result_doc,
- "[plain]1. foo[/plain]\n"
- "\n"
- "[plain]#foo bar [/plain]<a href='baz'>[plain]baz[/plain]</a>\n"
- "[plain]foo bar[/plain]\n"
- "\n"
- "[plain]#foo bar [/plain]<a href='baz'>\n"
- "[plain]baz[/plain]\n"
- "</a>\n"
- )
-
-def test_plaintext_parser_wrapped():
- parser = rssfeeds.MDHTMLParser()
- parser.feed(
- '<p>1. foo</p>\n'
- '\n'
- '<p>\n'
- '#foo bar <a href="baz">baz</a>\n'
- 'foo bar\n'
- '</p>\n'
- '\n'
- '<p>#foo bar <a href="baz">\n'
- 'baz\n'
- '</a></p>\n'
- )
- parser.close()
- assert_equal(parser.result_doc,
- "<p>[plain]1. foo[/plain]</p>\n"
- "\n"
- "<p>\n"
- "[plain]#foo bar [/plain]<a href='baz'>[plain]baz[/plain]</a>\n"
- "[plain]foo bar[/plain]\n"
- "</p>\n"
- "\n"
- "<p>[plain]#foo bar [/plain]<a href='baz'>\n"
- "[plain]baz[/plain]\n"
- "</a></p>\n"
- )
+ assert_equal(posts[2].text, "\n".join([
+ r"1\. foo",
+ "",
+ r"\#foo bar [baz](baz) foo bar ",
+ "",
+ r"\#foo bar [ baz ](baz)",
+ " [link](http://example.com/)",
+ ]))
def test_plaintext_preprocessor():
text = html2text(