You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by jo...@apache.org on 2012/11/06 21:25:48 UTC
[2/2] git commit: [#4888] use markdown escaping instead of the custom `[plain]` tag

[#4888] use markdown escaping instead of the custom `[plain]` tag


Project: http://git-wip-us.apache.org/repos/asf/incubator-allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-allura/commit/7375db4a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-allura/tree/7375db4a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-allura/diff/7375db4a

Branch: refs/heads/master
Commit: 7375db4aad5807fbfa2bf8cdb70a0ad8b255eda0
Parents: e1b98e2
Author: Dave Brondsema <db...@geek.net>
Authored: Mon Nov 5 17:32:35 2012 +0000
Committer: Cory Johns <jo...@geek.net>
Committed: Tue Nov 6 20:25:17 2012 +0000

----------------------------------------------------------------------
 Allura/allura/lib/markdown_extensions.py   |    8 +-
 ForgeBlog/forgeblog/command/rssfeeds.py    |  116 ++---------------------
 ForgeBlog/forgeblog/tests/test_commands.py |   73 ++------------
 3 files changed, 24 insertions(+), 173 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/7375db4a/Allura/allura/lib/markdown_extensions.py
----------------------------------------------------------------------
diff --git a/Allura/allura/lib/markdown_extensions.py b/Allura/allura/lib/markdown_extensions.py
index ea27f49..44197c3 100644
--- a/Allura/allura/lib/markdown_extensions.py
+++ b/Allura/allura/lib/markdown_extensions.py
@@ -54,6 +54,11 @@ class ForgeExtension(markdown.Extension):
         self.forge_processor.reset()
 
 class PlainTextPreprocessor(markdown.preprocessors.Preprocessor):
+    '''
+    This was used earlier for [plain] tags that the Blog tool's rss importer
+    created, before html2text did good escaping of all special markdown chars.
+    Can be deprecated.
+    '''
 
     def run(self, lines):
         text = "\n".join(lines)
@@ -289,7 +294,7 @@ class LineOrientedTreeProcessor(markdown.treeprocessors.Treeprocessor):
 
     def __init__(self, md):
         self._markdown = md
-    
+
     def run(self, root):
         for node in root.getiterator('p'):
             if not node.text: continue
@@ -324,4 +329,3 @@ class AutolinkPattern(markdown.inlinepatterns.LinkPattern):
         result.text = old_link
         result.set('href', old_link)
         return result
-

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/7375db4a/ForgeBlog/forgeblog/command/rssfeeds.py
----------------------------------------------------------------------
diff --git a/ForgeBlog/forgeblog/command/rssfeeds.py b/ForgeBlog/forgeblog/command/rssfeeds.py
index 4ae7485..c824f96 100644
--- a/ForgeBlog/forgeblog/command/rssfeeds.py
+++ b/ForgeBlog/forgeblog/command/rssfeeds.py
@@ -1,6 +1,5 @@
 from time import mktime
 from datetime import datetime
-from HTMLParser import HTMLParser
 
 import feedparser
 import html2text
@@ -21,104 +20,6 @@ from allura.lib.decorators import exceptionless
 
 html2text.BODY_WIDTH = 0
 
-class MDHTMLParser(HTMLParser):
-    def __init__(self):
-        HTMLParser.__init__(self)
-        self.NO_END_TAGS = ["area", "base", "basefont", "br", "col", "frame",
-                            "hr", "img", "input", "link", "meta", "param"]
-        self.CUSTTAG_OPEN = u"[plain]"
-        self.CUSTTAG_CLOSE = u"[/plain]"
-        self.result_doc = u""
-        self.custom_tag_opened = False
-
-    def handle_starttag(self, tag, attrs):
-        if self.custom_tag_opened:
-            self.result_doc = u"%s%s" % (self.result_doc, self.CUSTTAG_CLOSE)
-            self.custom_tag_opened = False
-
-        tag_text = u"<%s" % tag
-        for attr in attrs:
-            if attr[1].find('"'):
-                tag_text = u"%s %s='%s'" % (tag_text, attr[0], attr[1])
-            else:
-                tag_text = u'%s %s="%s"' % (tag_text, attr[0], attr[1])
-        if tag not in self.NO_END_TAGS:
-            tag_text = tag_text + ">"
-        else:
-            tag_text = tag_text + "/>"
-        self.result_doc = u"%s%s" % (self.result_doc, tag_text)
-
-    def handle_endtag(self, tag):
-        if tag not in self.NO_END_TAGS:
-            if self.custom_tag_opened:
-                self.result_doc = u"%s%s" % (self.result_doc, self.CUSTTAG_CLOSE)
-                self.custom_tag_opened = False
-
-            self.result_doc = u"%s</%s>" % (self.result_doc, tag)
-
-    def handle_data(self, data):
-        res_data = ''
-
-        for line in data.splitlines(True):
-            # pre-emptive special case
-            if not line or line.isspace():
-                # don't wrap all whitespace lines
-                res_data += line
-                continue
-
-            # open custom tag
-            if not self.custom_tag_opened:
-                res_data += self.CUSTTAG_OPEN
-                self.custom_tag_opened = True
-            # else: cust tag might be open already from previous incomplete data block
-
-            # data
-            res_data += line.rstrip('\r\n')  # strip EOL (add close tag before)
-
-            # close custom tag
-            if line.endswith(('\r','\n')):
-                res_data += self.CUSTTAG_CLOSE + '\n'
-                self.custom_tag_opened = False
-            # else: no EOL could mean we're dealing with incomplete data block;
-                # leave it open for next handle_data, handle_starttag, or handle_endtag to clean up
-
-        self.result_doc += res_data
-
-    def handle_comment(self, data):
-        if self.custom_tag_opened:
-            self.result_doc = u"%s%s" % (self.result_doc, self.CUSTTAG_CLOSE)
-            self.custom_tag_opened = False
-
-        self.result_doc = u"%s<!-- %s -->" % (self.result_doc, data)
-
-    def handle_entityref(self, name):
-        if not self.custom_tag_opened:
-            self.result_doc = u"%s%s" % (self.result_doc, self.CUSTTAG_OPEN)
-            self.custom_tag_opened = True
-
-        self.result_doc = u"%s&%s;" % (self.result_doc, name)
-
-    def handle_charref(self, name):
-        if not self.custom_tag_opened:
-            self.result_doc = u"%s%s" % (self.result_doc, self.CUSTTAG_OPEN)
-            self.custom_tag_opened = True
-
-        self.result_doc = u"%s&%s;" % (self.result_doc, name)
-
-    def handle_decl(self, data):
-        if self.custom_tag_opened:
-            self.result_doc = u"%s%s" % (self.result_doc, self.CUSTTAG_CLOSE)
-            self.custom_tag_opened = False
-
-        self.result_doc = u"%s<!%s>" % (self.result_doc, data)
-
-    def close(self):
-        HTMLParser.close(self)
-
-        if self.custom_tag_opened:
-            self.result_doc = u"%s%s" % (self.result_doc, self.CUSTTAG_CLOSE)
-            self.custom_tag_opened = False
-
 
 class RssFeedsCommand(base.BlogCommand):
     summary = 'Rss feed client'
@@ -189,18 +90,17 @@ class RssFeedsCommand(base.BlogCommand):
             content = u''
             for ct in e.content:
                 if ct.type != 'text/html':
-                    content += '[plain]%s[/plain]' % ct.value
+                    content += html2text.escape_md_section(ct.value, snob=True)
                 else:
-                    parser = MDHTMLParser()
-                    parser.feed(ct.value)
-                    parser.close() # must be before using the result_doc
-                    markdown_content = html2text.html2text(parser.result_doc, baseurl=e.link)
-
+                    html2md = html2text.HTML2Text(baseurl=e.link)
+                    html2md.escape_snob = True
+                    markdown_content = html2md.handle(ct.value)
                     content += markdown_content
         else:
-            content = '[plain]%s[/plain]' % getattr(e, 'summary',
-                                                getattr(e, 'subtitle',
-                                                    getattr(e, 'title')))
+            content = html2text.escape_md_section(getattr(e, 'summary',
+                                                    getattr(e, 'subtitle',
+                                                      getattr(e, 'title'))),
+                                                  snob=True)
 
         content += u' [link](%s)' % e.link
         updated = datetime.utcfromtimestamp(mktime(e.updated_parsed))

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/7375db4a/ForgeBlog/forgeblog/tests/test_commands.py
----------------------------------------------------------------------
diff --git a/ForgeBlog/forgeblog/tests/test_commands.py b/ForgeBlog/forgeblog/tests/test_commands.py
index 58961bd..0357c82 100644
--- a/ForgeBlog/forgeblog/tests/test_commands.py
+++ b/ForgeBlog/forgeblog/tests/test_commands.py
@@ -89,71 +89,18 @@ def test_pull_rss_feeds(parsefeed):
     assert_equal(posts.count(), 3)
     posts = posts.all()
     assert_equal(posts[0].title, 'Test')
-    assert_equal(posts[0].text, '[plain]This is a test[/plain] [link](http://example.com/)')
+    assert_equal(posts[0].text, 'This is a test [link](http://example.com/)')
     assert_equal(posts[1].title, 'Default Title 2')
-    assert_equal(posts[1].text, '[plain]Test feed[/plain] [link](http://example.com/)')
+    assert_equal(posts[1].text, 'Test feed [link](http://example.com/)')
     assert_equal(posts[2].title, 'Default Title 3')
-    assert_equal(posts[2].text,
-        "[plain]1. foo[/plain]\n"
-        "\n"
-        "[plain]#foo bar [/plain][[plain]baz[/plain]](baz) "
-        "[plain]foo bar[/plain] \n"
-        "\n"
-        "[plain]#foo bar [/plain][ [plain]baz[/plain] ](baz)\n "
-        "[link](http://example.com/)"
-    )
-
-def test_plaintext_parser():
-    parser = rssfeeds.MDHTMLParser()
-    parser.feed(
-        '1. foo\n'
-        '\n'
-        '#foo bar <a href="baz">baz</a>\n'
-        'foo bar\n'
-        '\n'
-        '#foo bar <a href="baz">\n'
-        'baz\n'
-        '</a>\n'
-    )
-    parser.close()
-    assert_equal(parser.result_doc,
-        "[plain]1. foo[/plain]\n"
-        "\n"
-        "[plain]#foo bar [/plain]<a href='baz'>[plain]baz[/plain]</a>\n"
-        "[plain]foo bar[/plain]\n"
-        "\n"
-        "[plain]#foo bar [/plain]<a href='baz'>\n"
-        "[plain]baz[/plain]\n"
-        "</a>\n"
-    )
-
-def test_plaintext_parser_wrapped():
-    parser = rssfeeds.MDHTMLParser()
-    parser.feed(
-        '<p>1. foo</p>\n'
-        '\n'
-        '<p>\n'
-        '#foo bar <a href="baz">baz</a>\n'
-        'foo bar\n'
-        '</p>\n'
-        '\n'
-        '<p>#foo bar <a href="baz">\n'
-        'baz\n'
-        '</a></p>\n'
-    )
-    parser.close()
-    assert_equal(parser.result_doc,
-        "<p>[plain]1. foo[/plain]</p>\n"
-        "\n"
-        "<p>\n"
-        "[plain]#foo bar [/plain]<a href='baz'>[plain]baz[/plain]</a>\n"
-        "[plain]foo bar[/plain]\n"
-        "</p>\n"
-        "\n"
-        "<p>[plain]#foo bar [/plain]<a href='baz'>\n"
-        "[plain]baz[/plain]\n"
-        "</a></p>\n"
-    )
+    assert_equal(posts[2].text, "\n".join([
+       r"1\. foo",
+        "",
+       r"\#foo bar [baz](baz) foo bar ",
+        "",
+       r"\#foo bar [ baz ](baz)",
+        " [link](http://example.com/)",
+    ]))
 
 def test_plaintext_preprocessor():
     text = html2text(