You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by br...@apache.org on 2020/06/17 21:48:05 UTC

[allura] 01/04: [#8367] upgrade html2text

This is an automated email from the ASF dual-hosted git repository.

brondsem pushed a commit to branch db/8367
in repository https://gitbox.apache.org/repos/asf/allura.git

commit 5b512ca4580ad18392f4cd3f3d2eca5949455e86
Author: Dave Brondsema <da...@brondsema.net>
AuthorDate: Tue Jun 16 17:15:31 2020 -0400

    [#8367] upgrade html2text
---
 Allura/allura/scripts/trac_export.py                     | 4 +++-
 ForgeBlog/forgeblog/tests/test_commands.py               | 7 ++++---
 ForgeImporters/forgeimporters/github/tests/test_wiki.py  | 5 ++++-
 ForgeImporters/forgeimporters/github/wiki.py             | 2 +-
 ForgeImporters/forgeimporters/trac/tests/test_tickets.py | 4 ++--
 requirements-optional.txt                                | 6 +-----
 6 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/Allura/allura/scripts/trac_export.py b/Allura/allura/scripts/trac_export.py
index be8004c..b5934d8 100644
--- a/Allura/allura/scripts/trac_export.py
+++ b/Allura/allura/scripts/trac_export.py
@@ -74,7 +74,7 @@ Export ticket data from a Trac instance''')
 class TracExport(object):
 
     PAGE_SIZE = 100
-    TICKET_URL = 'ticket/%d'
+    TICKET_URL = 'ticket/%s'
     QUERY_MAX_ID_URL = 'query?col=id&order=id&desc=1&max=2'
     QUERY_BY_PAGE_URL = 'query?col=id&col=time&col=changetime&order=id&max=' + \
         str(PAGE_SIZE) + '&page=%d'
@@ -171,6 +171,7 @@ class TracExport(object):
         ticket['description'] = html2text.html2text(
             desc.renderContents('utf8').decode('utf8')) if desc else ''
         comments = []
+        relative_base_url = six.moves.urllib.parse.urlparse(self.full_url(self.TICKET_URL % '')).path
         for comment in d.findAll('form', action='#comment'):
             c = {}
             c['submitter'] = re.sub(
@@ -180,6 +181,7 @@ class TracExport(object):
             changes = six.text_type(comment.find('ul', 'changes') or '')
             body = comment.find('div', 'comment')
             body = body.renderContents('utf8').decode('utf8') if body else ''
+            body = body.replace('href="{}'.format(relative_base_url), 'href="')  # crude way to rewrite ticket links
             c['comment'] = html2text.html2text(changes + body)
             c['class'] = 'COMMENT'
             comments.append(c)
diff --git a/ForgeBlog/forgeblog/tests/test_commands.py b/ForgeBlog/forgeblog/tests/test_commands.py
index f728ca4..addc8ee 100644
--- a/ForgeBlog/forgeblog/tests/test_commands.py
+++ b/ForgeBlog/forgeblog/tests/test_commands.py
@@ -82,7 +82,7 @@ def test_pull_rss_feeds(parsefeed):
         "foo bar\n"
         "</p>\n"
         "\n"
-        "<p>#foo bar <a href='baz'>\n"
+        "<p>#foo bar <a href='http://other.com/baz'>\n"
         "baz\n"
         "</a></p>\n"
     )
@@ -90,9 +90,10 @@ def test_pull_rss_feeds(parsefeed):
     rendered_html_content = "\n".join([
         r"1\. foo",
         "",
-        r"\#foo bar [baz](baz) foo bar ",
+        r"\#foo bar [baz](http://example.com/baz) foo bar",
+        "",
+        r"\#foo bar [ baz ](http://other.com/baz)",
         "",
-        r"\#foo bar [ baz ](baz)",
         " [link](http://example.com/)",
     ])
 
diff --git a/ForgeImporters/forgeimporters/github/tests/test_wiki.py b/ForgeImporters/forgeimporters/github/tests/test_wiki.py
index 910ac91..d300907 100644
--- a/ForgeImporters/forgeimporters/github/tests/test_wiki.py
+++ b/ForgeImporters/forgeimporters/github/tests/test_wiki.py
@@ -426,6 +426,7 @@ Our website is [[http://domain.net]].
   * [Test2](/p/test/wiki/AgentSpring conventions)
   * [Test3](/p/test/wiki/AgentSpring Q&A)
   * [Test4](/p/test/wiki/Extensions)
+
 '''
 
         assert_equal(f(source, 'test.mediawiki'), result)
@@ -486,13 +487,14 @@ See [Page]'''
   1. Duplicate libraries regularly break builds
   2. Subtle bugs emerge with duplicate libraries, and to a lesser extent, duplicate tools
   3. We want you to try harder to make your formula work with what OS X comes with
+
 '''
 
         assert_equal(f(source, 'test.textile'), result)
 
         # textile-style links converts normal
         source = '*"Textile":Troubleshooting*'
-        result = '**[Textile](Troubleshooting)**\n'
+        result = '**[Textile](Troubleshooting)**\n\n'
         assert_equal(f(source, 'test2.textile'), result)
 
         # links with formatting converts normal in textile now
@@ -507,6 +509,7 @@ some text and *[[Tips n' Tricks]]*
 some text and **[Tips n\u2019 Tricks]**
 
 **[link](http://otherlink.com)**
+
 '''
         assert_equal(f(source, 'test3.textile'), result)
 
diff --git a/ForgeImporters/forgeimporters/github/wiki.py b/ForgeImporters/forgeimporters/github/wiki.py
index 3cc1ba4..9fec9ec 100644
--- a/ForgeImporters/forgeimporters/github/wiki.py
+++ b/ForgeImporters/forgeimporters/github/wiki.py
@@ -334,7 +334,7 @@ class GitHubWikiImporter(ToolImporter):
             if html2text:
                 text = html2text.html2text(text)
                 text = self.convert_gollum_tags(text)
-            text = text.replace('<notextile>', '').replace('</notextile>', '')
+            text = text.replace('<notextile>', '').replace('< notextile>', '').replace('</notextile>', '')
             text = text.replace('&#60;notextile&#62;', '').replace(
                 '&#60;/notextile&#62;', '')
             text = text.replace('&lt;notextile&gt;', '').replace(
diff --git a/ForgeImporters/forgeimporters/trac/tests/test_tickets.py b/ForgeImporters/forgeimporters/trac/tests/test_tickets.py
index b58e122..ee77cd5 100644
--- a/ForgeImporters/forgeimporters/trac/tests/test_tickets.py
+++ b/ForgeImporters/forgeimporters/trac/tests/test_tickets.py
@@ -269,7 +269,7 @@ class TestTracImportSupportFunctional(TestRestApiBase, TestCase):
         csv_fp = open(os.path.dirname(__file__) + '/data/test-list.csv')
         html_fp = open(os.path.dirname(__file__) + '/data/test-list.html')
         with patch.object(TracExport, 'next_ticket_ids', return_value=[(390, {})]):
-            te = TracExport('url', do_attachments=False)
+            te = TracExport('http://somesite.com/apps/trac/open-ms/', do_attachments=False)
             te.exhausted = True
             te.csvopen = lambda s: csv_fp
         with patch('allura.scripts.trac_export.urlopen', return_value=html_fp):
@@ -284,6 +284,6 @@ class TestTracImportSupportFunctional(TestRestApiBase, TestCase):
                                      ticket_num=390)
         self.assertIn('To reproduce:  \n\\- open an mzML file',
                       ticket.description)
-        self.assertIn('duplicate of:  \n\\- [#316](316)',
+        self.assertIn('duplicate of:  \n\\- [#316](316 "defect: SpectraViewWidget is',
                       ticket.discussion_thread.find_posts()[0].text)
         self.assertIn('will crash TOPPView.', ticket.description)
diff --git a/requirements-optional.txt b/requirements-optional.txt
index 2da797a..36ba703 100644
--- a/requirements-optional.txt
+++ b/requirements-optional.txt
@@ -2,11 +2,7 @@
 # License v2
 
 # for ForgeWiki mediawiki importer, as well as ForgeBlog external feed
-# importer
-# Use the bleeding edge since stable release 3.200.3 doesn't have
-# https://github.com/aaronsw/html2text/commit/a5c0f3317edd1c9b61f89539b6a6e8363cf99225
-# or many other escaping/formatting improvements
--e git://github.com/brondsem/html2text.git#egg=html2text # GPL
+html2text==2019.8.11  # last to support py2
 
 # for ForgeWiki's mediawiki importer:
 -e git://github.com/zikzakmedia/python-mediawiki.git#egg=python-mediawiki   # GPL