You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by br...@apache.org on 2013/09/12 22:38:46 UTC

git commit: [#6650] continue past invalid google code attachments

Updated Branches:
  refs/heads/db/6650 [created] dd1ab5e13


[#6650] continue past invalid google code attachments

Convert one of the test attachment links to an invalid one, refactor
shared attachment handling code between comments and issue


Project: http://git-wip-us.apache.org/repos/asf/incubator-allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-allura/commit/dd1ab5e1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-allura/tree/dd1ab5e1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-allura/diff/dd1ab5e1

Branch: refs/heads/db/6650
Commit: dd1ab5e130d587aa77d8b0c8f460bcad94330510
Parents: cd1be45
Author: Dave Brondsema <db...@slashdotmedia.com>
Authored: Thu Sep 12 20:37:18 2013 +0000
Committer: Dave Brondsema <db...@slashdotmedia.com>
Committed: Thu Sep 12 20:37:18 2013 +0000

----------------------------------------------------------------------
 .../forgeimporters/google/__init__.py           | 35 +++++++++++---------
 .../tests/data/google/test-issue.html           | 12 +++----
 .../tests/google/functional/test_tracker.py     |  1 -
 .../tests/google/test_extractor.py              |  5 +--
 4 files changed, 26 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/dd1ab5e1/ForgeImporters/forgeimporters/google/__init__.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/google/__init__.py b/ForgeImporters/forgeimporters/google/__init__.py
index 849f924..10ddc86 100644
--- a/ForgeImporters/forgeimporters/google/__init__.py
+++ b/ForgeImporters/forgeimporters/google/__init__.py
@@ -190,11 +190,7 @@ class GoogleCodeProjectExtractor(ProjectExtractor):
         return [_as_text(l) for l in label_nodes]
 
     def get_issue_attachments(self):
-        attachments = self.page.find(id='hc0').find('div', 'attachments')
-        if attachments:
-            return [Attachment(a.parent) for a in attachments.findAll('a', text='Download')]
-        else:
-            return []
+        return _get_attachments(self.page.find(id='hc0'))
 
     def get_issue_stars(self):
         stars_re = re.compile(r'(\d+) (person|people) starred this issue')
@@ -221,13 +217,29 @@ class UserLink(object):
         else:
             return self.name
 
+def _get_attachments(tag):
+    attachment_links = tag.find('div', 'attachments')
+    if attachment_links:
+        attachments = []
+        for a in attachment_links.findAll('a', text='Download'):
+            url = a.parent.get('href')
+            try:
+                attachment = Attachment(url)
+            except Exception:
+                log.exception('Could not get attachment: %s', url)
+            else:
+                attachments.append(attachment)
+        return attachments
+    else:
+        return []
+
 class Comment(object):
     def __init__(self, tag):
         self.author = UserLink(tag.find('span', 'author').find(True, 'userlink'))
         self.created_date = tag.find('span', 'date').get('title')
         self.body = _as_text(tag.find('pre')).strip()
         self._get_updates(tag)
-        self._get_attachments(tag)
+        self.attachments = _get_attachments(tag)
 
     def _get_updates(self, tag):
         _updates = tag.find('div', 'updates')
@@ -238,13 +250,6 @@ class Comment(object):
         else:
             self.updates = {}
 
-    def _get_attachments(self, tag):
-        attachments = tag.find('div', 'attachments')
-        if attachments:
-            self.attachments = [Attachment(a.parent) for a in attachments.findAll('a', text='Download')]
-        else:
-            self.attachments = []
-
     @property
     def annotated_text(self):
         text = (
@@ -272,7 +277,7 @@ class File(object):
         self.file = extractor.page['data']
 
 class Attachment(File):
-    def __init__(self, tag):
-        url = urljoin(GoogleCodeProjectExtractor.BASE_URL, tag.get('href'))
+    def __init__(self, url):
+        url = urljoin(GoogleCodeProjectExtractor.BASE_URL, url)
         filename = parse_qs(urlparse(url).query)['name'][0]
         super(Attachment, self).__init__(url, filename)

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/dd1ab5e1/ForgeImporters/forgeimporters/tests/data/google/test-issue.html
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/tests/data/google/test-issue.html b/ForgeImporters/forgeimporters/tests/data/google/test-issue.html
index 63670fd..693ca1d 100644
--- a/ForgeImporters/forgeimporters/tests/data/google/test-issue.html
+++ b/ForgeImporters/forgeimporters/tests/data/google/test-issue.html
@@ -305,19 +305,17 @@ That's all
 </table>
 <table cellspacing="3" cellpadding="2" border="0">
 <tr><td width="20">
-<a href="//allura-google-importer.googlecode.com/issues/attachment?aid=70000001&amp;name=at2.txt&amp;token=C9Hn4s1-g38hlSggRGo65VZM1ys%3A1376059941255">
+<a href="//allura-google-importer.googlecode.com/issues/attachment?aid=70000001&amp;name=&amp;token=C9Hn4s1-g38hlSggRGo65VZM1ys%3A1376059941255">
 <img width="15" height="15" src="http://www.gstatic.com/codesite/ph/images/paperclip.gif" border="0" />
 </a>
 </td>
 <td style="min-width:16em" valign="top">
-<b>at2.txt</b>
+<b></b>
 <br />
- 13 bytes
-
+ 0 bytes
 
- &nbsp; <a href="../../allura-google-importer/issues/attachmentText?id=7&amp;aid=70000001&amp;name=at2.txt&amp;token=C9Hn4s1-g38hlSggRGo65VZM1ys%3A1376059941255" target="_blank">View</a>
 
- &nbsp; <a href="//allura-google-importer.googlecode.com/issues/attachment?aid=70000001&amp;name=at2.txt&amp;token=C9Hn4s1-g38hlSggRGo65VZM1ys%3A1376059941255">Download</a>
+ &nbsp; <a href="//allura-google-importer.googlecode.com/issues/attachment?aid=70000001&amp;name=&amp;token=C9Hn4s1-g38hlSggRGo65VZM1ys%3A1376059941255">Download</a>
 </td>
 </tr>
 </table>
@@ -339,7 +337,7 @@ Test *comment* is a comment
 <div class="attachments">
 <table cellspacing="3" cellpadding="2" border="0">
 <tr><td width="20">
-<a href="//allura-google-importer.googlecode.com/issues/attachment?aid=60001000&amp;name=at2.txt&amp;token=JOSo4duwaN2FCKZrwYOQ-nx9r7U%3A1376001446667">
+<a href="//allura-google-importer.googlecode.com/issues/attachment?aid=60001000&amp;name=&amp;token=JOSo4duwaN2FCKZrwYOQ-nx9r7U%3A1376001446667">
 <img width="15" height="15" src="http://www.gstatic.com/codesite/ph/images/paperclip.gif" border="0" />
 </a>
 </td>

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/dd1ab5e1/ForgeImporters/forgeimporters/tests/google/functional/test_tracker.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/tests/google/functional/test_tracker.py b/ForgeImporters/forgeimporters/tests/google/functional/test_tracker.py
index 9f48257..ca7c8a5 100644
--- a/ForgeImporters/forgeimporters/tests/google/functional/test_tracker.py
+++ b/ForgeImporters/forgeimporters/tests/google/functional/test_tracker.py
@@ -190,7 +190,6 @@ class TestGCTrackerImporter(TestCase):
         ticket = self._make_ticket(self.test_issue)
         self._assert_attachments(ticket.attachments,
                 ('at1.txt', 'text/plain', 'http://allura-google-importer.googlecode.com/issues/attachment?aid=70000000&name=at1.txt&token=3REU1M3JUUMt0rJUg7ldcELt6LA%3A1376059941255'),
-                ('at2.txt', 'text/plain', 'http://allura-google-importer.googlecode.com/issues/attachment?aid=70000001&name=at2.txt&token=C9Hn4s1-g38hlSggRGo65VZM1ys%3A1376059941255'),
             )
 
     @without_module('html2text')

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/dd1ab5e1/ForgeImporters/forgeimporters/tests/google/test_extractor.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/tests/google/test_extractor.py b/ForgeImporters/forgeimporters/tests/google/test_extractor.py
index 668662e..a6c09be 100644
--- a/ForgeImporters/forgeimporters/tests/google/test_extractor.py
+++ b/ForgeImporters/forgeimporters/tests/google/test_extractor.py
@@ -217,13 +217,10 @@ class TestGoogleCodeProjectExtractor(TestCase):
         test_issue = open(pkg_resources.resource_filename('forgeimporters', 'tests/data/google/test-issue.html')).read()
         gpe = self._make_extractor(test_issue)
         attachments = gpe.get_issue_attachments()
-        self.assertEqual(len(attachments), 2)
+        self.assertEqual(len(attachments), 1)
         self.assertEqual(attachments[0].filename, 'at1.txt')
         self.assertEqual(attachments[0].url, 'http://allura-google-importer.googlecode.com/issues/attachment?aid=70000000&name=at1.txt&token=3REU1M3JUUMt0rJUg7ldcELt6LA%3A1376059941255')
         self.assertEqual(attachments[0].type, 'text/plain')
-        self.assertEqual(attachments[1].filename, 'at2.txt')
-        self.assertEqual(attachments[1].url, 'http://allura-google-importer.googlecode.com/issues/attachment?aid=70000001&name=at2.txt&token=C9Hn4s1-g38hlSggRGo65VZM1ys%3A1376059941255')
-        self.assertEqual(attachments[1].type, 'text/plain')
 
     @mock.patch.object(google, 'StringIO')
     def test_iter_comments(self, StringIO):