You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by je...@apache.org on 2015/04/21 16:25:32 UTC

[02/25] allura git commit: [#7864] handle multiple pages of Google Code comments

[#7864] handle multiple pages of Google Code comments

The new test .html files are based off of the existing test-issue.html and are very similar.

Some tests were moved around to accomodate different test setup for those using iter_comments and
affected by the changes.


Project: http://git-wip-us.apache.org/repos/asf/allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/allura/commit/225dc73c
Tree: http://git-wip-us.apache.org/repos/asf/allura/tree/225dc73c
Diff: http://git-wip-us.apache.org/repos/asf/allura/diff/225dc73c

Branch: refs/heads/ib/7856
Commit: 225dc73c51dce1de27de0514cf58f222d3f1a351
Parents: 747cad0
Author: Dave Brondsema <da...@brondsema.net>
Authored: Tue Apr 7 10:22:46 2015 -0400
Committer: Igor Bondarenko <je...@gmail.com>
Committed: Wed Apr 15 11:04:02 2015 +0000

----------------------------------------------------------------------
 .../forgeimporters/google/__init__.py           |  30 +-
 .../data/google/test-issue-first-page.html      | 548 +++++++++++++++++++
 .../tests/data/google/test-issue-prev-page.html | 431 +++++++++++++++
 .../tests/data/google/test-issue.html           |   7 +
 .../tests/google/functional/test_tracker.py     |   7 +-
 .../tests/google/test_extractor.py              | 207 ++++---
 6 files changed, 1148 insertions(+), 82 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/allura/blob/225dc73c/ForgeImporters/forgeimporters/google/__init__.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/google/__init__.py b/ForgeImporters/forgeimporters/google/__init__.py
index b848151..302544e 100644
--- a/ForgeImporters/forgeimporters/google/__init__.py
+++ b/ForgeImporters/forgeimporters/google/__init__.py
@@ -304,9 +304,9 @@ class GoogleCodeProjectExtractor(ProjectExtractor):
         return self.page.find(id='hc0').find('span', 'date').get('title')
 
     def get_issue_mod_date(self):
-        comments = self.page.findAll('div', 'issuecomment')
+        comments = list(self.iter_comments())
         if comments:
-            last_update = Comment(comments[-1], self.project_name)
+            last_update = comments[-1]
             return last_update.created_date
         else:
             return self.get_issue_created_date()
@@ -346,8 +346,30 @@ class GoogleCodeProjectExtractor(ProjectExtractor):
         return 0
 
     def iter_comments(self):
-        for comment in self.page.findAll('div', 'issuecomment'):
-            yield Comment(comment, self.project_name)
+        # first, get all pages if there are multiple pages of comments
+        looking_for_comment_pages = True
+        comment_page_urls = [self.url]
+        while looking_for_comment_pages:
+            first_comment = self.page.find('div', 'vt issuecomment')
+            looking_for_comment_pages = False
+            if first_comment and 'cursor_off' not in first_comment['class']:
+                # this is not a real comment, just forward/back links
+                for link in first_comment.findAll('a'):
+                    if link.text.startswith('Older'):
+                        prev_comments_page = urljoin(self.url, link['href'])
+                        comment_page_urls.insert(0, prev_comments_page)
+                        looking_for_comment_pages = True
+                        self.get_page(prev_comments_page)  # prep for next iteration of loop
+
+        # then go through those to get the actual comments
+        for comment_page_url in comment_page_urls:
+            self.get_page(comment_page_url)
+            # regular comments have cursor_off class
+            for comment in self.page.findAll('div', 'cursor_off vt issuecomment'):
+                yield Comment(comment, self.project_name)
+
+
+
 
 
 class UserLink(object):

http://git-wip-us.apache.org/repos/asf/allura/blob/225dc73c/ForgeImporters/forgeimporters/tests/data/google/test-issue-first-page.html
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/tests/data/google/test-issue-first-page.html b/ForgeImporters/forgeimporters/tests/data/google/test-issue-first-page.html
new file mode 100644
index 0000000..4fc25eb
--- /dev/null
+++ b/ForgeImporters/forgeimporters/tests/data/google/test-issue-first-page.html
@@ -0,0 +1,548 @@
+<!DOCTYPE html>
+<!--
+
+An issue with a link to another page of comments (google paginates after 500 comments, we simulate with less)
+test-issue-prev-page.html is the test file for that other page of comments
+
+-->
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" />
+<meta name="ROBOTS" content="NOARCHIVE" />
+<link rel="icon" type="image/vnd.microsoft.icon" href="http://www.gstatic.com/codesite/ph/images/phosting.ico" />
+<script type="text/javascript">
+
+
+
+
+ var codesite_token = null;
+
+
+ var CS_env = {"loggedInUserEmail":null,"relativeBaseUrl":"","projectHomeUrl":"/p/allura-google-importer","assetVersionPath":"http://www.gstatic.com/codesite/ph/3783617020303179221","assetHostPath":"http://www.gstatic.com/codesite/ph","domainName":null,"projectName":"allura-google-importer","token":null,"profileUrl":null};
+ var _gaq = _gaq || [];
+ _gaq.push(
+ ['siteTracker._setAccount', 'UA-18071-1'],
+ ['siteTracker._trackPageview']);
+
+ (function() {
+ var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+ ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+ (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(ga);
+ })();
+
+ </script>
+<title>Issue 6 -
+ allura-google-importer -
+
+ Test Issue -
+ Import Google Code projects to an Allura forge - Google Project Hosting
+ </title>
+<link type="text/css" rel="stylesheet" href="http://www.gstatic.com/codesite/ph/3783617020303179221/css/core.css" />
+<link type="text/css" rel="stylesheet" href="http://www.gstatic.com/codesite/ph/3783617020303179221/css/ph_detail.css" />
+<!--[if IE]>
+ <link type="text/css" rel="stylesheet" href="http://www.gstatic.com/codesite/ph/3783617020303179221/css/d_ie.css" >
+<![endif]-->
+<style type="text/css">
+ .menuIcon.off { background: no-repeat url(http://www.gstatic.com/codesite/ph/images/dropdown_sprite.gif) 0 -42px }
+ .menuIcon.on { background: no-repeat url(http://www.gstatic.com/codesite/ph/images/dropdown_sprite.gif) 0 -28px }
+ .menuIcon.down { background: no-repeat url(http://www.gstatic.com/codesite/ph/images/dropdown_sprite.gif) 0 0; }
+
+
+ .attachments { width:33%; border-top:2px solid #999; padding-top: 3px; margin-left: .7em;}
+ .attachments table { margin-bottom: 0.75em; }
+ .attachments table tr td { padding: 0; margin: 0; font-size: 95%; }
+ .preview { border: 2px solid #c3d9ff; padding: 1px; }
+ .preview:hover { border: 2px solid blue; }
+ .label { white-space: nowrap; }
+ .derived { font-style:italic }
+ .cursor_on .author {
+ background: url(http://www.gstatic.com/codesite/ph/images/show-arrow.gif) no-repeat 2px;
+ }
+ .hiddenform {
+ display: none;
+ }
+
+
+ </style>
+</head>
+<body class="t3">
+<script type="text/javascript">
+ window.___gcfg = {lang: 'en'};
+ (function()
+ {var po = document.createElement("script");
+ po.type = "text/javascript"; po.async = true;po.src = "https://apis.google.com/js/plusone.js";
+ var s = document.getElementsByTagName("script")[0];
+ s.parentNode.insertBefore(po, s);
+ })();
+</script>
+<div class="headbg">
+<div id="gaia">
+<span>
+<a href="#" id="projects-dropdown" onclick="return false;"><u>My favorites</u> <small>&#9660;</small></a>
+ | <a href="https://www.google.com/accounts/ServiceLogin?service=code&amp;ltmpl=phosting&amp;continue=http%3A%2F%2Fcode.google.com%2Fp%2Fallura-google-importer%2Fissues%2Fdetail%3Fid%3D6&amp;followup=http%3A%2F%2Fcode.google.com%2Fp%2Fallura-google-importer%2Fissues%2Fdetail%3Fid%3D6" onclick="_CS_click('/gb/ph/signin');"><u>Sign in</u></a>
+</span>
+</div>
+<div class="gbh" style="left: 0pt;"></div>
+<div class="gbh" style="right: 0pt;"></div>
+<div style="height: 1px"></div>
+<!--[if lte IE 7]>
+<div style="text-align:center;">
+Your version of Internet Explorer is not supported. Try a browser that
+contributes to open source, such as <a href="http://www.firefox.com">Firefox</a>,
+<a href="http://www.google.com/chrome">Google Chrome</a>, or
+<a href="http://code.google.com/chrome/chromeframe/">Google Chrome Frame</a>.
+</div>
+<![endif]-->
+<table style="padding:0px; margin: 0px 0px 10px 0px; width:100%" cellpadding="0" cellspacing="0" itemscope="itemscope" itemtype="http://schema.org/CreativeWork">
+<tr style="height: 58px;">
+<td id="plogo">
+<link itemprop="url" href="/p/allura-google-importer" />
+<a href="/p/allura-google-importer/">
+<img src="/p/allura-google-importer/logo?cct=1374769571" alt="Logo" itemprop="image" />
+</a>
+</td>
+<td style="padding-left: 0.5em">
+<div id="pname">
+<a href="/p/allura-google-importer/"><span itemprop="name">allura-google-importer</span></a>
+</div>
+<div id="psum">
+<a id="project_summary_link" href="/p/allura-google-importer/"><span itemprop="description">Import Google Code projects to an Allura forge</span></a>
+</div>
+</td>
+<td style="white-space:nowrap;text-align:right; vertical-align:bottom;">
+<form action="/hosting/search">
+<input size="30" name="q" value="" type="text" />
+<input type="submit" name="projectsearch" value="Search projects" />
+</form>
+</td></tr>
+</table>
+</div>
+<div id="mt" class="gtb">
+<a href="/p/allura-google-importer/" class="tab ">Project&nbsp;Home</a>
+<a href="/p/allura-google-importer/wiki/TestPage?tm=6" class="tab ">Wiki</a>
+<a href="/p/allura-google-importer/issues/list" class="tab active">Issues</a>
+<a href="/p/allura-google-importer/source/checkout" class="tab ">Source</a>
+<div class="gtbc"></div>
+</div>
+<table cellspacing="0" cellpadding="0" width="100%" align="center" border="0" class="st">
+<tr>
+<td class="subt">
+<div class="issueDetail">
+<div class="isf">
+<span class="inIssueEntry">
+<a class="buttonify" href="entry" onclick="return _newIssuePrompt();">New issue</a>
+</span> &nbsp;
+
+ <span class="inIssueList">
+<span>Search</span>
+</span><form action="list" method="GET" style="display:inline">
+<select id="can" name="can">
+<option disabled="disabled">Search within:</option>
+<option value="1">&nbsp;All issues</option>
+<option value="2" selected="selected">&nbsp;Open issues</option>
+<option value="6">&nbsp;New issues</option>
+<option value="7">&nbsp;Issues to verify</option>
+</select>
+<span>for</span>
+<span id="qq"><input type="text" size="38" id="searchq" name="q" value="" autocomplete="off" onkeydown="_blurOnEsc(event)" /></span>
+<span id="search_colspec"><input type="hidden" name="colspec" value="ID Type Status Priority Milestone Owner Summary" /></span>
+<input type="hidden" name="cells" value="tiles" />
+<input type="submit" value="Search" />
+</form>
+ &nbsp;
+ <span class="inIssueAdvSearch">
+<a href="advsearch">Advanced search</a>
+</span> &nbsp;
+ <span class="inIssueSearchTips">
+<a href="searchtips">Search tips</a>
+</span> &nbsp;
+ <span class="inIssueSubscriptions">
+<a href="/p/allura-google-importer/issues/subscriptions">Subscriptions</a>
+</span>
+</div>
+</div>
+</td>
+<td align="right" valign="top" class="bevel-right"></td>
+</tr>
+</table>
+<script type="text/javascript">
+ var cancelBubble = false;
+ function _go(url) { document.location = url; }
+</script>
+<div id="maincol">
+<div id="color_control" class="">
+<div id="issueheader">
+<table cellpadding="0" cellspacing="0" width="100%"><tbody>
+<tr>
+<td class="vt h3" nowrap="nowrap" style="padding:0 5px">
+
+
+ Issue <a href="detail?id=6">6</a>:
+ </td>
+<td width="90%" class="vt">
+<span class="h3">Test &quot;Issue&quot;</span>
+</td>
+<td>
+<div class="pagination">
+<a href="../../allura-google-importer/issues/detail?id=5" title="Prev">&lsaquo; Prev</a>
+ 6 of 6
+
+ </div>
+</td>
+</tr>
+<tr>
+<td></td>
+<td nowrap="nowrap">
+
+
+ 1 person starred this issue and may be notified of changes.
+
+
+
+ </td>
+<td align="center" nowrap="nowrap">
+<a href="http://code.google.com/p/allura-google-importer/issues/list?cursor=allura-google-importer%3A6">Back to list</a>
+</td>
+</tr>
+</tbody></table>
+</div>
+<table width="100%" cellpadding="0" cellspacing="0" border="0" class="issuepage" id="meta-container">
+<tbody class="collapse">
+<tr>
+<td id="issuemeta">
+<div id="meta-float">
+<table cellspacing="0" cellpadding="0">
+<tr><th align="left">Status:&nbsp;</th>
+<td width="100%">
+<span title="Work on this issue has begun">Started</span>
+</td>
+</tr>
+<tr><th align="left">Owner:&nbsp;</th><td>
+<a class="userlink" href="/u/101557263855536553789/">john...@gmail.com</a>
+</td>
+</tr>
+<tr><td colspan="2">
+<div style="padding-top:2px">
+<a href="list?q=label:Type-Defect" title="Report of a software defect" class="label"><b>Type-</b>Defect</a>
+</div>
+<div style="padding-top:2px">
+<a href="list?q=label:Priority-Medium" title="Normal priority" class="label"><b>Priority-</b>Medium</a>
+</div>
+<div style="padding-top:2px">
+<a href="list?q=label:Milestone-Release1.0" title="All essential functionality working" class="label"><b>Milestone-</b>Release1.0</a>
+</div>
+<div style="padding-top:2px">
+<a href="list?q=label:OpSys-All" title="Affects all operating systems" class="label"><b>OpSys-</b>All</a>
+</div>
+<div style="padding-top:2px">
+<a href="list?q=label:Component-Logic" title="Issue relates to application logic" class="label"><b>Component-</b>Logic</a>
+</div>
+<div style="padding-top:2px">
+<a href="list?q=label:Performance" title="Performance issue" class="label">Performance</a>
+</div>
+<div style="padding-top:2px">
+<a href="list?q=label:Security" title="Security risk to users" class="label">Security</a>
+</div>
+<div style="padding-top:2px">
+<a href="list?q=label:OpSys-Windows" title="Affects Windows users" class="label"><b>OpSys-</b>Windows</a>
+</div>
+<div style="padding-top:2px">
+<a href="list?q=label:OpSys-OSX" title="Affects Mac OS X users" class="label"><b>OpSys-</b>OSX</a>
+</div>
+</td></tr>
+</table>
+<div class="rel_issues">
+</div>
+<br /><br />
+<div style="white-space:nowrap"><a href="https://www.google.com/accounts/ServiceLogin?service=code&amp;ltmpl=phosting&amp;continue=http%3A%2F%2Fcode.google.com%2Fp%2Fallura-google-importer%2Fissues%2Fdetail%3Fid%3D6&amp;followup=http%3A%2F%2Fcode.google.com%2Fp%2Fallura-google-importer%2Fissues%2Fdetail%3Fid%3D6">Sign in</a> to add a comment</div>
+</div>&nbsp;
+ </td>
+<td class="vt issuedescription" width="100%" id="cursorarea">
+<div class="cursor_off vt issuedescription" id="hc0">
+<div class="author">
+<span class="role_label">Project Member</span>
+ Reported by
+
+
+ <a class="userlink" href="/u/101557263855536553789/">john...@gmail.com</a>,
+ <span class="date" title="Thu Aug  8 15:33:52 2013">Today (3 minutes ago)</span>
+</div>
+<pre>
+Test *Issue* for testing
+
+  1. Test List
+  2. Item
+
+**Testing**
+
+ * Test list 2
+ * Item
+
+# Test Section
+
+    p = source.test_issue.post()
+    p.count = p.count *5 #* 6
+    if p.count &gt; 5:
+        print "Not &lt; 5 &amp; != 5"
+
+References: <a href="/p/allura-google-importer/issues/detail?id=1">issue 1</a>, <a href="/p/allura-google-importer/source/detail?r=2">r2</a>
+
+That's all
+
+
+</pre>
+<div class="attachments">
+<table cellspacing="3" cellpadding="2" border="0">
+<tr><td width="20">
+<a href="//allura-google-importer.googlecode.com/issues/attachment?aid=70000000&amp;name=at1.txt&amp;token=3REU1M3JUUMt0rJUg7ldcELt6LA%3A1376059941255">
+<img width="15" height="15" src="http://www.gstatic.com/codesite/ph/images/paperclip.gif" border="0" />
+</a>
+</td>
+<td style="min-width:16em" valign="top">
+<b>at1.txt</b>
+<br />
+ 13 bytes
+
+
+ &nbsp; <a href="../../allura-google-importer/issues/attachmentText?id=7&amp;aid=70000000&amp;name=at1.txt&amp;token=3REU1M3JUUMt0rJUg7ldcELt6LA%3A1376059941255" target="_blank">View</a>
+
+ &nbsp; <a href="//allura-google-importer.googlecode.com/issues/attachment?aid=70000000&amp;name=at1.txt&amp;token=3REU1M3JUUMt0rJUg7ldcELt6LA%3A1376059941255">Download</a>
+</td>
+</tr>
+</table>
+<table cellspacing="3" cellpadding="2" border="0">
+<tr><td width="20">
+<a href="//allura-google-importer.googlecode.com/issues/attachment?aid=70000001&amp;name=&amp;token=C9Hn4s1-g38hlSggRGo65VZM1ys%3A1376059941255">
+<img width="15" height="15" src="http://www.gstatic.com/codesite/ph/images/paperclip.gif" border="0" />
+</a>
+</td>
+<td style="min-width:16em" valign="top">
+<b></b>
+<br />
+ 0 bytes
+
+
+ &nbsp; <a href="//allura-google-importer.googlecode.com/issues/attachment?aid=70000001&amp;name=&amp;token=C9Hn4s1-g38hlSggRGo65VZM1ys%3A1376059941255">Download</a>
+</td>
+</tr>
+</table>
+</div>
+</div>
+
+ <div class="vt issuecomment" width="100%" style="background:#e5ecf9; padding:2px .7em; margin:0; border:0">
+Showing comments 3 - 6
+of 6
+ &nbsp; <a href="detail?id=1769&amp;cnum=500&amp;cstart=2">Older <b>&rsaquo;</b></a>
+ </div>
+
+<div class="cursor_off vt issuecomment" id="hc1">
+<div style="float:right; margin-right:.3em; text-align:right">
+<span class="date" title="Thu Aug  8 15:35:15 2013">
+ Today (2 minutes ago)
+ </span>
+</div>
+<span class="author">
+<span class="role_label">Project Member</span>
+<a name="c1" href="/p/allura-google-importer/issues/detail?id=6#c1">#1</a>
+<a class="userlink" href="/u/101557263855536553789/">john...@gmail.com</a></span>
+<pre>
+Test *comment* is a comment
+</pre>
+<div class="attachments">
+<table cellspacing="3" cellpadding="2" border="0">
+<tr><td width="20">
+<a href="//allura-google-importer.googlecode.com/issues/attachment?aid=60001000&amp;name=&amp;token=JOSo4duwaN2FCKZrwYOQ-nx9r7U%3A1376001446667">
+<img width="15" height="15" src="http://www.gstatic.com/codesite/ph/images/paperclip.gif" border="0" />
+</a>
+</td>
+<td style="min-width:16em" valign="top">
+<b>at2.txt</b>
+<br />
+ 13 bytes
+
+
+ &nbsp; <a href="../../allura-google-importer/issues/attachmentText?id=6&amp;aid=60001000&amp;name=at2.txt&amp;token=JOSo4duwaN2FCKZrwYOQ-nx9r7U%3A1376001446667" target="_blank">View</a>
+
+ &nbsp; <a href="//allura-google-importer.googlecode.com/issues/attachment?aid=60001000&amp;name=at2.txt&amp;token=JOSo4duwaN2FCKZrwYOQ-nx9r7U%3A1376001446667">Download</a>
+</td>
+</tr>
+</table>
+</div>
+<div class="updates">
+<div class="round4"></div>
+<div class="round2"></div>
+<div class="round1"></div>
+<div class="box-inner">
+<b>Status:</b>
+ Started
+
+ <br />
+<b>Labels:</b>
+ -OpSys-Linux OpSys-Windows
+
+ <br />
+</div>
+<div class="round1"></div>
+<div class="round2"></div>
+<div class="round4"></div>
+</div>
+</div>
+<div class="cursor_off vt issuecomment" id="hc2">
+<div style="float:right; margin-right:.3em; text-align:right">
+<span class="date" title="Thu Aug  8 15:35:34 2013">
+ Today (1 minute ago)
+ </span>
+</div>
+<span class="author">
+<span class="role_label">Project Member</span>
+<a name="c2" href="/p/allura-google-importer/issues/detail?id=6#c2">#2</a>
+<a class="userlink" href="/u/101557263855536553789/">john...@gmail.com</a></span>
+<pre>
+Another comment with references: <a href="/p/allura-google-importer/issues/detail?id=2">issue 2</a>, <a href="/p/allura-google-importer/source/detail?r=1">r1</a>
+</pre>
+</div>
+<div class="cursor_off vt issuecomment" id="hc3">
+<div style="float:right; margin-right:.3em; text-align:right">
+<span class="date" title="Thu Aug  8 15:36:39 2013">
+ Today (moments ago)
+ </span>
+</div>
+<span class="author">
+<span class="role_label">Project Member</span>
+<a name="c3" href="/p/allura-google-importer/issues/detail?id=6#c3">#3</a>
+<a class="userlink" href="/u/101557263855536553789/">john...@gmail.com</a></span>
+<pre>
+Last comment
+</pre>
+<div class="attachments">
+<table cellspacing="3" cellpadding="2" border="0">
+<tr><td width="20">
+<a href="//allura-google-importer.googlecode.com/issues/attachment?aid=60003000&amp;name=at4.txt&amp;token=6Ny2zYHmV6b82dqxyoiH6HUYoC4%3A1376001446667">
+<img width="15" height="15" src="http://www.gstatic.com/codesite/ph/images/paperclip.gif" border="0" />
+</a>
+</td>
+<td style="min-width:16em" valign="top">
+<b>at4.txt</b>
+<br />
+ 13 bytes
+
+
+ &nbsp; <a href="../../allura-google-importer/issues/attachmentText?id=6&amp;aid=60003000&amp;name=at4.txt&amp;token=6Ny2zYHmV6b82dqxyoiH6HUYoC4%3A1376001446667" target="_blank">View</a>
+
+ &nbsp; <a href="//allura-google-importer.googlecode.com/issues/attachment?aid=60003000&amp;name=at4.txt&amp;token=6Ny2zYHmV6b82dqxyoiH6HUYoC4%3A1376001446667">Download</a>
+</td>
+</tr>
+</table>
+<table cellspacing="3" cellpadding="2" border="0">
+<tr><td width="20">
+<a href="//allura-google-importer.googlecode.com/issues/attachment?aid=60003001&amp;name=at1.txt&amp;token=NS8aMvWsKzTAPuY2kniJG5aLzPg%3A1376001446667">
+<img width="15" height="15" src="http://www.gstatic.com/codesite/ph/images/paperclip.gif" border="0" />
+</a>
+</td>
+<td style="min-width:16em" valign="top">
+<b>at1.txt</b>
+<br />
+ 13 bytes
+
+
+ &nbsp; <a href="../../allura-google-importer/issues/attachmentText?id=6&amp;aid=60003001&amp;name=at1.txt&amp;token=NS8aMvWsKzTAPuY2kniJG5aLzPg%3A1376001446667" target="_blank">View</a>
+
+ &nbsp; <a href="//allura-google-importer.googlecode.com/issues/attachment?aid=60003001&amp;name=at1.txt&amp;token=NS8aMvWsKzTAPuY2kniJG5aLzPg%3A1376001446667">Download</a>
+</td>
+</tr>
+</table>
+</div>
+</div>
+<div class="cursor_off vt issuecomment" id="hc4">
+<div style="float:right; margin-right:.3em; text-align:right">
+<span class="date" title="Thu Aug  8 15:36:57 2013">
+ Today (moments ago)
+ </span>
+</div>
+<span class="author">
+<span class="role_label">Project Member</span>
+<a name="c4" href="/p/allura-google-importer/issues/detail?id=6#c4">#4</a>
+<a class="userlink" href="/u/101557263855536553789/">john...@gmail.com</a></span>
+<pre>
+Oh, I forgot one (with an inter-project reference to <a href="/p/other-project/issues/detail?id=1">issue other-project:1</a>)
+</pre>
+<div class="updates">
+<div class="round4"></div>
+<div class="round2"></div>
+<div class="round1"></div>
+<div class="box-inner">
+<b>Labels:</b>
+ OpSys-OSX
+
+ <br />
+</div>
+<div class="round1"></div>
+<div class="round2"></div>
+<div class="round4"></div>
+</div>
+</div>
+
+<div class="vt issuecomment" width="100%" style="background:#e5ecf9; padding:2px .7em; margin:0">
+Showing comments 3 - 6
+of 6
+ &nbsp; <a href="detail?id=1769&amp;cnum=500&amp;cstart=2">Older <b>&rsaquo;</b></a>
+</div>
+
+</td>
+</tr>
+<tr>
+<td></td>
+<td class="vt issuecomment">
+<span class="indicator">&#9658;</span> <a href="https://www.google.com/accounts/ServiceLogin?service=code&amp;ltmpl=phosting&amp;continue=http%3A%2F%2Fcode.google.com%2Fp%2Fallura-google-importer%2Fissues%2Fdetail%3Fid%3D6&amp;followup=http%3A%2F%2Fcode.google.com%2Fp%2Fallura-google-importer%2Fissues%2Fdetail%3Fid%3D6">Sign in</a> to add a comment
+ </td>
+</tr>
+</tbody>
+</table>
+<br />
+<script type="text/javascript" src="http://www.gstatic.com/codesite/ph/3783617020303179221/js/dit_scripts.js"></script>
+</div>
+<form name="delcom" action="delComment.do?q=&amp;can=2&amp;groupby=&amp;sort=&amp;colspec=ID+Type+Status+Priority+Milestone+Owner+Summary" method="POST">
+<input type="hidden" name="sequence_num" value="" />
+<input type="hidden" name="mode" value="" />
+<input type="hidden" name="id" value="6" />
+<input type="hidden" name="token" value="" />
+</form>
+<div id="helparea"></div>
+<script type="text/javascript">
+ _onload();
+ function delComment(sequence_num, delete_mode) {
+ var f = document.forms["delcom"];
+ f.sequence_num.value = sequence_num;
+ f.mode.value = delete_mode;
+
+ f.submit();
+ return false;
+ }
+
+ _floatMetadata();
+</script>
+<script type="text/javascript" src="http://www.gstatic.com/codesite/ph/3783617020303179221/js/kibbles.js"></script>
+<script type="text/javascript">
+ _setupKibblesOnDetailPage(
+ 'http://code.google.com/p/allura-google-importer/issues/list?cursor\x3dallura-google-importer%3A6',
+ '/p/allura-google-importer/issues/entry',
+ '../../allura-google-importer/issues/detail?id\x3d5',
+ '',
+ '', 'allura-google-importer', 6,
+ false, false, codesite_token);
+</script>
+<script type="text/javascript" src="http://www.gstatic.com/codesite/ph/3783617020303179221/js/ph_core.js"></script>
+</div>
+<div id="footer" dir="ltr">
+<div class="text">
+<a href="/projecthosting/terms.html">Terms</a> -
+ <a href="http://www.google.com/privacy.html">Privacy</a> -
+ <a href="/p/support/">Project Hosting Help</a>
+</div>
+</div>
+<div class="hostedBy" style="margin-top: -20px;">
+<span style="vertical-align: top;">Powered by <a href="http://code.google.com/projecthosting/">Google Project Hosting</a></span>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/allura/blob/225dc73c/ForgeImporters/forgeimporters/tests/data/google/test-issue-prev-page.html
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/tests/data/google/test-issue-prev-page.html b/ForgeImporters/forgeimporters/tests/data/google/test-issue-prev-page.html
new file mode 100644
index 0000000..62a3b23
--- /dev/null
+++ b/ForgeImporters/forgeimporters/tests/data/google/test-issue-prev-page.html
@@ -0,0 +1,431 @@
+<!DOCTYPE html>
+<!--
+
+This is the second page of previous comments, that goes with test-issue-first-page.html
+
+-->
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" />
+<meta name="ROBOTS" content="NOARCHIVE" />
+<link rel="icon" type="image/vnd.microsoft.icon" href="http://www.gstatic.com/codesite/ph/images/phosting.ico" />
+<script type="text/javascript">
+
+
+
+
+ var codesite_token = null;
+
+
+ var CS_env = {"loggedInUserEmail":null,"relativeBaseUrl":"","projectHomeUrl":"/p/allura-google-importer","assetVersionPath":"http://www.gstatic.com/codesite/ph/3783617020303179221","assetHostPath":"http://www.gstatic.com/codesite/ph","domainName":null,"projectName":"allura-google-importer","token":null,"profileUrl":null};
+ var _gaq = _gaq || [];
+ _gaq.push(
+ ['siteTracker._setAccount', 'UA-18071-1'],
+ ['siteTracker._trackPageview']);
+
+ (function() {
+ var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+ ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+ (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(ga);
+ })();
+
+ </script>
+<title>Issue 6 -
+ allura-google-importer -
+
+ Test Issue -
+ Import Google Code projects to an Allura forge - Google Project Hosting
+ </title>
+<link type="text/css" rel="stylesheet" href="http://www.gstatic.com/codesite/ph/3783617020303179221/css/core.css" />
+<link type="text/css" rel="stylesheet" href="http://www.gstatic.com/codesite/ph/3783617020303179221/css/ph_detail.css" />
+<!--[if IE]>
+ <link type="text/css" rel="stylesheet" href="http://www.gstatic.com/codesite/ph/3783617020303179221/css/d_ie.css" >
+<![endif]-->
+<style type="text/css">
+ .menuIcon.off { background: no-repeat url(http://www.gstatic.com/codesite/ph/images/dropdown_sprite.gif) 0 -42px }
+ .menuIcon.on { background: no-repeat url(http://www.gstatic.com/codesite/ph/images/dropdown_sprite.gif) 0 -28px }
+ .menuIcon.down { background: no-repeat url(http://www.gstatic.com/codesite/ph/images/dropdown_sprite.gif) 0 0; }
+
+
+ .attachments { width:33%; border-top:2px solid #999; padding-top: 3px; margin-left: .7em;}
+ .attachments table { margin-bottom: 0.75em; }
+ .attachments table tr td { padding: 0; margin: 0; font-size: 95%; }
+ .preview { border: 2px solid #c3d9ff; padding: 1px; }
+ .preview:hover { border: 2px solid blue; }
+ .label { white-space: nowrap; }
+ .derived { font-style:italic }
+ .cursor_on .author {
+ background: url(http://www.gstatic.com/codesite/ph/images/show-arrow.gif) no-repeat 2px;
+ }
+ .hiddenform {
+ display: none;
+ }
+
+
+ </style>
+</head>
+<body class="t3">
+<script type="text/javascript">
+ window.___gcfg = {lang: 'en'};
+ (function()
+ {var po = document.createElement("script");
+ po.type = "text/javascript"; po.async = true;po.src = "https://apis.google.com/js/plusone.js";
+ var s = document.getElementsByTagName("script")[0];
+ s.parentNode.insertBefore(po, s);
+ })();
+</script>
+<div class="headbg">
+<div id="gaia">
+<span>
+<a href="#" id="projects-dropdown" onclick="return false;"><u>My favorites</u> <small>&#9660;</small></a>
+ | <a href="https://www.google.com/accounts/ServiceLogin?service=code&amp;ltmpl=phosting&amp;continue=http%3A%2F%2Fcode.google.com%2Fp%2Fallura-google-importer%2Fissues%2Fdetail%3Fid%3D6&amp;followup=http%3A%2F%2Fcode.google.com%2Fp%2Fallura-google-importer%2Fissues%2Fdetail%3Fid%3D6" onclick="_CS_click('/gb/ph/signin');"><u>Sign in</u></a>
+</span>
+</div>
+<div class="gbh" style="left: 0pt;"></div>
+<div class="gbh" style="right: 0pt;"></div>
+<div style="height: 1px"></div>
+<!--[if lte IE 7]>
+<div style="text-align:center;">
+Your version of Internet Explorer is not supported. Try a browser that
+contributes to open source, such as <a href="http://www.firefox.com">Firefox</a>,
+<a href="http://www.google.com/chrome">Google Chrome</a>, or
+<a href="http://code.google.com/chrome/chromeframe/">Google Chrome Frame</a>.
+</div>
+<![endif]-->
+<table style="padding:0px; margin: 0px 0px 10px 0px; width:100%" cellpadding="0" cellspacing="0" itemscope="itemscope" itemtype="http://schema.org/CreativeWork">
+<tr style="height: 58px;">
+<td id="plogo">
+<link itemprop="url" href="/p/allura-google-importer" />
+<a href="/p/allura-google-importer/">
+<img src="/p/allura-google-importer/logo?cct=1374769571" alt="Logo" itemprop="image" />
+</a>
+</td>
+<td style="padding-left: 0.5em">
+<div id="pname">
+<a href="/p/allura-google-importer/"><span itemprop="name">allura-google-importer</span></a>
+</div>
+<div id="psum">
+<a id="project_summary_link" href="/p/allura-google-importer/"><span itemprop="description">Import Google Code projects to an Allura forge</span></a>
+</div>
+</td>
+<td style="white-space:nowrap;text-align:right; vertical-align:bottom;">
+<form action="/hosting/search">
+<input size="30" name="q" value="" type="text" />
+<input type="submit" name="projectsearch" value="Search projects" />
+</form>
+</td></tr>
+</table>
+</div>
+<div id="mt" class="gtb">
+<a href="/p/allura-google-importer/" class="tab ">Project&nbsp;Home</a>
+<a href="/p/allura-google-importer/wiki/TestPage?tm=6" class="tab ">Wiki</a>
+<a href="/p/allura-google-importer/issues/list" class="tab active">Issues</a>
+<a href="/p/allura-google-importer/source/checkout" class="tab ">Source</a>
+<div class="gtbc"></div>
+</div>
+<table cellspacing="0" cellpadding="0" width="100%" align="center" border="0" class="st">
+<tr>
+<td class="subt">
+<div class="issueDetail">
+<div class="isf">
+<span class="inIssueEntry">
+<a class="buttonify" href="entry" onclick="return _newIssuePrompt();">New issue</a>
+</span> &nbsp;
+
+ <span class="inIssueList">
+<span>Search</span>
+</span><form action="list" method="GET" style="display:inline">
+<select id="can" name="can">
+<option disabled="disabled">Search within:</option>
+<option value="1">&nbsp;All issues</option>
+<option value="2" selected="selected">&nbsp;Open issues</option>
+<option value="6">&nbsp;New issues</option>
+<option value="7">&nbsp;Issues to verify</option>
+</select>
+<span>for</span>
+<span id="qq"><input type="text" size="38" id="searchq" name="q" value="" autocomplete="off" onkeydown="_blurOnEsc(event)" /></span>
+<span id="search_colspec"><input type="hidden" name="colspec" value="ID Type Status Priority Milestone Owner Summary" /></span>
+<input type="hidden" name="cells" value="tiles" />
+<input type="submit" value="Search" />
+</form>
+ &nbsp;
+ <span class="inIssueAdvSearch">
+<a href="advsearch">Advanced search</a>
+</span> &nbsp;
+ <span class="inIssueSearchTips">
+<a href="searchtips">Search tips</a>
+</span> &nbsp;
+ <span class="inIssueSubscriptions">
+<a href="/p/allura-google-importer/issues/subscriptions">Subscriptions</a>
+</span>
+</div>
+</div>
+</td>
+<td align="right" valign="top" class="bevel-right"></td>
+</tr>
+</table>
+<script type="text/javascript">
+ var cancelBubble = false;
+ function _go(url) { document.location = url; }
+</script>
+<div id="maincol">
+<div id="color_control" class="">
+<div id="issueheader">
+<table cellpadding="0" cellspacing="0" width="100%"><tbody>
+<tr>
+<td class="vt h3" nowrap="nowrap" style="padding:0 5px">
+
+
+ Issue <a href="detail?id=6">6</a>:
+ </td>
+<td width="90%" class="vt">
+<span class="h3">Test &quot;Issue&quot;</span>
+</td>
+<td>
+<div class="pagination">
+<a href="../../allura-google-importer/issues/detail?id=5" title="Prev">&lsaquo; Prev</a>
+ 6 of 6
+
+ </div>
+</td>
+</tr>
+<tr>
+<td></td>
+<td nowrap="nowrap">
+
+
+ 1 person starred this issue and may be notified of changes.
+
+
+
+ </td>
+<td align="center" nowrap="nowrap">
+<a href="http://code.google.com/p/allura-google-importer/issues/list?cursor=allura-google-importer%3A6">Back to list</a>
+</td>
+</tr>
+</tbody></table>
+</div>
+<table width="100%" cellpadding="0" cellspacing="0" border="0" class="issuepage" id="meta-container">
+<tbody class="collapse">
+<tr>
+<td id="issuemeta">
+<div id="meta-float">
+<table cellspacing="0" cellpadding="0">
+<tr><th align="left">Status:&nbsp;</th>
+<td width="100%">
+<span title="Work on this issue has begun">Started</span>
+</td>
+</tr>
+<tr><th align="left">Owner:&nbsp;</th><td>
+<a class="userlink" href="/u/101557263855536553789/">john...@gmail.com</a>
+</td>
+</tr>
+<tr><td colspan="2">
+<div style="padding-top:2px">
+<a href="list?q=label:Type-Defect" title="Report of a software defect" class="label"><b>Type-</b>Defect</a>
+</div>
+<div style="padding-top:2px">
+<a href="list?q=label:Priority-Medium" title="Normal priority" class="label"><b>Priority-</b>Medium</a>
+</div>
+<div style="padding-top:2px">
+<a href="list?q=label:Milestone-Release1.0" title="All essential functionality working" class="label"><b>Milestone-</b>Release1.0</a>
+</div>
+<div style="padding-top:2px">
+<a href="list?q=label:OpSys-All" title="Affects all operating systems" class="label"><b>OpSys-</b>All</a>
+</div>
+<div style="padding-top:2px">
+<a href="list?q=label:Component-Logic" title="Issue relates to application logic" class="label"><b>Component-</b>Logic</a>
+</div>
+<div style="padding-top:2px">
+<a href="list?q=label:Performance" title="Performance issue" class="label">Performance</a>
+</div>
+<div style="padding-top:2px">
+<a href="list?q=label:Security" title="Security risk to users" class="label">Security</a>
+</div>
+<div style="padding-top:2px">
+<a href="list?q=label:OpSys-Windows" title="Affects Windows users" class="label"><b>OpSys-</b>Windows</a>
+</div>
+<div style="padding-top:2px">
+<a href="list?q=label:OpSys-OSX" title="Affects Mac OS X users" class="label"><b>OpSys-</b>OSX</a>
+</div>
+</td></tr>
+</table>
+<div class="rel_issues">
+</div>
+<br /><br />
+<div style="white-space:nowrap"><a href="https://www.google.com/accounts/ServiceLogin?service=code&amp;ltmpl=phosting&amp;continue=http%3A%2F%2Fcode.google.com%2Fp%2Fallura-google-importer%2Fissues%2Fdetail%3Fid%3D6&amp;followup=http%3A%2F%2Fcode.google.com%2Fp%2Fallura-google-importer%2Fissues%2Fdetail%3Fid%3D6">Sign in</a> to add a comment</div>
+</div>&nbsp;
+ </td>
+<td class="vt issuedescription" width="100%" id="cursorarea">
+<div class="cursor_off vt issuedescription" id="hc0">
+<div class="author">
+<span class="role_label">Project Member</span>
+ Reported by
+
+
+ <a class="userlink" href="/u/101557263855536553789/">john...@gmail.com</a>,
+ <span class="date" title="Thu Aug  8 15:33:52 2013">Today (3 minutes ago)</span>
+</div>
+<pre>
+Test *Issue* for testing
+
+  1. Test List
+  2. Item
+
+**Testing**
+
+ * Test list 2
+ * Item
+
+# Test Section
+
+    p = source.test_issue.post()
+    p.count = p.count *5 #* 6
+    if p.count &gt; 5:
+        print "Not &lt; 5 &amp; != 5"
+
+References: <a href="/p/allura-google-importer/issues/detail?id=1">issue 1</a>, <a href="/p/allura-google-importer/source/detail?r=2">r2</a>
+
+That's all
+
+
+</pre>
+<div class="attachments">
+<table cellspacing="3" cellpadding="2" border="0">
+<tr><td width="20">
+<a href="//allura-google-importer.googlecode.com/issues/attachment?aid=70000000&amp;name=at1.txt&amp;token=3REU1M3JUUMt0rJUg7ldcELt6LA%3A1376059941255">
+<img width="15" height="15" src="http://www.gstatic.com/codesite/ph/images/paperclip.gif" border="0" />
+</a>
+</td>
+<td style="min-width:16em" valign="top">
+<b>at1.txt</b>
+<br />
+ 13 bytes
+
+
+ &nbsp; <a href="../../allura-google-importer/issues/attachmentText?id=7&amp;aid=70000000&amp;name=at1.txt&amp;token=3REU1M3JUUMt0rJUg7ldcELt6LA%3A1376059941255" target="_blank">View</a>
+
+ &nbsp; <a href="//allura-google-importer.googlecode.com/issues/attachment?aid=70000000&amp;name=at1.txt&amp;token=3REU1M3JUUMt0rJUg7ldcELt6LA%3A1376059941255">Download</a>
+</td>
+</tr>
+</table>
+<table cellspacing="3" cellpadding="2" border="0">
+<tr><td width="20">
+<a href="//allura-google-importer.googlecode.com/issues/attachment?aid=70000001&amp;name=&amp;token=C9Hn4s1-g38hlSggRGo65VZM1ys%3A1376059941255">
+<img width="15" height="15" src="http://www.gstatic.com/codesite/ph/images/paperclip.gif" border="0" />
+</a>
+</td>
+<td style="min-width:16em" valign="top">
+<b></b>
+<br />
+ 0 bytes
+
+
+ &nbsp; <a href="//allura-google-importer.googlecode.com/issues/attachment?aid=70000001&amp;name=&amp;token=C9Hn4s1-g38hlSggRGo65VZM1ys%3A1376059941255">Download</a>
+</td>
+</tr>
+</table>
+</div>
+</div>
+
+ <div class="vt issuecomment" width="100%" style="background:#e5ecf9; padding:2px .7em; margin:0; border:0">
+ <a href="detail?id=1769&amp;cnum=500&amp;cstart=502">Newer <b>&rsaquo;</b></a>
+ &nbsp;
+Showing comments 1 - 2
+of 6
+ </div>
+
+<div class="cursor_off vt issuecomment" id="hc1">
+<div style="float:right; margin-right:.3em; text-align:right">
+<span class="date" title="Thu Aug  8 15:34:01 2013">
+ Today (3 minutes ago)
+ </span>
+</div>
+<span class="author">
+<span class="role_label">Project Member</span>
+<a name="c1" href="/p/allura-google-importer/issues/detail?id=6#c1">#1</a>
+<a class="userlink" href="/u/101557263855536553789/">john...@gmail.com</a></span>
+<pre>
+Simple comment
+</pre>
+</div>
+<div class="cursor_off vt issuecomment" id="hc2">
+<div style="float:right; margin-right:.3em; text-align:right">
+<span class="date" title="Thu Aug  8 15:34:09 2013">
+ Today (3 minutes ago)
+ </span>
+</div>
+<span class="author">
+<span class="role_label">Project Member</span>
+<a name="c2" href="/p/allura-google-importer/issues/detail?id=6#c2">#2</a>
+<a class="userlink" href="/u/101557263855536553789/">john...@gmail.com</a></span>
+<pre>
+Boring comment
+</pre>
+</div>
+
+<div class="vt issuecomment" width="100%" style="background:#e5ecf9; padding:2px .7em; margin:0">
+ <a href="detail?id=1769&amp;cnum=500&amp;cstart=502">Newer <b>&rsaquo;</b></a>
+ &nbsp;
+Showing comments 1 - 2
+of 6
+</div>
+
+</td>
+</tr>
+<tr>
+<td></td>
+<td class="vt issuecomment">
+<span class="indicator">&#9658;</span> <a href="https://www.google.com/accounts/ServiceLogin?service=code&amp;ltmpl=phosting&amp;continue=http%3A%2F%2Fcode.google.com%2Fp%2Fallura-google-importer%2Fissues%2Fdetail%3Fid%3D6&amp;followup=http%3A%2F%2Fcode.google.com%2Fp%2Fallura-google-importer%2Fissues%2Fdetail%3Fid%3D6">Sign in</a> to add a comment
+ </td>
+</tr>
+</tbody>
+</table>
+<br />
+<script type="text/javascript" src="http://www.gstatic.com/codesite/ph/3783617020303179221/js/dit_scripts.js"></script>
+</div>
+<form name="delcom" action="delComment.do?q=&amp;can=2&amp;groupby=&amp;sort=&amp;colspec=ID+Type+Status+Priority+Milestone+Owner+Summary" method="POST">
+<input type="hidden" name="sequence_num" value="" />
+<input type="hidden" name="mode" value="" />
+<input type="hidden" name="id" value="6" />
+<input type="hidden" name="token" value="" />
+</form>
+<div id="helparea"></div>
+<script type="text/javascript">
+ _onload();
+ function delComment(sequence_num, delete_mode) {
+ var f = document.forms["delcom"];
+ f.sequence_num.value = sequence_num;
+ f.mode.value = delete_mode;
+
+ f.submit();
+ return false;
+ }
+
+ _floatMetadata();
+</script>
+<script type="text/javascript" src="http://www.gstatic.com/codesite/ph/3783617020303179221/js/kibbles.js"></script>
+<script type="text/javascript">
+ _setupKibblesOnDetailPage(
+ 'http://code.google.com/p/allura-google-importer/issues/list?cursor\x3dallura-google-importer%3A6',
+ '/p/allura-google-importer/issues/entry',
+ '../../allura-google-importer/issues/detail?id\x3d5',
+ '',
+ '', 'allura-google-importer', 6,
+ false, false, codesite_token);
+</script>
+<script type="text/javascript" src="http://www.gstatic.com/codesite/ph/3783617020303179221/js/ph_core.js"></script>
+</div>
+<div id="footer" dir="ltr">
+<div class="text">
+<a href="/projecthosting/terms.html">Terms</a> -
+ <a href="http://www.google.com/privacy.html">Privacy</a> -
+ <a href="/p/support/">Project Hosting Help</a>
+</div>
+</div>
+<div class="hostedBy" style="margin-top: -20px;">
+<span style="vertical-align: top;">Powered by <a href="http://code.google.com/projecthosting/">Google Project Hosting</a></span>
+</div>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/allura/blob/225dc73c/ForgeImporters/forgeimporters/tests/data/google/test-issue.html
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/tests/data/google/test-issue.html b/ForgeImporters/forgeimporters/tests/data/google/test-issue.html
index c6bce0a..59507a9 100644
--- a/ForgeImporters/forgeimporters/tests/data/google/test-issue.html
+++ b/ForgeImporters/forgeimporters/tests/data/google/test-issue.html
@@ -1,4 +1,9 @@
 <!DOCTYPE html>
+<!--
+
+Just a regular single-page issue
+
+-->
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
@@ -323,6 +328,7 @@ That's all
 </table>
 </div>
 </div>
+
 <div class="cursor_off vt issuecomment" id="hc1">
 <div style="float:right; margin-right:.3em; text-align:right">
 <span class="date" title="Thu Aug  8 15:35:15 2013">
@@ -469,6 +475,7 @@ Oh, I forgot one (with an inter-project reference to <a href="/p/other-project/i
 <div class="round4"></div>
 </div>
 </div>
+
 </td>
 </tr>
 <tr>

http://git-wip-us.apache.org/repos/asf/allura/blob/225dc73c/ForgeImporters/forgeimporters/tests/google/functional/test_tracker.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/tests/google/functional/test_tracker.py b/ForgeImporters/forgeimporters/tests/google/functional/test_tracker.py
index 96d789d..938d1c7 100644
--- a/ForgeImporters/forgeimporters/tests/google/functional/test_tracker.py
+++ b/ForgeImporters/forgeimporters/tests/google/functional/test_tracker.py
@@ -31,6 +31,7 @@ from allura import model as M
 from forgetracker import model as TM
 from forgeimporters import base
 from forgeimporters import google
+import forgeimporters.google.tracker
 
 
 class TestGCTrackerImporter(TestCase):
@@ -42,6 +43,10 @@ class TestGCTrackerImporter(TestCase):
                 'allura-google-importer', 'project_info')
         extractor.page = BeautifulSoup(html)
         extractor.url = "http://test/issue/?id=1"
+        # iter_comments will make more get_page() calls but we don't want the real thing to run an mess up the .page
+        # and .url attributes, make it a no-op which works with these tests (since its just the same page being
+        # fetched really)
+        extractor.get_page = lambda *a, **kw: ''
         return extractor
 
     def _make_ticket(self, issue, issue_id=1):
@@ -197,7 +202,7 @@ class TestGCTrackerImporter(TestCase):
                    for a in actual)
         self.assertEqual(atts, set(expected))
 
-    def test_attachements(self):
+    def test_attachments(self):
         ticket = self._make_ticket(self.test_issue)
         self._assert_attachments(ticket.attachments,
                                  ('at1.txt', 'text/plain',

http://git-wip-us.apache.org/repos/asf/allura/blob/225dc73c/ForgeImporters/forgeimporters/tests/google/test_extractor.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/tests/google/test_extractor.py b/ForgeImporters/forgeimporters/tests/google/test_extractor.py
index 60ca0e2..fe74f9c 100644
--- a/ForgeImporters/forgeimporters/tests/google/test_extractor.py
+++ b/ForgeImporters/forgeimporters/tests/google/test_extractor.py
@@ -33,7 +33,6 @@ class TestGoogleCodeProjectExtractor(TestCase):
 
     def setUp(self):
         self._p_urlopen = mock.patch.object(base.ProjectExtractor, 'urlopen')
-        # self._p_soup = mock.patch.object(google, 'BeautifulSoup')
         self._p_soup = mock.patch.object(base, 'BeautifulSoup')
         self.urlopen = self._p_urlopen.start()
         self.soup = self._p_soup.start()
@@ -145,8 +144,7 @@ class TestGoogleCodeProjectExtractor(TestCase):
 
     def _make_extractor(self, html):
         with mock.patch.object(base.ProjectExtractor, 'urlopen'):
-            extractor = google.GoogleCodeProjectExtractor(
-                'allura-google-importer')
+            extractor = google.GoogleCodeProjectExtractor('allura-google-importer')
         extractor.page = BeautifulSoup(html)
         extractor.get_page = lambda pagename: extractor.page
         extractor.url = "http://test/source/browse"
@@ -184,9 +182,9 @@ class TestGoogleCodeProjectExtractor(TestCase):
 
     @without_module('html2text')
     def test_get_issue_basic_fields(self):
-        test_issue = open(pkg_resources.resource_filename(
-            'forgeimporters', 'tests/data/google/test-issue.html')).read()
+        test_issue = open(pkg_resources.resource_filename('forgeimporters', 'tests/data/google/test-issue.html')).read()
         gpe = self._make_extractor(test_issue)
+
         self.assertEqual(gpe.get_issue_creator().name, 'john...@gmail.com')
         self.assertEqual(gpe.get_issue_creator().url,
                          'http://code.google.com/u/101557263855536553789/')
@@ -224,8 +222,7 @@ class TestGoogleCodeProjectExtractor(TestCase):
 
     @skipif(module_not_available('html2text'))
     def test_get_issue_basic_fields_html2text(self):
-        test_issue = open(pkg_resources.resource_filename(
-            'forgeimporters', 'tests/data/google/test-issue.html')).read()
+        test_issue = open(pkg_resources.resource_filename('forgeimporters', 'tests/data/google/test-issue.html')).read()
         gpe = self._make_extractor(test_issue)
         self.assertEqual(gpe.get_issue_creator().name, 'john...@gmail.com')
         self.assertEqual(gpe.get_issue_creator().url,
@@ -280,12 +277,6 @@ class TestGoogleCodeProjectExtractor(TestCase):
         gpe = self._make_extractor(html % u'My Summary')
         self.assertEqual(gpe.get_issue_summary(), u'My Summary')
 
-    def test_get_issue_mod_date(self):
-        test_issue = open(pkg_resources.resource_filename(
-            'forgeimporters', 'tests/data/google/test-issue.html')).read()
-        gpe = self._make_extractor(test_issue)
-        self.assertEqual(gpe.get_issue_mod_date(), 'Thu Aug  8 15:36:57 2013')
-
     def test_get_issue_labels(self):
         test_issue = open(pkg_resources.resource_filename(
             'forgeimporters', 'tests/data/google/test-issue.html')).read()
@@ -316,18 +307,116 @@ class TestGoogleCodeProjectExtractor(TestCase):
             attachments[0].url, 'http://allura-google-importer.googlecode.com/issues/attachment?aid=70000000&name=at1.txt&token=3REU1M3JUUMt0rJUg7ldcELt6LA%3A1376059941255')
         self.assertEqual(attachments[0].type, 'text/plain')
 
+    def test_get_issue_ids(self):
+        extractor = google.GoogleCodeProjectExtractor(None)
+        extractor.get_page = mock.Mock(side_effect=((1, 2, 3), (2, 3, 4), ()))
+        self.assertItemsEqual(extractor.get_issue_ids(start=10), (1, 2, 3, 4))
+        self.assertEqual(extractor.get_page.call_count, 3)
+        extractor.get_page.assert_has_calls([
+            mock.call('issues_csv', parser=google.csv_parser, start=10),
+            mock.call('issues_csv', parser=google.csv_parser, start=110),
+            mock.call('issues_csv', parser=google.csv_parser, start=210),
+        ])
+
+    @mock.patch.object(google.GoogleCodeProjectExtractor, 'get_page')
+    @mock.patch.object(google.GoogleCodeProjectExtractor, 'get_issue_ids')
+    def test_iter_issue_ids(self, get_issue_ids, get_page):
+        get_issue_ids.side_effect = [set([1, 2]), set([2, 3, 4])]
+        issue_ids = [i for i,
+                     e in list(google.GoogleCodeProjectExtractor.iter_issues('foo'))]
+        self.assertEqual(issue_ids, [1, 2, 3, 4])
+        get_issue_ids.assert_has_calls([
+            mock.call(start=0),
+            mock.call(start=-8),
+        ])
+
+    @mock.patch.object(google.GoogleCodeProjectExtractor, '__init__')
+    @mock.patch.object(google.GoogleCodeProjectExtractor, 'get_issue_ids')
+    def test_iter_issue_ids_raises(self, get_issue_ids, __init__):
+        get_issue_ids.side_effect = [set([1, 2, 3, 4, 5])]
+        __init__.side_effect = [
+            None,
+            None,
+            # should skip but keep going
+            HTTPError('fourohfour', 404, 'fourohfour', {}, mock.Mock()),
+            None,
+            # should be re-raised
+            HTTPError('fubar', 500, 'fubar', {}, mock.Mock()),
+            None,
+        ]
+        issue_ids = []
+        try:
+            for issue_id, extractor in google.GoogleCodeProjectExtractor.iter_issues('foo'):
+                issue_ids.append(issue_id)
+        except HTTPError as e:
+            self.assertEqual(e.code, 500)
+        else:
+            assert False, 'Missing expected raised exception'
+        self.assertEqual(issue_ids, [1, 3])
+
+    @mock.patch.object(google.requests, 'head')
+    def test_check_readable(self, head):
+        head.return_value.status_code = 200
+        assert google.GoogleCodeProjectExtractor('my-project').check_readable()
+        head.return_value.status_code = 404
+        assert not google.GoogleCodeProjectExtractor('my-project').check_readable()
+
+
+class TestWithSetupForComments(TestCase):
+    # The main test suite did too much patching for how we want these tests to work
+    # These tests use iter_comments and 2 HTML pages of comments.
+
+    def _create_extractor(self):
+        test_issue = open(pkg_resources.resource_filename('forgeimporters', 'tests/data/google/test-issue-first-page.html')).read()
+        test_issue_older = open(pkg_resources.resource_filename('forgeimporters', 'tests/data/google/test-issue-prev-page.html')).read()
+
+        class LocalTestExtractor(google.GoogleCodeProjectExtractor):
+            def urlopen(self, url, **kw):
+                return self.urlopen_results.pop(0)
+
+            def setup_urlopen_results(self, results):
+                self.urlopen_results = results
+
+        gpe = LocalTestExtractor('allura-google-importer')
+        gpe.setup_urlopen_results([test_issue, test_issue_older])
+
+        return gpe
+
+    def test_get_issue_mod_date(self):
+        gpe = self._create_extractor()
+        gpe.get_page('detail?id=6')
+        self.assertEqual(gpe.get_issue_mod_date(), 'Thu Aug  8 15:36:57 2013')
+
     @without_module('html2text')
     @mock.patch.object(base, 'StringIO')
     def test_iter_comments(self, StringIO):
-        test_issue = open(pkg_resources.resource_filename(
-            'forgeimporters', 'tests/data/google/test-issue.html')).read()
-        gpe = self._make_extractor(test_issue)
-        comments = list(gpe.iter_comments())
-        self.assertEqual(len(comments), 4)
+        gpe = self._create_extractor()
+        gpe.get_page('detail?id=6')
+
+        with mock.patch.object(base.ProjectExtractor, 'urlopen'):  # for attachments, which end up using a different Extractor urlopen
+            comments = list(gpe.iter_comments())
+
+        self.assertEqual(len(comments), 6)
         expected = [
             {
                 'author.name': 'john...@gmail.com',
                 'author.url': 'http://code.google.com/u/101557263855536553789/',
+                'created_date': 'Thu Aug  8 15:34:01 2013',
+                'body': 'Simple comment',
+                'updates': {},
+                'attachments': [],
+            },
+            {
+                'author.name': 'john...@gmail.com',
+                'author.url': 'http://code.google.com/u/101557263855536553789/',
+                'created_date': 'Thu Aug  8 15:34:09 2013',
+                'body': 'Boring comment',
+                'updates': {},
+                'attachments': [],
+            },
+            {
+                'author.name': 'john...@gmail.com',
+                'author.url': 'http://code.google.com/u/101557263855536553789/',
                 'created_date': 'Thu Aug  8 15:35:15 2013',
                 'body': 'Test \\*comment\\* is a comment',
                 'updates': {'Status:': 'Started', 'Labels:': '-OpSys-Linux OpSys-Windows'},
@@ -370,15 +459,33 @@ class TestGoogleCodeProjectExtractor(TestCase):
     @skipif(module_not_available('html2text'))
     @mock.patch.object(base, 'StringIO')
     def test_iter_comments_html2text(self, StringIO):
-        test_issue = open(pkg_resources.resource_filename(
-            'forgeimporters', 'tests/data/google/test-issue.html')).read()
-        gpe = self._make_extractor(test_issue)
-        comments = list(gpe.iter_comments())
-        self.assertEqual(len(comments), 4)
+        gpe = self._create_extractor()
+        gpe.get_page('detail?id=6')
+
+        with mock.patch.object(base.ProjectExtractor, 'urlopen'):  # for attachments, which end up using a different Extractor urlopen
+            comments = list(gpe.iter_comments())
+
+        self.assertEqual(len(comments), 6)
         expected = [
             {
                 'author.name': 'john...@gmail.com',
                 'author.url': 'http://code.google.com/u/101557263855536553789/',
+                'created_date': 'Thu Aug  8 15:34:01 2013',
+                'body': 'Simple comment',
+                'updates': {},
+                'attachments': [],
+            },
+            {
+                'author.name': 'john...@gmail.com',
+                'author.url': 'http://code.google.com/u/101557263855536553789/',
+                'created_date': 'Thu Aug  8 15:34:09 2013',
+                'body': 'Boring comment',
+                'updates': {},
+                'attachments': [],
+            },
+            {
+                'author.name': 'john...@gmail.com',
+                'author.url': 'http://code.google.com/u/101557263855536553789/',
                 'created_date': 'Thu Aug  8 15:35:15 2013',
                 'body': 'Test \\*comment\\* is a comment',
                 'updates': {'Status:': 'Started', 'Labels:': '-OpSys-Linux OpSys-Windows'},
@@ -418,60 +525,6 @@ class TestGoogleCodeProjectExtractor(TestCase):
             self.assertEqual(
                 [a.filename for a in actual.attachments], expected['attachments'])
 
-    def test_get_issue_ids(self):
-        extractor = google.GoogleCodeProjectExtractor(None)
-        extractor.get_page = mock.Mock(side_effect=((1, 2, 3), (2, 3, 4), ()))
-        self.assertItemsEqual(extractor.get_issue_ids(start=10), (1, 2, 3, 4))
-        self.assertEqual(extractor.get_page.call_count, 3)
-        extractor.get_page.assert_has_calls([
-            mock.call('issues_csv', parser=google.csv_parser, start=10),
-            mock.call('issues_csv', parser=google.csv_parser, start=110),
-            mock.call('issues_csv', parser=google.csv_parser, start=210),
-        ])
-
-    @mock.patch.object(google.GoogleCodeProjectExtractor, 'get_page')
-    @mock.patch.object(google.GoogleCodeProjectExtractor, 'get_issue_ids')
-    def test_iter_issue_ids(self, get_issue_ids, get_page):
-        get_issue_ids.side_effect = [set([1, 2]), set([2, 3, 4])]
-        issue_ids = [i for i,
-                     e in list(google.GoogleCodeProjectExtractor.iter_issues('foo'))]
-        self.assertEqual(issue_ids, [1, 2, 3, 4])
-        get_issue_ids.assert_has_calls([
-            mock.call(start=0),
-            mock.call(start=-8),
-        ])
-
-    @mock.patch.object(google.GoogleCodeProjectExtractor, '__init__')
-    @mock.patch.object(google.GoogleCodeProjectExtractor, 'get_issue_ids')
-    def test_iter_issue_ids_raises(self, get_issue_ids, __init__):
-        get_issue_ids.side_effect = [set([1, 2, 3, 4, 5])]
-        __init__.side_effect = [
-            None,
-            None,
-            # should skip but keep going
-            HTTPError('fourohfour', 404, 'fourohfour', {}, mock.Mock()),
-            None,
-            # should be re-raised
-            HTTPError('fubar', 500, 'fubar', {}, mock.Mock()),
-            None,
-        ]
-        issue_ids = []
-        try:
-            for issue_id, extractor in google.GoogleCodeProjectExtractor.iter_issues('foo'):
-                issue_ids.append(issue_id)
-        except HTTPError as e:
-            self.assertEqual(e.code, 500)
-        else:
-            assert False, 'Missing expected raised exception'
-        self.assertEqual(issue_ids, [1, 3])
-
-    @mock.patch.object(google.requests, 'head')
-    def test_check_readable(self, head):
-        head.return_value.status_code = 200
-        assert google.GoogleCodeProjectExtractor('my-project').check_readable()
-        head.return_value.status_code = 404
-        assert not google.GoogleCodeProjectExtractor('my-project').check_readable()
-
 
 class TestUserLink(TestCase):