You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by jo...@apache.org on 2013/08/19 20:00:21 UTC

[03/17] git commit: [#6464] Google Code Tracker Importer via web scraping

[#6464] Google Code Tracker Importer via web scraping

Signed-off-by: Cory Johns <cj...@slashdotmedia.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-allura/commit/45b6ac90
Tree: http://git-wip-us.apache.org/repos/asf/incubator-allura/tree/45b6ac90
Diff: http://git-wip-us.apache.org/repos/asf/incubator-allura/diff/45b6ac90

Branch: refs/heads/cj/6464
Commit: 45b6ac9017f2f93e3202b71d403abc9ed9057047
Parents: afb7bb4
Author: Cory Johns <cj...@slashdotmedia.com>
Authored: Tue Aug 6 23:47:14 2013 +0000
Committer: Cory Johns <cj...@slashdotmedia.com>
Committed: Mon Aug 19 18:00:08 2013 +0000

----------------------------------------------------------------------
 Allura/allura/controllers/site_admin.py         |  15 +-
 .../allura/templates/site_admin_task_view.html  |   8 +
 .../forgeimporters/google/__init__.py           | 176 ++++++++++++++---
 ForgeImporters/forgeimporters/google/tasks.py   |   8 +-
 ForgeImporters/forgeimporters/google/tracker.py | 191 ++++---------------
 .../tests/google/test_extractor.py              |  19 +-
 .../forgeimporters/tests/google/test_tasks.py   |   8 +-
 .../forgeimporters/tests/google/test_tracker.py |  98 +++++-----
 8 files changed, 277 insertions(+), 246 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/45b6ac90/Allura/allura/controllers/site_admin.py
----------------------------------------------------------------------
diff --git a/Allura/allura/controllers/site_admin.py b/Allura/allura/controllers/site_admin.py
index 4f8f835..50b0e38 100644
--- a/Allura/allura/controllers/site_admin.py
+++ b/Allura/allura/controllers/site_admin.py
@@ -30,6 +30,7 @@ import tg
 from pylons import tmpl_context as c, app_globals as g
 from pylons import request
 from formencode import validators, Invalid
+from webob.exc import HTTPNotFound
 
 from allura.lib import helpers as h
 from allura.lib import validators as v
@@ -321,7 +322,19 @@ class TaskManagerController(object):
             config_dict['user'] = user
         with h.push_config(c, **config_dict):
             task = task.post(*args, **kw)
-        redirect('view/%s' % task._id)
+        redirect('../view/%s' % task._id)
+
+    @expose()
+    @require_post()
+    def resubmit(self, task_id):
+        try:
+            task = M.monq_model.MonQTask.query.get(_id=bson.ObjectId(task_id))
+        except bson.errors.InvalidId as e:
+            task = None
+        if task is None:
+            raise HTTPNotFound()
+        task.state = 'ready'
+        redirect('../view/%s' % task._id)
 
     @expose('json:')
     def task_doc(self, task_name):

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/45b6ac90/Allura/allura/templates/site_admin_task_view.html
----------------------------------------------------------------------
diff --git a/Allura/allura/templates/site_admin_task_view.html b/Allura/allura/templates/site_admin_task_view.html
index c107382..e363b8d 100644
--- a/Allura/allura/templates/site_admin_task_view.html
+++ b/Allura/allura/templates/site_admin_task_view.html
@@ -66,6 +66,9 @@
     #task_details td.second-column {
         border: 0;
     }
+    #resubmit-task-form {
+        float: right;
+    }
 </style>
 {% endblock %}
 
@@ -73,6 +76,11 @@
 {% if not task %}
     Task not found
 {% else %}
+    {% if task.state in ['error', 'complete'] %}
+    <form id="resubmit-task-form" action="../resubmit/{{task._id}}" method="POST">
+        <input type="submit" value="Re-Submit Task" />
+    </form>
+    {% endif %}
     <h2>Task Details</h2>
     <table id="task_details">
         <tr>

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/45b6ac90/ForgeImporters/forgeimporters/google/__init__.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/google/__init__.py b/ForgeImporters/forgeimporters/google/__init__.py
index a12389b..eaa0765 100644
--- a/ForgeImporters/forgeimporters/google/__init__.py
+++ b/ForgeImporters/forgeimporters/google/__init__.py
@@ -19,6 +19,7 @@ import re
 import urllib
 from urlparse import urlparse, urljoin
 from collections import defaultdict
+from contextlib import closing
 try:
     from cStringIO import StringIO
 except ImportError:
@@ -33,13 +34,32 @@ from forgeimporters.base import ProjectExtractor
 
 log = logging.getLogger(__name__)
 
+def _as_text(node, chunks=None):
+    """
+    Similar to node.text, but preserves whitespace around tags,
+    and converts <br/>s to \n.
+    """
+    if chunks is None:
+        chunks = []
+    for n in node:
+        if isinstance(n, basestring):
+            chunks.append(n)
+        elif n.name == 'br':
+            chunks.append('\n')
+        else:
+            _as_text(n, chunks)
+    return ''.join(chunks)
+
+
 class GoogleCodeProjectExtractor(ProjectExtractor):
     BASE_URL = 'http://code.google.com'
     RE_REPO_TYPE = re.compile(r'(svn|hg|git)')
 
     PAGE_MAP = {
-            'project_info': BASE_URL + '/p/%s/',
-            'source_browse': BASE_URL + '/p/%s/source/browse/',
+            'project_info': BASE_URL + '/p/{project_name}/',
+            'source_browse': BASE_URL + '/p/{project_name}/source/browse/',
+            'issues_csv': BASE_URL + '/p/{project_name}/issues/csv?can=1&colspec=ID&start={start}',
+            'issue': BASE_URL + '/p/{project_name}/issues/detail?id={issue_id}',
         }
 
     LICENSE_MAP = defaultdict(lambda:'Other/Proprietary License', {
@@ -57,16 +77,16 @@ class GoogleCodeProjectExtractor(ProjectExtractor):
 
     DEFAULT_ICON = 'http://www.gstatic.com/codesite/ph/images/defaultlogo.png'
 
-    def __init__(self, allura_project, gc_project_name, page=None):
-        self.project = allura_project
-        self.gc_project_name = gc_project_name
+    def __init__(self, project_name, page_name=None, **kw):
+        self.url = self.PAGE_MAP[page_name].format(
+        self.project_name = project_name
         self._page_cache = {}
         self.url = None
         self.page = None
-        if page:
-            self.get_page(page)
+        if page_name:
+            self.get_page(page_name, **kw)
 
-    def get_page(self, page_name_or_url):
+    def get_page(self, page_name_or_url, **kw):
         """Return a Beautiful soup object for the given page name or url.
 
         If a page name is provided, the associated url is looked up in
@@ -77,27 +97,33 @@ class GoogleCodeProjectExtractor(ProjectExtractor):
         request.
 
         """
-        if page_name_or_url in self._page_cache:
-            return self._page_cache[page_name_or_url]
-        self.url = (self.get_page_url(page_name_or_url) if page_name_or_url in
-                self.PAGE_MAP else page_name_or_url)
-        self.page = self._page_cache[page_name_or_url] = \
-                BeautifulSoup(self.urlopen(self.url))
+        if page_name_or_url in self.PAGE_MAP:
+            self.url = self.get_page_url(page_name_or_url, **kw)
+        else:
+            self.url = page_name_or_url
+        if self.url in self._page_cache:
+            self.page = self._page_cache[self.url]
+        else:
+            self.page = self._page_cache[page_name_or_url] = \
+                    BeautifulSoup(self.urlopen(self.url))
         return self.page
 
-    def get_page_url(self, page_name):
+    def get_page_url(self, page_name, **kw):
         """Return the url associated with ``page_name``.
 
         Raises KeyError if ``page_name`` is not in :attr:`PAGE_MAP`.
 
         """
-        return self.PAGE_MAP[page_name] % urllib.quote(self.gc_project_name)
+        return self.PAGE_MAP[page_name].format(
+            project_name = urllib.quote(self.project_name),
+            **kw,
+        )
 
-    def get_short_description(self):
+    def get_short_description(self, project):
         page = self.get_page('project_info')
-        self.project.short_description = page.find(itemprop='description').string.strip()
+        project.short_description = page.find(itemprop='description').string.strip()
 
-    def get_icon(self):
+    def get_icon(self, project):
         page = self.get_page('project_info')
         icon_url = urljoin(self.url, page.find(itemprop='image').attrMap['src'])
         if icon_url == self.DEFAULT_ICON:
@@ -109,13 +135,13 @@ class GoogleCodeProjectExtractor(ProjectExtractor):
             icon_name, fp,
             fp_ish.info()['content-type'].split(';')[0],  # strip off charset=x extra param,
             square=True, thumbnail_size=(48,48),
-            thumbnail_meta={'project_id': self.project._id, 'category': 'icon'})
+            thumbnail_meta={'project_id': project._id, 'category': 'icon'})
 
-    def get_license(self):
+    def get_license(self, project):
         page = self.get_page('project_info')
         license = page.find(text='Code license').findNext().find('a').string.strip()
         trove = M.TroveCategory.query.get(fullname=self.LICENSE_MAP[license])
-        self.project.trove_license.append(trove._id)
+        project.trove_license.append(trove._id)
 
     def get_repo_type(self):
         page = self.get_page('source_browse')
@@ -128,3 +154,109 @@ class GoogleCodeProjectExtractor(ProjectExtractor):
             return re_match.group(0)
         else:
             raise Exception("Unknown repo type: {0}".format(repo_type.text))
+
+    @classmethod
+    def _get_issue_ids_page(cls, project_name, start):
+        url = cls.PAGE_MAP['issues_csv'].format(project_name=project_name, start=start)
+        with closing(urllib2.urlopen(url)) as fp:
+            lines = fp.readlines()[1:]  # skip CSV header
+            if not lines[-1].startswith('"'):
+                lines.pop()  # skip "next page here" info footer
+        issue_ids = [line.strip('",\n') for line in lines]
+        return issue_ids
+
+    @classmethod
+    def iter_issues(cls, project_name):
+        """
+        Iterate over all issues for a project,
+        using paging to keep the responses reasonable.
+        """
+        start = 0
+        limit = 100
+
+        while True:
+            issue_ids = cls._get_issue_ids_page(project_name, start)
+            if len(issue_ids) <= 0:
+                return
+            for issue_id in issue_ids:
+                yield cls(project_name, 'issue', issue_id=issue_id)
+            start += limit
+
+    def get_issue_summary(self):
+        return self.page.find(id='issueheader').findAll('td', limit=2)[1].span.string.strip()
+
+    def get_issue_description(self):
+        return _as_text(self.page.find(id='hc0').pre)
+
+    def get_issue_created_date(self):
+        return self.page.find(id='hc0').find('span', 'date').get('title')
+
+    def get_issue_mod_date(self):
+        last_update = Comment(self.page.findAll('div', 'issuecomment')[-1])
+        return last_update.created_date
+
+    def get_issue_creator(self):
+        a = self.page.find(id='hc0').find('a', 'userlink')
+        return UserLink(a)
+
+    def get_issue_status(self):
+        return self.page.find(id='issuemeta').find('th', text=re.compile('Status:')).findNext().span.string.strip()
+
+    def get_issue_owner(self):
+        return UserLink(self.page.find(id='issuemeta').find('th', text=re.compile('Owner:')).findNext().a)
+
+    def get_issue_labels(self):
+        label_nodes = self.page.find(id='issuemeta').findAll('a', 'label')
+        return [_as_text(l) for l in label_nodes]
+
+    def get_issue_attachments(self):
+        attachments = self.page.find(id='hc0').find('div', 'attachments')
+        if attachments:
+            return map(Attachment, attachments.findAll('tr'))
+        else:
+            return []
+
+    def iter_comments(self):
+        for comment in self.page.findAll('div', 'issuecomment'):
+            yield Comment(comment)
+
+class UserLink(object):
+    def __init__(self, tag):
+        self.name = tag.string.strip()
+        self.link = urljoin(GoogleCodeProjectExtractor.BASE_URL, tag.get('href'))
+
+class Comment(object):
+    def __init__(self, tag):
+        self.author = UserLink(tag.find('span', 'author').find('a', 'userlink'))
+        self.created_date = tag.find('span', 'date').get('title')
+        self.body = _as_text(tag.find('pre'))
+        self._get_updates(tag)
+        self._get_attachments(tag)
+
+    def _get_updates(self, tag):
+        _updates = tag.find('div', 'updates')
+        if _updates:
+            _strings = _updates.findAll(text=True)
+            updates = (s.strip() for s in _strings if s.strip())
+            self.updates = {field: updates.next() for field in updates}
+        else:
+            self.updates = {}
+
+    def _get_attachments(self, tag):
+        attachments = tag.find('div', 'attachments')
+        if attachments:
+            self.attachments = map(Attachment, attachments.findAll('tr'))
+        else:
+            self.attachments = []
+
+class Attachment(object):
+    def __init__(self, tag):
+        self.filename = _as_text(tag).strip().split()[0]
+        self.url = urljoin(GoogleCodeProjectExtractor.BASE_URL, tag.a.get('href'))
+        self.type = None
+
+    @property
+    def file(self):
+        fp_ish = urllib2.urlopen(self.url)
+        fp = StringIO(fp_ish.read())
+        return fp

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/45b6ac90/ForgeImporters/forgeimporters/google/tasks.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/google/tasks.py b/ForgeImporters/forgeimporters/google/tasks.py
index 968d9a9..69e7556 100644
--- a/ForgeImporters/forgeimporters/google/tasks.py
+++ b/ForgeImporters/forgeimporters/google/tasks.py
@@ -27,9 +27,9 @@ from . import GoogleCodeProjectExtractor
 
 @task
 def import_project_info(project_name):
-    extractor = GoogleCodeProjectExtractor(c.project, project_name, 'project_info')
-    extractor.get_short_description()
-    extractor.get_icon()
-    extractor.get_license()
+    extractor = GoogleCodeProjectExtractor(project_name, 'project_info')
+    extractor.get_short_description(c.project)
+    extractor.get_icon(c.project)
+    extractor.get_license(c.project)
     ThreadLocalORMSession.flush_all()
     g.post_event('project_updated')

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/45b6ac90/ForgeImporters/forgeimporters/google/tracker.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/google/tracker.py b/ForgeImporters/forgeimporters/google/tracker.py
index 95f53e4..297f65a 100644
--- a/ForgeImporters/forgeimporters/google/tracker.py
+++ b/ForgeImporters/forgeimporters/google/tracker.py
@@ -19,15 +19,14 @@ from collections import defaultdict
 from datetime import datetime
 
 from pylons import tmpl_context as c
-#import gdata
-gdata = None
-from ming.orm import session
+from ming.orm import session, ThreadLocalORMSession
 
 from allura.lib import helpers as h
 
 from forgetracker.tracker_main import ForgeTrackerApp
 from forgetracker import model as TM
 from ..base import ToolImporter
+from . import GoogleCodeProjectExtractor
 
 
 class GoogleCodeTrackerImporter(ToolImporter):
@@ -42,23 +41,22 @@ class GoogleCodeTrackerImporter(ToolImporter):
             type='select',
         )
 
-    def import_tool(self, project, user, project_name=None, mount_point=None,
+    def import_tool(self, project, user, project_name, mount_point=None,
             mount_label=None, **kw):
-        c.app = project.install_app('tracker', mount_point, mount_label)
-        c.app.globals.open_status_names = ['New', 'Accepted', 'Started']
-        c.app.globals.closed_status_names = ['Fixed', 'Verified', 'Invalid', 'Duplicate', 'WontFix', 'Done']
+        c.app = project.install_app('tickets', mount_point, mount_label)
+        ThreadLocalORMSession.flush_all()
+        c.app.globals.open_status_names = 'New Accepted Started'
+        c.app.globals.closed_status_names = 'Fixed Verified Invalid Duplicate WontFix Done'
         self.custom_fields = {}
-        extractor = GDataAPIExtractor(project_name)
-        for issue in extractor.iter_issues():
+        for issue in GoogleCodeProjectExtractor.iter_issues(project_name):
             ticket = TM.Ticket.new()
             self.process_fields(ticket, issue)
             self.process_labels(ticket, issue)
-            self.process_comments(ticket, extractor.iter_comments(issue))
+            self.process_comments(ticket, issue)
             session(ticket).flush(ticket)
             session(ticket).expunge(ticket)
         self.postprocess_custom_fields()
-        session(c.app).flush(c.app)
-        session(c.app.globals).flush(c.app.globals)
+        ThreadLocalORMSession.flush_all()
 
     def custom_field(self, name):
         if name not in self.custom_fields:
@@ -71,16 +69,25 @@ class GoogleCodeTrackerImporter(ToolImporter):
         return self.custom_fields[name]
 
     def process_fields(self, ticket, issue):
-        ticket.summary = issue.summary
-        ticket.description = issue.description
-        ticket.status = issue.status
-        ticket.created_date = datetime.strptime(issue.created_date, '')
-        ticket.mod_date = datetime.strptime(issue.mod_date, '')
+        ticket.summary = issue.get_issue_summary()
+        ticket.status = issue.get_issue_status()
+        ticket.created_date = datetime.strptime(issue.get_issue_created_date(), '%c')
+        ticket.mod_date = datetime.strptime(issue.get_issue_mod_date(), '%c')
+        ticket.description = (
+                u'*Originally created by:* [{creator.name}]({creator.link})\n'
+                '*Originally owned by:* [{owner.name}]({owner.link})\n'
+                '\n'
+                '{body}').format(
+                    creator=issue.get_issue_creator(),
+                    owner=issue.get_issue_owner(),
+                    body=issue.get_issue_description(),
+                )
+        ticket.add_multiple_attachments(issue.get_issue_attachments())
 
     def process_labels(self, ticket, issue):
         labels = set()
         custom_fields = defaultdict(set)
-        for label in issue.labels:
+        for label in issue.get_issue_labels():
             if u'-' in label:
                 name, value = label.split(u'-', 1)
                 cf = self.custom_field(name)
@@ -91,23 +98,24 @@ class GoogleCodeTrackerImporter(ToolImporter):
         ticket.labels = list(labels)
         ticket.custom_fields = {n: u', '.join(sorted(v)) for n,v in custom_fields.iteritems()}
 
-    def process_comments(self, ticket, comments):
-        for comment in comments:
-            p = ticket.thread.add_post(
+    def process_comments(self, ticket, issue):
+        for comment in issue.iter_comments():
+            p = ticket.discussion_thread.add_post(
                     text = (
-                        u'Originally posted by: [{author.name}]({author.link})\n'
+                        u'*Originally posted by:* [{author.name}]({author.link})\n'
                         '\n'
                         '{body}\n'
                         '\n'
                         '{updates}').format(
                             author=comment.author,
-                            body=comment.text,
+                            body=comment.body,
                             updates='\n'.join(
-                                '*%s*: %s' % (k,v)
+                                '**%s** %s' % (k,v)
                                 for k,v in comment.updates.items()
                             ),
                     )
                 )
+            p.created_date = p.timestamp = datetime.strptime(comment.created_date, '%c')
             p.add_multiple_attachments(comment.attachments)
 
     def postprocess_custom_fields(self):
@@ -125,138 +133,3 @@ class GoogleCodeTrackerImporter(ToolImporter):
             else:
                 field['options'] = ''
             c.app.globals.custom_fields.append(field)
-
-
-class GDataAPIExtractor(object):
-    def __init__(self, project_name):
-        self.project_name = project_name
-
-    def iter_issues(self, limit=50):
-        """
-        Iterate over all issues for a project,
-        using paging to keep the responses reasonable.
-        """
-        start = 1
-
-        client = gdata.projecthosting.client.ProjectHostingClient()
-        while True:
-            query = gdata.projecthosting.client.Query(start_index=start, max_results=limit)
-            issues = client.get_issues(self.project_name, query=query).entry
-            if len(issues) <= 0:
-                return
-            for issue in issues:
-                yield GDataAPIIssue(issue)
-            start += limit
-
-    def iter_comments(self, issue, limit=50):
-        """
-        Iterate over all comments for a given issue,
-        using paging to keep the responses reasonable.
-        """
-        start = 1
-
-        client = gdata.projecthosting.client.ProjectHostingClient()
-        while True:
-            query = gdata.projecthosting.client.Query(start_index=start, max_results=limit)
-            comments = client.get_comments(self.project_name, query=query).entry
-            if len(comments) <= 0:
-                return
-            for comment in comments:
-                yield GDataAPIComment(comment)
-            start += limit
-
-
-class GDataAPIUser(object):
-    def __init__(self, user):
-        self.user = user
-
-    @property
-    def name(self):
-        return h.really_unicode(self.user.name.text)
-
-    @property
-    def link(self):
-        return u'http://code.google.com/u/%s' % self.name
-
-
-class GDataAPIIssue(object):
-    def __init__(self, issue):
-        self.issue = issue
-
-    @property
-    def summary(self):
-        return h.really_unicode(self.issue.title.text)
-
-    @property
-    def description(self):
-        return h.really_unicode(self.issue.content.text)
-
-    @property
-    def created_date(self):
-        return self.to_date(self.issue.published.text)
-
-    @property
-    def mod_date(self):
-        return self.to_date(self.issue.updated.text)
-
-    @property
-    def creator(self):
-        return h.really_unicode(self.issue.author[0].name.text)
-
-    @property
-    def status(self):
-        if getattr(self.issue, 'status', None) is not None:
-            return h.really_unicode(self.issue.status.text)
-        return u''
-
-    @property
-    def owner(self):
-        if getattr(self.issue, 'owner', None) is not None:
-            return h.really_unicode(self.issue.owner.username.text)
-        return u''
-
-    @property
-    def labels(self):
-        return [h.really_unicode(l.text) for l in self.issue.labels]
-
-
-class GDataAPIComment(object):
-    def __init__(self, comment):
-        self.comment = comment
-
-    @property
-    def author(self):
-        return GDataAPIUser(self.comment.author[0])
-
-    @property
-    def created_date(self):
-        return h.really_unicode(self.comment.published.text)
-
-    @property
-    def body(self):
-        return h.really_unicode(self.comment.content.text)
-
-    @property
-    def updates(self):
-        return {}
-
-    @property
-    def attachments(self):
-        return []
-
-
-class GDataAPIAttachment(object):
-    def __init__(self, attachment):
-        self.attachment = attachment
-
-    @property
-    def filename(self):
-        pass
-
-    @property
-    def type(self):
-        pass
-
-    @property
-    def file(self):
-        pass

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/45b6ac90/ForgeImporters/forgeimporters/tests/google/test_extractor.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/tests/google/test_extractor.py b/ForgeImporters/forgeimporters/tests/google/test_extractor.py
index 9b6db45..89aac5a 100644
--- a/ForgeImporters/forgeimporters/tests/google/test_extractor.py
+++ b/ForgeImporters/forgeimporters/tests/google/test_extractor.py
@@ -37,10 +37,9 @@ class TestGoogleCodeProjectExtractor(TestCase):
         self._p_soup.stop()
 
     def test_init(self):
-        extractor = google.GoogleCodeProjectExtractor(self.project, 'my-project', 'project_info')
+        extractor = google.GoogleCodeProjectExtractor('my-project', 'project_info')
 
         self.urlopen.assert_called_once_with('http://code.google.com/p/my-project/')
-        self.assertEqual(extractor.project, self.project)
         self.soup.assert_called_once_with(self.urlopen.return_value)
         self.assertEqual(extractor.page, self.soup.return_value)
 
@@ -57,10 +56,10 @@ class TestGoogleCodeProjectExtractor(TestCase):
                 'http://code.google.com/p/my-project/')
 
     def test_get_short_description(self):
-        extractor = google.GoogleCodeProjectExtractor(self.project, 'my-project', 'project_info')
+        extractor = google.GoogleCodeProjectExtractor('my-project', 'project_info')
         extractor.page.find.return_value.string = 'My Super Project'
 
-        extractor.get_short_description()
+        extractor.get_short_description(self.project)
 
         extractor.page.find.assert_called_once_with(itemprop='description')
         self.assertEqual(self.project.short_description, 'My Super Project')
@@ -69,11 +68,11 @@ class TestGoogleCodeProjectExtractor(TestCase):
     @mock.patch.object(google, 'M')
     def test_get_icon(self, M, StringIO):
         self.urlopen.return_value.info.return_value = {'content-type': 'image/png'}
-        extractor = google.GoogleCodeProjectExtractor(self.project, 'my-project', 'project_info')
+        extractor = google.GoogleCodeProjectExtractor('my-project', 'project_info')
         extractor.page.find.return_value.attrMap = {'src': 'http://example.com/foo/bar/my-logo.png'}
         self.urlopen.reset_mock()
 
-        extractor.get_icon()
+        extractor.get_icon(self.project)
 
         extractor.page.find.assert_called_once_with(itemprop='image')
         self.urlopen.assert_called_once_with('http://example.com/foo/bar/my-logo.png')
@@ -87,11 +86,11 @@ class TestGoogleCodeProjectExtractor(TestCase):
     @mock.patch.object(google, 'M')
     def test_get_license(self, M):
         self.project.trove_license = []
-        extractor = google.GoogleCodeProjectExtractor(self.project, 'my-project', 'project_info')
+        extractor = google.GoogleCodeProjectExtractor('my-project', 'project_info')
         extractor.page.find.return_value.findNext.return_value.find.return_value.string = '  New BSD License  '
         trove = M.TroveCategory.query.get.return_value
 
-        extractor.get_license()
+        extractor.get_license(self.project)
 
         extractor.page.find.assert_called_once_with(text='Code license')
         extractor.page.find.return_value.findNext.assert_called_once_with()
@@ -101,13 +100,13 @@ class TestGoogleCodeProjectExtractor(TestCase):
 
         M.TroveCategory.query.get.reset_mock()
         extractor.page.find.return_value.findNext.return_value.find.return_value.string = 'non-existant license'
-        extractor.get_license()
+        extractor.get_license(self.project)
         M.TroveCategory.query.get.assert_called_once_with(fullname='Other/Proprietary License')
 
     def _make_extractor(self, html):
         from BeautifulSoup import BeautifulSoup
         with mock.patch.object(base.ProjectExtractor, 'urlopen'):
-            extractor = google.GoogleCodeProjectExtractor(self.project, 'my-project')
+            extractor = google.GoogleCodeProjectExtractor('my-project')
         extractor.page = BeautifulSoup(html)
         extractor.get_page = lambda pagename: extractor.page
         extractor.url="http://test/source/browse"

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/45b6ac90/ForgeImporters/forgeimporters/tests/google/test_tasks.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/tests/google/test_tasks.py b/ForgeImporters/forgeimporters/tests/google/test_tasks.py
index dc7d936..01bab68 100644
--- a/ForgeImporters/forgeimporters/tests/google/test_tasks.py
+++ b/ForgeImporters/forgeimporters/tests/google/test_tasks.py
@@ -26,8 +26,8 @@ from ...google import tasks
 def test_import_project_info(c, session, gpe):
     c.project = mock.Mock(name='project')
     tasks.import_project_info('my-project')
-    gpe.assert_called_once_with(c.project, 'my-project', 'project_info')
-    gpe.return_value.get_short_description.assert_called_once_with()
-    gpe.return_value.get_icon.assert_called_once_with()
-    gpe.return_value.get_license.assert_called_once_with()
+    gpe.assert_called_once_with('my-project', 'project_info')
+    gpe.return_value.get_short_description.assert_called_once_with(c.project)
+    gpe.return_value.get_icon.assert_called_once_with(c.project)
+    gpe.return_value.get_license.assert_called_once_with(c.project)
     session.flush_all.assert_called_once_with()

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/45b6ac90/ForgeImporters/forgeimporters/tests/google/test_tracker.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/tests/google/test_tracker.py b/ForgeImporters/forgeimporters/tests/google/test_tracker.py
index e49f279..62493bd 100644
--- a/ForgeImporters/forgeimporters/tests/google/test_tracker.py
+++ b/ForgeImporters/forgeimporters/tests/google/test_tracker.py
@@ -15,6 +15,7 @@
 #       specific language governing permissions and limitations
 #       under the License.
 
+from datetime import datetime
 from operator import itemgetter
 from unittest import TestCase
 import mock
@@ -24,10 +25,11 @@ from ...google import tracker
 
 class TestTrackerImporter(TestCase):
     @mock.patch.object(tracker, 'c')
+    @mock.patch.object(tracker, 'ThreadLocalORMSession')
     @mock.patch.object(tracker, 'session')
     @mock.patch.object(tracker, 'TM')
-    @mock.patch.object(tracker, 'GDataAPIExtractor')
-    def test_import_tool(self, gdata, TM, session, c):
+    @mock.patch.object(tracker, 'GoogleCodeProjectExtractor')
+    def test_import_tool(self, gpe, TM, session, tlos, c):
         importer = tracker.GoogleCodeTrackerImporter()
         importer.process_fields = mock.Mock()
         importer.process_labels = mock.Mock()
@@ -35,16 +37,14 @@ class TestTrackerImporter(TestCase):
         importer.postprocess_custom_fields = mock.Mock()
         project, user = mock.Mock(), mock.Mock()
         app = project.install_app.return_value
-        extractor = gdata.return_value
-        issues = extractor.iter_issues.return_value = [mock.Mock(), mock.Mock()]
+        issues = gpe.iter_issues.return_value = [mock.Mock(), mock.Mock()]
         tickets = TM.Ticket.new.side_effect = [mock.Mock(), mock.Mock()]
-        comments = extractor.iter_comments.side_effect = [mock.Mock(), mock.Mock()]
 
         importer.import_tool(project, user, project_name='project_name',
                 mount_point='mount_point', mount_label='mount_label')
 
-        project.install_app.assert_called_once_with('tracker', 'mount_point', 'mount_label')
-        gdata.assert_called_once_with('project_name')
+        project.install_app.assert_called_once_with('tickets', 'mount_point', 'mount_label')
+        gpe.iter_issues.assert_called_once_with('project_name')
         self.assertEqual(importer.process_fields.call_args_list, [
                 mock.call(tickets[0], issues[0]),
                 mock.call(tickets[1], issues[1]),
@@ -54,26 +54,16 @@ class TestTrackerImporter(TestCase):
                 mock.call(tickets[1], issues[1]),
             ])
         self.assertEqual(importer.process_comments.call_args_list, [
-                mock.call(tickets[0], comments[0]),
-                mock.call(tickets[1], comments[1]),
+                mock.call(tickets[0], issues[0]),
+                mock.call(tickets[1], issues[1]),
             ])
-        self.assertEqual(extractor.iter_comments.call_args_list, [
-                mock.call(issues[0]),
-                mock.call(issues[1]),
-            ])
-        self.assertEqual(session.call_args_list, [
-                mock.call(tickets[0]),
-                mock.call(tickets[0]),
-                mock.call(tickets[1]),
-                mock.call(tickets[1]),
-                mock.call(app),
-                mock.call(app.globals),
+        self.assertEqual(tlos.flush_all.call_args_list, [
+                mock.call(),
+                mock.call(),
             ])
         self.assertEqual(session.return_value.flush.call_args_list, [
                 mock.call(tickets[0]),
                 mock.call(tickets[1]),
-                mock.call(app),
-                mock.call(app.globals),
             ])
         self.assertEqual(session.return_value.expunge.call_args_list, [
                 mock.call(tickets[0]),
@@ -119,30 +109,37 @@ class TestTrackerImporter(TestCase):
 
     def test_process_fields(self):
         ticket = mock.Mock()
+        def _user(l):
+            u = mock.Mock()
+            u.name = '%sname' % l
+            u.link = '%slink' % l
+            return u
         issue = mock.Mock(
-                summary='summary',
-                description='description',
-                status='status',
-                created_date='created_date',
-                mod_date='mod_date',
+                get_issue_summary=lambda:'summary',
+                get_issue_description=lambda:'description',
+                get_issue_status=lambda:'status',
+                get_issue_created_date=lambda:'created_date',
+                get_issue_mod_date=lambda:'mod_date',
+                get_issue_creator=lambda:_user('c'),
+                get_issue_owner=lambda:_user('o'),
             )
         importer = tracker.GoogleCodeTrackerImporter()
         with mock.patch.object(tracker, 'datetime') as dt:
             dt.strptime.side_effect = lambda s,f: s
             importer.process_fields(ticket, issue)
             self.assertEqual(ticket.summary, 'summary')
-            self.assertEqual(ticket.description, 'description')
+            self.assertEqual(ticket.description, '*Originally created by:* [cname](clink)\n*Originally owned by:* [oname](olink)\n\ndescription')
             self.assertEqual(ticket.status, 'status')
             self.assertEqual(ticket.created_date, 'created_date')
             self.assertEqual(ticket.mod_date, 'mod_date')
             self.assertEqual(dt.strptime.call_args_list, [
-                    mock.call('created_date', ''),
-                    mock.call('mod_date', ''),
+                    mock.call('created_date', '%c'),
+                    mock.call('mod_date', '%c'),
                 ])
 
     def test_process_labels(self):
         ticket = mock.Mock(custom_fields={}, labels=[])
-        issue = mock.Mock(labels=['Foo-Bar', 'Baz', 'Foo-Qux'])
+        issue = mock.Mock(get_issue_labels=lambda:['Foo-Bar', 'Baz', 'Foo-Qux'])
         importer = tracker.GoogleCodeTrackerImporter()
         importer.custom_field = mock.Mock(side_effect=lambda n: {'name': '_%s' % n.lower(), 'options': set()})
         importer.process_labels(ticket, issue)
@@ -156,40 +153,49 @@ class TestTrackerImporter(TestCase):
             a.link = 'author%s_link' % n
             return a
         ticket = mock.Mock()
-        comments = [
+        issue = mock.Mock()
+        comments = issue.iter_comments.return_value = [
                 mock.Mock(
                     author=_author(1),
-                    text='text1',
+                    body='text1',
                     attachments='attachments1',
+                    created_date='Mon Jul 15 00:00:00 2013',
                 ),
                 mock.Mock(
                     author=_author(2),
-                    text='text2',
+                    body='text2',
                     attachments='attachments2',
+                    created_date='Mon Jul 16 00:00:00 2013',
                 ),
             ]
-        comments[0].updates.items.return_value = [('Foo', 'Bar'), ('Baz', 'Qux')]
+        comments[0].updates.items.return_value = [('Foo:', 'Bar'), ('Baz:', 'Qux')]
         comments[1].updates.items.return_value = []
+        posts = ticket.discussion_thread.add_post.side_effect = [
+                mock.Mock(),
+                mock.Mock(),
+            ]
         importer = tracker.GoogleCodeTrackerImporter()
-        importer.process_comments(ticket, comments)
-        self.assertEqual(ticket.thread.add_post.call_args_list[0], mock.call(
-                text='Originally posted by: [author1](author1_link)\n'
+        importer.process_comments(ticket, issue)
+        self.assertEqual(ticket.discussion_thread.add_post.call_args_list[0], mock.call(
+                text='*Originally posted by:* [author1](author1_link)\n'
                 '\n'
                 'text1\n'
                 '\n'
-                '*Foo*: Bar\n'
-                '*Baz*: Qux'
+                '**Foo:** Bar\n'
+                '**Baz:** Qux'
             ))
-        self.assertEqual(ticket.thread.add_post.call_args_list[1], mock.call(
-                text='Originally posted by: [author2](author2_link)\n'
+        self.assertEqual(posts[0].created_date, datetime(2013, 7, 15))
+        self.assertEqual(posts[0].timestamp, datetime(2013, 7, 15))
+        posts[0].add_multiple_attachments.assert_called_once_with('attachments1')
+        self.assertEqual(ticket.discussion_thread.add_post.call_args_list[1], mock.call(
+                text='*Originally posted by:* [author2](author2_link)\n'
                 '\n'
                 'text2\n'
                 '\n'
             ))
-        self.assertEqual(ticket.thread.add_post.return_value.add_multiple_attachments.call_args_list, [
-                mock.call('attachments1'),
-                mock.call('attachments2'),
-            ])
+        self.assertEqual(posts[1].created_date, datetime(2013, 7, 16))
+        self.assertEqual(posts[1].timestamp, datetime(2013, 7, 16))
+        posts[1].add_multiple_attachments.assert_called_once_with('attachments2')
 
     @mock.patch.object(tracker, 'c')
     def test_postprocess_custom_fields(self, c):