You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by jo...@apache.org on 2013/08/26 21:11:17 UTC

[6/7] git commit: [#6531] Refactored more stuff up into ProjectExtractor

[#6531] Refactored more stuff up into ProjectExtractor

Signed-off-by: Tim Van Steenburgh <tv...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-allura/commit/2d5cf6c5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-allura/tree/2d5cf6c5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-allura/diff/2d5cf6c5

Branch: refs/heads/master
Commit: 2d5cf6c51f0aabd5748856cc4b1b1b9c01bb67c1
Parents: 0008c42
Author: Tim Van Steenburgh <tv...@gmail.com>
Authored: Fri Aug 23 16:01:29 2013 +0000
Committer: Cory Johns <cj...@slashdotmedia.com>
Committed: Mon Aug 26 17:19:57 2013 +0000

----------------------------------------------------------------------
 ForgeImporters/forgeimporters/base.py           | 59 ++++++++++++++++++++
 .../forgeimporters/github/__init__.py           | 30 ++++------
 ForgeImporters/forgeimporters/github/tasks.py   |  8 ++-
 .../github/templates/project.html               |  3 +-
 .../forgeimporters/google/__init__.py           | 39 -------------
 .../tests/github/functional/test_github.py      |  2 +-
 .../tests/github/test_extractor.py              | 32 ++++-------
 .../forgeimporters/tests/github/test_tasks.py   |  5 +-
 .../tests/google/test_extractor.py              |  3 +-
 9 files changed, 92 insertions(+), 89 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/2d5cf6c5/ForgeImporters/forgeimporters/base.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/base.py b/ForgeImporters/forgeimporters/base.py
index 49397a5..3cf6774 100644
--- a/ForgeImporters/forgeimporters/base.py
+++ b/ForgeImporters/forgeimporters/base.py
@@ -16,10 +16,12 @@
 #       under the License.
 
 import logging
+import urllib
 import urllib2
 
 from pkg_resources import iter_entry_points
 
+from BeautifulSoup import BeautifulSoup
 from tg import expose, validate, flash, redirect, config
 from tg.decorators import with_trailing_slash
 from pylons import tmpl_context as c
@@ -67,12 +69,69 @@ class ProjectExtractor(object):
     a custom User-Agent and automatically retries timed-out requests.
 
     """
+
+    PAGE_MAP = {}
+
+    def __init__(self, project_name, page_name=None, **kw):
+        self.project_name = project_name
+        self._page_cache = {}
+        self.url = None
+        self.page = None
+        if page_name:
+            self.get_page(page_name, **kw)
+
     @staticmethod
     def urlopen(url, retries=3, codes=(408,), **kw):
         req = urllib2.Request(url, **kw)
         req.add_header('User-Agent', 'Allura Data Importer (http://sf.net/p/allura)')
         return h.urlopen(req, retries=retries, codes=codes)
 
+    def get_page(self, page_name_or_url, **kw):
+        """Return a Beautiful soup object for the given page name or url.
+
+        If a page name is provided, the associated url is looked up in
+        :attr:`PAGE_MAP`.
+
+        Results are cached so that subsequent calls for the same page name or
+        url will return the cached result rather than making another HTTP
+        request.
+
+        """
+        if page_name_or_url in self.PAGE_MAP:
+            self.url = self.get_page_url(page_name_or_url, **kw)
+        else:
+            self.url = page_name_or_url
+        if self.url in self._page_cache:
+            self.page = self._page_cache[self.url]
+        else:
+            self.page = self._page_cache[self.url] = \
+                    self.parse_page(self.urlopen(self.url))
+        return self.page
+
+    def get_page_url(self, page_name, **kw):
+        """Return the url associated with ``page_name``.
+
+        Raises KeyError if ``page_name`` is not in :attr:`PAGE_MAP`.
+
+        """
+        return self.PAGE_MAP[page_name].format(
+            project_name = urllib.quote(self.project_name), **kw)
+
+    def parse_page(self, page):
+        """Transforms the result of a `urlopen` call before returning it from
+        :meth:`get_page`.
+
+        The default implementation create a :class:`BeautifulSoup` object from
+        the html.
+
+        Subclasses can override to change the behavior or handle other types
+        of content (like JSON).
+
+        :param page: A file-like object return from :meth:`urlopen`
+
+        """
+        return BeautifulSoup(page)
+
 
 class ProjectImporter(BaseController):
     """

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/2d5cf6c5/ForgeImporters/forgeimporters/github/__init__.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/github/__init__.py b/ForgeImporters/forgeimporters/github/__init__.py
index 1ce4398..f08f478 100644
--- a/ForgeImporters/forgeimporters/github/__init__.py
+++ b/ForgeImporters/forgeimporters/github/__init__.py
@@ -15,32 +15,24 @@
 #       specific language governing permissions and limitations
 #       under the License.
 
-import re
-import urllib
-import urllib2
-import json
-try:
-    from cStringIO import StringIO
-except ImportError:
-    from StringIO import StringIO
 import logging
+import json
+
+from forgeimporters import base
 
 log = logging.getLogger(__name__)
 
-class GitHubProjectExtractor(object):
-    RE_REPO_TYPE = re.compile(r'(svn|hg|git)')
+
+class GitHubProjectExtractor(base.ProjectExtractor):
     PAGE_MAP = {
-            'project_info': 'https://api.github.com/repos/%s',
+            'project_info': 'https://api.github.com/repos/{project_name}',
         }
 
+    def parse_page(self, page):
+        return json.loads(page.read().decode('utf8'))
 
-    def __init__(self, allura_project, gh_project_name, page):
-        self.project = allura_project
-        self.url = self.PAGE_MAP[page] % urllib.quote(gh_project_name)
-        self.page = json.loads(urllib2.urlopen(self.url).read().decode('utf8'))
-
-    def get_summmary(self):
-        self.project.summary = self.page['description']
+    def get_summary(self):
+        return self.get_page('project_info').get('description')
 
     def get_homepage(self):
-        self.project.external_homepage = self.page['homepage']
+        return self.get_page('project_info').get('homepage')

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/2d5cf6c5/ForgeImporters/forgeimporters/github/tasks.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/github/tasks.py b/ForgeImporters/forgeimporters/github/tasks.py
index be2325d..588c5a6 100644
--- a/ForgeImporters/forgeimporters/github/tasks.py
+++ b/ForgeImporters/forgeimporters/github/tasks.py
@@ -16,6 +16,7 @@
 #       under the License.
 
 from pylons import tmpl_context as c
+from pylons import app_globals as g
 
 from ming.orm import ThreadLocalORMSession
 
@@ -26,7 +27,8 @@ from . import GitHubProjectExtractor
 
 @task
 def import_project_info(project_name):
-    extractor = GitHubProjectExtractor(c.project, project_name, 'project_info')
-    extractor.get_summmary()
-    extractor.get_homepage()
+    extractor = GitHubProjectExtractor(project_name)
+    c.project.summary = extractor.get_summary()
+    c.project.external_homepage = extractor.get_homepage()
     ThreadLocalORMSession.flush_all()
+    g.post_event('project_updated')

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/2d5cf6c5/ForgeImporters/forgeimporters/github/templates/project.html
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/github/templates/project.html b/ForgeImporters/forgeimporters/github/templates/project.html
index de42737..d0f59f6 100644
--- a/ForgeImporters/forgeimporters/github/templates/project.html
+++ b/ForgeImporters/forgeimporters/github/templates/project.html
@@ -29,12 +29,11 @@
         </div>
     </div>
 
-
     <div class="grid-6" style="clear:left">
         <label>GitHub Project Name</label>
     </div>
      <div class="grid-10">
-        <input id="project_name" name="project_name" value="{{c.form_values['project_name']}}" autofocus/>
+        <input id="project_name" name="project_name" value="{{c.form_values['project_name']}}" />
         <div id="project_name_error" class="error{% if not c.form_errors['project_name'] %} hidden{% endif %}">
             {{c.form_errors['project_name']}}
         </div>

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/2d5cf6c5/ForgeImporters/forgeimporters/google/__init__.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/google/__init__.py b/ForgeImporters/forgeimporters/google/__init__.py
index 902683a..29e5011 100644
--- a/ForgeImporters/forgeimporters/google/__init__.py
+++ b/ForgeImporters/forgeimporters/google/__init__.py
@@ -78,45 +78,6 @@ class GoogleCodeProjectExtractor(ProjectExtractor):
 
     DEFAULT_ICON = 'http://www.gstatic.com/codesite/ph/images/defaultlogo.png'
 
-    def __init__(self, project_name, page_name=None, **kw):
-        self.project_name = project_name
-        self._page_cache = {}
-        self.url = None
-        self.page = None
-        if page_name:
-            self.get_page(page_name, **kw)
-
-    def get_page(self, page_name_or_url, **kw):
-        """Return a Beautiful soup object for the given page name or url.
-
-        If a page name is provided, the associated url is looked up in
-        :attr:`PAGE_MAP`.
-
-        Results are cached so that subsequent calls for the same page name or
-        url will return the cached result rather than making another HTTP
-        request.
-
-        """
-        if page_name_or_url in self.PAGE_MAP:
-            self.url = self.get_page_url(page_name_or_url, **kw)
-        else:
-            self.url = page_name_or_url
-        if self.url in self._page_cache:
-            self.page = self._page_cache[self.url]
-        else:
-            self.page = self._page_cache[self.url] = \
-                    BeautifulSoup(self.urlopen(self.url))
-        return self.page
-
-    def get_page_url(self, page_name, **kw):
-        """Return the url associated with ``page_name``.
-
-        Raises KeyError if ``page_name`` is not in :attr:`PAGE_MAP`.
-
-        """
-        return self.PAGE_MAP[page_name].format(
-            project_name = urllib.quote(self.project_name), **kw)
-
     def get_short_description(self, project):
         page = self.get_page('project_info')
         project.short_description = page.find(itemprop='description').string.strip()

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/2d5cf6c5/ForgeImporters/forgeimporters/tests/github/functional/test_github.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/tests/github/functional/test_github.py b/ForgeImporters/forgeimporters/tests/github/functional/test_github.py
index 6667e7b..3c05a2d 100644
--- a/ForgeImporters/forgeimporters/tests/github/functional/test_github.py
+++ b/ForgeImporters/forgeimporters/tests/github/functional/test_github.py
@@ -24,5 +24,5 @@ class TestGitHubImportController(TestController, TestCase):
         r = self.app.get('/p/import_project/github/')
         assert 'GitHub Project Importer' in r
         assert '<input id="user_name" name="user_name" value="" autofocus/>' in r
-        assert '<input id="project_name" name="project_name" value="" autofocus/>' in r
+        assert '<input id="project_name" name="project_name" value="" />' in r
         assert '<input id="project_shortname" name="project_shortname" value=""/>' in r

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/2d5cf6c5/ForgeImporters/forgeimporters/tests/github/test_extractor.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/tests/github/test_extractor.py b/ForgeImporters/forgeimporters/tests/github/test_extractor.py
index a21f775..cffbf81 100644
--- a/ForgeImporters/forgeimporters/tests/github/test_extractor.py
+++ b/ForgeImporters/forgeimporters/tests/github/test_extractor.py
@@ -17,32 +17,20 @@
 
 from unittest import TestCase
 
-import mock
-
 from ... import github
 
 
 class TestGitHubProjectExtractor(TestCase):
     def setUp(self):
-        self._p_urlopen = mock.patch.object(github.urllib2, 'urlopen')
-        self._p_json = mock.patch.object(github.json, 'loads')
-        self.urlopen = self._p_urlopen.start()
-        self.json = self._p_json.start()
-        self.project = mock.Mock(name='project')
-        self.project.get_tool_data.return_value = 'testproject'
-
-    def tearDown(self):
-        self._p_urlopen.stop()
-        self._p_json.stop()
-
-
-    def test_init(self):
-        extractor = github.GitHubProjectExtractor(self.project, 'testproject', 'project_info')
-        self.urlopen.assert_called_once_with('https://api.github.com/repos/testproject')
-        self.assertEqual(extractor.project, self.project)
+        import json
+        from StringIO import StringIO
+        self.extractor = github.GitHubProjectExtractor('testproject')
+        d = dict(description='project description',
+                homepage='http://example.com')
+        self.extractor.urlopen = lambda url: StringIO(json.dumps(d))
 
     def test_get_summary(self):
-        extractor = github.GitHubProjectExtractor(self.project, 'testproject', 'project_info')
-        extractor.page = {'description': 'test summary'}
-        extractor.get_summmary()
-        self.assertEqual(self.project.summary, 'test summary')
+        self.assertEqual(self.extractor.get_summary(), 'project description')
+
+    def test_get_homepage(self):
+        self.assertEqual(self.extractor.get_homepage(), 'http://example.com')

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/2d5cf6c5/ForgeImporters/forgeimporters/tests/github/test_tasks.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/tests/github/test_tasks.py b/ForgeImporters/forgeimporters/tests/github/test_tasks.py
index b478bc9..86b321e 100644
--- a/ForgeImporters/forgeimporters/tests/github/test_tasks.py
+++ b/ForgeImporters/forgeimporters/tests/github/test_tasks.py
@@ -26,6 +26,7 @@ from ...github import tasks
 def test_import_project_info(c, session, ghpe):
     c.project = mock.Mock(name='project')
     tasks.import_project_info('my-project')
-    ghpe.assert_called_once_with(c.project, 'my-project', 'project_info')
-    ghpe.return_value.get_summmary.assert_called_once_with()
+    ghpe.assert_called_once_with('my-project')
+    ghpe.return_value.get_summary.assert_called_once_with()
+    ghpe.return_value.get_homepage.assert_called_once_with()
     session.flush_all.assert_called_once_with()

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/2d5cf6c5/ForgeImporters/forgeimporters/tests/google/test_extractor.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/tests/google/test_extractor.py b/ForgeImporters/forgeimporters/tests/google/test_extractor.py
index 92c5fb2..d5a9f22 100644
--- a/ForgeImporters/forgeimporters/tests/google/test_extractor.py
+++ b/ForgeImporters/forgeimporters/tests/google/test_extractor.py
@@ -28,7 +28,8 @@ from forgeimporters import base
 class TestGoogleCodeProjectExtractor(TestCase):
     def setUp(self):
         self._p_urlopen = mock.patch.object(base.ProjectExtractor, 'urlopen')
-        self._p_soup = mock.patch.object(google, 'BeautifulSoup')
+        # self._p_soup = mock.patch.object(google, 'BeautifulSoup')
+        self._p_soup = mock.patch.object(base, 'BeautifulSoup')
         self.urlopen = self._p_urlopen.start()
         self.soup = self._p_soup.start()
         self.project = mock.Mock(name='project')