You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by tv...@apache.org on 2013/08/07 15:36:58 UTC
[05/14] git commit: [#6458] Add google-code wiki page extraction
[#6458] Add google-code wiki page extraction
Signed-off-by: Tim Van Steenburgh <tv...@gmail.com>
Project: http://git-wip-us.apache.org/repos/asf/incubator-allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-allura/commit/c192a843
Tree: http://git-wip-us.apache.org/repos/asf/incubator-allura/tree/c192a843
Diff: http://git-wip-us.apache.org/repos/asf/incubator-allura/diff/c192a843
Branch: refs/heads/tv/6480
Commit: c192a843ffb850eb4e7dba472a57cd852fec81ce
Parents: e3663fb
Author: Tim Van Steenburgh <tv...@gmail.com>
Authored: Tue Jul 30 14:39:04 2013 +0000
Committer: Tim Van Steenburgh <tv...@gmail.com>
Committed: Wed Aug 7 12:26:52 2013 +0000
----------------------------------------------------------------------
ForgeImporters/forgeimporters/google/__init__.py | 17 ++++++++++++++---
.../forgeimporters/tests/google/test_extractor.py | 9 +++++++++
2 files changed, 23 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/c192a843/ForgeImporters/forgeimporters/google/__init__.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/google/__init__.py b/ForgeImporters/forgeimporters/google/__init__.py
index 57e384b..8c91fd3 100644
--- a/ForgeImporters/forgeimporters/google/__init__.py
+++ b/ForgeImporters/forgeimporters/google/__init__.py
@@ -34,11 +34,13 @@ from allura import model as M
log = logging.getLogger(__name__)
class GoogleCodeProjectExtractor(object):
+ BASE_URL = 'http://code.google.com'
RE_REPO_TYPE = re.compile(r'(svn|hg|git)')
PAGE_MAP = {
- 'project_info': 'http://code.google.com/p/%s/',
- 'source_browse': 'http://code.google.com/p/%s/source/browse/',
+ 'project_info': BASE_URL + '/p/%s/',
+ 'source_browse': BASE_URL + '/p/%s/source/browse/',
+ 'wiki_index': BASE_URL + '/p/%s/w/list',
}
LICENSE_MAP = defaultdict(lambda:'Other/Proprietary License', {
@@ -58,7 +60,8 @@ class GoogleCodeProjectExtractor(object):
def __init__(self, allura_project, gc_project_name, page):
self.project = allura_project
- self.url = self.PAGE_MAP[page] % urllib.quote(gc_project_name)
+ self.gc_project_name = gc_project_name
+ self.url = self.PAGE_MAP[page] % urllib.quote(self.gc_project_name)
self.page = BeautifulSoup(urllib2.urlopen(self.url))
def get_short_description(self):
@@ -92,3 +95,11 @@ class GoogleCodeProjectExtractor(object):
return re_match.group(0)
else:
raise Exception("Unknown repo type: {0}".format(repo_type.text))
+
+ def get_wiki_pages(self):
+ RE_WIKI_PAGE_URL = r'^/p/{0}/wiki/.*$'.format(self.gc_project_name)
+ seen = set()
+ for a in self.page.find(id="resultstable").findAll("a"):
+ if re.match(RE_WIKI_PAGE_URL, a['href']) and a['href'] not in seen:
+ yield (a.text, self.BASE_URL + a['href'])
+ seen.add(a['href'])
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/c192a843/ForgeImporters/forgeimporters/tests/google/test_extractor.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/tests/google/test_extractor.py b/ForgeImporters/forgeimporters/tests/google/test_extractor.py
index 1a3a87c..250759f 100644
--- a/ForgeImporters/forgeimporters/tests/google/test_extractor.py
+++ b/ForgeImporters/forgeimporters/tests/google/test_extractor.py
@@ -118,3 +118,12 @@ class TestGoogleCodeProjectExtractor(TestCase):
with self.assertRaises(Exception) as cm:
extractor.get_repo_type()
self.assertEqual(str(cm.exception), "Unknown repo type: cvs")
+
+ def test_get_wiki_pages(self):
+ extractor = self._make_extractor('''
+ <div id="resultstable">
+ <a href="#">Link that's not a wiki page</a>
+ <a href="/p/my-project/wiki/PageOne">PageOne</a>
+ </div>''')
+ self.assertEqual(list(extractor.get_wiki_pages()), [
+ ('PageOne', 'http://code.google.com/p/my-project/wiki/PageOne')])