You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by jo...@apache.org on 2013/08/16 02:30:05 UTC
[2/6] git commit: [#6506] Add custom User-Agent and auto-retries for
project export requests
[#6506] Add custom User-Agent and auto-retries for project export requests
Signed-off-by: Tim Van Steenburgh <tv...@gmail.com>
Project: http://git-wip-us.apache.org/repos/asf/incubator-allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-allura/commit/490c83e3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-allura/tree/490c83e3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-allura/diff/490c83e3
Branch: refs/heads/master
Commit: 490c83e38af601ddeed4b4f7ca3ed1a7e5721ed0
Parents: 675495e
Author: Tim Van Steenburgh <tv...@gmail.com>
Authored: Tue Aug 13 21:37:31 2013 +0000
Committer: Cory Johns <cj...@slashdotmedia.com>
Committed: Thu Aug 15 16:09:31 2013 +0000
----------------------------------------------------------------------
Allura/allura/scripts/trac_export.py | 17 +++++++++++------
ForgeImporters/forgeimporters/base.py | 16 ++++++++++++++++
ForgeImporters/forgeimporters/google/__init__.py | 8 ++++----
.../forgeimporters/tests/google/test_extractor.py | 7 ++++---
ForgeImporters/forgeimporters/tests/test_base.py | 13 +++++++++++++
ForgeTracker/forgetracker/import_support.py | 10 +++++++---
.../forgewiki/scripts/wiki_from_trac/extractors.py | 17 ++++++++++-------
7 files changed, 65 insertions(+), 23 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/490c83e3/Allura/allura/scripts/trac_export.py
----------------------------------------------------------------------
diff --git a/Allura/allura/scripts/trac_export.py b/Allura/allura/scripts/trac_export.py
index d53afbc..4d908bc 100644
--- a/Allura/allura/scripts/trac_export.py
+++ b/Allura/allura/scripts/trac_export.py
@@ -18,7 +18,6 @@
# under the License.
import logging
-import socket
import sys
import csv
import urlparse
@@ -28,14 +27,20 @@ import time
import re
from optparse import OptionParser
from itertools import islice
-from datetime import datetime
import feedparser
from BeautifulSoup import BeautifulSoup, NavigableString
import dateutil.parser
import pytz
-from allura.lib import helpers as h
+try:
+ from forgeimporters.base import ProjectExtractor
+ urlopen = ProjectExtractor.urlopen
+except ImportError:
+ try:
+ from allura.lib.helpers import urlopen
+ except ImportError:
+ from urllib2 import urlopen
log = logging.getLogger(__name__)
@@ -124,7 +129,7 @@ class TracExport(object):
def csvopen(self, url):
self.log_url(url)
- f = h.urlopen(url)
+ f = urlopen(url)
# Trac doesn't throw 403 error, just shows normal 200 HTML page
# telling that access denied. So, we'll emulate 403 ourselves.
# TODO: currently, any non-csv result treated as 403.
@@ -146,7 +151,7 @@ class TracExport(object):
from html2text import html2text
url = self.full_url(self.TICKET_URL % id, 'rss')
self.log_url(url)
- d = feedparser.parse(h.urlopen(url))
+ d = feedparser.parse(urlopen(url))
res = []
for comment in d['entries']:
c = {}
@@ -163,7 +168,7 @@ class TracExport(object):
# Scrape HTML to get ticket attachments
url = self.full_url(self.ATTACHMENT_LIST_URL % id)
self.log_url(url)
- f = h.urlopen(url)
+ f = urlopen(url)
soup = BeautifulSoup(f)
attach = soup.find('div', id='attachments')
list = []
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/490c83e3/ForgeImporters/forgeimporters/base.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/base.py b/ForgeImporters/forgeimporters/base.py
index e1af4f7..ee34ab3 100644
--- a/ForgeImporters/forgeimporters/base.py
+++ b/ForgeImporters/forgeimporters/base.py
@@ -16,6 +16,7 @@
# under the License.
import logging
+import urllib2
from pkg_resources import iter_entry_points
@@ -28,6 +29,7 @@ from allura.lib.decorators import require_post
from allura.lib.decorators import task
from allura.lib.security import require_access
from allura.lib.plugin import ProjectRegistrationProvider
+from allura.lib import helpers as h
from allura.lib import exceptions
from paste.deploy.converters import aslist
@@ -57,6 +59,20 @@ def import_tool(importer_name, project_name=None, mount_point=None, mount_label=
mount_point=mount_point, mount_label=mount_label, **kw)
+class ProjectExtractor(object):
+ """Base class for project extractors.
+
+ Subclasses should use :meth:`urlopen` to make HTTP requests, as it provides
+ a custom User-Agent and automatically retries timed-out requests.
+
+ """
+ @staticmethod
+ def urlopen(url, retries=3, codes=(408,), **kw):
+ req = urllib2.Request(url, **kw)
+ req.add_header('User-Agent', 'Allura Data Importer (http://sf.net/p/allura)')
+ return h.urlopen(req, retries=retries, codes=codes)
+
+
class ProjectImporter(BaseController):
"""
Base class for project importers.
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/490c83e3/ForgeImporters/forgeimporters/google/__init__.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/google/__init__.py b/ForgeImporters/forgeimporters/google/__init__.py
index a307bcd..a12389b 100644
--- a/ForgeImporters/forgeimporters/google/__init__.py
+++ b/ForgeImporters/forgeimporters/google/__init__.py
@@ -17,7 +17,6 @@
import re
import urllib
-import urllib2
from urlparse import urlparse, urljoin
from collections import defaultdict
try:
@@ -29,11 +28,12 @@ import logging
from BeautifulSoup import BeautifulSoup
from allura import model as M
+from forgeimporters.base import ProjectExtractor
log = logging.getLogger(__name__)
-class GoogleCodeProjectExtractor(object):
+class GoogleCodeProjectExtractor(ProjectExtractor):
BASE_URL = 'http://code.google.com'
RE_REPO_TYPE = re.compile(r'(svn|hg|git)')
@@ -82,7 +82,7 @@ class GoogleCodeProjectExtractor(object):
self.url = (self.get_page_url(page_name_or_url) if page_name_or_url in
self.PAGE_MAP else page_name_or_url)
self.page = self._page_cache[page_name_or_url] = \
- BeautifulSoup(urllib2.urlopen(self.url))
+ BeautifulSoup(self.urlopen(self.url))
return self.page
def get_page_url(self, page_name):
@@ -103,7 +103,7 @@ class GoogleCodeProjectExtractor(object):
if icon_url == self.DEFAULT_ICON:
return
icon_name = urllib.unquote(urlparse(icon_url).path).split('/')[-1]
- fp_ish = urllib2.urlopen(icon_url)
+ fp_ish = self.urlopen(icon_url)
fp = StringIO(fp_ish.read())
M.ProjectFile.save_image(
icon_name, fp,
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/490c83e3/ForgeImporters/forgeimporters/tests/google/test_extractor.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/tests/google/test_extractor.py b/ForgeImporters/forgeimporters/tests/google/test_extractor.py
index b4e64c0..9b6db45 100644
--- a/ForgeImporters/forgeimporters/tests/google/test_extractor.py
+++ b/ForgeImporters/forgeimporters/tests/google/test_extractor.py
@@ -19,12 +19,13 @@ from unittest import TestCase
import mock
-from ... import google
+from forgeimporters import google
+from forgeimporters import base
class TestGoogleCodeProjectExtractor(TestCase):
def setUp(self):
- self._p_urlopen = mock.patch.object(google.urllib2, 'urlopen')
+ self._p_urlopen = mock.patch.object(base.ProjectExtractor, 'urlopen')
self._p_soup = mock.patch.object(google, 'BeautifulSoup')
self.urlopen = self._p_urlopen.start()
self.soup = self._p_soup.start()
@@ -105,7 +106,7 @@ class TestGoogleCodeProjectExtractor(TestCase):
def _make_extractor(self, html):
from BeautifulSoup import BeautifulSoup
- with mock.patch.object(google, 'urllib2'):
+ with mock.patch.object(base.ProjectExtractor, 'urlopen'):
extractor = google.GoogleCodeProjectExtractor(self.project, 'my-project')
extractor.page = BeautifulSoup(html)
extractor.get_page = lambda pagename: extractor.page
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/490c83e3/ForgeImporters/forgeimporters/tests/test_base.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/tests/test_base.py b/ForgeImporters/forgeimporters/tests/test_base.py
index 68be24d..57ed227 100644
--- a/ForgeImporters/forgeimporters/tests/test_base.py
+++ b/ForgeImporters/forgeimporters/tests/test_base.py
@@ -23,6 +23,19 @@ import mock
from .. import base
+class TestProjectExtractor(TestCase):
+ @mock.patch('forgeimporters.base.h.urlopen')
+ @mock.patch('forgeimporters.base.urllib2.Request')
+ def test_urlopen(self, Request, urlopen):
+ r = base.ProjectExtractor.urlopen('myurl', data='foo')
+ Request.assert_called_once_with('myurl', data='foo')
+ req = Request.return_value
+ req.add_header.assert_called_once_with(
+ 'User-Agent', 'Allura Data Importer (http://sf.net/p/allura)')
+ urlopen.assert_called_once_with(req, retries=3, codes=(408,))
+ self.assertEqual(r, urlopen.return_value)
+
+
@mock.patch.object(base.ToolImporter, 'by_name')
@mock.patch.object(base, 'c')
def test_import_tool(c, by_name):
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/490c83e3/ForgeTracker/forgetracker/import_support.py
----------------------------------------------------------------------
diff --git a/ForgeTracker/forgetracker/import_support.py b/ForgeTracker/forgetracker/import_support.py
index 04d453a..e22c50e 100644
--- a/ForgeTracker/forgetracker/import_support.py
+++ b/ForgeTracker/forgetracker/import_support.py
@@ -34,6 +34,12 @@ from allura import model as M
# Local imports
from forgetracker import model as TM
+try:
+ from forgeimporters.base import ProjectExtractor
+ urlopen = ProjectExtractor.urlopen
+except ImportError:
+ urlopen = h.urlopen
+
log = logging.getLogger(__name__)
class ImportException(Exception):
@@ -276,15 +282,13 @@ class ImportSupport(object):
comment.import_id = c.api_token.api_key
def make_attachment(self, org_ticket_id, ticket_id, att_dict):
- import urllib2
if att_dict['size'] > self.ATTACHMENT_SIZE_LIMIT:
self.errors.append('Ticket #%s: Attachment %s (@ %s) is too large, skipping' %
(org_ticket_id, att_dict['filename'], att_dict['url']))
return
- f = urllib2.urlopen(att_dict['url'])
+ f = urlopen(att_dict['url'])
TM.TicketAttachment.save_attachment(att_dict['filename'], ResettableStream(f),
artifact_id=ticket_id)
- f.close()
#
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/490c83e3/ForgeWiki/forgewiki/scripts/wiki_from_trac/extractors.py
----------------------------------------------------------------------
diff --git a/ForgeWiki/forgewiki/scripts/wiki_from_trac/extractors.py b/ForgeWiki/forgewiki/scripts/wiki_from_trac/extractors.py
index 7f111d7..af07f50 100644
--- a/ForgeWiki/forgewiki/scripts/wiki_from_trac/extractors.py
+++ b/ForgeWiki/forgewiki/scripts/wiki_from_trac/extractors.py
@@ -24,15 +24,18 @@ from urllib import quote, unquote
from urlparse import urljoin, urlsplit
try:
- import requests
-except:
- # Ignore this import if the requests package is not installed
- pass
+ from forgeimporters.base import ProjectExtractor
+ urlopen = ProjectExtractor.urlopen
+except ImportError:
+ try:
+ from allura.lib.helpers import urlopen
+ except ImportError:
+ from urllib2 import urlopen
try:
# Ignore this import if the html2text package is not installed
import html2text
-except:
+except ImportError:
pass
from BeautifulSoup import BeautifulSoup
@@ -128,8 +131,8 @@ class WikiExporter(object):
glue = '&' if '?' in suburl else '?'
return url + glue + 'format=' + type
- def fetch(self, url, **kwargs):
- return requests.get(url, **kwargs)
+ def fetch(self, url):
+ return urlopen(url)
def page_list(self):
url = urljoin(self.base_url, self.PAGE_LIST_URL)