You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by ke...@apache.org on 2020/11/02 21:28:03 UTC
[allura] 12/15: [#8378] py3 and other fixes for Trac Tickets
importer
This is an automated email from the ASF dual-hosted git repository.
kentontaylor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/allura.git
commit ea3075bcd8c71814c805951af9e9c46ac26582cc
Author: Dave Brondsema <da...@brondsema.net>
AuthorDate: Mon Oct 12 16:50:41 2020 -0400
[#8378] py3 and other fixes for Trac Tickets importer
---
Allura/allura/scripts/trac_export.py | 20 +++++++++++---------
1 file changed, 11 insertions(+), 9 deletions(-)
diff --git a/Allura/allura/scripts/trac_export.py b/Allura/allura/scripts/trac_export.py
index 4c2e937..e9b1448 100644
--- a/Allura/allura/scripts/trac_export.py
+++ b/Allura/allura/scripts/trac_export.py
@@ -31,6 +31,7 @@ import re
from optparse import OptionParser
from itertools import islice
import codecs
+from io import TextIOWrapper
from bs4 import BeautifulSoup, NavigableString
import dateutil.parser
@@ -137,8 +138,11 @@ class TracExport(object):
@staticmethod
def match_pattern(regexp, string):
m = re.match(regexp, string)
- assert m
- return m.group(1)
+ assert m, "'{}' didn't match '{}'".format(regexp, string)
+ for grp in m.groups():
+ if grp is not None:
+ return grp
+ return None
def csvopen(self, url):
self.log_url(url)
@@ -149,7 +153,7 @@ class TracExport(object):
if not f.info()['Content-Type'].startswith('text/csv'):
raise six.moves.urllib.error.HTTPError(
url, 403, 'Forbidden - emulated', f.info(), f)
- return f
+ return TextIOWrapper(f)
def parse_ticket(self, id):
# Use CSV export to get ticket fields
@@ -177,7 +181,7 @@ class TracExport(object):
c['submitter'] = re.sub(
r'.* by ', '', comment.find('h3', 'change').text).strip()
c['date'] = self.trac2z_date(
- comment.find('a', 'timeline')['title'].replace(' in Timeline', ''))
+ comment.find('a', 'timeline')['title'].replace(' in Timeline', '').replace('See timeline at ', ''))
changes = six.text_type(comment.find('ul', 'changes') or '')
body = comment.find('div', 'comment')
body = body.renderContents('utf8').decode('utf8') if body else ''
@@ -190,7 +194,7 @@ class TracExport(object):
def parse_ticket_attachments(self, id):
SIZE_PATTERN = r'(\d+) bytes'
- TIMESTAMP_PATTERN = r'(.+) in Timeline'
+ TIMESTAMP_PATTERN = r'(?:(.+) in Timeline|See timeline at (.+))'
# Scrape HTML to get ticket attachments
url = self.full_url(self.ATTACHMENT_LIST_URL % id)
self.log_url(url)
@@ -208,10 +212,8 @@ class TracExport(object):
size_s = attach.span['title']
d['size'] = int(self.match_pattern(SIZE_PATTERN, size_s))
timestamp_s = attach.find('a', {'class': 'timeline'})['title']
- d['date'] = self.trac2z_date(
- self.match_pattern(TIMESTAMP_PATTERN, timestamp_s))
- d['by'] = attach.find(
- text=re.compile('added by')).nextSibling.renderContents()
+ d['date'] = self.trac2z_date(self.match_pattern(TIMESTAMP_PATTERN, timestamp_s))
+ d['by'] = attach.find(text=re.compile('added by')).nextSibling.text
d['description'] = ''
# Skip whitespace
while attach.nextSibling and isinstance(attach.nextSibling, NavigableString):