You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by tv...@apache.org on 2013/08/07 15:36:57 UTC
[04/14] git commit: [#6480] Add trac ticket importer plugin
[#6480] Add trac ticket importer plugin
Signed-off-by: Tim Van Steenburgh <tv...@gmail.com>
Project: http://git-wip-us.apache.org/repos/asf/incubator-allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-allura/commit/bc49a02b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-allura/tree/bc49a02b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-allura/diff/bc49a02b
Branch: refs/heads/tv/6480
Commit: bc49a02bc6bcd6501d7c1f75a6b894706a0c6e31
Parents: c192a84
Author: Tim Van Steenburgh <tv...@gmail.com>
Authored: Fri Aug 2 17:00:29 2013 +0000
Committer: Tim Van Steenburgh <tv...@gmail.com>
Committed: Wed Aug 7 12:26:52 2013 +0000
----------------------------------------------------------------------
Allura/allura/scripts/trac_export.py | 280 +++++++++++++++++++
ForgeImporters/forgeimporters/trac/__init__.py | 17 ++
.../trac/templates/tickets/index.html | 42 +++
ForgeImporters/forgeimporters/trac/tickets.py | 107 +++++++
.../forgetracker/scripts/import_tracker.py | 18 +-
scripts/trac_export.py | 257 +----------------
6 files changed, 458 insertions(+), 263 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/bc49a02b/Allura/allura/scripts/trac_export.py
----------------------------------------------------------------------
diff --git a/Allura/allura/scripts/trac_export.py b/Allura/allura/scripts/trac_export.py
new file mode 100644
index 0000000..aeb14ea
--- /dev/null
+++ b/Allura/allura/scripts/trac_export.py
@@ -0,0 +1,280 @@
+#!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import sys
+import csv
+import urlparse
+import urllib2
+import json
+import time
+import re
+from optparse import OptionParser
+from itertools import islice
+from datetime import datetime
+
+import feedparser
+from html2text import html2text
+from BeautifulSoup import BeautifulSoup, NavigableString
+import dateutil.parser
+import pytz
+
+
+def parse_options():
+ optparser = OptionParser(usage=''' %prog <Trac URL>
+
+Export ticket data from a Trac instance''')
+ optparser.add_option('-o', '--out-file', dest='out_filename', help='Write to file (default stdout)')
+ optparser.add_option('--no-attachments', dest='do_attachments', action='store_false', default=True, help='Export attachment info')
+ optparser.add_option('--only-tickets', dest='only_tickets', action='store_true', help='Export only ticket list')
+ optparser.add_option('--start', dest='start_id', type='int', default=1, help='Start with given ticket numer (or next accessible)')
+ optparser.add_option('--limit', dest='limit', type='int', default=None, help='Limit number of tickets')
+ optparser.add_option('-v', '--verbose', dest='verbose', action='store_true', help='Verbose operation')
+ options, args = optparser.parse_args()
+ if len(args) != 1:
+ optparser.error("Wrong number of arguments.")
+ return options, args
+
+
+class TracExport(object):
+
+ PAGE_SIZE = 100
+ TICKET_URL = 'ticket/%d'
+ QUERY_MAX_ID_URL = 'query?col=id&order=id&desc=1&max=2'
+ QUERY_BY_PAGE_URL = 'query?col=id&col=time&col=changetime&order=id&max=' + str(PAGE_SIZE)+ '&page=%d'
+ ATTACHMENT_LIST_URL = 'attachment/ticket/%d/'
+ ATTACHMENT_URL = 'raw-attachment/ticket/%d/%s'
+
+ FIELD_MAP = {
+ 'reporter': 'submitter',
+ 'owner': 'assigned_to',
+ }
+
+ def __init__(self, base_url, start_id=1):
+ """start_id - start with at least that ticket number (actual returned
+ ticket may have higher id if we don't have access to exact
+ one).
+ """
+ self.base_url = base_url.rstrip('/') + '/'
+ # Contains additional info for a ticket which cannot
+ # be get with single-ticket export (create/mod times is
+ # and example).
+ self.ticket_map = {}
+ self.start_id = start_id
+ self.page = (start_id - 1) / self.PAGE_SIZE + 1
+ self.ticket_queue = self.next_ticket_ids()
+
+ def remap_fields(self, dict):
+ "Remap fields to adhere to standard taxonomy."
+ out = {}
+ for k, v in dict.iteritems():
+ out[self.FIELD_MAP.get(k, k)] = v
+
+ out['id'] = int(out['id'])
+ if 'private' in out:
+ out['private'] = bool(int(out['private']))
+ return out
+
+ def full_url(self, suburl, type=None):
+ url = urlparse.urljoin(self.base_url, suburl)
+ if type is None:
+ return url
+ glue = '&' if '?' in suburl else '?'
+ return url + glue + 'format=' + type
+
+ @staticmethod
+ def log_url(url):
+ if options.verbose:
+ print >>sys.stderr, url
+
+ @classmethod
+ def trac2z_date(cls, s):
+ d = dateutil.parser.parse(s)
+ d = d.astimezone(pytz.UTC)
+ return d.strftime("%Y-%m-%dT%H:%M:%SZ")
+
+ @staticmethod
+ def match_pattern(regexp, string):
+ m = re.match(regexp, string)
+ assert m
+ return m.group(1)
+
+ def csvopen(self, url):
+ self.log_url(url)
+ f = urllib2.urlopen(url)
+ # Trac doesn't throw 403 error, just shows normal 200 HTML page
+ # telling that access denied. So, we'll emulate 403 ourselves.
+ # TODO: currently, any non-csv result treated as 403.
+ if not f.info()['Content-Type'].startswith('text/csv'):
+ raise urllib2.HTTPError(url, 403, 'Forbidden - emulated', f.info(), f)
+ return f
+
+ def parse_ticket_body(self, id):
+ # Use CSV export to get ticket fields
+ url = self.full_url(self.TICKET_URL % id, 'csv')
+ f = self.csvopen(url)
+ reader = csv.DictReader(f)
+ ticket_fields = reader.next()
+ ticket_fields['class'] = 'ARTIFACT'
+ return self.remap_fields(ticket_fields)
+
+ def parse_ticket_comments(self, id):
+ # Use RSS export to get ticket comments
+ url = self.full_url(self.TICKET_URL % id, 'rss')
+ self.log_url(url)
+ d = feedparser.parse(url)
+ res = []
+ for comment in d['entries']:
+ c = {}
+ c['submitter'] = comment.author
+ c['date'] = comment.updated_parsed
+ c['comment'] = html2text(comment.summary)
+ c['class'] = 'COMMENT'
+ res.append(c)
+ return res
+
+ def parse_ticket_attachments(self, id):
+ SIZE_PATTERN = r'(\d+) bytes'
+ TIMESTAMP_PATTERN = r'(.+) in Timeline'
+ # Scrape HTML to get ticket attachments
+ url = self.full_url(self.ATTACHMENT_LIST_URL % id)
+ self.log_url(url)
+ f = urllib2.urlopen(url)
+ soup = BeautifulSoup(f)
+ attach = soup.find('div', id='attachments')
+ list = []
+ while attach:
+ attach = attach.findNext('dt')
+ if not attach:
+ break
+ d = {}
+ d['filename'] = attach.a['href'].rsplit('/', 1)[1]
+ d['url'] = self.full_url(self.ATTACHMENT_URL % (id, d['filename']))
+ size_s = attach.span['title']
+ d['size'] = int(self.match_pattern(SIZE_PATTERN, size_s))
+ timestamp_s = attach.find('a', {'class': 'timeline'})['title']
+ d['date'] = self.trac2z_date(self.match_pattern(TIMESTAMP_PATTERN, timestamp_s))
+ d['by'] = attach.find(text=re.compile('added by')).nextSibling.renderContents()
+ d['description'] = ''
+ # Skip whitespace
+ while attach.nextSibling and type(attach.nextSibling) is NavigableString:
+ attach = attach.nextSibling
+ # if there's a description, there will be a <dd> element, other immediately next <dt>
+ if attach.nextSibling and attach.nextSibling.name == 'dd':
+ desc_el = attach.nextSibling
+ if desc_el:
+ # TODO: Convert to Allura link syntax as needed
+ d['description'] = ''.join(desc_el.findAll(text=True)).strip()
+ list.append(d)
+ return list
+
+ def get_max_ticket_id(self):
+ url = self.full_url(self.QUERY_MAX_ID_URL, 'csv')
+ f = self.csvopen(url)
+ reader = csv.DictReader(f)
+ fields = reader.next()
+ print fields
+ return int(fields['id'])
+
+ def get_ticket(self, id, extra={}):
+ '''Get ticket with given id
+ extra: extra fields to add to ticket (parsed elsewhere)
+ '''
+ t = self.parse_ticket_body(id)
+ t['comments'] = self.parse_ticket_comments(id)
+ if options.do_attachments:
+ atts = self.parse_ticket_attachments(id)
+ if atts:
+ t['attachments'] = atts
+ t.update(extra)
+ return t
+
+ def next_ticket_ids(self):
+ 'Go thru ticket list and collect available ticket ids.'
+ # We could just do CSV export, which by default dumps entire list
+ # Alas, for many busy servers with long ticket list, it will just
+ # time out. So, let's paginate it instead.
+ res = []
+
+ url = self.full_url(self.QUERY_BY_PAGE_URL % self.page, 'csv')
+ try:
+ f = self.csvopen(url)
+ except urllib2.HTTPError, e:
+ if 'emulated' in e.msg:
+ body = e.fp.read()
+ if 'beyond the number of pages in the query' in body or 'Log in with a SourceForge account' in body:
+ raise StopIteration
+ raise
+ reader = csv.reader(f)
+ cols = reader.next()
+ for r in reader:
+ if r and r[0].isdigit():
+ id = int(r[0])
+ extra = {'date': self.trac2z_date(r[1]), 'date_updated': self.trac2z_date(r[2])}
+ res.append((id, extra))
+ self.page += 1
+
+ return res
+
+ def __iter__(self):
+ return self
+
+ def next(self):
+ while True:
+ # queue empty, try to fetch more
+ if len(self.ticket_queue) == 0:
+ self.ticket_queue = self.next_ticket_ids()
+ # there aren't any more, we're really done
+ if len(self.ticket_queue) == 0:
+ raise StopIteration
+ id, extra = self.ticket_queue.pop(0)
+ if id >= self.start_id:
+ break
+ return self.get_ticket(id, extra)
+
+
+class DateJSONEncoder(json.JSONEncoder):
+ def default(self, obj):
+ if isinstance(obj, time.struct_time):
+ return time.strftime('%Y-%m-%dT%H:%M:%SZ', obj)
+ return json.JSONEncoder.default(self, obj)
+
+
+def main():
+ options, args = parse_options()
+ ex = TracExport(args[0], start_id=options.start_id)
+ # Implement iterator sequence limiting using islice()
+ doc = [t for t in islice(ex, options.limit)]
+
+ if not options.only_tickets:
+ doc = {
+ 'class': 'PROJECT',
+ 'trackers': {'default': {'artifacts': doc}}
+ }
+
+ out_file = sys.stdout
+ if options.out_filename:
+ out_file = open(options.out_filename, 'w')
+ out_file.write(json.dumps(doc, cls=DateJSONEncoder, indent=2, sort_keys=True))
+ # It's bad habit not to terminate lines
+ out_file.write('\n')
+
+
+if __name__ == '__main__':
+ main()
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/bc49a02b/ForgeImporters/forgeimporters/trac/__init__.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/trac/__init__.py b/ForgeImporters/forgeimporters/trac/__init__.py
new file mode 100644
index 0000000..77505f1
--- /dev/null
+++ b/ForgeImporters/forgeimporters/trac/__init__.py
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/bc49a02b/ForgeImporters/forgeimporters/trac/templates/tickets/index.html
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/trac/templates/tickets/index.html b/ForgeImporters/forgeimporters/trac/templates/tickets/index.html
new file mode 100644
index 0000000..eaf9aac
--- /dev/null
+++ b/ForgeImporters/forgeimporters/trac/templates/tickets/index.html
@@ -0,0 +1,42 @@
+{#-
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-#}
+{% extends g.theme.master %}
+
+{% block title %}
+{{c.project.name}} / Import Trac Tickets
+{% endblock %}
+
+{% block header %}
+Import tickets from Trac
+{% endblock %}
+
+{% block content %}
+<form action="create" method="post" class="pad">
+ <label for="trac_url">URL of the Trac instance</label>
+ <input name="trac_url" />
+
+ <label for="mount_label">Label</label>
+ <input name="mount_label" value="Source" />
+
+ <label for="mount_point">Mount Point</label>
+ <input name="mount_point" value="source" />
+
+ <input type="submit" />
+</form>
+{% endblock %}
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/bc49a02b/ForgeImporters/forgeimporters/trac/tickets.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/trac/tickets.py b/ForgeImporters/forgeimporters/trac/tickets.py
new file mode 100644
index 0000000..cc31741
--- /dev/null
+++ b/ForgeImporters/forgeimporters/trac/tickets.py
@@ -0,0 +1,107 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from datetime import (
+ datetime,
+ timedelta,
+ )
+import json
+
+import formencode as fe
+from formencode import validators as fev
+
+from pylons import tmpl_context as c
+from pylons import app_globals as g
+from tg import (
+ config,
+ expose,
+ redirect,
+ validate,
+ )
+from tg.decorators import (
+ with_trailing_slash,
+ without_trailing_slash,
+ )
+
+from allura.controllers import BaseController
+from allura.lib.decorators import require_post
+from allura.lib.import_api import AlluraImportApiClient
+from allura.model import ApiTicket
+from allura.scripts.trac_export import (
+ TracExport,
+ DateJSONEncoder,
+ )
+
+from forgeimporters.base import ToolImporter
+from forgetracker.tracker_main import ForgeTrackerApp
+from forgetracker.script.import_tracker import import_tracker
+
+
+class TracTicketImportSchema(fe.Schema):
+ trac_url = fev.URL(not_empty=True)
+ mount_point = fev.UnicodeString()
+ mount_label = fev.UnicodeString()
+
+
+class TracTicketImportController(BaseController):
+ @with_trailing_slash
+ @expose('jinja:forgeimporters.trac:templates/tickets/index.html')
+ def index(self, **kw):
+ return {}
+
+ @without_trailing_slash
+ @expose()
+ @require_post()
+ @validate(TracTicketImportSchema(), error_handler=index)
+ def create(self, trac_url, mount_point, mount_label, **kw):
+ app = TracTicketImporter.import_tool(c.project,
+ mount_point=mount_point,
+ mount_label=mount_label,
+ trac_url=trac_url,
+ user=c.user)
+ redirect(app.url())
+
+
+class TracTicketImporter(ToolImporter):
+ target_app = ForgeTrackerApp
+ source = 'Trac'
+ controller = TracTicketImportController
+ tool_label = 'Trac Ticket Importer'
+ tool_description = 'Import your tickets from Trac'
+
+ def import_tool(self, project=None, mount_point=None, mount_label=None,
+ trac_url=None, user=None):
+ """ Import Trac tickets into a new Allura Tracker tool.
+
+ """
+ mount_point = mount_point or 'tickets'
+ app = project.install_app(
+ 'Tickets',
+ mount_point=mount_point,
+ mount_label=mount_label or 'Tickets',
+ )
+ export = TracExport(trac_url)
+ export_string = json.dumps(export, cls=DateJSONEncoder)
+ api_ticket = ApiTicket(user_id=user._id,
+ capabilities={"import": ["Projects", project.shortname]},
+ expires=datetime.utcnow() + timedelta(minutes=60))
+ cli = AlluraImportApiClient(config['base_url'], api_ticket.api_key,
+ api_ticket.secret_key, False)
+ import_tracker(cli, project.shortname, mount_point, {},
+ export_string, validate=False)
+ g.post_event('project_updated')
+ return app
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/bc49a02b/ForgeTracker/forgetracker/scripts/import_tracker.py
----------------------------------------------------------------------
diff --git a/ForgeTracker/forgetracker/scripts/import_tracker.py b/ForgeTracker/forgetracker/scripts/import_tracker.py
index 506e771..32b4d1c 100644
--- a/ForgeTracker/forgetracker/scripts/import_tracker.py
+++ b/ForgeTracker/forgetracker/scripts/import_tracker.py
@@ -25,7 +25,8 @@ from allura.lib.import_api import AlluraImportApiClient
log = logging.getLogger(__name__)
-def import_tracker(cli, project, tool, import_options, options, doc_txt, validate=True, verbose=False):
+def import_tracker(cli, project, tool, import_options, doc_txt,
+ validate=True, verbose=False, cont=False):
url = '/rest/p/' + project + '/' + tool
if validate:
url += '/validate_import'
@@ -33,8 +34,8 @@ def import_tracker(cli, project, tool, import_options, options, doc_txt, validat
url += '/perform_import'
existing_map = {}
- if options.cont:
- existing_tickets = cli.call('/rest/p/' + options.project + '/' + options.tracker + '/')['tickets']
+ if cont:
+ existing_tickets = cli.call('/rest/p/' + project + '/' + tool + '/')['tickets']
for t in existing_tickets:
existing_map[t['ticket_num']] = t['summary']
@@ -46,12 +47,12 @@ def import_tracker(cli, project, tool, import_options, options, doc_txt, validat
else:
tickets_in = doc
- if options.verbose:
+ if verbose:
print "Processing %d tickets" % len(tickets_in)
for cnt, ticket_in in enumerate(tickets_in):
if ticket_in['id'] in existing_map:
- if options.verbose:
+ if verbose:
print 'Ticket id %d already exists, skipping' % ticket_in['id']
continue
doc_import={}
@@ -60,7 +61,7 @@ def import_tracker(cli, project, tool, import_options, options, doc_txt, validat
doc_import['trackers']['default']['artifacts'] = [ticket_in]
res = cli.call(url, doc=json.dumps(doc_import), options=json.dumps(import_options))
assert res['status'] and not res['errors']
- if options.validate:
+ if validate:
if res['warnings']:
print "Ticket id %s warnings: %s" % (ticket_in['id'], res['warnings'])
else:
@@ -93,9 +94,10 @@ class ImportTracker(ScriptTask):
import_options['user_map'] = user_map
cli = AlluraImportApiClient(options.base_url, options.api_key, options.secret_key, options.verbose)
doc_txt = open(options.file_data).read()
- import_tracker(cli, options.project, options.tracker, import_options, options, doc_txt,
+ import_tracker(cli, options.project, options.tracker, import_options, doc_txt,
validate=options.validate,
- verbose=options.verbose)
+ verbose=options.verbose,
+ cont=options.cont)
@classmethod
def parser(cls):
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/bc49a02b/scripts/trac_export.py
----------------------------------------------------------------------
diff --git a/scripts/trac_export.py b/scripts/trac_export.py
index 002a1e8..ac90b17 100755
--- a/scripts/trac_export.py
+++ b/scripts/trac_export.py
@@ -17,259 +17,6 @@
# specific language governing permissions and limitations
# under the License.
-
-import sys
-import csv
-import urlparse
-import urllib2
-import json
-import time
-import re
-from optparse import OptionParser
-from itertools import islice
-from datetime import datetime
-
-import feedparser
-from html2text import html2text
-from BeautifulSoup import BeautifulSoup, NavigableString
-import dateutil.parser
-import pytz
-
-
-def parse_options():
- optparser = OptionParser(usage=''' %prog <Trac URL>
-
-Export ticket data from a Trac instance''')
- optparser.add_option('-o', '--out-file', dest='out_filename', help='Write to file (default stdout)')
- optparser.add_option('--no-attachments', dest='do_attachments', action='store_false', default=True, help='Export attachment info')
- optparser.add_option('--only-tickets', dest='only_tickets', action='store_true', help='Export only ticket list')
- optparser.add_option('--start', dest='start_id', type='int', default=1, help='Start with given ticket numer (or next accessible)')
- optparser.add_option('--limit', dest='limit', type='int', default=None, help='Limit number of tickets')
- optparser.add_option('-v', '--verbose', dest='verbose', action='store_true', help='Verbose operation')
- options, args = optparser.parse_args()
- if len(args) != 1:
- optparser.error("Wrong number of arguments.")
- return options, args
-
-
-class TracExport(object):
-
- PAGE_SIZE = 100
- TICKET_URL = 'ticket/%d'
- QUERY_MAX_ID_URL = 'query?col=id&order=id&desc=1&max=2'
- QUERY_BY_PAGE_URL = 'query?col=id&col=time&col=changetime&order=id&max=' + str(PAGE_SIZE)+ '&page=%d'
- ATTACHMENT_LIST_URL = 'attachment/ticket/%d/'
- ATTACHMENT_URL = 'raw-attachment/ticket/%d/%s'
-
- FIELD_MAP = {
- 'reporter': 'submitter',
- 'owner': 'assigned_to',
- }
-
- def __init__(self, base_url, start_id=1):
- """start_id - start with at least that ticket number (actual returned
- ticket may have higher id if we don't have access to exact
- one).
- """
- self.base_url = base_url.rstrip('/') + '/'
- # Contains additional info for a ticket which cannot
- # be get with single-ticket export (create/mod times is
- # and example).
- self.ticket_map = {}
- self.start_id = start_id
- self.page = (start_id - 1) / self.PAGE_SIZE + 1
- self.ticket_queue = self.next_ticket_ids()
-
- def remap_fields(self, dict):
- "Remap fields to adhere to standard taxonomy."
- out = {}
- for k, v in dict.iteritems():
- out[self.FIELD_MAP.get(k, k)] = v
-
- out['id'] = int(out['id'])
- if 'private' in out:
- out['private'] = bool(int(out['private']))
- return out
-
- def full_url(self, suburl, type=None):
- url = urlparse.urljoin(self.base_url, suburl)
- if type is None:
- return url
- glue = '&' if '?' in suburl else '?'
- return url + glue + 'format=' + type
-
- @staticmethod
- def log_url(url):
- if options.verbose:
- print >>sys.stderr, url
-
- @classmethod
- def trac2z_date(cls, s):
- d = dateutil.parser.parse(s)
- d = d.astimezone(pytz.UTC)
- return d.strftime("%Y-%m-%dT%H:%M:%SZ")
-
- @staticmethod
- def match_pattern(regexp, string):
- m = re.match(regexp, string)
- assert m
- return m.group(1)
-
- def csvopen(self, url):
- self.log_url(url)
- f = urllib2.urlopen(url)
- # Trac doesn't throw 403 error, just shows normal 200 HTML page
- # telling that access denied. So, we'll emulate 403 ourselves.
- # TODO: currently, any non-csv result treated as 403.
- if not f.info()['Content-Type'].startswith('text/csv'):
- raise urllib2.HTTPError(url, 403, 'Forbidden - emulated', f.info(), f)
- return f
-
- def parse_ticket_body(self, id):
- # Use CSV export to get ticket fields
- url = self.full_url(self.TICKET_URL % id, 'csv')
- f = self.csvopen(url)
- reader = csv.DictReader(f)
- ticket_fields = reader.next()
- ticket_fields['class'] = 'ARTIFACT'
- return self.remap_fields(ticket_fields)
-
- def parse_ticket_comments(self, id):
- # Use RSS export to get ticket comments
- url = self.full_url(self.TICKET_URL % id, 'rss')
- self.log_url(url)
- d = feedparser.parse(url)
- res = []
- for comment in d['entries']:
- c = {}
- c['submitter'] = comment.author
- c['date'] = comment.updated_parsed
- c['comment'] = html2text(comment.summary)
- c['class'] = 'COMMENT'
- res.append(c)
- return res
-
- def parse_ticket_attachments(self, id):
- SIZE_PATTERN = r'(\d+) bytes'
- TIMESTAMP_PATTERN = r'(.+) in Timeline'
- # Scrape HTML to get ticket attachments
- url = self.full_url(self.ATTACHMENT_LIST_URL % id)
- self.log_url(url)
- f = urllib2.urlopen(url)
- soup = BeautifulSoup(f)
- attach = soup.find('div', id='attachments')
- list = []
- while attach:
- attach = attach.findNext('dt')
- if not attach:
- break
- d = {}
- d['filename'] = attach.a['href'].rsplit('/', 1)[1]
- d['url'] = self.full_url(self.ATTACHMENT_URL % (id, d['filename']))
- size_s = attach.span['title']
- d['size'] = int(self.match_pattern(SIZE_PATTERN, size_s))
- timestamp_s = attach.find('a', {'class': 'timeline'})['title']
- d['date'] = self.trac2z_date(self.match_pattern(TIMESTAMP_PATTERN, timestamp_s))
- d['by'] = attach.find(text=re.compile('added by')).nextSibling.renderContents()
- d['description'] = ''
- # Skip whitespace
- while attach.nextSibling and type(attach.nextSibling) is NavigableString:
- attach = attach.nextSibling
- # if there's a description, there will be a <dd> element, other immediately next <dt>
- if attach.nextSibling and attach.nextSibling.name == 'dd':
- desc_el = attach.nextSibling
- if desc_el:
- # TODO: Convert to Allura link syntax as needed
- d['description'] = ''.join(desc_el.findAll(text=True)).strip()
- list.append(d)
- return list
-
- def get_max_ticket_id(self):
- url = self.full_url(self.QUERY_MAX_ID_URL, 'csv')
- f = self.csvopen(url)
- reader = csv.DictReader(f)
- fields = reader.next()
- print fields
- return int(fields['id'])
-
- def get_ticket(self, id, extra={}):
- '''Get ticket with given id
- extra: extra fields to add to ticket (parsed elsewhere)
- '''
- t = self.parse_ticket_body(id)
- t['comments'] = self.parse_ticket_comments(id)
- if options.do_attachments:
- atts = self.parse_ticket_attachments(id)
- if atts:
- t['attachments'] = atts
- t.update(extra)
- return t
-
- def next_ticket_ids(self):
- 'Go thru ticket list and collect available ticket ids.'
- # We could just do CSV export, which by default dumps entire list
- # Alas, for many busy servers with long ticket list, it will just
- # time out. So, let's paginate it instead.
- res = []
-
- url = self.full_url(self.QUERY_BY_PAGE_URL % self.page, 'csv')
- try:
- f = self.csvopen(url)
- except urllib2.HTTPError, e:
- if 'emulated' in e.msg:
- body = e.fp.read()
- if 'beyond the number of pages in the query' in body or 'Log in with a SourceForge account' in body:
- raise StopIteration
- raise
- reader = csv.reader(f)
- cols = reader.next()
- for r in reader:
- if r and r[0].isdigit():
- id = int(r[0])
- extra = {'date': self.trac2z_date(r[1]), 'date_updated': self.trac2z_date(r[2])}
- res.append((id, extra))
- self.page += 1
-
- return res
-
- def __iter__(self):
- return self
-
- def next(self):
- while True:
- # queue empty, try to fetch more
- if len(self.ticket_queue) == 0:
- self.ticket_queue = self.next_ticket_ids()
- # there aren't any more, we're really done
- if len(self.ticket_queue) == 0:
- raise StopIteration
- id, extra = self.ticket_queue.pop(0)
- if id >= self.start_id:
- break
- return self.get_ticket(id, extra)
-
-
-class DateJSONEncoder(json.JSONEncoder):
- def default(self, obj):
- if isinstance(obj, time.struct_time):
- return time.strftime('%Y-%m-%dT%H:%M:%SZ', obj)
- return json.JSONEncoder.default(self, obj)
-
if __name__ == '__main__':
- options, args = parse_options()
- ex = TracExport(args[0], start_id=options.start_id)
- # Implement iterator sequence limiting using islice()
- doc = [t for t in islice(ex, options.limit)]
-
- if not options.only_tickets:
- doc = {
- 'class': 'PROJECT',
- 'trackers': {'default': {'artifacts': doc}}
- }
-
- out_file = sys.stdout
- if options.out_filename:
- out_file = open(options.out_filename, 'w')
- out_file.write(json.dumps(doc, cls=DateJSONEncoder, indent=2, sort_keys=True))
- # It's bad habit not to terminate lines
- out_file.write('\n')
+ from allura.scripts.trac_export import main
+ main()