You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by jo...@apache.org on 2014/01/10 22:23:30 UTC

[34/36] git commit: [#6484] ticket:492 Move wiki_from_trac script to tracwikiimporter

[#6484] ticket:492 Move wiki_from_trac script to tracwikiimporter


Project: http://git-wip-us.apache.org/repos/asf/incubator-allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-allura/commit/2050da06
Tree: http://git-wip-us.apache.org/repos/asf/incubator-allura/tree/2050da06
Diff: http://git-wip-us.apache.org/repos/asf/incubator-allura/diff/2050da06

Branch: refs/heads/cj/6484
Commit: 2050da061d28644a56c1ed3f005d46cb099349d9
Parents: 4f9f216
Author: Igor Bondarenko <je...@gmail.com>
Authored: Thu Jan 2 10:44:27 2014 +0200
Committer: Cory Johns <cj...@slashdotmedia.com>
Committed: Fri Jan 10 18:57:16 2014 +0000

----------------------------------------------------------------------
 ForgeWiki/forgewiki/scripts/__init__.py         |  16 --
 .../scripts/wiki_from_trac/__init__.py          |  18 --
 .../scripts/wiki_from_trac/extractors.py        | 244 -------------------
 .../forgewiki/scripts/wiki_from_trac/loaders.py |  74 ------
 .../scripts/wiki_from_trac/wiki_from_trac.py    |  82 -------
 scripts/allura_import.py                        |   2 +-
 scripts/wiki-export.py                          |   2 +-
 7 files changed, 2 insertions(+), 436 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/2050da06/ForgeWiki/forgewiki/scripts/__init__.py
----------------------------------------------------------------------
diff --git a/ForgeWiki/forgewiki/scripts/__init__.py b/ForgeWiki/forgewiki/scripts/__init__.py
deleted file mode 100644
index 144e298..0000000
--- a/ForgeWiki/forgewiki/scripts/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-#       Licensed to the Apache Software Foundation (ASF) under one
-#       or more contributor license agreements.  See the NOTICE file
-#       distributed with this work for additional information
-#       regarding copyright ownership.  The ASF licenses this file
-#       to you under the Apache License, Version 2.0 (the
-#       "License"); you may not use this file except in compliance
-#       with the License.  You may obtain a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#       Unless required by applicable law or agreed to in writing,
-#       software distributed under the License is distributed on an
-#       "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#       KIND, either express or implied.  See the License for the
-#       specific language governing permissions and limitations
-#       under the License.

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/2050da06/ForgeWiki/forgewiki/scripts/wiki_from_trac/__init__.py
----------------------------------------------------------------------
diff --git a/ForgeWiki/forgewiki/scripts/wiki_from_trac/__init__.py b/ForgeWiki/forgewiki/scripts/wiki_from_trac/__init__.py
deleted file mode 100644
index 625362c..0000000
--- a/ForgeWiki/forgewiki/scripts/wiki_from_trac/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-#       Licensed to the Apache Software Foundation (ASF) under one
-#       or more contributor license agreements.  See the NOTICE file
-#       distributed with this work for additional information
-#       regarding copyright ownership.  The ASF licenses this file
-#       to you under the Apache License, Version 2.0 (the
-#       "License"); you may not use this file except in compliance
-#       with the License.  You may obtain a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#       Unless required by applicable law or agreed to in writing,
-#       software distributed under the License is distributed on an
-#       "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#       KIND, either express or implied.  See the License for the
-#       specific language governing permissions and limitations
-#       under the License.
-
-from .wiki_from_trac import WikiFromTrac

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/2050da06/ForgeWiki/forgewiki/scripts/wiki_from_trac/extractors.py
----------------------------------------------------------------------
diff --git a/ForgeWiki/forgewiki/scripts/wiki_from_trac/extractors.py b/ForgeWiki/forgewiki/scripts/wiki_from_trac/extractors.py
deleted file mode 100644
index 7f146e6..0000000
--- a/ForgeWiki/forgewiki/scripts/wiki_from_trac/extractors.py
+++ /dev/null
@@ -1,244 +0,0 @@
-#       Licensed to the Apache Software Foundation (ASF) under one
-#       or more contributor license agreements.  See the NOTICE file
-#       distributed with this work for additional information
-#       regarding copyright ownership.  The ASF licenses this file
-#       to you under the Apache License, Version 2.0 (the
-#       "License"); you may not use this file except in compliance
-#       with the License.  You may obtain a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#       Unless required by applicable law or agreed to in writing,
-#       software distributed under the License is distributed on an
-#       "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#       KIND, either express or implied.  See the License for the
-#       specific language governing permissions and limitations
-#       under the License.
-
-import logging
-import re
-import sys
-import json
-import traceback
-from urllib import quote, unquote
-from urlparse import urljoin, urlsplit
-
-try:
-    from forgeimporters.base import ProjectExtractor
-    urlopen = ProjectExtractor.urlopen
-except ImportError:
-    try:
-        from allura.lib.helpers import urlopen
-    except ImportError:
-        from urllib2 import urlopen
-
-try:
-    # Ignore this import if the html2text package is not installed
-    import html2text
-except ImportError:
-    pass
-
-from BeautifulSoup import BeautifulSoup
-
-log = logging.getLogger(__name__)
-
-
-class WikiExporter(object):
-
-    PAGE_LIST_URL = 'wiki/TitleIndex'
-    PAGE_URL = 'wiki/%s'
-    CONTENT_DIV_ATTRS = {'class': 'wikipage searchable'}
-    EXCLUDE_PAGES = [
-        'CamelCase',
-        'InterMapTxt',
-        'InterTrac',
-        'InterWiki',
-        'PageTemplates',
-        'SandBox',
-        'TitleIndex',
-        'TracAccessibility',
-        'TracAdmin',
-        'TracBackup',
-        'TracBrowser',
-        'TracChangeset',
-        'TracEnvironment',
-        'TracFineGrainedPermissions',
-        'TracGuide',
-        'TracImport',
-        'TracIni',
-        'TracInterfaceCustomization',
-        'TracLinks',
-        'TracLogging',
-        'TracNavigation',
-        'TracNotification',
-        'TracPermissions',
-        'TracPlugins',
-        'TracQuery',
-        'TracReports',
-        'TracRevisionLog',
-        'TracRoadmap',
-        'TracRss',
-        'TracSearch',
-        'TracSupport',
-        'TracSyntaxColoring',
-        'TracTickets',
-        'TracTicketsCustomFields',
-        'TracTimeline',
-        'TracUnicode',
-        'TracWiki',
-        'TracWorkflow',
-        'WikiDeletePage',
-        'WikiFormatting',
-        'WikiHtml',
-        'WikiMacros',
-        'WikiNewPage',
-        'WikiPageNames',
-        'WikiProcessors',
-        'WikiRestructuredText',
-        'WikiRestructuredTextLinks',
-        'RecentChanges',
-    ]
-    RENAME_PAGES = {
-        'WikiStart': 'Home',  # Change the start page name to Home
-        'Home': 'WikiStart',  # Rename the Home page to WikiStart
-    }
-
-    def __init__(self, base_url, options):
-        self.base_url = base_url
-        self.options = options
-
-    def export(self, out):
-        pages = []
-        for title in self.page_list():
-            try:
-                pages.append(self.get_page(title))
-            except:
-                self.log('Cannot fetch page %s. Skipping' % title)
-                self.log(traceback.format_exc())
-                continue
-        out.write(json.dumps(pages, indent=2, sort_keys=True))
-        out.write('\n')
-
-    def log(self, msg):
-        log.info(msg)
-        if self.options.verbose:
-            print >>sys.stderr, msg
-
-    def url(self, suburl, type=None):
-        url = urljoin(self.base_url, suburl)
-        if type is None:
-            return url
-        glue = '&' if '?' in suburl else '?'
-        return url + glue + 'format=' + type
-
-    def fetch(self, url):
-        return urlopen(url)
-
-    def page_list(self):
-        url = urljoin(self.base_url, self.PAGE_LIST_URL)
-        self.log('Fetching list of pages from %s' % url)
-        r = self.fetch(url)
-        html = BeautifulSoup(r)
-        pages = html.find('div', attrs=self.CONTENT_DIV_ATTRS) \
-                    .find('ul').findAll('li')
-        pages = [page.find('a').text
-                 for page in pages
-                 if page.find('a')
-                 and page.find('a').text not in self.EXCLUDE_PAGES]
-        # Remove duplicate entries by converting page list to a set.
-        # As we're going to fetch all listed pages,
-        # it's safe to destroy the original order of pages.
-        return set(pages)
-
-    def get_page(self, title):
-        title = quote(title)
-        convert_method = '_get_page_' + self.options.converter
-        content = getattr(self, convert_method)(title)
-        page = {
-            'title': self.convert_title(title),
-            'text': self.convert_content(content),
-            'labels': '',
-        }
-        return page
-
-    def _get_page_html2text(self, title):
-        url = self.url(self.PAGE_URL % title)
-        self.log('Fetching page %s' % url)
-        r = self.fetch(url)
-        html = BeautifulSoup(r)
-        return html.find('div', attrs=self.CONTENT_DIV_ATTRS)
-
-    def _get_page_regex(self, title):
-        url = self.url(self.PAGE_URL % title, 'txt')
-        self.log('Fetching page %s' % url)
-        r = self.fetch(url)
-        return r
-
-    def convert_title(self, title):
-        title = self.RENAME_PAGES.get(title, title)
-        title = title.replace('/', '-')  # Handle subpages
-        title = title.rstrip('?')  # Links to non-existent pages ends with '?'
-        return title
-
-    def convert_content(self, content):
-        convert_method = '_convert_content_' + self.options.converter
-        return getattr(self, convert_method)(content)
-
-    def _convert_wiki_toc_to_markdown(self, content):
-        """
-        Removes contents of div.wiki-toc elements and replaces them with
-        the '[TOC]' markdown macro.
-        """
-        for toc in content('div', attrs={'class': 'wiki-toc'}):
-            toc.string = '[TOC]'
-        return content
-
-    def _convert_content_html2text(self, content):
-        html2text.BODY_WIDTH = 0  # Don't wrap lines
-        content = self._convert_wiki_toc_to_markdown(content)
-        content = html2text.html2text(unicode(content))
-        # Convert internal links
-        internal_url = urlsplit(self.base_url).path + 'wiki/'
-        internal_link_re = r'\[([^]]+)\]\(%s([^)]*)\)' % internal_url
-        internal_link = re.compile(internal_link_re, re.UNICODE)
-
-        def sub(match):
-            caption = match.group(1)
-            page = self.convert_title(match.group(2))
-            if caption == page:
-                link = '[%s]' % unquote(page)
-            else:
-                link = '[%s](%s)' % (caption, page)
-            return link
-        return internal_link.sub(sub, content)
-
-    def _convert_content_regex(self, text):
-        # https://gist.github.com/sgk/1286682
-        text = re.sub('\r\n', '\n', text)
-        text = re.sub(r'{{{(.*?)}}}', r'`\1`', text)
-
-        def indent4(m):
-            return '\n    ' + m.group(1).replace('\n', '\n    ')
-
-        text = re.sub(r'(?sm){{{\n(.*?)\n}}}', indent4, text)
-        text = re.sub(r'(?m)^====\s+(.*?)\s+====$', r'#### \1', text)
-        text = re.sub(r'(?m)^===\s+(.*?)\s+===$', r'### \1', text)
-        text = re.sub(r'(?m)^==\s+(.*?)\s+==$', r'## \1', text)
-        text = re.sub(r'(?m)^=\s+(.*?)\s+=$', r'# \1', text)
-        text = re.sub(r'^       * ', r'****', text)
-        text = re.sub(r'^     * ', r'***', text)
-        text = re.sub(r'^   * ', r'**', text)
-        text = re.sub(r'^ * ', r'*', text)
-        text = re.sub(r'^ \d+. ', r'1.', text)
-        a = []
-        for line in text.split('\n'):
-            if not line.startswith('    '):
-                line = re.sub(
-                    r'\[(https?://[^\s\[\]]+)\s([^\[\]]+)\]', r'[\2](\1)', line)
-                line = re.sub(r'\[(wiki:[^\s\[\]]+)\s([^\[\]]+)\]',
-                              r'[\2](/\1/)', line)
-                line = re.sub(r'\!(([A-Z][a-z0-9]+){2,})', r'\1', line)
-                line = re.sub(r'\'\'\'(.*?)\'\'\'', r'*\1*', line)
-                line = re.sub(r'\'\'(.*?)\'\'', r'_\1_', line)
-            a.append(line)
-        return '\n'.join(a)

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/2050da06/ForgeWiki/forgewiki/scripts/wiki_from_trac/loaders.py
----------------------------------------------------------------------
diff --git a/ForgeWiki/forgewiki/scripts/wiki_from_trac/loaders.py b/ForgeWiki/forgewiki/scripts/wiki_from_trac/loaders.py
deleted file mode 100644
index 45d056c..0000000
--- a/ForgeWiki/forgewiki/scripts/wiki_from_trac/loaders.py
+++ /dev/null
@@ -1,74 +0,0 @@
-#       Licensed to the Apache Software Foundation (ASF) under one
-#       or more contributor license agreements.  See the NOTICE file
-#       distributed with this work for additional information
-#       regarding copyright ownership.  The ASF licenses this file
-#       to you under the Apache License, Version 2.0 (the
-#       "License"); you may not use this file except in compliance
-#       with the License.  You may obtain a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#       Unless required by applicable law or agreed to in writing,
-#       software distributed under the License is distributed on an
-#       "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#       KIND, either express or implied.  See the License for the
-#       specific language governing permissions and limitations
-#       under the License.
-
-import json
-from optparse import OptionParser
-
-from allura.lib.import_api import AlluraImportApiClient
-
-
-def load_data(doc_file_name=None, optparser=None, options=None):
-    import_options = {}
-    for s in options.import_opts:
-        k, v = s.split('=', 1)
-        if v == 'false':
-            v = False
-        import_options[k] = v
-
-    user_map = {}
-    if options.user_map_file:
-        f = open(options.user_map_file)
-        try:
-            user_map = json.load(f)
-            if type(user_map) is not type({}):
-                raise ValueError
-            for k, v in user_map.iteritems():
-                print k, v
-                if not isinstance(k, basestring) or not isinstance(v, basestring):
-                    raise ValueError
-        except ValueError:
-            optparser.error(
-                '--user-map should specify JSON file with format {"original_user": "sf_user", ...}')
-        finally:
-            f.close()
-
-    import_options['user_map'] = user_map
-
-    cli = AlluraImportApiClient(
-        options.base_url, options.api_key, options.secret_key, options.verbose)
-    doc_txt = open(doc_file_name).read()
-
-    if options.wiki:
-        import_wiki(cli, options.project, options.wiki, options, doc_txt)
-
-
-def import_wiki(cli, project, tool, options, doc_txt):
-    url = '/rest/p/' + project + '/' + tool
-    doc = json.loads(doc_txt)
-    if 'wiki' in doc and 'default' in doc['wiki'] and 'artifacts' in doc['wiki']['default']:
-        pages = doc['trackers']['default']['artifacts']
-    else:
-        pages = doc
-    if options.verbose:
-        print "Processing %d pages" % len(pages)
-    for page in pages:
-        title = page.pop('title').encode('utf-8')
-        page['text'] = page['text'].encode('utf-8')
-        page['labels'] = page['labels'].encode('utf-8')
-        r = cli.call(url + '/' + title, **page)
-        assert r == {}
-        print 'Imported wiki page %s' % title

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/2050da06/ForgeWiki/forgewiki/scripts/wiki_from_trac/wiki_from_trac.py
----------------------------------------------------------------------
diff --git a/ForgeWiki/forgewiki/scripts/wiki_from_trac/wiki_from_trac.py b/ForgeWiki/forgewiki/scripts/wiki_from_trac/wiki_from_trac.py
deleted file mode 100644
index afc6f41..0000000
--- a/ForgeWiki/forgewiki/scripts/wiki_from_trac/wiki_from_trac.py
+++ /dev/null
@@ -1,82 +0,0 @@
-#       Licensed to the Apache Software Foundation (ASF) under one
-#       or more contributor license agreements.  See the NOTICE file
-#       distributed with this work for additional information
-#       regarding copyright ownership.  The ASF licenses this file
-#       to you under the Apache License, Version 2.0 (the
-#       "License"); you may not use this file except in compliance
-#       with the License.  You may obtain a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#       Unless required by applicable law or agreed to in writing,
-#       software distributed under the License is distributed on an
-#       "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#       KIND, either express or implied.  See the License for the
-#       specific language governing permissions and limitations
-#       under the License.
-
-import argparse
-import logging
-from tempfile import NamedTemporaryFile
-from tg.decorators import cached_property
-
-from forgewiki.scripts.wiki_from_trac.extractors import WikiExporter
-from forgewiki.scripts.wiki_from_trac.loaders import load_data
-
-from allura.scripts import ScriptTask
-
-
-log = logging.getLogger(__name__)
-
-
-class WikiFromTrac(ScriptTask):
-
-    """Import Trac Wiki to Allura Wiki"""
-    @classmethod
-    def parser(cls):
-        parser = argparse.ArgumentParser(description='Import wiki from'
-                                         'Trac to allura wiki')
-
-        parser.add_argument('trac_url', type=str, help='Trac URL')
-        parser.add_argument('-a', '--api-ticket',
-                            dest='api_key', help='API ticket')
-        parser.add_argument('-s', '--secret-key',
-                            dest='secret_key', help='Secret key')
-        parser.add_argument('-p', '--project', dest='project',
-                            help='Project to import to')
-        parser.add_argument('-t', '--tracker', dest='tracker',
-                            help='Tracker to import to')
-        parser.add_argument('-f', '--forum', dest='forum',
-                            help='Forum tool to import to')
-        parser.add_argument('-w', '--wiki', dest='wiki',
-                            help='Wiki tool to import to')
-        parser.add_argument('-u', '--base-url', dest='base_url',
-                            default='https://sourceforge.net', help='Base Allura (%(default)s for default)')
-        parser.add_argument('-o', dest='import_opts',
-                            default=[], action='append', help='Specify import option(s)', metavar='opt=val')
-        parser.add_argument('--user-map', dest='user_map_file',
-                            help='Map original users to SF.net users', metavar='JSON_FILE')
-        parser.add_argument('--validate', dest='validate',
-                            action='store_true', help='Validate import data')
-        parser.add_argument('-v', '--verbose', dest='verbose',
-                            action='store_true', help='Verbose operation')
-        parser.add_argument('-c', '--continue', dest='cont',
-                            action='store_true', help='Continue import into existing tracker')
-        parser.add_argument('-C', '--converter', dest='converter',
-                            default='html2text',
-                            help='Converter to use on wiki text. '
-                                 'Available options: '
-                                 'html2text (default) or regex')
-
-        return parser
-
-    @classmethod
-    def execute(cls, options):
-        with NamedTemporaryFile() as f:
-            WikiExporter(options.trac_url, options).export(f)
-            f.flush()
-            load_data(f.name, cls.parser(), options)
-
-
-if __name__ == '__main__':
-    WikiFromTrac.main()

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/2050da06/scripts/allura_import.py
----------------------------------------------------------------------
diff --git a/scripts/allura_import.py b/scripts/allura_import.py
index 0f2f715..56bc5d5 100644
--- a/scripts/allura_import.py
+++ b/scripts/allura_import.py
@@ -20,7 +20,7 @@ from optparse import OptionParser
 
 from allura.lib.import_api import AlluraImportApiClient
 from forgetracker.scripts.import_tracker import import_tracker
-from forgewiki.scripts.wiki_from_trac.loaders import import_wiki
+from tracwikiimporter.scripts.wiki_from_trac.loaders import import_wiki
 
 
 def main():

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/2050da06/scripts/wiki-export.py
----------------------------------------------------------------------
diff --git a/scripts/wiki-export.py b/scripts/wiki-export.py
index e096949..3c51ed2 100755
--- a/scripts/wiki-export.py
+++ b/scripts/wiki-export.py
@@ -22,7 +22,7 @@ import json
 import sys
 from optparse import OptionParser
 
-from forgewiki.scripts.wiki_from_trac.extractors import WikiExporter
+from tracwikiimporter.scripts.wiki_from_trac.extractors import WikiExporter
 
 
 def parse_options():