You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by br...@apache.org on 2013/02/13 18:24:00 UTC

[1/2] git commit: [#5703] Add ScriptTask class and convert refresh-all-repos.py to be runnable as script or task

[#5703] Add ScriptTask class and convert refresh-all-repos.py to be runnable as script or task


Project: http://git-wip-us.apache.org/repos/asf/incubator-allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-allura/commit/05012ca8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-allura/tree/05012ca8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-allura/diff/05012ca8

Branch: refs/heads/db/5703
Commit: 05012ca82ace52d9c6c50933bd02de22cc1b88d6
Parents: 6628b2e
Author: Tim Van Steenburgh <tv...@gmail.com>
Authored: Fri Feb 8 16:40:27 2013 +0000
Committer: Dave Brondsema <db...@geek.net>
Committed: Wed Feb 13 17:17:25 2013 +0000

----------------------------------------------------------------------
 Allura/allura/lib/decorators.py      |   10 ++
 Allura/allura/scripts/__init__.py    |    1 +
 Allura/allura/scripts/refreshrepo.py |  159 +++++++++++++++++++++++++++
 Allura/allura/scripts/scripttask.py  |   90 ++++++++++++++++
 scripts/refresh-all-repos.py         |  166 -----------------------------
 5 files changed, 260 insertions(+), 166 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/05012ca8/Allura/allura/lib/decorators.py
----------------------------------------------------------------------
diff --git a/Allura/allura/lib/decorators.py b/Allura/allura/lib/decorators.py
index a8ad5ec..bb5a7ff 100644
--- a/Allura/allura/lib/decorators.py
+++ b/Allura/allura/lib/decorators.py
@@ -1,3 +1,4 @@
+import inspect
 import sys
 import json
 import logging
@@ -12,6 +13,8 @@ from webob import exc
 
 def task(func):
     '''Decorator to add some methods to task functions'''
+    if inspect.isclass(func):
+        return taskclass(func)
     def post(*args, **kwargs):
         from allura import model as M
         delay = kwargs.pop('delay', 0)
@@ -19,6 +22,13 @@ def task(func):
     func.post = post
     return func
 
+def taskclass(cls):
+    def post(*args, **kwargs):
+        from allura import model as M
+        return M.MonQTask.post(cls, args[1:], kwargs)
+    cls.post = classmethod(post)
+    return cls
+
 class event_handler(object):
     '''Decorator to register event handlers'''
     listeners = defaultdict(set)

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/05012ca8/Allura/allura/scripts/__init__.py
----------------------------------------------------------------------
diff --git a/Allura/allura/scripts/__init__.py b/Allura/allura/scripts/__init__.py
new file mode 100644
index 0000000..d13c300
--- /dev/null
+++ b/Allura/allura/scripts/__init__.py
@@ -0,0 +1 @@
+from scripttask import ScriptTask

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/05012ca8/Allura/allura/scripts/refreshrepo.py
----------------------------------------------------------------------
diff --git a/Allura/allura/scripts/refreshrepo.py b/Allura/allura/scripts/refreshrepo.py
new file mode 100644
index 0000000..09b6242
--- /dev/null
+++ b/Allura/allura/scripts/refreshrepo.py
@@ -0,0 +1,159 @@
+import argparse
+import logging
+import re
+
+import faulthandler
+from pylons import c
+from ming.orm import ThreadLocalORMSession
+
+from allura import model as M
+from allura.lib.utils import chunked_find, chunked_list
+from allura.scripts import ScriptTask
+
+log = logging.getLogger(__name__)
+
+
+class RefreshRepo(ScriptTask):
+    @classmethod
+    def execute(cls, options):
+        q_project = {}
+        if options.nbhd:
+            nbhd = M.Neighborhood.query.get(url_prefix=options.nbhd)
+            if not nbhd:
+                raise AttributeError("Invalid neighborhood url prefix.")
+            q_project['neighborhood_id'] = nbhd._id
+        if options.project:
+            q_project['shortname'] = options.project
+        elif options.project_regex:
+            q_project['shortname'] = {'$regex': options.project_regex}
+
+        log.info('Refreshing repositories')
+        for chunk in chunked_find(M.Project, q_project):
+            for p in chunk:
+                log.info("Refreshing repos for project '%s'." % p.shortname)
+                if options.dry_run:
+                    continue
+                c.project = p
+                if options.mount_point:
+                    mount_points = [options.mount_point]
+                else:
+                    mount_points = [ac.options.mount_point for ac in
+                                    M.AppConfig.query.find(dict(project_id=p._id))]
+                for app in (p.app_instance(mp) for mp in mount_points):
+                    c.app = app
+                    if not hasattr(app, 'repo'):
+                        continue
+                    if c.app.repo.tool.lower() not in options.repo_types:
+                        log.info("Skipping %r: wrong type (%s)", c.app.repo,
+                                c.app.repo.tool.lower())
+                        continue
+
+                    if options.clean:
+                        ci_ids = list(c.app.repo.all_commit_ids())
+                        log.info("Deleting mongo data for %i commits...", len(ci_ids))
+                        tree_ids = [
+                                tree_id for doc in
+                                M.repo.TreesDoc.m.find({"_id": {"$in": ci_ids}},
+                                                       {"tree_ids": 1})
+                                for tree_id in doc.get("tree_ids", [])]
+
+                        i = M.repo.CommitDoc.m.find({"_id": {"$in": ci_ids}}).count()
+                        log.info("Deleting %i CommitDoc docs...", i)
+                        M.repo.CommitDoc.m.remove({"_id": {"$in": ci_ids}})
+
+                        # delete these in chunks, otherwise the query doc can
+                        # exceed the max BSON size limit (16MB at the moment)
+                        for tree_ids_chunk in chunked_list(tree_ids, 300000):
+                            i = M.repo.TreeDoc.m.find({"_id": {"$in": tree_ids_chunk}}).count()
+                            log.info("Deleting %i TreeDoc docs...", i)
+                            M.repo.TreeDoc.m.remove({"_id": {"$in": tree_ids_chunk}})
+                            i = M.repo.LastCommitDoc.m.find({"object_id": {"$in": tree_ids_chunk}}).count()
+                            log.info("Deleting %i LastCommitDoc docs...", i)
+                            M.repo.LastCommitDoc.m.remove({"object_id": {"$in": tree_ids_chunk}})
+                        del tree_ids
+
+                        # delete these after TreeDoc and LastCommitDoc so that if
+                        # we crash, we don't lose the ability to delete those
+                        i = M.repo.TreesDoc.m.find({"_id": {"$in": ci_ids}}).count()
+                        log.info("Deleting %i TreesDoc docs...", i)
+                        M.repo.TreesDoc.m.remove({"_id": {"$in": ci_ids}})
+
+                        # delete LastCommitDocs for non-trees
+                        repo_lastcommit_re = re.compile("^{}:".format(c.app.repo._id))
+                        i = M.repo.LastCommitDoc.m.find(dict(_id=repo_lastcommit_re)).count()
+                        log.info("Deleting %i remaining LastCommitDoc docs, by repo id...", i)
+                        M.repo.LastCommitDoc.m.remove(dict(_id=repo_lastcommit_re))
+
+                        i = M.repo.DiffInfoDoc.m.find({"_id": {"$in": ci_ids}}).count()
+                        log.info("Deleting %i DiffInfoDoc docs...", i)
+                        M.repo.DiffInfoDoc.m.remove({"_id": {"$in": ci_ids}})
+
+                        i = M.repo.CommitRunDoc.m.find({"commit_ids": {"$in": ci_ids}}).count()
+                        log.info("Deleting %i CommitRunDoc docs...", i)
+                        M.repo.CommitRunDoc.m.remove({"commit_ids": {"$in": ci_ids}})
+                        del ci_ids
+
+                    try:
+                        if options.all:
+                            log.info('Refreshing ALL commits in %r', c.app.repo)
+                        else:
+                            log.info('Refreshing NEW commits in %r', c.app.repo)
+                        if options.profile:
+                            import cProfile
+                            cProfile.runctx('c.app.repo.refresh(options.all, notify=options.notify)',
+                                    globals(), locals(), 'refresh.profile')
+                        else:
+                            c.app.repo.refresh(options.all, notify=options.notify)
+                    except:
+                        log.exception('Error refreshing %r', c.app.repo)
+            ThreadLocalORMSession.flush_all()
+
+    @classmethod
+    def parser(cls):
+        def repo_type_list(s):
+            repo_types = []
+            for repo_type in s.split(','):
+                repo_type = repo_type.strip()
+                if repo_type not in ['svn', 'git', 'hg']:
+                    raise argparse.ArgumentTypeError(
+                            '{} is not a valid repo type.'.format(repo_type))
+                repo_types.append(repo_type)
+            return repo_types
+
+        parser = argparse.ArgumentParser(description='Scan repos on filesytem and '
+                'update repo metadata in MongoDB. Run for all repos (no args), '
+                'or restrict by neighborhood, project, or code tool mount point.')
+        parser.add_argument('--nbhd', action='store', default='', dest='nbhd',
+                help='Restrict update to a particular neighborhood, e.g. /p/.')
+        parser.add_argument('--project', action='store', default='', dest='project',
+                help='Restrict update to a particular project. To specify a '
+                'subproject, use a slash: project/subproject.')
+        parser.add_argument('--project-regex', action='store', default='',
+                dest='project_regex',
+                help='Restrict update to projects for which the shortname matches '
+                'the provided regex.')
+        parser.add_argument('--repo-types', action='store', type=repo_type_list,
+                default=['svn', 'git', 'hg'], dest='repo_types',
+                help='Only refresh repos of the given type(s). Defaults to: '
+                'svn,git,hg. Example: --repo-types=git,hg')
+        parser.add_argument('--mount-point', default='', dest='mount_point',
+                help='Restrict update to repos at the given tool mount point. ')
+        parser.add_argument('--clean', action='store_true', dest='clean',
+                default=False, help='Remove repo-related mongo docs (for '
+                'project(s) being refreshed only) before doing the refresh.')
+        parser.add_argument('--all', action='store_true', dest='all', default=False,
+                help='Refresh all commits (not just the ones that are new).')
+        parser.add_argument('--notify', action='store_true', dest='notify',
+                default=False, help='Send email notifications of new commits.')
+        parser.add_argument('--dry-run', action='store_true', dest='dry_run',
+                default=False, help='Log names of projects that would have their '
+                'repos refreshed, but do not perform the actual refresh.')
+        parser.add_argument('--profile', action='store_true', dest='profile',
+                default=False, help='Enable the profiler (slow). Will log '
+                'profiling output to ./refresh.profile')
+        return parser
+
+
+if __name__ == '__main__':
+    faulthandler.enable()
+    RefreshRepo.main()

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/05012ca8/Allura/allura/scripts/scripttask.py
----------------------------------------------------------------------
diff --git a/Allura/allura/scripts/scripttask.py b/Allura/allura/scripts/scripttask.py
new file mode 100644
index 0000000..9dbe857
--- /dev/null
+++ b/Allura/allura/scripts/scripttask.py
@@ -0,0 +1,90 @@
+"""
+Provides ScriptTask, a base class for implementing a command-line script that
+can be run as a task.
+
+To use, subclass ScriptTask and implement two methods::
+
+    class MyScript(ScriptTask):
+        @classmethod
+        def parser(cls):
+            '''Define and return an argparse.ArgumentParser instance'''
+            pass
+
+        @classmethod
+        def execute(cls, options):
+            '''Your main code goes here.'''
+            pass
+
+To call as a script::
+
+    if __name__ == '__main__':
+        MyScript.main()
+
+To call as a task::
+
+    # post the task with cmd-line-style args
+    MyScript.post('-p myproject --dry-run')
+
+"""
+
+import argparse
+import logging
+import sys
+
+from allura.lib.decorators import task
+
+
+log = logging.getLogger(__name__)
+
+
+class Writer(object):
+    def __init__(self, func):
+        self.func = func
+
+    def write(self, buf):
+        self.func(buf)
+
+
+class ScriptTask(object):
+    """Base class for a command-line script that is also executable as a task."""
+
+    class __metaclass__(type):
+        @property
+        def __doc__(cls):
+            return cls.parser().format_help()
+        def __new__(meta, classname, bases, classDict):
+            return task(type.__new__(meta, classname, bases, classDict))
+
+    def __new__(cls, arg_string):
+        cls._execute_task(arg_string)
+
+    @classmethod
+    def _execute_task(cls, arg_string):
+        try:
+            _stdout = sys.stdout
+            _stderr = sys.stderr
+            sys.stdout = Writer(log.info)
+            sys.stderr = Writer(log.error)
+            try:
+                options = cls.parser().parse_args(arg_string.split(' '))
+            except SystemExit:
+                raise Exception("Error parsing args: '%s'" % arg_string)
+            cls.execute(options)
+        finally:
+            sys.stdout = _stdout
+            sys.stderr = _stderr
+
+    @classmethod
+    def execute(cls, options):
+        """User code goes here."""
+        pass
+
+    @classmethod
+    def parser(cls):
+        """Return an argument parser appropriate for your script."""
+        return argparse.ArgumentParser(description="Default no-op parser")
+
+    @classmethod
+    def main(cls):
+        options = cls.parser().parse_args()
+        cls.execute(options)

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/05012ca8/scripts/refresh-all-repos.py
----------------------------------------------------------------------
diff --git a/scripts/refresh-all-repos.py b/scripts/refresh-all-repos.py
deleted file mode 100644
index 1cf7e3d..0000000
--- a/scripts/refresh-all-repos.py
+++ /dev/null
@@ -1,166 +0,0 @@
-import argparse
-import logging
-import re
-
-import faulthandler
-from pylons import c
-from ming.orm import ThreadLocalORMSession
-
-from allura import model as M
-from allura.lib.utils import chunked_find, chunked_list
-
-log = logging.getLogger(__name__)
-
-
-def main(options):
-    q_project = {}
-    if options.nbhd:
-        nbhd = M.Neighborhood.query.get(url_prefix=options.nbhd)
-        if not nbhd:
-            return "Invalid neighborhood url prefix."
-        q_project['neighborhood_id'] = nbhd._id
-    if options.project:
-        q_project['shortname'] = options.project
-    elif options.project_regex:
-        q_project['shortname'] = {'$regex': options.project_regex}
-
-    log.info('Refreshing repositories')
-    if options.clean_all:
-        log.info('Removing all repository objects')
-        M.repo.CommitDoc.m.remove({})
-        M.repo.TreeDoc.m.remove({})
-        M.repo.TreesDoc.m.remove({})
-        M.repo.DiffInfoDoc.m.remove({})
-        M.repo.CommitRunDoc.m.remove({})
-        M.repo.LastCommitDoc.m.remove({})
-
-    for chunk in chunked_find(M.Project, q_project):
-        for p in chunk:
-            log.info("Refreshing repos for project '%s'." % p.shortname)
-            if options.dry_run:
-                continue
-            c.project = p
-            if options.mount_point:
-                mount_points = [options.mount_point]
-            else:
-                mount_points = [ac.options.mount_point for ac in
-                                M.AppConfig.query.find(dict(project_id=p._id))]
-            for app in (p.app_instance(mp) for mp in mount_points):
-                c.app = app
-                if not hasattr(app, 'repo'):
-                    continue
-                if c.app.repo.tool.lower() not in options.repo_types:
-                    log.info("Skipping %r: wrong type (%s)", c.app.repo,
-                            c.app.repo.tool.lower())
-                    continue
-
-                if options.clean:
-                    ci_ids = list(c.app.repo.all_commit_ids())
-                    log.info("Deleting mongo data for %i commits...", len(ci_ids))
-                    tree_ids = [
-                            tree_id for doc in
-                            M.repo.TreesDoc.m.find({"_id": {"$in": ci_ids}},
-                                                   {"tree_ids": 1})
-                            for tree_id in doc.get("tree_ids", [])]
-
-                    i = M.repo.CommitDoc.m.find({"_id": {"$in": ci_ids}}).count()
-                    log.info("Deleting %i CommitDoc docs...", i)
-                    M.repo.CommitDoc.m.remove({"_id": {"$in": ci_ids}})
-
-                    # delete these in chunks, otherwise the query doc can
-                    # exceed the max BSON size limit (16MB at the moment)
-                    for tree_ids_chunk in chunked_list(tree_ids, 300000):
-                        i = M.repo.TreeDoc.m.find({"_id": {"$in": tree_ids_chunk}}).count()
-                        log.info("Deleting %i TreeDoc docs...", i)
-                        M.repo.TreeDoc.m.remove({"_id": {"$in": tree_ids_chunk}})
-                    del tree_ids
-
-                    # delete these after TreeDoc and LastCommitDoc so that if
-                    # we crash, we don't lose the ability to delete those
-                    i = M.repo.TreesDoc.m.find({"_id": {"$in": ci_ids}}).count()
-                    log.info("Deleting %i TreesDoc docs...", i)
-                    M.repo.TreesDoc.m.remove({"_id": {"$in": ci_ids}})
-
-                    # delete LastCommitDocs
-                    i = M.repo.LastCommitDoc.m.find(dict(commit_ids={'$in': ci_ids})).count()
-                    log.info("Deleting %i remaining LastCommitDoc docs, by repo id...", i)
-                    M.repo.LastCommitDoc.m.remove(dict(commit_ids={'$in': ci_ids}))
-
-                    i = M.repo.DiffInfoDoc.m.find({"_id": {"$in": ci_ids}}).count()
-                    log.info("Deleting %i DiffInfoDoc docs...", i)
-                    M.repo.DiffInfoDoc.m.remove({"_id": {"$in": ci_ids}})
-
-                    i = M.repo.CommitRunDoc.m.find({"commit_ids": {"$in": ci_ids}}).count()
-                    log.info("Deleting %i CommitRunDoc docs...", i)
-                    M.repo.CommitRunDoc.m.remove({"commit_ids": {"$in": ci_ids}})
-                    del ci_ids
-
-                try:
-                    if options.all:
-                        log.info('Refreshing ALL commits in %r', c.app.repo)
-                    else:
-                        log.info('Refreshing NEW commits in %r', c.app.repo)
-                    if options.profile:
-                        import cProfile
-                        cProfile.runctx('c.app.repo.refresh(options.all, notify=options.notify)',
-                                globals(), locals(), 'refresh.profile')
-                    else:
-                        c.app.repo.refresh(options.all, notify=options.notify)
-                except:
-                    log.exception('Error refreshing %r', c.app.repo)
-        ThreadLocalORMSession.flush_all()
-        ThreadLocalORMSession.close_all()
-
-
-def repo_type_list(s):
-    repo_types = []
-    for repo_type in s.split(','):
-        repo_type = repo_type.strip()
-        if repo_type not in ['svn', 'git', 'hg']:
-            raise argparse.ArgumentTypeError(
-                    '{} is not a valid repo type.'.format(repo_type))
-        repo_types.append(repo_type)
-    return repo_types
-
-
-def parse_options():
-    parser = argparse.ArgumentParser(description='Scan repos on filesytem and '
-            'update repo metadata in MongoDB. Run for all repos (no args), '
-            'or restrict by neighborhood, project, or code tool mount point.')
-    parser.add_argument('--nbhd', action='store', default='', dest='nbhd',
-            help='Restrict update to a particular neighborhood, e.g. /p/.')
-    parser.add_argument('--project', action='store', default='', dest='project',
-            help='Restrict update to a particular project. To specify a '
-            'subproject, use a slash: project/subproject.')
-    parser.add_argument('--project-regex', action='store', default='',
-            dest='project_regex',
-            help='Restrict update to projects for which the shortname matches '
-            'the provided regex.')
-    parser.add_argument('--repo-types', action='store', type=repo_type_list,
-            default=['svn', 'git', 'hg'], dest='repo_types',
-            help='Only refresh repos of the given type(s). Defaults to: '
-            'svn,git,hg. Example: --repo-types=git,hg')
-    parser.add_argument('--mount_point', default='', dest='mount_point',
-            help='Restrict update to repos at the given tool mount point. ')
-    parser.add_argument('--clean', action='store_true', dest='clean',
-            default=False, help='Remove repo-related mongo docs (for '
-            'project(s) being refreshed only) before doing the refresh.')
-    parser.add_argument('--clean-all', action='store_true', dest='clean_all',
-            default=False, help='Remove ALL repo-related mongo docs before '
-            'refresh.')
-    parser.add_argument('--all', action='store_true', dest='all', default=False,
-            help='Refresh all commits (not just the ones that are new).')
-    parser.add_argument('--notify', action='store_true', dest='notify',
-            default=False, help='Send email notifications of new commits.')
-    parser.add_argument('--dry-run', action='store_true', dest='dry_run',
-            default=False, help='Log names of projects that would have their '
-            'repos refreshed, but do not perform the actual refresh.')
-    parser.add_argument('--profile', action='store_true', dest='profile',
-            default=False, help='Enable the profiler (slow). Will log '
-            'profiling output to ./refresh.profile')
-    return parser.parse_args()
-
-if __name__ == '__main__':
-    import sys
-    faulthandler.enable()
-    sys.exit(main(parse_options()))