You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by jo...@apache.org on 2013/02/15 23:46:13 UTC
[2/10] git commit: [#5703] Add ScriptTask class and convert
refresh-all-repos.py to be runnable as script or task
[#5703] Add ScriptTask class and convert refresh-all-repos.py to be runnable as script or task
Project: http://git-wip-us.apache.org/repos/asf/incubator-allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-allura/commit/05012ca8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-allura/tree/05012ca8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-allura/diff/05012ca8
Branch: refs/heads/cj/5685
Commit: 05012ca82ace52d9c6c50933bd02de22cc1b88d6
Parents: 6628b2e
Author: Tim Van Steenburgh <tv...@gmail.com>
Authored: Fri Feb 8 16:40:27 2013 +0000
Committer: Dave Brondsema <db...@geek.net>
Committed: Wed Feb 13 17:17:25 2013 +0000
----------------------------------------------------------------------
Allura/allura/lib/decorators.py | 10 ++
Allura/allura/scripts/__init__.py | 1 +
Allura/allura/scripts/refreshrepo.py | 159 +++++++++++++++++++++++++++
Allura/allura/scripts/scripttask.py | 90 ++++++++++++++++
scripts/refresh-all-repos.py | 166 -----------------------------
5 files changed, 260 insertions(+), 166 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/05012ca8/Allura/allura/lib/decorators.py
----------------------------------------------------------------------
diff --git a/Allura/allura/lib/decorators.py b/Allura/allura/lib/decorators.py
index a8ad5ec..bb5a7ff 100644
--- a/Allura/allura/lib/decorators.py
+++ b/Allura/allura/lib/decorators.py
@@ -1,3 +1,4 @@
+import inspect
import sys
import json
import logging
@@ -12,6 +13,8 @@ from webob import exc
def task(func):
'''Decorator to add some methods to task functions'''
+ if inspect.isclass(func):
+ return taskclass(func)
def post(*args, **kwargs):
from allura import model as M
delay = kwargs.pop('delay', 0)
@@ -19,6 +22,13 @@ def task(func):
func.post = post
return func
+def taskclass(cls):
+ def post(*args, **kwargs):
+ from allura import model as M
+ return M.MonQTask.post(cls, args[1:], kwargs)
+ cls.post = classmethod(post)
+ return cls
+
class event_handler(object):
'''Decorator to register event handlers'''
listeners = defaultdict(set)
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/05012ca8/Allura/allura/scripts/__init__.py
----------------------------------------------------------------------
diff --git a/Allura/allura/scripts/__init__.py b/Allura/allura/scripts/__init__.py
new file mode 100644
index 0000000..d13c300
--- /dev/null
+++ b/Allura/allura/scripts/__init__.py
@@ -0,0 +1 @@
+from scripttask import ScriptTask
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/05012ca8/Allura/allura/scripts/refreshrepo.py
----------------------------------------------------------------------
diff --git a/Allura/allura/scripts/refreshrepo.py b/Allura/allura/scripts/refreshrepo.py
new file mode 100644
index 0000000..09b6242
--- /dev/null
+++ b/Allura/allura/scripts/refreshrepo.py
@@ -0,0 +1,159 @@
+import argparse
+import logging
+import re
+
+import faulthandler
+from pylons import c
+from ming.orm import ThreadLocalORMSession
+
+from allura import model as M
+from allura.lib.utils import chunked_find, chunked_list
+from allura.scripts import ScriptTask
+
+log = logging.getLogger(__name__)
+
+
+class RefreshRepo(ScriptTask):
+ @classmethod
+ def execute(cls, options):
+ q_project = {}
+ if options.nbhd:
+ nbhd = M.Neighborhood.query.get(url_prefix=options.nbhd)
+ if not nbhd:
+ raise AttributeError("Invalid neighborhood url prefix.")
+ q_project['neighborhood_id'] = nbhd._id
+ if options.project:
+ q_project['shortname'] = options.project
+ elif options.project_regex:
+ q_project['shortname'] = {'$regex': options.project_regex}
+
+ log.info('Refreshing repositories')
+ for chunk in chunked_find(M.Project, q_project):
+ for p in chunk:
+ log.info("Refreshing repos for project '%s'." % p.shortname)
+ if options.dry_run:
+ continue
+ c.project = p
+ if options.mount_point:
+ mount_points = [options.mount_point]
+ else:
+ mount_points = [ac.options.mount_point for ac in
+ M.AppConfig.query.find(dict(project_id=p._id))]
+ for app in (p.app_instance(mp) for mp in mount_points):
+ c.app = app
+ if not hasattr(app, 'repo'):
+ continue
+ if c.app.repo.tool.lower() not in options.repo_types:
+ log.info("Skipping %r: wrong type (%s)", c.app.repo,
+ c.app.repo.tool.lower())
+ continue
+
+ if options.clean:
+ ci_ids = list(c.app.repo.all_commit_ids())
+ log.info("Deleting mongo data for %i commits...", len(ci_ids))
+ tree_ids = [
+ tree_id for doc in
+ M.repo.TreesDoc.m.find({"_id": {"$in": ci_ids}},
+ {"tree_ids": 1})
+ for tree_id in doc.get("tree_ids", [])]
+
+ i = M.repo.CommitDoc.m.find({"_id": {"$in": ci_ids}}).count()
+ log.info("Deleting %i CommitDoc docs...", i)
+ M.repo.CommitDoc.m.remove({"_id": {"$in": ci_ids}})
+
+ # delete these in chunks, otherwise the query doc can
+ # exceed the max BSON size limit (16MB at the moment)
+ for tree_ids_chunk in chunked_list(tree_ids, 300000):
+ i = M.repo.TreeDoc.m.find({"_id": {"$in": tree_ids_chunk}}).count()
+ log.info("Deleting %i TreeDoc docs...", i)
+ M.repo.TreeDoc.m.remove({"_id": {"$in": tree_ids_chunk}})
+ i = M.repo.LastCommitDoc.m.find({"object_id": {"$in": tree_ids_chunk}}).count()
+ log.info("Deleting %i LastCommitDoc docs...", i)
+ M.repo.LastCommitDoc.m.remove({"object_id": {"$in": tree_ids_chunk}})
+ del tree_ids
+
+ # delete these after TreeDoc and LastCommitDoc so that if
+ # we crash, we don't lose the ability to delete those
+ i = M.repo.TreesDoc.m.find({"_id": {"$in": ci_ids}}).count()
+ log.info("Deleting %i TreesDoc docs...", i)
+ M.repo.TreesDoc.m.remove({"_id": {"$in": ci_ids}})
+
+ # delete LastCommitDocs for non-trees
+ repo_lastcommit_re = re.compile("^{}:".format(c.app.repo._id))
+ i = M.repo.LastCommitDoc.m.find(dict(_id=repo_lastcommit_re)).count()
+ log.info("Deleting %i remaining LastCommitDoc docs, by repo id...", i)
+ M.repo.LastCommitDoc.m.remove(dict(_id=repo_lastcommit_re))
+
+ i = M.repo.DiffInfoDoc.m.find({"_id": {"$in": ci_ids}}).count()
+ log.info("Deleting %i DiffInfoDoc docs...", i)
+ M.repo.DiffInfoDoc.m.remove({"_id": {"$in": ci_ids}})
+
+ i = M.repo.CommitRunDoc.m.find({"commit_ids": {"$in": ci_ids}}).count()
+ log.info("Deleting %i CommitRunDoc docs...", i)
+ M.repo.CommitRunDoc.m.remove({"commit_ids": {"$in": ci_ids}})
+ del ci_ids
+
+ try:
+ if options.all:
+ log.info('Refreshing ALL commits in %r', c.app.repo)
+ else:
+ log.info('Refreshing NEW commits in %r', c.app.repo)
+ if options.profile:
+ import cProfile
+ cProfile.runctx('c.app.repo.refresh(options.all, notify=options.notify)',
+ globals(), locals(), 'refresh.profile')
+ else:
+ c.app.repo.refresh(options.all, notify=options.notify)
+ except:
+ log.exception('Error refreshing %r', c.app.repo)
+ ThreadLocalORMSession.flush_all()
+
+ @classmethod
+ def parser(cls):
+ def repo_type_list(s):
+ repo_types = []
+ for repo_type in s.split(','):
+ repo_type = repo_type.strip()
+ if repo_type not in ['svn', 'git', 'hg']:
+ raise argparse.ArgumentTypeError(
+ '{} is not a valid repo type.'.format(repo_type))
+ repo_types.append(repo_type)
+ return repo_types
+
+ parser = argparse.ArgumentParser(description='Scan repos on filesytem and '
+ 'update repo metadata in MongoDB. Run for all repos (no args), '
+ 'or restrict by neighborhood, project, or code tool mount point.')
+ parser.add_argument('--nbhd', action='store', default='', dest='nbhd',
+ help='Restrict update to a particular neighborhood, e.g. /p/.')
+ parser.add_argument('--project', action='store', default='', dest='project',
+ help='Restrict update to a particular project. To specify a '
+ 'subproject, use a slash: project/subproject.')
+ parser.add_argument('--project-regex', action='store', default='',
+ dest='project_regex',
+ help='Restrict update to projects for which the shortname matches '
+ 'the provided regex.')
+ parser.add_argument('--repo-types', action='store', type=repo_type_list,
+ default=['svn', 'git', 'hg'], dest='repo_types',
+ help='Only refresh repos of the given type(s). Defaults to: '
+ 'svn,git,hg. Example: --repo-types=git,hg')
+ parser.add_argument('--mount-point', default='', dest='mount_point',
+ help='Restrict update to repos at the given tool mount point. ')
+ parser.add_argument('--clean', action='store_true', dest='clean',
+ default=False, help='Remove repo-related mongo docs (for '
+ 'project(s) being refreshed only) before doing the refresh.')
+ parser.add_argument('--all', action='store_true', dest='all', default=False,
+ help='Refresh all commits (not just the ones that are new).')
+ parser.add_argument('--notify', action='store_true', dest='notify',
+ default=False, help='Send email notifications of new commits.')
+ parser.add_argument('--dry-run', action='store_true', dest='dry_run',
+ default=False, help='Log names of projects that would have their '
+ 'repos refreshed, but do not perform the actual refresh.')
+ parser.add_argument('--profile', action='store_true', dest='profile',
+ default=False, help='Enable the profiler (slow). Will log '
+ 'profiling output to ./refresh.profile')
+ return parser
+
+
+if __name__ == '__main__':
+ faulthandler.enable()
+ RefreshRepo.main()
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/05012ca8/Allura/allura/scripts/scripttask.py
----------------------------------------------------------------------
diff --git a/Allura/allura/scripts/scripttask.py b/Allura/allura/scripts/scripttask.py
new file mode 100644
index 0000000..9dbe857
--- /dev/null
+++ b/Allura/allura/scripts/scripttask.py
@@ -0,0 +1,90 @@
+"""
+Provides ScriptTask, a base class for implementing a command-line script that
+can be run as a task.
+
+To use, subclass ScriptTask and implement two methods::
+
+ class MyScript(ScriptTask):
+ @classmethod
+ def parser(cls):
+ '''Define and return an argparse.ArgumentParser instance'''
+ pass
+
+ @classmethod
+ def execute(cls, options):
+ '''Your main code goes here.'''
+ pass
+
+To call as a script::
+
+ if __name__ == '__main__':
+ MyScript.main()
+
+To call as a task::
+
+ # post the task with cmd-line-style args
+ MyScript.post('-p myproject --dry-run')
+
+"""
+
+import argparse
+import logging
+import sys
+
+from allura.lib.decorators import task
+
+
+log = logging.getLogger(__name__)
+
+
+class Writer(object):
+ def __init__(self, func):
+ self.func = func
+
+ def write(self, buf):
+ self.func(buf)
+
+
+class ScriptTask(object):
+ """Base class for a command-line script that is also executable as a task."""
+
+ class __metaclass__(type):
+ @property
+ def __doc__(cls):
+ return cls.parser().format_help()
+ def __new__(meta, classname, bases, classDict):
+ return task(type.__new__(meta, classname, bases, classDict))
+
+ def __new__(cls, arg_string):
+ cls._execute_task(arg_string)
+
+ @classmethod
+ def _execute_task(cls, arg_string):
+ try:
+ _stdout = sys.stdout
+ _stderr = sys.stderr
+ sys.stdout = Writer(log.info)
+ sys.stderr = Writer(log.error)
+ try:
+ options = cls.parser().parse_args(arg_string.split(' '))
+ except SystemExit:
+ raise Exception("Error parsing args: '%s'" % arg_string)
+ cls.execute(options)
+ finally:
+ sys.stdout = _stdout
+ sys.stderr = _stderr
+
+ @classmethod
+ def execute(cls, options):
+ """User code goes here."""
+ pass
+
+ @classmethod
+ def parser(cls):
+ """Return an argument parser appropriate for your script."""
+ return argparse.ArgumentParser(description="Default no-op parser")
+
+ @classmethod
+ def main(cls):
+ options = cls.parser().parse_args()
+ cls.execute(options)
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/05012ca8/scripts/refresh-all-repos.py
----------------------------------------------------------------------
diff --git a/scripts/refresh-all-repos.py b/scripts/refresh-all-repos.py
deleted file mode 100644
index 1cf7e3d..0000000
--- a/scripts/refresh-all-repos.py
+++ /dev/null
@@ -1,166 +0,0 @@
-import argparse
-import logging
-import re
-
-import faulthandler
-from pylons import c
-from ming.orm import ThreadLocalORMSession
-
-from allura import model as M
-from allura.lib.utils import chunked_find, chunked_list
-
-log = logging.getLogger(__name__)
-
-
-def main(options):
- q_project = {}
- if options.nbhd:
- nbhd = M.Neighborhood.query.get(url_prefix=options.nbhd)
- if not nbhd:
- return "Invalid neighborhood url prefix."
- q_project['neighborhood_id'] = nbhd._id
- if options.project:
- q_project['shortname'] = options.project
- elif options.project_regex:
- q_project['shortname'] = {'$regex': options.project_regex}
-
- log.info('Refreshing repositories')
- if options.clean_all:
- log.info('Removing all repository objects')
- M.repo.CommitDoc.m.remove({})
- M.repo.TreeDoc.m.remove({})
- M.repo.TreesDoc.m.remove({})
- M.repo.DiffInfoDoc.m.remove({})
- M.repo.CommitRunDoc.m.remove({})
- M.repo.LastCommitDoc.m.remove({})
-
- for chunk in chunked_find(M.Project, q_project):
- for p in chunk:
- log.info("Refreshing repos for project '%s'." % p.shortname)
- if options.dry_run:
- continue
- c.project = p
- if options.mount_point:
- mount_points = [options.mount_point]
- else:
- mount_points = [ac.options.mount_point for ac in
- M.AppConfig.query.find(dict(project_id=p._id))]
- for app in (p.app_instance(mp) for mp in mount_points):
- c.app = app
- if not hasattr(app, 'repo'):
- continue
- if c.app.repo.tool.lower() not in options.repo_types:
- log.info("Skipping %r: wrong type (%s)", c.app.repo,
- c.app.repo.tool.lower())
- continue
-
- if options.clean:
- ci_ids = list(c.app.repo.all_commit_ids())
- log.info("Deleting mongo data for %i commits...", len(ci_ids))
- tree_ids = [
- tree_id for doc in
- M.repo.TreesDoc.m.find({"_id": {"$in": ci_ids}},
- {"tree_ids": 1})
- for tree_id in doc.get("tree_ids", [])]
-
- i = M.repo.CommitDoc.m.find({"_id": {"$in": ci_ids}}).count()
- log.info("Deleting %i CommitDoc docs...", i)
- M.repo.CommitDoc.m.remove({"_id": {"$in": ci_ids}})
-
- # delete these in chunks, otherwise the query doc can
- # exceed the max BSON size limit (16MB at the moment)
- for tree_ids_chunk in chunked_list(tree_ids, 300000):
- i = M.repo.TreeDoc.m.find({"_id": {"$in": tree_ids_chunk}}).count()
- log.info("Deleting %i TreeDoc docs...", i)
- M.repo.TreeDoc.m.remove({"_id": {"$in": tree_ids_chunk}})
- del tree_ids
-
- # delete these after TreeDoc and LastCommitDoc so that if
- # we crash, we don't lose the ability to delete those
- i = M.repo.TreesDoc.m.find({"_id": {"$in": ci_ids}}).count()
- log.info("Deleting %i TreesDoc docs...", i)
- M.repo.TreesDoc.m.remove({"_id": {"$in": ci_ids}})
-
- # delete LastCommitDocs
- i = M.repo.LastCommitDoc.m.find(dict(commit_ids={'$in': ci_ids})).count()
- log.info("Deleting %i remaining LastCommitDoc docs, by repo id...", i)
- M.repo.LastCommitDoc.m.remove(dict(commit_ids={'$in': ci_ids}))
-
- i = M.repo.DiffInfoDoc.m.find({"_id": {"$in": ci_ids}}).count()
- log.info("Deleting %i DiffInfoDoc docs...", i)
- M.repo.DiffInfoDoc.m.remove({"_id": {"$in": ci_ids}})
-
- i = M.repo.CommitRunDoc.m.find({"commit_ids": {"$in": ci_ids}}).count()
- log.info("Deleting %i CommitRunDoc docs...", i)
- M.repo.CommitRunDoc.m.remove({"commit_ids": {"$in": ci_ids}})
- del ci_ids
-
- try:
- if options.all:
- log.info('Refreshing ALL commits in %r', c.app.repo)
- else:
- log.info('Refreshing NEW commits in %r', c.app.repo)
- if options.profile:
- import cProfile
- cProfile.runctx('c.app.repo.refresh(options.all, notify=options.notify)',
- globals(), locals(), 'refresh.profile')
- else:
- c.app.repo.refresh(options.all, notify=options.notify)
- except:
- log.exception('Error refreshing %r', c.app.repo)
- ThreadLocalORMSession.flush_all()
- ThreadLocalORMSession.close_all()
-
-
-def repo_type_list(s):
- repo_types = []
- for repo_type in s.split(','):
- repo_type = repo_type.strip()
- if repo_type not in ['svn', 'git', 'hg']:
- raise argparse.ArgumentTypeError(
- '{} is not a valid repo type.'.format(repo_type))
- repo_types.append(repo_type)
- return repo_types
-
-
-def parse_options():
- parser = argparse.ArgumentParser(description='Scan repos on filesytem and '
- 'update repo metadata in MongoDB. Run for all repos (no args), '
- 'or restrict by neighborhood, project, or code tool mount point.')
- parser.add_argument('--nbhd', action='store', default='', dest='nbhd',
- help='Restrict update to a particular neighborhood, e.g. /p/.')
- parser.add_argument('--project', action='store', default='', dest='project',
- help='Restrict update to a particular project. To specify a '
- 'subproject, use a slash: project/subproject.')
- parser.add_argument('--project-regex', action='store', default='',
- dest='project_regex',
- help='Restrict update to projects for which the shortname matches '
- 'the provided regex.')
- parser.add_argument('--repo-types', action='store', type=repo_type_list,
- default=['svn', 'git', 'hg'], dest='repo_types',
- help='Only refresh repos of the given type(s). Defaults to: '
- 'svn,git,hg. Example: --repo-types=git,hg')
- parser.add_argument('--mount_point', default='', dest='mount_point',
- help='Restrict update to repos at the given tool mount point. ')
- parser.add_argument('--clean', action='store_true', dest='clean',
- default=False, help='Remove repo-related mongo docs (for '
- 'project(s) being refreshed only) before doing the refresh.')
- parser.add_argument('--clean-all', action='store_true', dest='clean_all',
- default=False, help='Remove ALL repo-related mongo docs before '
- 'refresh.')
- parser.add_argument('--all', action='store_true', dest='all', default=False,
- help='Refresh all commits (not just the ones that are new).')
- parser.add_argument('--notify', action='store_true', dest='notify',
- default=False, help='Send email notifications of new commits.')
- parser.add_argument('--dry-run', action='store_true', dest='dry_run',
- default=False, help='Log names of projects that would have their '
- 'repos refreshed, but do not perform the actual refresh.')
- parser.add_argument('--profile', action='store_true', dest='profile',
- default=False, help='Enable the profiler (slow). Will log '
- 'profiling output to ./refresh.profile')
- return parser.parse_args()
-
-if __name__ == '__main__':
- import sys
- faulthandler.enable()
- sys.exit(main(parse_options()))