You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by br...@apache.org on 2014/05/21 16:45:01 UTC
[13/20] git commit: [#7257] ticket:563 Add ScriptTask to (re)index
all projects
[#7257] ticket:563 Add ScriptTask to (re)index all projects
Project: http://git-wip-us.apache.org/repos/asf/allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/allura/commit/654621c8
Tree: http://git-wip-us.apache.org/repos/asf/allura/tree/654621c8
Diff: http://git-wip-us.apache.org/repos/asf/allura/diff/654621c8
Branch: refs/heads/master
Commit: 654621c8c28a081490dbcf92f0d43c1502346ea0
Parents: 0f8a7d4
Author: Igor Bondarenko <je...@gmail.com>
Authored: Mon Apr 28 16:48:43 2014 +0300
Committer: Dave Brondsema <db...@slashdotmedia.com>
Committed: Wed May 21 14:44:21 2014 +0000
----------------------------------------------------------------------
Allura/allura/model/project.py | 2 +-
Allura/allura/scripts/reindex_projects.py | 119 +++++++++++++++++++++++++
2 files changed, 120 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/allura/blob/654621c8/Allura/allura/model/project.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/project.py b/Allura/allura/model/project.py
index db7d10f..5c9cfa4 100644
--- a/Allura/allura/model/project.py
+++ b/Allura/allura/model/project.py
@@ -1037,7 +1037,7 @@ class Project(SearchIndexable, MappedClass, ActivityNode, ActivityObject):
deleted_b=self.deleted,
# Not analyzed fields
private_b=self.private,
- category_id_s=str(self.category_id),
+ category_id_s=str(self.category_id or ''),
neighborhood_id_s=str(self.neighborhood_id),
url_s=h.absurl(self.url()),
created_dt=self._id.generation_time,
http://git-wip-us.apache.org/repos/asf/allura/blob/654621c8/Allura/allura/scripts/reindex_projects.py
----------------------------------------------------------------------
diff --git a/Allura/allura/scripts/reindex_projects.py b/Allura/allura/scripts/reindex_projects.py
new file mode 100644
index 0000000..daab641
--- /dev/null
+++ b/Allura/allura/scripts/reindex_projects.py
@@ -0,0 +1,119 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import argparse
+import logging
+
+from pymongo.errors import InvalidDocument
+from pylons import tmpl_context as c, app_globals as g
+
+from allura.scripts import ScriptTask
+from allura import model as M
+from allura.tasks.index_tasks import add_projects
+from allura.lib.utils import chunked_find, chunked_list
+from allura.lib.exceptions import CompoundError
+
+
+log = logging.getLogger(__name__)
+
+
+class ReindexProjects(ScriptTask):
+
+ @classmethod
+ def execute(cls, options):
+ q_project = {}
+ if options.nbhd:
+ nbhd = M.Neighborhood.query.get(url_prefix=options.nbhd)
+ if not nbhd:
+ return "Invalid neighborhood url prefix."
+ q_project['neighborhood_id'] = nbhd._id
+ if options.project:
+ q_project['shortname'] = options.project
+ elif options.project_regex:
+ q_project['shortname'] = {'$regex': options.project_regex}
+
+ for chunk in chunked_find(M.Project, q_project):
+ project_ids = []
+ for p in chunk:
+ log.info('Reindex project %s', p.shortname)
+ if options.dry_run:
+ continue
+ c.project = p
+ # Clear index for this project
+ if not options.skip_solr_delete:
+ g.solr.delete(q='project_id_s:%s' % p._id)
+ project_ids.append(p._id)
+
+ try:
+ for chunk in chunked_list(project_ids, options.max_chunk):
+ if options.tasks:
+ cls._post_add_projects(chunk)
+ else:
+ add_projects(chunk)
+ except CompoundError, err:
+ log.exception('Error indexing projects:\n%r', err)
+ log.error('%s', err.format_error())
+ M.main_orm_session.flush()
+ M.main_orm_session.clear()
+ log.info('Reindex %s', 'queued' if options.tasks else 'done')
+
+ @classmethod
+ def _post_add_projects(cls, chunk):
+ """
+ Post task, recursively splitting and re-posting if the resulting
+ mongo document is too large.
+ """
+ try:
+ add_projects.post(chunk)
+ except InvalidDocument as e:
+ # there are many types of InvalidDocument, only recurse if its
+ # expected to help
+ if e.args[0].startswith('BSON document too large'):
+ cls._post_add_projects(chunk[:len(chunk) // 2])
+ cls._post_add_projects(chunk[len(chunk) // 2:])
+ else:
+ raise
+
+ @classmethod
+ def parser(cls):
+ parser = argparse.ArgumentParser(description='Reindex all projects')
+ parser.add_argument('-n', '--nbhd', action='store', default='', dest='nbhd',
+ help='Restrict reindex to a particular neighborhood, e.g. /p/.')
+ parser.add_argument(
+ '-p', '--project', action='store', default='', dest='project',
+ help='Restrict update to a particular project. To specify a '
+ 'subproject, use a slash: project/subproject.')
+ parser.add_argument('--project-regex', action='store', default='',
+ dest='project_regex',
+ help='Restrict update to projects for which the shortname matches '
+ 'the provided regex.')
+ parser.add_argument('--dry-run', action='store_true', dest='dry_run',
+ default=False, help='Log names of projects that would be reindexed, '
+ 'but do not perform the actual reindex.')
+ parser.add_argument(
+ '--skip-solr-delete', action='store_true', dest='skip_solr_delete', default=False,
+ help='Skip clearing solr index.')
+ parser.add_argument('--tasks', action='store_true', dest='tasks',
+ help='Run each individual index operation as a background task.')
+ parser.add_argument(
+ '--max-chunk', dest='max_chunk', type=int, default=100 * 1000,
+ help='Max number of artifacts to index in one Solr update command')
+ return parser
+
+
+if __name__ == '__main__':
+ ReindexProjects.main()