You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by br...@apache.org on 2013/05/16 20:53:18 UTC

[3/3] git commit: [#6192] ticket:336 Split add_artifacts tasks into smaller sizes

 [#6192] ticket:336 Split add_artifacts tasks into smaller sizes


Project: http://git-wip-us.apache.org/repos/asf/incubator-allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-allura/commit/fbf88bb0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-allura/tree/fbf88bb0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-allura/diff/fbf88bb0

Branch: refs/heads/master
Commit: fbf88bb0c4be366b2a25191a32999ca15f389d7f
Parents: 74dfc19
Author: Igor Bondarenko <je...@gmail.com>
Authored: Wed May 8 09:58:57 2013 +0000
Committer: Dave Brondsema <db...@slashdotmedia.com>
Committed: Thu May 16 18:44:24 2013 +0000

----------------------------------------------------------------------
 Allura/allura/command/show_models.py |   39 +++++++++++++++++++----
 Allura/allura/tests/test_commands.py |   47 +++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/fbf88bb0/Allura/allura/command/show_models.py
----------------------------------------------------------------------
diff --git a/Allura/allura/command/show_models.py b/Allura/allura/command/show_models.py
index 17d4b7f..c4e49d1 100644
--- a/Allura/allura/command/show_models.py
+++ b/Allura/allura/command/show_models.py
@@ -20,12 +20,12 @@ from collections import defaultdict
 from itertools import groupby
 
 from pylons import tmpl_context as c, app_globals as g
-from pymongo.errors import DuplicateKeyError
+from pymongo.errors import DuplicateKeyError, InvalidDocument
 
 from ming.orm import mapper, session, Mapper
 from ming.orm.declarative import MappedClass
 
-import allura.tasks.index_tasks
+from allura.tasks.index_tasks import add_artifacts
 from allura.lib.exceptions import CompoundError
 from allura.lib import utils
 from . import base
@@ -119,12 +119,12 @@ class ReindexCommand(base.Command):
                     M.main_orm_session.flush()
                     M.artifact_orm_session.clear()
                     try:
-                        add_artifacts = allura.tasks.index_tasks.add_artifacts
                         if self.options.tasks:
-                            add_artifacts = add_artifacts.post
-                        add_artifacts(ref_ids,
-                                       update_solr=self.options.solr,
-                                       update_refs=self.options.refs)
+                            self._chunked_add_artifacts(ref_ids)
+                        else:
+                            add_artifacts(ref_ids,
+                                    update_solr=self.options.solr,
+                                    update_refs=self.options.refs)
                     except CompoundError, err:
                         base.log.exception('Error indexing artifacts:\n%r', err)
                         base.log.error('%s', err.format_error())
@@ -132,6 +132,31 @@ class ReindexCommand(base.Command):
                     M.main_orm_session.clear()
         base.log.info('Reindex %s', 'queued' if self.options.tasks else 'done')
 
+    def _chunked_add_artifacts(self, ref_ids):
+        # ref_ids contains solr index ids which can easily be over
+        # 100 bytes. Here we allow for 160 bytes avg, plus
+        # room for other document overhead.
+        for chunk in utils.chunked_list(ref_ids, 100 * 1000):
+            self._post_add_artifacts(chunk)
+
+    def _post_add_artifacts(self, chunk):
+        """
+        Post task, recursively splitting and re-posting if the resulting
+        mongo document is too large.
+        """
+        try:
+            add_artifacts.post(chunk,
+                    update_solr=self.options.solr,
+                    update_refs=self.options.refs)
+        except InvalidDocument as e:
+            # there are many types of InvalidDocument, only recurse if its expected to help
+            if str(e).startswith('BSON document too large'):
+                self._post_add_artifacts(chunk[:len(chunk) // 2])
+                self._post_add_artifacts(chunk[len(chunk) // 2:])
+            else:
+                raise
+
+
 class EnsureIndexCommand(base.Command):
     min_args=1
     max_args=1

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/fbf88bb0/Allura/allura/tests/test_commands.py
----------------------------------------------------------------------
diff --git a/Allura/allura/tests/test_commands.py b/Allura/allura/tests/test_commands.py
index bfaf350..5f4963c 100644
--- a/Allura/allura/tests/test_commands.py
+++ b/Allura/allura/tests/test_commands.py
@@ -19,6 +19,7 @@ from nose.tools import assert_raises, assert_in
 from datadiff.tools import assert_equal
 from ming.orm import ThreadLocalORMSession
 from mock import Mock, call, patch
+import pymongo
 
 from alluratest.controller import setup_basic_test, setup_global_objects
 from allura.command import base, script, set_neighborhood_features, \
@@ -26,6 +27,7 @@ from allura.command import base, script, set_neighborhood_features, \
 from allura import model as M
 from forgeblog import model as BM
 from allura.lib.exceptions import InvalidNBFeatureValueError
+from allura.tests import decorators as td
 
 test_config = 'test.ini#main'
 
@@ -357,3 +359,48 @@ class TestReindexCommand(object):
         cmd.run([test_config, '--project-regex', '^test'])
         utils.chunked_find.assert_called_once_with(
             M.Project, {'shortname': {'$regex': '^test'}})
+
+    @patch('allura.command.show_models.add_artifacts')
+    def test_chunked_add_artifacts(self, add_artifacts):
+        cmd = show_models.ReindexCommand('reindex')
+        cmd.options = Mock()
+        ref_ids = list(range(100 * 1000 * 2 + 20))
+        cmd._chunked_add_artifacts(ref_ids)
+        assert_equal(len(add_artifacts.post.call_args_list), 3)
+        assert_equal(len(add_artifacts.post.call_args_list[0][0][0]), 100 * 1000)
+        assert_equal(len(add_artifacts.post.call_args_list[1][0][0]), 100 * 1000)
+        assert_equal(len(add_artifacts.post.call_args_list[2][0][0]), 20)
+
+    @patch('allura.command.show_models.add_artifacts')
+    def test_post_add_artifacts_too_large(self, add_artifacts):
+        def on_post(chunk, **kw):
+            if len(chunk) > 1:
+                raise pymongo.errors.InvalidDocument(
+                        "BSON document too large (16906035 bytes) - the connected server supports BSON document sizes up to 16777216 bytes.")
+        add_artifacts.post.side_effect = on_post
+        cmd = show_models.ReindexCommand('reindex')
+        cmd.options = Mock()
+        cmd._post_add_artifacts(range(5))
+        kw = {'update_solr': cmd.options.solr, 'update_refs': cmd.options.refs}
+        expected = [
+            call([0, 1, 2, 3, 4], **kw),
+            call([0, 1], **kw),
+            call([0], **kw),
+            call([1], **kw),
+            call([2, 3, 4], **kw),
+            call([2], **kw),
+            call([3, 4], **kw),
+            call([3], **kw),
+            call([4], **kw)
+        ]
+        assert_equal(expected, add_artifacts.post.call_args_list)
+
+    @patch('allura.command.show_models.add_artifacts')
+    def test_post_add_artifacts_other_error(self, add_artifacts):
+        def on_post(chunk, **kw):
+            raise pymongo.errors.InvalidDocument("Cannot encode object...")
+        add_artifacts.post.side_effect = on_post
+        cmd = show_models.ReindexCommand('reindex')
+        cmd.options = Mock()
+        with td.raises(pymongo.errors.InvalidDocument):
+            cmd._post_add_artifacts(range(5))