You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by br...@apache.org on 2017/10/17 20:30:19 UTC

[2/2] allura git commit: Proof-of-concept removing TreesDoc collection

Proof-of-concept removing TreesDoc collection


Project: http://git-wip-us.apache.org/repos/asf/allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/allura/commit/b0f237b2
Tree: http://git-wip-us.apache.org/repos/asf/allura/tree/b0f237b2
Diff: http://git-wip-us.apache.org/repos/asf/allura/diff/b0f237b2

Branch: refs/heads/db/remove_treestuff
Commit: b0f237b2714e69e5595756ebf92577581f845b3f
Parents: c80c80f
Author: Dave Brondsema <da...@brondsema.net>
Authored: Tue Oct 17 16:30:01 2017 -0400
Committer: Dave Brondsema <da...@brondsema.net>
Committed: Tue Oct 17 16:30:01 2017 -0400

----------------------------------------------------------------------
 Allura/allura/model/repo.py                     |  4 +-
 Allura/allura/model/repo_refresh.py             | 37 ++++++++-------
 Allura/allura/model/repository.py               |  9 +---
 Allura/allura/scripts/refresh_last_commits.py   |  1 +
 Allura/allura/scripts/refreshrepo.py            | 13 ++----
 Allura/allura/tests/model/test_repo.py          |  8 ----
 Allura/test-light.py                            | 49 --------------------
 ForgeSVN/forgesvn/model/svn.py                  |  4 --
 .../tests/model/test_svnimplementation.py       |  4 +-
 9 files changed, 29 insertions(+), 100 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/allura/blob/b0f237b2/Allura/allura/model/repo.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/repo.py b/Allura/allura/model/repo.py
index 6ba6633..b01cb2b 100644
--- a/Allura/allura/model/repo.py
+++ b/Allura/allura/model/repo.py
@@ -21,11 +21,11 @@
 
 from .repository import SUser, SObjType
 from .repository import QSIZE, README_RE, VIEWABLE_EXTENSIONS, PYPELINE_EXTENSIONS, DIFF_SIMILARITY_THRESHOLD
-from .repository import CommitDoc, TreeDoc, LastCommitDoc, TreesDoc, CommitRunDoc
+from .repository import CommitDoc, TreeDoc, LastCommitDoc, CommitRunDoc
 from .repository import RepoObject, Commit, Tree, Blob, LastCommit
 from .repository import ModelCache
 
 __all__ = [
     'SUser', 'SObjType', 'QSIZE', 'README_RE', 'VIEWABLE_EXTENSIONS', 'PYPELINE_EXTENSIONS',
-    'DIFF_SIMILARITY_THRESHOLD', 'CommitDoc', 'TreeDoc', 'LastCommitDoc', 'TreesDoc', 'CommitRunDoc', 'RepoObject',
+    'DIFF_SIMILARITY_THRESHOLD', 'TreeDoc', 'CommitDoc', 'LastCommitDoc', 'CommitRunDoc', 'RepoObject',
     'Commit', 'Tree', 'Blob', 'LastCommit', 'ModelCache']

http://git-wip-us.apache.org/repos/asf/allura/blob/b0f237b2/Allura/allura/model/repo_refresh.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/repo_refresh.py b/Allura/allura/model/repo_refresh.py
index 99fda0b..6ea81f5 100644
--- a/Allura/allura/model/repo_refresh.py
+++ b/Allura/allura/model/repo_refresh.py
@@ -32,7 +32,7 @@ from ming.orm import mapper, session, ThreadLocalORMSession
 
 from allura.lib import utils
 from allura.lib import helpers as h
-from allura.model.repository import CommitDoc, TreeDoc, TreesDoc
+from allura.model.repository import CommitDoc, TreeDoc
 from allura.model.repository import CommitRunDoc
 from allura.model.repository import Commit, Tree, LastCommit, ModelCache
 from allura.model.index import ArtifactReferenceDoc, ShortlinkDoc
@@ -101,6 +101,7 @@ def refresh_repo(repo, all_commits=False, notify=True, new_clone=False):
     # Refresh trees
     # Like diffs below, pre-computing trees for some SCMs is too expensive,
     # so we skip it here, then do it on-demand later.
+    """
     if repo._refresh_precompute:
         cache = {}
         for i, oid in enumerate(commit_ids):
@@ -108,12 +109,13 @@ def refresh_repo(repo, all_commits=False, notify=True, new_clone=False):
             cache = refresh_commit_trees(ci, cache)
             if (i + 1) % 100 == 0:
                 log.info('Refresh commit trees %d: %s', (i + 1), ci._id)
+    """
 
-    # Compute diffs
     cache = {}
     # For some SCMs, we don't want to pre-compute the LCDs because that
     # would be too expensive, so we skip them here and do them on-demand
     # with caching.
+    """
     if repo._refresh_precompute:
         model_cache = ModelCache()
         lcid_cache = {}
@@ -124,6 +126,7 @@ def refresh_repo(repo, all_commits=False, notify=True, new_clone=False):
             ThreadLocalORMSession.flush_all()
             if (i + 1) % 100 == 0:
                 log.info('Compute last commit info %d: %s', (i + 1), ci._id)
+    """
 
     # Clear any existing caches for branches/tags
     if repo.cached_branches:
@@ -172,18 +175,18 @@ def refresh_repo(repo, all_commits=False, notify=True, new_clone=False):
         send_notifications(repo, reversed(commit_ids))
 
 
-def refresh_commit_trees(ci, cache):
-    '''Refresh the list of trees included withn a commit'''
-    if ci.tree_id is None:
-        return cache
-    trees_doc = TreesDoc(dict(
-        _id=ci._id,
-        tree_ids=list(trees(ci.tree_id, cache))))
-    trees_doc.m.save(safe=False)
-    new_cache = dict(
-        (oid, cache[oid])
-        for oid in trees_doc.tree_ids)
-    return new_cache
+# def refresh_commit_trees(ci, cache):
+#     '''Refresh the list of trees included withn a commit'''
+#     if ci.tree_id is None:
+#         return cache
+#     trees_doc = TreesDoc(dict(
+#         _id=ci._id,
+#         tree_ids=list(trees(ci.tree_id, cache))))
+#     trees_doc.m.save(safe=False)
+#     new_cache = dict(
+#         (oid, cache[oid])
+#         for oid in trees_doc.tree_ids)
+#     return new_cache
 
 
 def refresh_commit_repos(all_commit_ids, repo):
@@ -348,7 +351,7 @@ class CommitRunBuilder(object):
                 break
             del self.runs[p_run_id]
 
-
+"""
 def trees(id, cache):
     '''Recursively generate the list of trees contained within a given tree ID'''
     yield id
@@ -360,7 +363,7 @@ def trees(id, cache):
     for i in entries:
         for x in trees(i, cache):
             yield x
-
+"""
 
 def unknown_commit_ids(all_commit_ids):
     '''filter out all commit ids that have already been cached'''
@@ -541,6 +544,7 @@ def last_known_commit_id(all_commit_ids, new_commit_ids):
     return all_commit_ids[all_commit_ids.index(new_commit_ids[0]) - 1]
 
 
+"""
 def compute_lcds(commit, model_cache, lcid_cache):
     '''
     Compute LastCommit data for every Tree node under this tree.
@@ -566,6 +570,7 @@ def _compute_lcds(tree, cache):
     for x in tree.tree_ids:
         sub_tree = _pull_tree(cache, x.id, tree, x.name)
         _compute_lcds(sub_tree, cache)
+"""
 
 
 def _pull_tree(cache, tree_id, *context):

http://git-wip-us.apache.org/repos/asf/allura/blob/b0f237b2/Allura/allura/model/repository.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/repository.py b/Allura/allura/model/repository.py
index 5386ae7..996b92f 100644
--- a/Allura/allura/model/repository.py
+++ b/Allura/allura/model/repository.py
@@ -981,7 +981,7 @@ CommitDoc = collection(
     Field('child_ids', [str], index=True),
     Field('repo_ids', [S.ObjectId()], index=True))
 
-# Basic tree information (also see TreesDoc)
+# Basic tree information
 TreeDoc = collection(
     'repo_tree', main_doc_session,
     Field('_id', str),
@@ -1000,13 +1000,6 @@ LastCommitDoc = collection(
         name=str,
         commit_id=str)]))
 
-# List of all trees contained within a commit
-# TreesDoc._id = CommitDoc._id
-# TreesDoc.tree_ids = [ TreeDoc._id, ... ]
-TreesDoc = collection(
-    'repo_trees', main_doc_session,
-    Field('_id', str),
-    Field('tree_ids', [str]))
 
 # List of commit runs (a run is a linear series of single-parent commits)
 # CommitRunDoc.commit_ids = [ CommitDoc._id, ... ]

http://git-wip-us.apache.org/repos/asf/allura/blob/b0f237b2/Allura/allura/scripts/refresh_last_commits.py
----------------------------------------------------------------------
diff --git a/Allura/allura/scripts/refresh_last_commits.py b/Allura/allura/scripts/refresh_last_commits.py
index 095fafb..cbaae53 100644
--- a/Allura/allura/scripts/refresh_last_commits.py
+++ b/Allura/allura/scripts/refresh_last_commits.py
@@ -150,6 +150,7 @@ class RefreshLastCommits(ScriptTask):
                 continue
             commit.set_context(c.app.repo)
             with time(timings):
+                # FIXME call LastCommit._build() instead?  or remove this script?
                 M.repo_refresh.compute_lcds(commit, model_cache, lcid_cache)
                 ThreadLocalORMSession.flush_all()
             if i % 100 == 0:

http://git-wip-us.apache.org/repos/asf/allura/blob/b0f237b2/Allura/allura/scripts/refreshrepo.py
----------------------------------------------------------------------
diff --git a/Allura/allura/scripts/refreshrepo.py b/Allura/allura/scripts/refreshrepo.py
index 115e5a4..6fa4876 100644
--- a/Allura/allura/scripts/refreshrepo.py
+++ b/Allura/allura/scripts/refreshrepo.py
@@ -71,7 +71,9 @@ class RefreshRepo(ScriptTask):
                                  len(ci_ids))
                         # like the tree_ids themselves below, we need to process these in
                         # chunks to avoid hitting the BSON max size limit
+                        """
                         tree_ids = []
+                        # FIXME: TreesDoc provides a mapping of commit_id to tree_id so that cleanup knows what TreeDocs to del
                         for ci_ids_chunk in chunked_list(ci_ids, 3000):
                             tree_ids.extend([
                                 tree_id for doc in
@@ -97,18 +99,9 @@ class RefreshRepo(ScriptTask):
                                 M.repository.TreeDoc.m.remove(
                                     {"_id": {"$in": tree_ids_chunk}})
                         del tree_ids
+                        """
 
-                        # delete these after TreeDoc and LastCommitDoc so that if
-                        # we crash, we don't lose the ability to delete those
                         for ci_ids_chunk in chunked_list(ci_ids, 3000):
-                            # delete TreesDocs
-                            i = M.repository.TreesDoc.m.find(
-                                {"_id": {"$in": ci_ids_chunk}}).count()
-                            if i:
-                                log.info("Deleting %i TreesDoc docs...", i)
-                                M.repository.TreesDoc.m.remove(
-                                    {"_id": {"$in": ci_ids_chunk}})
-
                             # delete LastCommitDocs
                             i = M.repository.LastCommitDoc.m.find(
                                 dict(commit_id={'$in': ci_ids_chunk})).count()

http://git-wip-us.apache.org/repos/asf/allura/blob/b0f237b2/Allura/allura/tests/model/test_repo.py
----------------------------------------------------------------------
diff --git a/Allura/allura/tests/model/test_repo.py b/Allura/allura/tests/model/test_repo.py
index 3761821..189fdf4 100644
--- a/Allura/allura/tests/model/test_repo.py
+++ b/Allura/allura/tests/model/test_repo.py
@@ -443,14 +443,6 @@ class TestModelCache(unittest.TestCase):
         tr_get.assert_called_once_with(_id='foo')
         self.assertEqual(val, tree1)
 
-    @mock.patch.object(M.repository.TreesDoc.m, 'get')
-    def test_get_doc(self, tr_get):
-        trees = tr_get.return_value = mock.Mock(
-            spec=['_id', 'val'], _id='foo', val='bar')
-        val = self.cache.get(M.repository.TreesDoc, {'_id': 'foo'})
-        tr_get.assert_called_once_with(_id='foo')
-        self.assertEqual(val, trees)
-
     def test_set(self):
         tree = mock.Mock(spec=['_id', 'test_set'], _id='foo', val='test_set')
         self.cache.set(M.repository.Tree, {'val': 'test_set'}, tree)

http://git-wip-us.apache.org/repos/asf/allura/blob/b0f237b2/Allura/test-light.py
----------------------------------------------------------------------
diff --git a/Allura/test-light.py b/Allura/test-light.py
deleted file mode 100644
index f095214..0000000
--- a/Allura/test-light.py
+++ /dev/null
@@ -1,49 +0,0 @@
-#       Licensed to the Apache Software Foundation (ASF) under one
-#       or more contributor license agreements.  See the NOTICE file
-#       distributed with this work for additional information
-#       regarding copyright ownership.  The ASF licenses this file
-#       to you under the Apache License, Version 2.0 (the
-#       "License"); you may not use this file except in compliance
-#       with the License.  You may obtain a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-#       Unless required by applicable law or agreed to in writing,
-#       software distributed under the License is distributed on an
-#       "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#       KIND, either express or implied.  See the License for the
-#       specific language governing permissions and limitations
-#       under the License.
-
-import sys
-
-from pylons import tmpl_context as c
-
-from allura.lib import helpers as h
-from allura.model.repository import CommitDoc, TreeDoc, TreesDoc
-from allura.model.repository import LastCommitDoc, CommitRunDoc
-from allura.model.repo_refresh import refresh_repo
-
-
-def main():
-    if len(sys.argv) > 1:
-        h.set_context('test')
-        c.project.install_app('Git', 'code', 'Code',
-                              init_from_url='/home/rick446/src/forge')
-        c.project.install_app('Hg', 'code2', 'Code2',
-                              init_from_url='/home/rick446/src/Kajiki')
-    CommitDoc.m.remove({})
-    TreeDoc.m.remove({})
-    TreesDoc.m.remove({})
-    LastCommitDoc.m.remove({})
-    CommitRunDoc.m.remove({})
-
-    h.set_context('test', 'code')
-    refresh_repo(c.app.repo, notify=False)
-    h.set_context('test', 'code2')
-    refresh_repo(c.app.repo, notify=False)
-
-
-if __name__ == '__main__':
-    main()
-    # dolog()

http://git-wip-us.apache.org/repos/asf/allura/blob/b0f237b2/ForgeSVN/forgesvn/model/svn.py
----------------------------------------------------------------------
diff --git a/ForgeSVN/forgesvn/model/svn.py b/ForgeSVN/forgesvn/model/svn.py
index db52738..2aa2b40 100644
--- a/ForgeSVN/forgesvn/model/svn.py
+++ b/ForgeSVN/forgesvn/model/svn.py
@@ -439,10 +439,6 @@ class SVNImplementation(M.RepositoryImplementation):
         if is_new:
             commit_id = self._oid(infos[0][1].last_changed_rev.number)
             path = tree_path.strip('/')
-            RM.TreesDoc.m.update_partial(
-                {'_id': commit._id},
-                {'$addToSet': {'tree_ids': tree_id}},
-                upsert=True)
             RM.LastCommitDoc.m.update_partial(
                 {'commit_id': commit_id, 'path': path},
                 {'commit_id': commit_id, 'path':

http://git-wip-us.apache.org/repos/asf/allura/blob/b0f237b2/ForgeSVN/forgesvn/tests/model/test_svnimplementation.py
----------------------------------------------------------------------
diff --git a/ForgeSVN/forgesvn/tests/model/test_svnimplementation.py b/ForgeSVN/forgesvn/tests/model/test_svnimplementation.py
index c51cdce..216255d 100644
--- a/ForgeSVN/forgesvn/tests/model/test_svnimplementation.py
+++ b/ForgeSVN/forgesvn/tests/model/test_svnimplementation.py
@@ -36,10 +36,9 @@ class TestSVNImplementation(object):
         self._test_compute_tree_new('trunk/foo')
 
     @patch('allura.model.repository.LastCommitDoc.m.update_partial')
-    @patch('allura.model.repository.TreesDoc.m.update_partial')
     @patch('allura.model.repository.Tree.upsert')
     @patch('allura.model.repository.Tree.query.get')
-    def _test_compute_tree_new(self, path, tree_get, tree_upsert, treesdoc_partial, lcd_partial):
+    def _test_compute_tree_new(self, path, tree_get, tree_upsert, lcd_partial):
         repo = Mock(fs_path=g.tmpdir + '/')
         repo.name = 'code'
         impl = SVNImplementation(repo)
@@ -54,7 +53,6 @@ class TestSVNImplementation(object):
 
         assert_equal(impl._svn.info2.call_args[0]
                      [0], 'file://' + g.tmpdir + '/code/trunk/foo')
-        assert treesdoc_partial.called
         assert lcd_partial.called
 
     def test_last_commit_ids(self):