You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by br...@apache.org on 2019/03/13 16:25:40 UTC

[allura] 01/01: [#8271] remove unused CommitRunDoc and CommitRunBuilder

This is an automated email from the ASF dual-hosted git repository.

brondsem pushed a commit to branch db/8271
in repository https://gitbox.apache.org/repos/asf/allura.git

commit 4b3cc44ed23e9442f525e63a22d94ae7aaacde67
Author: Dave Brondsema <da...@brondsema.net>
AuthorDate: Wed Mar 13 12:03:08 2019 -0400

    [#8271] remove unused CommitRunDoc and CommitRunBuilder
---
 Allura/allura/model/repo.py                      |   4 +-
 Allura/allura/model/repo_refresh.py              | 136 -----------------------
 Allura/allura/model/repository.py                |  10 --
 Allura/allura/scripts/refreshrepo.py             |   6 -
 Allura/allura/tests/model/test_repo.py           |  34 +-----
 Allura/allura/tests/unit/test_repo.py            |  50 ---------
 ForgeSVN/forgesvn/tests/model/test_repository.py |   3 -
 7 files changed, 3 insertions(+), 240 deletions(-)

diff --git a/Allura/allura/model/repo.py b/Allura/allura/model/repo.py
index 4cbbea3..6231ba4 100644
--- a/Allura/allura/model/repo.py
+++ b/Allura/allura/model/repo.py
@@ -21,11 +21,11 @@
 
 from .repository import SUser, SObjType
 from .repository import QSIZE, README_RE, VIEWABLE_EXTENSIONS, PYPELINE_EXTENSIONS, DIFF_SIMILARITY_THRESHOLD
-from .repository import CommitDoc, TreeDoc, LastCommitDoc, CommitRunDoc
+from .repository import CommitDoc, TreeDoc, LastCommitDoc
 from .repository import RepoObject, Commit, Tree, Blob, LastCommit
 from .repository import ModelCache
 
 __all__ = [
     'SUser', 'SObjType', 'QSIZE', 'README_RE', 'VIEWABLE_EXTENSIONS', 'PYPELINE_EXTENSIONS',
-    'DIFF_SIMILARITY_THRESHOLD', 'CommitDoc', 'TreeDoc', 'LastCommitDoc', 'CommitRunDoc', 'RepoObject',
+    'DIFF_SIMILARITY_THRESHOLD', 'CommitDoc', 'TreeDoc', 'LastCommitDoc', 'RepoObject',
     'Commit', 'Tree', 'Blob', 'LastCommit', 'ModelCache']
diff --git a/Allura/allura/model/repo_refresh.py b/Allura/allura/model/repo_refresh.py
index bc24d89..b6677b6 100644
--- a/Allura/allura/model/repo_refresh.py
+++ b/Allura/allura/model/repo_refresh.py
@@ -33,7 +33,6 @@ from ming.orm import mapper, session, ThreadLocalORMSession
 from allura.lib import utils
 from allura.lib import helpers as h
 from allura.model.repository import CommitDoc
-from allura.model.repository import CommitRunDoc
 from allura.model.repository import Commit, Tree, LastCommit, ModelCache
 from allura.model.index import ArtifactReferenceDoc, ShortlinkDoc
 from allura.model.auth import User
@@ -82,25 +81,6 @@ def refresh_repo(repo, all_commits=False, notify=True, new_clone=False, commits_
             log.info('Refresh child info %d for parents of %s',
                      (i + 1), ci._id)
 
-    if repo._refresh_precompute:
-        # Refresh commit runs
-        commit_run_ids = commit_ids
-        # Check if the CommitRuns for the repo are in a good state by checking for
-        # a CommitRunDoc that contains the last known commit. If there isn't one,
-        # the CommitRuns for this repo are in a bad state - rebuild them
-        # entirely.
-        if commit_run_ids != all_commit_ids:
-            last_commit = last_known_commit_id(all_commit_ids, new_commit_ids)
-            log.info('Last known commit id: %s', last_commit)
-            if not CommitRunDoc.m.find(dict(commit_ids=last_commit)).count():
-                log.info('CommitRun incomplete, rebuilding with all commits')
-                commit_run_ids = all_commit_ids
-        log.info('Starting CommitRunBuilder for %s', repo.full_fs_path)
-        rb = CommitRunBuilder(commit_run_ids)
-        rb.run()
-        rb.cleanup()
-        log.info('Finished CommitRunBuilder for %s', repo.full_fs_path)
-
     # Clear any existing caches for branches/tags
     if repo.cached_branches:
         repo.cached_branches = []
@@ -195,122 +175,6 @@ def refresh_children(ci):
         multi=True)
 
 
-class CommitRunBuilder(object):
-
-    '''Class used to build up linear runs of single-parent commits'''
-
-    def __init__(self, commit_ids):
-        self.commit_ids = commit_ids
-        self.run_index = {}  # by commit ID
-        self.runs = {}          # by run ID
-        self.reasons = {}    # reasons to stop merging runs
-
-    def run(self):
-        '''Build up the runs'''
-        for oids in utils.chunked_iter(self.commit_ids, QSIZE):
-            oids = list(oids)
-            for ci in CommitDoc.m.find(dict(_id={'$in': oids})):
-                if ci._id in self.run_index:
-                    continue
-                self.run_index[ci._id] = ci._id
-                self.runs[ci._id] = CommitRunDoc(dict(
-                    _id=ci._id,
-                    parent_commit_ids=ci.parent_ids,
-                    commit_ids=[ci._id],
-                    commit_times=[ci.authored['date']]))
-            self.merge_runs()
-        log.info('%d runs', len(self.runs))
-        for rid, run in sorted(self.runs.items()):
-            log.info('%32s: %r', self.reasons.get(rid, 'none'), run._id)
-        for run in self.runs.itervalues():
-            run.m.save()
-        return self.runs
-
-    def _all_runs(self):
-        '''Find all runs containing this builder's commit IDs'''
-        runs = {}
-        for oids in utils.chunked_iter(self.commit_ids, QSIZE):
-            oids = list(oids)
-            for run in CommitRunDoc.m.find(dict(commit_ids={'$in': oids})):
-                runs[run._id] = run
-            for run in CommitRunDoc.m.find(dict(parent_commit_ids={'$in': oids})):
-                runs[run._id] = run
-        seen_run_ids = set()
-        runs = runs.values()
-        while runs:
-            run = runs.pop()
-            if run._id in seen_run_ids:
-                continue
-            seen_run_ids.add(run._id)
-            yield run
-            for run in CommitRunDoc.m.find(
-                    dict(commit_ids={'$in': run.parent_commit_ids})):
-                runs.append(run)
-
-    def cleanup(self):
-        '''Delete non-maximal runs and merge any new runs with existing runs'''
-        runs = dict(
-            (run['commit_ids'][0], run)
-            for run in self._all_runs())
-        for rid, run in runs.items():
-            p_cis = run['parent_commit_ids']
-            if len(p_cis) != 1:
-                continue
-            parent_run = runs.get(p_cis[0], None)
-            if parent_run is None:
-                continue
-            run['commit_ids'] += parent_run['commit_ids']
-            run['commit_times'] += parent_run['commit_times']
-            run['parent_commit_ids'] = parent_run['parent_commit_ids']
-            run.m.save()
-            parent_run.m.delete()
-            del runs[p_cis[0]]
-        for run1 in runs.values():
-            # if run1 is a subset of another run, delete it
-            if CommitRunDoc.m.find(dict(commit_ids={'$all': run1.commit_ids},
-                                        _id={'$ne': run1._id})).count():
-                log.info('... delete %r (subset of another run)', run1)
-                run1.m.delete()
-                continue
-            for run2 in CommitRunDoc.m.find(dict(
-                    commit_ids=run1.commit_ids[0])):
-                if run1._id == run2._id:
-                    continue
-                log.info('... delete %r (part of %r)', run2, run1)
-                run2.m.delete()
-
-    def merge_runs(self):
-        '''Find partial runs that may be merged and merge them'''
-        while True:
-            for run_id, run in self.runs.iteritems():
-                if len(run.parent_commit_ids) != 1:
-                    self.reasons[run_id] = '%d parents' % len(
-                        run.parent_commit_ids)
-                    continue
-                p_oid = run.parent_commit_ids[0]
-                p_run_id = self.run_index.get(p_oid)
-                if p_run_id is None:
-                    self.reasons[run_id] = 'parent commit not found'
-                    continue
-                p_run = self.runs.get(p_run_id)
-                if p_run is None:
-                    self.reasons[run_id] = 'parent run not found'
-                    continue
-                if p_run.commit_ids[0] != p_oid:
-                    self.reasons[
-                        run_id] = 'parent does not start with parent commit'
-                    continue
-                run.commit_ids += p_run.commit_ids
-                run.commit_times += p_run.commit_times
-                run.parent_commit_ids = p_run.parent_commit_ids
-                for oid in p_run.commit_ids:
-                    self.run_index[oid] = run_id
-                break
-            else:
-                break
-            del self.runs[p_run_id]
-
-
 def unknown_commit_ids(all_commit_ids):
     '''filter out all commit ids that have already been cached'''
     result = []
diff --git a/Allura/allura/model/repository.py b/Allura/allura/model/repository.py
index f68580e..dc97527 100644
--- a/Allura/allura/model/repository.py
+++ b/Allura/allura/model/repository.py
@@ -1018,16 +1018,6 @@ LastCommitDoc = collection(
         commit_id=str)]))
 
 
-# List of commit runs (a run is a linear series of single-parent commits)
-# CommitRunDoc.commit_ids = [ CommitDoc._id, ... ]
-CommitRunDoc = collection(
-    'repo_commitrun', main_doc_session,
-    Field('_id', str),
-    Field('parent_commit_ids', [str], index=True),
-    Field('commit_ids', [str], index=True),
-    Field('commit_times', [datetime]))
-
-
 class RepoObject(object):
 
     def __repr__(self):  # pragma no cover
diff --git a/Allura/allura/scripts/refreshrepo.py b/Allura/allura/scripts/refreshrepo.py
index 7410b4c..9050400 100644
--- a/Allura/allura/scripts/refreshrepo.py
+++ b/Allura/allura/scripts/refreshrepo.py
@@ -102,12 +102,6 @@ class RefreshRepo(ScriptTask):
                                 M.repository.LastCommitDoc.m.remove(
                                     dict(commit_id={'$in': ci_ids_chunk}))
 
-                            i = M.repository.CommitRunDoc.m.find(
-                                {"commit_ids": {"$in": ci_ids_chunk}}).count()
-                            if i:
-                                log.info("Deleting %i CommitRunDoc docs...", i)
-                                M.repository.CommitRunDoc.m.remove(
-                                    {"commit_ids": {"$in": ci_ids_chunk}})
                         del ci_ids
 
                     try:
diff --git a/Allura/allura/tests/model/test_repo.py b/Allura/allura/tests/model/test_repo.py
index efbbb4d..1020031 100644
--- a/Allura/allura/tests/model/test_repo.py
+++ b/Allura/allura/tests/model/test_repo.py
@@ -62,39 +62,7 @@ class TestGitLikeTree(object):
 
 
 class RepoImplTestBase(object):
-    def test_commit_run(self):
-        M.repository.CommitRunDoc.m.remove()
-        commit_ids = list(self.repo.all_commit_ids())
-        # simulate building up a commit run from multiple pushes
-        for c_id in commit_ids:
-            crb = M.repo_refresh.CommitRunBuilder([c_id])
-            crb.run()
-            crb.cleanup()
-        runs = M.repository.CommitRunDoc.m.find().all()
-        self.assertEqual(len(runs), 1)
-        run = runs[0]
-        self.assertEqual(run.commit_ids, commit_ids)
-        self.assertEqual(len(run.commit_ids), len(run.commit_times))
-        self.assertEqual(run.parent_commit_ids, [])
-
-    def test_repair_commit_run(self):
-        commit_ids = list(self.repo.all_commit_ids())
-        # simulate building up a commit run from multiple pushes, but skip the
-        # last commit to simulate a broken commit run
-        for c_id in commit_ids[:-1]:
-            crb = M.repo_refresh.CommitRunBuilder([c_id])
-            crb.run()
-            crb.cleanup()
-        # now repair the commitrun by rebuilding with all commit ids
-        crb = M.repo_refresh.CommitRunBuilder(commit_ids)
-        crb.run()
-        crb.cleanup()
-        runs = M.repository.CommitRunDoc.m.find().all()
-        self.assertEqual(len(runs), 1)
-        run = runs[0]
-        self.assertEqual(run.commit_ids, commit_ids)
-        self.assertEqual(len(run.commit_ids), len(run.commit_times))
-        self.assertEqual(run.parent_commit_ids, [])
+    pass
 
 
 class RepoTestBase(unittest.TestCase):
diff --git a/Allura/allura/tests/unit/test_repo.py b/Allura/allura/tests/unit/test_repo.py
index eb78df4..33c10d3 100644
--- a/Allura/allura/tests/unit/test_repo.py
+++ b/Allura/allura/tests/unit/test_repo.py
@@ -27,58 +27,8 @@ from allura import model as M
 from allura.controllers.repository import topo_sort
 from allura.model.repository import zipdir, prefix_paths_union
 from allura.model.repo_refresh import (
-    CommitRunDoc,
-    CommitRunBuilder,
     _group_commits,
 )
-from alluratest.controller import setup_unit_test
-
-
-class TestCommitRunBuilder(unittest.TestCase):
-
-    def setUp(self):
-        setup_unit_test()
-        commits = [
-            M.repository.CommitDoc.make(dict(
-                _id=str(i)))
-            for i in range(10)]
-        for p, com in zip(commits, commits[1:]):
-            p.child_ids = [com._id]
-            com.parent_ids = [p._id]
-        for ci in commits:
-            ci.m.save()
-        self.commits = commits
-
-    def test_single_pass(self):
-        crb = CommitRunBuilder(
-            [ci._id for ci in self.commits])
-        crb.run()
-        self.assertEqual(CommitRunDoc.m.count(), 1)
-
-    def test_two_pass(self):
-        crb = CommitRunBuilder(
-            [ci._id for ci in self.commits[:5]])
-        crb.run()
-        crb = CommitRunBuilder(
-            [ci._id for ci in self.commits[5:]])
-        crb.run()
-        self.assertEqual(CommitRunDoc.m.count(), 2)
-        crb.cleanup()
-        self.assertEqual(CommitRunDoc.m.count(), 1)
-
-    def test_svn_like(self):
-        for ci in self.commits:
-            crb = CommitRunBuilder([ci._id])
-            crb.run()
-            crb.cleanup()
-        self.assertEqual(CommitRunDoc.m.count(), 1)
-
-    def test_reversed(self):
-        for ci in reversed(self.commits):
-            crb = CommitRunBuilder([ci._id])
-            crb.run()
-            crb.cleanup()
-        self.assertEqual(CommitRunDoc.m.count(), 1)
 
 
 class TestTopoSort(unittest.TestCase):
diff --git a/ForgeSVN/forgesvn/tests/model/test_repository.py b/ForgeSVN/forgesvn/tests/model/test_repository.py
index a5634ea..f4c49e8 100644
--- a/ForgeSVN/forgesvn/tests/model/test_repository.py
+++ b/ForgeSVN/forgesvn/tests/model/test_repository.py
@@ -681,9 +681,6 @@ class _Test(unittest.TestCase):
 
     def _make_log(self, ci):
         session(ci).flush(ci)
-        rb = M.repo_refresh.CommitRunBuilder([ci._id])
-        rb.run()
-        rb.cleanup()
 
     def setUp(self):
         setup_basic_test()