You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by he...@apache.org on 2015/04/01 23:10:48 UTC

[31/45] allura git commit: [#7837] ticket:736 Wipe out all the traces of DiffInfoDoc

[#7837] ticket:736 Wipe out all the traces of DiffInfoDoc


Project: http://git-wip-us.apache.org/repos/asf/allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/allura/commit/d0dd4b75
Tree: http://git-wip-us.apache.org/repos/asf/allura/tree/d0dd4b75
Diff: http://git-wip-us.apache.org/repos/asf/allura/diff/d0dd4b75

Branch: refs/heads/hss/7072
Commit: d0dd4b757f7c33fc7a11a180e7cbcce8858edf42
Parents: d50d350
Author: Igor Bondarenko <je...@gmail.com>
Authored: Mon Mar 2 16:18:10 2015 +0000
Committer: Dave Brondsema <db...@slashdotmedia.com>
Committed: Mon Mar 30 19:20:40 2015 +0000

----------------------------------------------------------------------
 Allura/allura/model/repo.py                     |  2 +-
 Allura/allura/model/repo_refresh.py             | 75 +-------------------
 Allura/allura/model/repository.py               |  9 ---
 Allura/allura/scripts/refresh_last_commits.py   | 24 +------
 Allura/allura/scripts/refreshrepo.py            |  7 --
 Allura/test-light.py                            |  3 +-
 ForgeSVN/forgesvn/model/svn.py                  | 47 +-----------
 .../forgesvn/tests/model/test_repository.py     |  7 --
 8 files changed, 7 insertions(+), 167 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/allura/blob/d0dd4b75/Allura/allura/model/repo.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/repo.py b/Allura/allura/model/repo.py
index 645fdc5..f041116 100644
--- a/Allura/allura/model/repo.py
+++ b/Allura/allura/model/repo.py
@@ -21,6 +21,6 @@
 
 from .repository import SUser, SObjType
 from .repository import QSIZE, README_RE, VIEWABLE_EXTENSIONS, PYPELINE_EXTENSIONS, DIFF_SIMILARITY_THRESHOLD
-from .repository import CommitDoc, TreeDoc, LastCommitDoc, TreesDoc, DiffInfoDoc, CommitRunDoc
+from .repository import CommitDoc, TreeDoc, LastCommitDoc, TreesDoc, CommitRunDoc
 from .repository import RepoObject, Commit, Tree, Blob, LastCommit
 from .repository import ModelCache

http://git-wip-us.apache.org/repos/asf/allura/blob/d0dd4b75/Allura/allura/model/repo_refresh.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/repo_refresh.py b/Allura/allura/model/repo_refresh.py
index d8ef050..1711bc3 100644
--- a/Allura/allura/model/repo_refresh.py
+++ b/Allura/allura/model/repo_refresh.py
@@ -31,7 +31,7 @@ from ming.orm import mapper, session, ThreadLocalORMSession
 
 from allura.lib import utils
 from allura.lib import helpers as h
-from allura.model.repository import CommitDoc, TreeDoc, TreesDoc, DiffInfoDoc
+from allura.model.repository import CommitDoc, TreeDoc, TreesDoc
 from allura.model.repository import CommitRunDoc
 from allura.model.repository import Commit, Tree, LastCommit, ModelCache
 from allura.model.index import ArtifactReferenceDoc, ShortlinkDoc
@@ -110,19 +110,10 @@ def refresh_repo(repo, all_commits=False, notify=True, new_clone=False):
 
     # Compute diffs
     cache = {}
-    # For some SCMs, we don't want to pre-compute the diffs because that
+    # For some SCMs, we don't want to pre-compute the LCDs because that
     # would be too expensive, so we skip them here and do them on-demand
     # with caching.
     if repo._refresh_precompute:
-        for i, oid in enumerate(commit_ids):
-            cid = CommitDoc.m.find(dict(_id=oid), validate=False).next()
-            ci = mapper(Commit).create(cid, dict(instrument=False))
-            ci.set_context(repo)
-            compute_diffs(repo._id, cache, ci)
-            if (i + 1) % 100 == 0:
-                log.info('Compute diffs %d: %s', (i + 1), ci._id)
-
-    if repo._refresh_precompute:
         model_cache = ModelCache()
         lcid_cache = {}
         for i, oid in enumerate(reversed(commit_ids)):
@@ -368,68 +359,6 @@ def unknown_commit_ids(all_commit_ids):
     return result
 
 
-def compute_diffs(repo_id, tree_cache, rhs_ci):
-    '''compute simple differences between a commit and its first parent'''
-    if rhs_ci.tree_id is None:
-        return tree_cache
-
-    def _update_cache(lhs_tree_ids, rhs_tree_ids):
-        # crazy cache logic that I'm not certain I understand
-        new_tree_ids = [
-            tid for tid in chain(lhs_tree_ids, rhs_tree_ids)
-            if tid not in tree_cache]
-        tree_index = dict(
-            (t._id, t) for t in TreeDoc.m.find(dict(_id={'$in': new_tree_ids}), validate=False))
-        tree_index.update(tree_cache)
-        rhs_tree_ids_set = set(rhs_tree_ids)
-        tree_cache.clear()
-        tree_cache.update(
-            (id, t) for id, t in tree_index.iteritems() if id in rhs_tree_ids_set)
-        return tree_index
-
-    empty_tree = Object(_id=None, tree_ids=[], blob_ids=[], other_ids=[])
-    commit_info = get_commit_info(rhs_ci)
-    differences = []
-    rhs_treesdoc = TreesDoc.m.get(_id=rhs_ci._id)
-    if not rhs_treesdoc:
-        # FIXME: These sometimes don't exist for unknown reasons; they should
-        # be auto-gen'ed
-        log.error('Missing TreesDoc: %s', rhs_ci)
-        return tree_cache
-    for lhs_cid in rhs_ci.parent_ids:
-        lhs_ci = CommitDoc.m.get(_id=lhs_cid)
-        if lhs_ci is None:
-            log.error(
-                'Commit ID referenced as parent but not found: %s parent of %s', lhs_cid, rhs_ci)
-            continue
-        lhs_treesdoc = TreesDoc.m.get(_id=lhs_cid)
-        if not lhs_treesdoc:
-            # FIXME: These sometimes don't exist for unknown reasons; they
-            # should be auto-gen'ed
-            log.error('Missing TreesDoc: %s', rhs_ci)
-            continue
-        tree_index = _update_cache(
-            lhs_treesdoc.tree_ids, rhs_treesdoc.tree_ids)
-        rhs_tree = tree_index[rhs_ci.tree_id]
-        lhs_tree = tree_index.get(lhs_ci.tree_id, empty_tree)
-        for name, lhs_id, rhs_id in _diff_trees(lhs_tree, rhs_tree, tree_index):
-            differences.append(
-                dict(name=name, lhs_id=lhs_id, rhs_id=rhs_id))
-    if not rhs_ci.parent_ids:
-        # no parents, so everything in rhs is new
-        tree_index = _update_cache([], rhs_treesdoc.tree_ids)
-        rhs_tree = tree_index[rhs_ci.tree_id]
-        for name, lhs_id, rhs_id in _diff_trees(empty_tree, rhs_tree, tree_index):
-            differences.append(
-                dict(name=name, lhs_id=lhs_id, rhs_id=rhs_id))
-    # Build the diffinfo
-    di = DiffInfoDoc(dict(
-        _id=rhs_ci._id,
-        differences=differences))
-    di.m.save()
-    return tree_cache
-
-
 def send_notifications(repo, commit_ids):
     '''Create appropriate notification and feed objects for a refresh'''
     from allura.model import Feed, Notification

http://git-wip-us.apache.org/repos/asf/allura/blob/d0dd4b75/Allura/allura/model/repository.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/repository.py b/Allura/allura/model/repository.py
index d15394f..fc50860 100644
--- a/Allura/allura/model/repository.py
+++ b/Allura/allura/model/repository.py
@@ -893,15 +893,6 @@ TreesDoc = collection(
     Field('_id', str),
     Field('tree_ids', [str]))
 
-# Information about which things were added/removed in  commit
-# DiffInfoDoc._id = CommitDoc._id
-DiffInfoDoc = collection(
-    'repo_diffinfo', main_doc_session,
-    Field('_id', str),
-    Field(
-        'differences',
-        [dict(name=str, lhs_id=str, rhs_id=str)]))
-
 # List of commit runs (a run is a linear series of single-parent commits)
 # CommitRunDoc.commit_ids = [ CommitDoc._id, ... ]
 CommitRunDoc = collection(

http://git-wip-us.apache.org/repos/asf/allura/blob/d0dd4b75/Allura/allura/scripts/refresh_last_commits.py
----------------------------------------------------------------------
diff --git a/Allura/allura/scripts/refresh_last_commits.py b/Allura/allura/scripts/refresh_last_commits.py
index 4673a2a..095fafb 100644
--- a/Allura/allura/scripts/refresh_last_commits.py
+++ b/Allura/allura/scripts/refresh_last_commits.py
@@ -70,8 +70,6 @@ class RefreshLastCommits(ScriptTask):
                             'project(s) being refreshed before doing the refresh.')
         parser.add_argument('--dry-run', action='store_true', dest='dry_run',
                             default=False, help='Log names of projects that would have their ')
-        parser.add_argument('--diffs', action='store_true', dest='diffs',
-                            default=False, help='Refresh / clean diffs as well as LCDs')
         parser.add_argument('--limit', action='store', type=int, dest='limit',
                             default=False, help='Limit of how many commits to process')
         return parser
@@ -118,7 +116,7 @@ class RefreshLastCommits(ScriptTask):
                         ci_ids = list(
                             reversed(list(c.app.repo.all_commit_ids())))
                         if options.clean:
-                            cls._clean(ci_ids, options.diffs)
+                            cls._clean(ci_ids)
 
                         log.info('Refreshing all last commits in %r',
                                  c.app.repo)
@@ -138,16 +136,6 @@ class RefreshLastCommits(ScriptTask):
     def refresh_repo_lcds(cls, commit_ids, options):
         tree_cache = {}
         timings = []
-        if options.diffs:
-            print 'Processing diffs'
-            for i, commit_id in enumerate(commit_ids):
-                commit = M.repository.Commit.query.get(_id=commit_id)
-                with time(timings):
-                    M.repo_refresh.compute_diffs(
-                        c.app.repo._id, tree_cache, commit)
-                if i % 1000 == 0:
-                    cls._print_stats(i, timings, 1000)
-
         model_cache = M.repository.ModelCache(
             max_instances={M.repository.LastCommit: 4000},
             max_queries={M.repository.LastCommit: 4000},
@@ -171,15 +159,7 @@ class RefreshLastCommits(ScriptTask):
         ThreadLocalORMSession.flush_all()
 
     @classmethod
-    def _clean(cls, commit_ids, clean_diffs):
-        if clean_diffs:
-            # delete DiffInfoDocs
-            i = M.repository.DiffInfoDoc.m.find(
-                dict(_id={'$in': commit_ids})).count()
-            log.info("Deleting %i DiffInfoDoc docs for %i commits...",
-                     i, len(commit_ids))
-            M.repository.DiffInfoDoc.m.remove(dict(_id={'$in': commit_ids}))
-
+    def _clean(cls, commit_ids):
         # delete LastCommitDocs
         i = M.repository.LastCommitDoc.m.find(
             dict(commit_id={'$in': commit_ids})).count()

http://git-wip-us.apache.org/repos/asf/allura/blob/d0dd4b75/Allura/allura/scripts/refreshrepo.py
----------------------------------------------------------------------
diff --git a/Allura/allura/scripts/refreshrepo.py b/Allura/allura/scripts/refreshrepo.py
index 08f3fcc..a336b0d 100644
--- a/Allura/allura/scripts/refreshrepo.py
+++ b/Allura/allura/scripts/refreshrepo.py
@@ -118,13 +118,6 @@ class RefreshRepo(ScriptTask):
                                 M.repository.LastCommitDoc.m.remove(
                                     dict(commit_ids={'$in': ci_ids_chunk}))
 
-                            i = M.repository.DiffInfoDoc.m.find(
-                                {"_id": {"$in": ci_ids_chunk}}).count()
-                            if i:
-                                log.info("Deleting %i DiffInfoDoc docs...", i)
-                                M.repository.DiffInfoDoc.m.remove(
-                                    {"_id": {"$in": ci_ids_chunk}})
-
                             i = M.repository.CommitRunDoc.m.find(
                                 {"commit_ids": {"$in": ci_ids_chunk}}).count()
                             if i:

http://git-wip-us.apache.org/repos/asf/allura/blob/d0dd4b75/Allura/test-light.py
----------------------------------------------------------------------
diff --git a/Allura/test-light.py b/Allura/test-light.py
index be218b9..f095214 100644
--- a/Allura/test-light.py
+++ b/Allura/test-light.py
@@ -20,7 +20,7 @@ import sys
 from pylons import tmpl_context as c
 
 from allura.lib import helpers as h
-from allura.model.repository import CommitDoc, TreeDoc, TreesDoc, DiffInfoDoc
+from allura.model.repository import CommitDoc, TreeDoc, TreesDoc
 from allura.model.repository import LastCommitDoc, CommitRunDoc
 from allura.model.repo_refresh import refresh_repo
 
@@ -35,7 +35,6 @@ def main():
     CommitDoc.m.remove({})
     TreeDoc.m.remove({})
     TreesDoc.m.remove({})
-    DiffInfoDoc.m.remove({})
     LastCommitDoc.m.remove({})
     CommitRunDoc.m.remove({})
 

http://git-wip-us.apache.org/repos/asf/allura/blob/d0dd4b75/ForgeSVN/forgesvn/model/svn.py
----------------------------------------------------------------------
diff --git a/ForgeSVN/forgesvn/model/svn.py b/ForgeSVN/forgesvn/model/svn.py
index 44d625f..142ca6a 100644
--- a/ForgeSVN/forgesvn/model/svn.py
+++ b/ForgeSVN/forgesvn/model/svn.py
@@ -76,9 +76,6 @@ class Repository(M.Repository):
                                        'checkout_url'),
                                    dest_path=self.suggested_clone_dest_path()))
 
-    def compute_diffs(self):
-        return
-
     def latest(self, branch=None):
         if self._impl is None:
             return None
@@ -366,7 +363,7 @@ class SVNImplementation(M.RepositoryImplementation):
             oid for oid in oids if oid not in seen_oids]
 
     def refresh_commit_info(self, oid, seen_object_ids, lazy=True):
-        from allura.model.repository import CommitDoc, DiffInfoDoc
+        from allura.model.repository import CommitDoc
         ci_doc = CommitDoc.m.get(_id=oid)
         if ci_doc and lazy:
             return False
@@ -408,48 +405,6 @@ class SVNImplementation(M.RepositoryImplementation):
             except DuplicateKeyError:
                 if lazy:
                     return False
-        # Save diff info
-        di = DiffInfoDoc.make(dict(_id=ci_doc._id, differences=[]))
-        for path in log_entry.changed_paths:
-            if path.action in ('A', 'M', 'R'):
-                try:
-                    rhs_info = self._svn.info2(
-                        self._url + h.really_unicode(path.path),
-                        revision=self._revision(ci_doc._id),
-                        recurse=False)[0][1]
-                    rhs_id = self._obj_oid(ci_doc._id, rhs_info)
-                except pysvn.ClientError, e:
-                    # pysvn will sometimes misreport deleted files (D) as
-                    # something else (like A), causing info2() to raise a
-                    # ClientError since the file doesn't exist in this
-                    # revision. Set lrhs_id = None to treat like a deleted file
-                    log.info('This error was handled gracefully and logged '
-                             'for informational purposes only:\n' + str(e))
-                    rhs_id = None
-            else:
-                rhs_id = None
-            if ci_doc.parent_ids and path.action in ('D', 'M', 'R'):
-                try:
-                    lhs_info = self._svn.info2(
-                        self._url + h.really_unicode(path.path),
-                        revision=self._revision(ci_doc.parent_ids[0]),
-                        recurse=False)[0][1]
-                    lhs_id = self._obj_oid(ci_doc._id, lhs_info)
-                except pysvn.ClientError, e:
-                    # pysvn will sometimes report new files as 'M'odified,
-                    # causing info2() to raise ClientError since the file
-                    # doesn't exist in the parent revision. Set lhs_id = None
-                    # to treat like a newly added file.
-                    log.info('This error was handled gracefully and logged '
-                             'for informational purposes only:\n' + str(e))
-                    lhs_id = None
-            else:
-                lhs_id = None
-            di.differences.append(dict(
-                name=h.really_unicode(path.path),
-                lhs_id=lhs_id,
-                rhs_id=rhs_id))
-        di.m.save()
         return True
 
     def compute_tree_new(self, commit, tree_path='/'):

http://git-wip-us.apache.org/repos/asf/allura/blob/d0dd4b75/ForgeSVN/forgesvn/tests/model/test_repository.py
----------------------------------------------------------------------
diff --git a/ForgeSVN/forgesvn/tests/model/test_repository.py b/ForgeSVN/forgesvn/tests/model/test_repository.py
index 7f34f45..2c86309 100644
--- a/ForgeSVN/forgesvn/tests/model/test_repository.py
+++ b/ForgeSVN/forgesvn/tests/model/test_repository.py
@@ -1052,8 +1052,6 @@ class TestCommit(_TestWithRepo):
         self.repo._impl.commit = mock.Mock(return_value=self.ci)
         self.repo._impl.open_blob = self._unique_blobs()
         M.repo_refresh.refresh_commit_trees(self.ci, {})
-        M.repo_refresh.compute_diffs(self.repo._id, {}, self.ci)
-        # self.ci.compute_diffs()
         assert_equal(self.ci.diffs.added,
                      ['a', 'a/a', 'a/a/a', 'a/a/b', 'a/b'])
         assert (self.ci.diffs.copied
@@ -1064,7 +1062,6 @@ class TestCommit(_TestWithRepo):
         ci.parent_ids = ['foo']
         self._make_log(ci)
         M.repo_refresh.refresh_commit_trees(ci, {})
-        M.repo_refresh.compute_diffs(self.repo._id, {}, ci)
         assert_equal(ci.diffs.removed, ['a', 'a/a', 'a/a/a', 'a/a/b', 'a/b'])
         assert (ci.diffs.copied
                 == ci.diffs.changed
@@ -1080,7 +1077,6 @@ class TestCommit(_TestWithRepo):
         ci.parent_ids = ['foo']
         self._make_log(ci)
         M.repo_refresh.refresh_commit_trees(ci, {})
-        M.repo_refresh.compute_diffs(self.repo._id, {}, ci)
         assert_equal(ci.diffs.added, ['b', 'b/a', 'b/a/a', 'b/a/b', 'b/b'])
         assert_equal(ci.diffs.removed, ['a', 'a/a', 'a/a/a', 'a/a/b', 'a/b'])
         assert (ci.diffs.copied
@@ -1104,7 +1100,6 @@ class TestCommit(_TestWithRepo):
 
         self.repo._impl.commit = mock.Mock(return_value=self.ci)
         M.repo_refresh.refresh_commit_trees(self.ci, {})
-        M.repo_refresh.compute_diffs(self.repo._id, {}, self.ci)
         assert_equal(self.ci.diffs.added,
                      ['a', 'a/a', 'a/a/a', 'a/a/b', 'a/b'])
         assert (self.ci.diffs.copied
@@ -1122,7 +1117,6 @@ class TestCommit(_TestWithRepo):
         ci.parent_ids = ['foo']
         self._make_log(ci)
         M.repo_refresh.refresh_commit_trees(ci, {})
-        M.repo_refresh.compute_diffs(self.repo._id, {}, ci)
         assert_equal(ci.diffs.added, ['b', 'b/a', 'b/a/a', 'b/a/b', 'b/b'])
         assert_equal(ci.diffs.removed, ['a', 'a/a', 'a/a/a', 'a/a/b', 'a/b'])
         assert (ci.diffs.copied
@@ -1138,7 +1132,6 @@ class TestCommit(_TestWithRepo):
         ci.parent_ids = ['bar']
         self._make_log(ci)
         M.repo_refresh.refresh_commit_trees(ci, {})
-        M.repo_refresh.compute_diffs(self.repo._id, {}, ci)
         assert_equal(ci.diffs.added, [])
         assert_equal(ci.diffs.changed, [])
         assert_equal(ci.diffs.removed, ['b/a/a'])