You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by jo...@apache.org on 2013/03/01 21:08:00 UTC

[5/26] git commit: [#5788] Added lcid caching and checks for added paths to improve performance of refreshing LastCommit docs

[#5788] Added lcid caching and checks for added paths to improve performance of refreshing LastCommit docs

Signed-off-by: Cory Johns <jo...@geek.net>


Project: http://git-wip-us.apache.org/repos/asf/incubator-allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-allura/commit/69f224f0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-allura/tree/69f224f0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-allura/diff/69f224f0

Branch: refs/heads/cj/5854
Commit: 69f224f0c74b0cadd5a755909141b01b21392a2d
Parents: 4c623e6
Author: Cory Johns <jo...@geek.net>
Authored: Tue Feb 26 00:04:54 2013 +0000
Committer: Tim Van Steenburgh <tv...@gmail.com>
Committed: Thu Feb 28 21:48:32 2013 +0000

----------------------------------------------------------------------
 Allura/allura/model/repo.py                   |   40 +++++++++++++++++--
 Allura/allura/model/repo_refresh.py           |   21 ++++++----
 Allura/allura/scripts/refresh_last_commits.py |    6 ++-
 3 files changed, 51 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/69f224f0/Allura/allura/model/repo.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/repo.py b/Allura/allura/model/repo.py
index 4f50d5b..1510de6 100644
--- a/Allura/allura/model/repo.py
+++ b/Allura/allura/model/repo.py
@@ -392,6 +392,31 @@ class Commit(RepoObject):
         return diffs
 
     @LazyProperty
+    def added_paths(self):
+        '''
+        Returns a list of paths added in this commit.
+        Leading and trailing slashes are removed, and
+        the list is complete, meaning that if a directory
+        with subdirectories is added, all of the child
+        paths are included (this relies on the DiffInfoDoc
+        being complete).
+
+        Example:
+
+            If the directory /foo/bar/ is added in the commit
+            which contains a subdirectory /foo/bar/baz/ with
+            the file /foo/bar/baz/qux.txt, this would return:
+            ['foo/bar', 'foo/bar/baz', 'foo/bar/baz/qux.txt']
+        '''
+        diff_info = DiffInfoDoc.m.get(_id=self._id)
+        diffs = set()
+        if diff_info:
+            for d in diff_info.differences:
+                if d.lhs_id is None:
+                    diffs.add(d.name.strip('/'))
+        return diffs
+
+    @LazyProperty
     def info(self):
         return dict(
             id=self._id,
@@ -749,6 +774,11 @@ class LastCommit(RepoObject):
 
     @classmethod
     def _prev_commit_id(cls, commit, path):
+        if not commit.parent_ids or path in commit.added_paths:
+            return None  # new paths by definition have no previous LCD
+        lcid_cache = getattr(c, 'lcid_cache', '')
+        if lcid_cache != '' and path in lcid_cache:
+            return lcid_cache[path]
         commit_id = list(commit.repo.commits(path, commit._id, skip=1, limit=1))
         if not commit_id:
             return None
@@ -773,23 +803,23 @@ class LastCommit(RepoObject):
           Build the LCD record, presuming that this tree is where it was most
           recently changed.
         '''
-        cache = getattr(c, 'model_cache', '') or ModelCache()
+        model_cache = getattr(c, 'model_cache', '') or ModelCache()
         path = tree.path().strip('/')
         entries = []
         prev_lcd = None
         prev_lcd_cid = cls._prev_commit_id(tree.commit, path)
         if prev_lcd_cid:
-            prev_lcd = cache.get(cls, {'path': path, 'commit_id': prev_lcd_cid})
+            prev_lcd = model_cache.get(cls, {'path': path, 'commit_id': prev_lcd_cid})
         entries = {}
         nodes = set([node.name for node in chain(tree.tree_ids, tree.blob_ids, tree.other_ids)])
         changed = set([node for node in nodes if os.path.join(path, node) in tree.commit.changed_paths])
+        unchanged = [os.path.join(path, node) for node in nodes - changed]
         if prev_lcd:
             # get unchanged entries from previously computed LCD
             entries = prev_lcd.by_name
-        else:
+        elif unchanged:
             # no previously computed LCD, so get unchanged entries from SCM
             # (but only ask for the ones that we know we need)
-            unchanged = [os.path.join(path, node) for node in nodes - changed]
             entries = tree.commit.repo.last_commit_ids(tree.commit, unchanged)
             if entries is None:
                 # something strange went wrong; bail out and possibly try again later
@@ -805,7 +835,7 @@ class LastCommit(RepoObject):
                 path=path,
                 entries=entries,
             )
-        cache.set(cls, {'path': path, 'commit_id': tree.commit._id}, lcd)
+        model_cache.set(cls, {'path': path, 'commit_id': tree.commit._id}, lcd)
         return lcd
 
     @LazyProperty

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/69f224f0/Allura/allura/model/repo_refresh.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/repo_refresh.py b/Allura/allura/model/repo_refresh.py
index c50cffc..539df2a 100644
--- a/Allura/allura/model/repo_refresh.py
+++ b/Allura/allura/model/repo_refresh.py
@@ -100,11 +100,12 @@ def refresh_repo(repo, all_commits=False, notify=True):
                 log.info('Compute diffs %d: %s', (i+1), ci._id)
 
     if repo._refresh_precompute:
-        cache = ModelCache()
+        model_cache = ModelCache()
+        lcid_cache = {}
         for i, oid in enumerate(reversed(commit_ids)):
-            ci = cache.get(Commit, dict(_id=oid))
+            ci = model_cache.get(Commit, dict(_id=oid))
             ci.set_context(repo)
-            compute_lcds(ci, cache)
+            compute_lcds(ci, model_cache, lcid_cache)
             ThreadLocalORMSession.flush_all()
             if (i+1) % 100 == 0:
                 log.info('Compute last commit info %d: %s', (i+1), ci._id)
@@ -485,18 +486,20 @@ def last_known_commit_id(all_commit_ids, new_commit_ids):
     return all_commit_ids[all_commit_ids.index(new_commit_ids[0]) - 1]
 
 
-def compute_lcds(commit, cache):
+def compute_lcds(commit, model_cache, lcid_cache):
     '''
     Compute LastCommit data for every Tree node under this tree.
     '''
-    trees = cache.get(TreesDoc, dict(_id=commit._id))
+    trees = model_cache.get(TreesDoc, dict(_id=commit._id))
     if not trees:
         log.error('Missing TreesDoc for %s; skipping compute_lcd' % commit)
         return
-    with h.push_config(c, model_cache=cache):
-        _update_tree_cache(trees.tree_ids, cache)
-        tree = _pull_tree(cache, commit.tree_id, commit)
-        _compute_lcds(tree, cache)
+    with h.push_config(c, model_cache=model_cache, lcid_cache=lcid_cache):
+        _update_tree_cache(trees.tree_ids, model_cache)
+        tree = _pull_tree(model_cache, commit.tree_id, commit)
+        _compute_lcds(tree, model_cache)
+        for changed_path in tree.commit.changed_paths:
+            lcid_cache[changed_path] = tree.commit._id
 
 def _compute_lcds(tree, cache):
     path = tree.path().strip('/')

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/69f224f0/Allura/allura/scripts/refresh_last_commits.py
----------------------------------------------------------------------
diff --git a/Allura/allura/scripts/refresh_last_commits.py b/Allura/allura/scripts/refresh_last_commits.py
index 4508039..882076c 100644
--- a/Allura/allura/scripts/refresh_last_commits.py
+++ b/Allura/allura/scripts/refresh_last_commits.py
@@ -11,6 +11,7 @@ from pylons import tmpl_context as c
 from ming.orm import ThreadLocalORMSession, session
 
 from allura import model as M
+from allura.lib import helpers as h
 from allura.lib.utils import chunked_find
 from allura.tasks.repo_tasks import refresh
 from allura.scripts import ScriptTask
@@ -127,10 +128,11 @@ class RefreshLastCommits(ScriptTask):
                 if i % 1000 == 0:
                     cls._print_stats(i, timings, 1000)
 
-        lcd_cache = M.repo.ModelCache(
+        model_cache = M.repo.ModelCache(
                 max_instances={M.repo.LastCommit: 4000},
                 max_queries={M.repo.LastCommit: 4000},
             )
+        lcid_cache = {}
         timings = []
         print 'Processing last commits'
         for i, commit_id in enumerate(commit_ids):
@@ -140,7 +142,7 @@ class RefreshLastCommits(ScriptTask):
                 continue
             commit.set_context(c.app.repo)
             with time(timings):
-                M.repo_refresh.compute_lcds(commit, lcd_cache)
+                M.repo_refresh.compute_lcds(commit, model_cache, lcid_cache)
                 ThreadLocalORMSession.flush_all()
             if i % 100 == 0:
                 cls._print_stats(i, timings, 100)