You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by jo...@apache.org on 2013/02/28 23:38:16 UTC
[14/20] git commit: [#5788] Added lcid caching and checks for added
paths to improve performance of refreshing LastCommit docs
[#5788] Added lcid caching and checks for added paths to improve performance of refreshing LastCommit docs
Signed-off-by: Cory Johns <jo...@geek.net>
Project: http://git-wip-us.apache.org/repos/asf/incubator-allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-allura/commit/69f224f0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-allura/tree/69f224f0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-allura/diff/69f224f0
Branch: refs/heads/cj/5788
Commit: 69f224f0c74b0cadd5a755909141b01b21392a2d
Parents: 4c623e6
Author: Cory Johns <jo...@geek.net>
Authored: Tue Feb 26 00:04:54 2013 +0000
Committer: Tim Van Steenburgh <tv...@gmail.com>
Committed: Thu Feb 28 21:48:32 2013 +0000
----------------------------------------------------------------------
Allura/allura/model/repo.py | 40 +++++++++++++++++--
Allura/allura/model/repo_refresh.py | 21 ++++++----
Allura/allura/scripts/refresh_last_commits.py | 6 ++-
3 files changed, 51 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/69f224f0/Allura/allura/model/repo.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/repo.py b/Allura/allura/model/repo.py
index 4f50d5b..1510de6 100644
--- a/Allura/allura/model/repo.py
+++ b/Allura/allura/model/repo.py
@@ -392,6 +392,31 @@ class Commit(RepoObject):
return diffs
@LazyProperty
+ def added_paths(self):
+ '''
+ Returns a list of paths added in this commit.
+ Leading and trailing slashes are removed, and
+ the list is complete, meaning that if a directory
+ with subdirectories is added, all of the child
+ paths are included (this relies on the DiffInfoDoc
+ being complete).
+
+ Example:
+
+ If the directory /foo/bar/ is added in the commit
+ which contains a subdirectory /foo/bar/baz/ with
+ the file /foo/bar/baz/qux.txt, this would return:
+ ['foo/bar', 'foo/bar/baz', 'foo/bar/baz/qux.txt']
+ '''
+ diff_info = DiffInfoDoc.m.get(_id=self._id)
+ diffs = set()
+ if diff_info:
+ for d in diff_info.differences:
+ if d.lhs_id is None:
+ diffs.add(d.name.strip('/'))
+ return diffs
+
+ @LazyProperty
def info(self):
return dict(
id=self._id,
@@ -749,6 +774,11 @@ class LastCommit(RepoObject):
@classmethod
def _prev_commit_id(cls, commit, path):
+ if not commit.parent_ids or path in commit.added_paths:
+ return None # new paths by definition have no previous LCD
+ lcid_cache = getattr(c, 'lcid_cache', '')
+ if lcid_cache != '' and path in lcid_cache:
+ return lcid_cache[path]
commit_id = list(commit.repo.commits(path, commit._id, skip=1, limit=1))
if not commit_id:
return None
@@ -773,23 +803,23 @@ class LastCommit(RepoObject):
Build the LCD record, presuming that this tree is where it was most
recently changed.
'''
- cache = getattr(c, 'model_cache', '') or ModelCache()
+ model_cache = getattr(c, 'model_cache', '') or ModelCache()
path = tree.path().strip('/')
entries = []
prev_lcd = None
prev_lcd_cid = cls._prev_commit_id(tree.commit, path)
if prev_lcd_cid:
- prev_lcd = cache.get(cls, {'path': path, 'commit_id': prev_lcd_cid})
+ prev_lcd = model_cache.get(cls, {'path': path, 'commit_id': prev_lcd_cid})
entries = {}
nodes = set([node.name for node in chain(tree.tree_ids, tree.blob_ids, tree.other_ids)])
changed = set([node for node in nodes if os.path.join(path, node) in tree.commit.changed_paths])
+ unchanged = [os.path.join(path, node) for node in nodes - changed]
if prev_lcd:
# get unchanged entries from previously computed LCD
entries = prev_lcd.by_name
- else:
+ elif unchanged:
# no previously computed LCD, so get unchanged entries from SCM
# (but only ask for the ones that we know we need)
- unchanged = [os.path.join(path, node) for node in nodes - changed]
entries = tree.commit.repo.last_commit_ids(tree.commit, unchanged)
if entries is None:
# something strange went wrong; bail out and possibly try again later
@@ -805,7 +835,7 @@ class LastCommit(RepoObject):
path=path,
entries=entries,
)
- cache.set(cls, {'path': path, 'commit_id': tree.commit._id}, lcd)
+ model_cache.set(cls, {'path': path, 'commit_id': tree.commit._id}, lcd)
return lcd
@LazyProperty
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/69f224f0/Allura/allura/model/repo_refresh.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/repo_refresh.py b/Allura/allura/model/repo_refresh.py
index c50cffc..539df2a 100644
--- a/Allura/allura/model/repo_refresh.py
+++ b/Allura/allura/model/repo_refresh.py
@@ -100,11 +100,12 @@ def refresh_repo(repo, all_commits=False, notify=True):
log.info('Compute diffs %d: %s', (i+1), ci._id)
if repo._refresh_precompute:
- cache = ModelCache()
+ model_cache = ModelCache()
+ lcid_cache = {}
for i, oid in enumerate(reversed(commit_ids)):
- ci = cache.get(Commit, dict(_id=oid))
+ ci = model_cache.get(Commit, dict(_id=oid))
ci.set_context(repo)
- compute_lcds(ci, cache)
+ compute_lcds(ci, model_cache, lcid_cache)
ThreadLocalORMSession.flush_all()
if (i+1) % 100 == 0:
log.info('Compute last commit info %d: %s', (i+1), ci._id)
@@ -485,18 +486,20 @@ def last_known_commit_id(all_commit_ids, new_commit_ids):
return all_commit_ids[all_commit_ids.index(new_commit_ids[0]) - 1]
-def compute_lcds(commit, cache):
+def compute_lcds(commit, model_cache, lcid_cache):
'''
Compute LastCommit data for every Tree node under this tree.
'''
- trees = cache.get(TreesDoc, dict(_id=commit._id))
+ trees = model_cache.get(TreesDoc, dict(_id=commit._id))
if not trees:
log.error('Missing TreesDoc for %s; skipping compute_lcd' % commit)
return
- with h.push_config(c, model_cache=cache):
- _update_tree_cache(trees.tree_ids, cache)
- tree = _pull_tree(cache, commit.tree_id, commit)
- _compute_lcds(tree, cache)
+ with h.push_config(c, model_cache=model_cache, lcid_cache=lcid_cache):
+ _update_tree_cache(trees.tree_ids, model_cache)
+ tree = _pull_tree(model_cache, commit.tree_id, commit)
+ _compute_lcds(tree, model_cache)
+ for changed_path in tree.commit.changed_paths:
+ lcid_cache[changed_path] = tree.commit._id
def _compute_lcds(tree, cache):
path = tree.path().strip('/')
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/69f224f0/Allura/allura/scripts/refresh_last_commits.py
----------------------------------------------------------------------
diff --git a/Allura/allura/scripts/refresh_last_commits.py b/Allura/allura/scripts/refresh_last_commits.py
index 4508039..882076c 100644
--- a/Allura/allura/scripts/refresh_last_commits.py
+++ b/Allura/allura/scripts/refresh_last_commits.py
@@ -11,6 +11,7 @@ from pylons import tmpl_context as c
from ming.orm import ThreadLocalORMSession, session
from allura import model as M
+from allura.lib import helpers as h
from allura.lib.utils import chunked_find
from allura.tasks.repo_tasks import refresh
from allura.scripts import ScriptTask
@@ -127,10 +128,11 @@ class RefreshLastCommits(ScriptTask):
if i % 1000 == 0:
cls._print_stats(i, timings, 1000)
- lcd_cache = M.repo.ModelCache(
+ model_cache = M.repo.ModelCache(
max_instances={M.repo.LastCommit: 4000},
max_queries={M.repo.LastCommit: 4000},
)
+ lcid_cache = {}
timings = []
print 'Processing last commits'
for i, commit_id in enumerate(commit_ids):
@@ -140,7 +142,7 @@ class RefreshLastCommits(ScriptTask):
continue
commit.set_context(c.app.repo)
with time(timings):
- M.repo_refresh.compute_lcds(commit, lcd_cache)
+ M.repo_refresh.compute_lcds(commit, model_cache, lcid_cache)
ThreadLocalORMSession.flush_all()
if i % 100 == 0:
cls._print_stats(i, timings, 100)