You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by jo...@apache.org on 2012/12/05 18:27:47 UTC
[2/34] git commit: [#4691] ModelCache improvements and more debuggon
for refresh-last-commit.py
[#4691] ModelCache improvements and more debuggon for refresh-last-commit.py
Signed-off-by: Cory Johns <jo...@geek.net>
Project: http://git-wip-us.apache.org/repos/asf/incubator-allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-allura/commit/b6aefcfd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-allura/tree/b6aefcfd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-allura/diff/b6aefcfd
Branch: refs/heads/cj/4691
Commit: b6aefcfd9f1d25d452ceeb6cf63db5223edf0514
Parents: affecde
Author: Cory Johns <jo...@geek.net>
Authored: Fri Nov 30 19:35:32 2012 +0000
Committer: Cory Johns <jo...@geek.net>
Committed: Wed Dec 5 14:59:38 2012 +0000
----------------------------------------------------------------------
Allura/allura/model/repo.py | 34 ++++++++++++++------------
Allura/allura/model/repo_refresh.py | 2 +-
Allura/allura/tests/model/test_repo.py | 16 ++++++++++--
scripts/refresh-last-commits.py | 24 +++++++++++++-----
4 files changed, 49 insertions(+), 27 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/b6aefcfd/Allura/allura/model/repo.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/repo.py b/Allura/allura/model/repo.py
index 2bc22a9..5a7f002 100644
--- a/Allura/allura/model/repo.py
+++ b/Allura/allura/model/repo.py
@@ -5,7 +5,7 @@ import logging
from hashlib import sha1
from itertools import chain
from datetime import datetime
-from collections import defaultdict
+from collections import defaultdict, OrderedDict
from difflib import SequenceMatcher, unified_diff
from pylons import c
@@ -824,12 +824,11 @@ class ModelCache(object):
Commit instances and 2000 Tree instances in the cache
at once with the default value.
'''
- self._cache = defaultdict(dict)
+ self._cache = defaultdict(OrderedDict)
self.max_size = max_size
- self._insertion_order = defaultdict(list)
# temporary, for performance testing
- self._hits = 0
- self._misses = 0
+ self._hits = defaultdict(int)
+ self._accesses = defaultdict(int)
self._get_calls = 0
self._get_walks = 0
self._get_walks_max = 0
@@ -847,12 +846,13 @@ class ModelCache(object):
def get(self, cls, key):
_key = self._normalize_key(key)
+ self._manage_cache(cls, _key)
+ self._accesses[cls] += 1
if _key not in self._cache[cls]:
- self._misses += 1
query = getattr(cls, 'query', getattr(cls, 'm', None))
self.set(cls, _key, query.get(**key))
else:
- self._hits += 1
+ self._hits[cls] += 1
return self._cache[cls][_key]
def set(self, cls, key, val):
@@ -866,23 +866,25 @@ class ModelCache(object):
and expire from the cache in a FIFO manner.
'''
if key in self._cache[cls]:
- return
- self._insertion_order[cls].append(key)
- if len(self._insertion_order[cls]) > self.max_size:
- _key = self._insertion_order[cls].pop(0)
- self._cache[cls].pop(_key)
+ # refresh access time in cache
+ val = self._cache[cls].pop(key)
+ self._cache[cls][key] = val
+ elif len(self._cache[cls]) >= self.max_size:
+ # remove the least-recently-used cache item
+ self._cache[cls].popitem(last=False)
def size(self):
- return sum([len(c) for c in self._insertion_order.values()])
+ return sum([len(c) for c in self._cache.values()])
- def keys(self, cls):
+ def keys(self, cls, as_dict=True):
'''
Returns all the cache keys for a given class. Each
cache key will be a dict.
'''
- if self._cache[cls]:
+ if as_dict:
return [dict(k) for k in self._cache[cls].keys()]
- return []
+ else:
+ return self._cache[cls].keys()
def batch_load(self, cls, query, attrs=None):
'''
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/b6aefcfd/Allura/allura/model/repo_refresh.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/repo_refresh.py b/Allura/allura/model/repo_refresh.py
index 149fcae..4796daa 100644
--- a/Allura/allura/model/repo_refresh.py
+++ b/Allura/allura/model/repo_refresh.py
@@ -515,6 +515,6 @@ def _walk_commit_tree(commit, cache):
def _update_tree_cache(tree_ids, cache):
current_ids = set(tree_ids)
- cached_ids = set([k['_id'] for k in cache.keys(Tree)])
+ cached_ids = set([k[0][1] for k in cache.keys(Tree, as_dict=False)])
new_ids = current_ids - cached_ids
cache.batch_load(Tree, {'_id': {'$in': list(new_ids)}})
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/b6aefcfd/Allura/allura/tests/model/test_repo.py
----------------------------------------------------------------------
diff --git a/Allura/allura/tests/model/test_repo.py b/Allura/allura/tests/model/test_repo.py
index 0dbff66..040c750 100644
--- a/Allura/allura/tests/model/test_repo.py
+++ b/Allura/allura/tests/model/test_repo.py
@@ -571,6 +571,12 @@ class TestModelCache(unittest.TestCase):
self.assertEqual(self.cache.keys(M.repo.Tree), [{'_id': 'test_keys', 'text': 'tko'}, {'fubar': 'scm'}])
self.assertEqual(self.cache.keys(M.repo.LastCommit), [])
+ def test_keys_not_as_dict(self):
+ self.cache._cache[M.repo.Tree][(('_id', 'test_keys'), ('text', 'tko'))] = 'foo'
+ self.cache._cache[M.repo.Tree][(('fubar', 'scm'),)] = 'bar'
+ self.assertEqual(self.cache.keys(M.repo.Tree, as_dict=False), [(('_id', 'test_keys'), ('text', 'tko')), (('fubar', 'scm'),)])
+ self.assertEqual(self.cache.keys(M.repo.LastCommit), [])
+
@mock.patch.object(M.repo.Tree.query, 'find')
def test_batch_load(self, tr_find):
# cls, query, attrs
@@ -600,14 +606,18 @@ class TestModelCache(unittest.TestCase):
})
def test_pruning(self):
- self.cache.max_size = 2
+ self.cache.max_size = 3
+ # ensure cache expires as LRU
self.cache.set(M.repo.Tree, {'_id': 'foo'}, 'bar')
self.cache.set(M.repo.Tree, {'_id': 'qux'}, 'zaz')
self.cache.set(M.repo.Tree, {'_id': 'f00'}, 'b4r')
- self.cache.set(M.repo.Tree, {'_id': 'qux'}, 'zaz')
+ self.cache.set(M.repo.Tree, {'_id': 'foo'}, 'zaz')
+ self.cache.get(M.repo.Tree, {'_id': 'f00'})
+ self.cache.set(M.repo.Tree, {'_id': 'mee'}, 'you')
self.assertEqual(self.cache._cache, {
M.repo.Tree: {
- (('_id', 'qux'),): 'zaz',
+ (('_id', 'foo'),): 'zaz',
(('_id', 'f00'),): 'b4r',
+ (('_id', 'mee'),): 'you',
},
})
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/b6aefcfd/scripts/refresh-last-commits.py
----------------------------------------------------------------------
diff --git a/scripts/refresh-last-commits.py b/scripts/refresh-last-commits.py
index 0a59b31..ed753ab 100644
--- a/scripts/refresh-last-commits.py
+++ b/scripts/refresh-last-commits.py
@@ -2,6 +2,7 @@ import sys
import argparse
import logging
import re
+from math import pow, log10
from datetime import datetime
from contextlib import contextmanager
@@ -97,29 +98,38 @@ def refresh_repo_lcds(commit_ids, options):
at = tt / len(timings)
print ' Processed %d commits (max: %f, avg: %f, tot: %f, cl: %d)' % (
len(timings), mt, at, tt, len(tree_cache))
- lcd_cache = M.repo.ModelCache(80000)
+ lcd_cache = M.repo.ModelCache(20000)
timings = []
print 'Processing last commits'
+ debug_step = int(pow(10, max(0, int(log10(len(commit_ids)) - log10(options.step) - 1))))
for i, commit_id in enum_step(commit_ids, options.step):
- print ' Processing commit %s...' % commit_id,
- sys.stdout.flush()
+ #print ' Processing commit %s...' % commit_id,
+ #sys.stdout.flush()
commit = M.repo.Commit.query.get(_id=commit_id)
with time(timings):
M.repo_refresh.compute_lcds(commit, lcd_cache)
- print 'done in %fs' % timings[-1]
- if len(timings) % 10 == 0:
+ #print 'done in %fs [%d%% in %d]' % (
+ # timings[-1],
+ # lcd_cache._hits[M.repo.LastCommit] * 100 / lcd_cache._accesses[M.repo.LastCommit],
+ # len(lcd_cache._cache[M.repo.LastCommit]),
+ # )
+ if len(timings) % debug_step == 0:
mt = max(timings)
tt = sum(timings)
at = tt / len(timings)
- mat = sum(timings[-10:]) / 10
+ mat = sum(timings[-debug_step:]) / debug_step
+ hits = sum(lcd_cache._hits.values())
+ accs = sum(lcd_cache._accesses.values())
print ' Processed %d commits (max: %f, avg: %f, mavg: %f, tot: %f, lc: %d, lcl: %d, hits: %d, agw: %d, mgw: %d, gh: %d, abw: %d, mbw: %d, ts: %d)' % (
len(timings), mt, at, mat, tt, lcd_cache.size(), len(lcd_cache._cache[M.repo.LastCommit]),
- lcd_cache._hits * 100 / (lcd_cache._hits + lcd_cache._misses),
+ hits * 100 / accs,
lcd_cache._get_walks / lcd_cache._get_calls, lcd_cache._get_walks_max, lcd_cache._get_hits * 100 / lcd_cache._get_calls,
lcd_cache._build_walks / lcd_cache._build_calls, lcd_cache._build_walks_max,
len(lcd_cache.get(M.repo.TreesDoc, dict(_id=commit._id)).tree_ids))
ThreadLocalORMSession.flush_all()
ThreadLocalORMSession.close_all()
+ ThreadLocalORMSession.flush_all()
+ ThreadLocalORMSession.close_all()
@contextmanager