You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by tv...@apache.org on 2013/02/05 21:23:34 UTC
[29/42] git commit: [#4691] Fixed ModelCache issue with new instances
not being considered equal to themselves
[#4691] Fixed ModelCache issue with new instances not being considered equal to themselves
Signed-off-by: Cory Johns <jo...@geek.net>
Project: http://git-wip-us.apache.org/repos/asf/incubator-allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-allura/commit/31b47857
Tree: http://git-wip-us.apache.org/repos/asf/incubator-allura/tree/31b47857
Diff: http://git-wip-us.apache.org/repos/asf/incubator-allura/diff/31b47857
Branch: refs/heads/master
Commit: 31b478572052a7a2191e39708402d745fd80518a
Parents: d8e44d1
Author: Cory Johns <jo...@geek.net>
Authored: Thu Dec 13 16:19:12 2012 +0000
Committer: Tim Van Steenburgh <tv...@gmail.com>
Committed: Tue Feb 5 20:22:51 2013 +0000
----------------------------------------------------------------------
Allura/allura/model/repo.py | 79 ++++++++++---
Allura/allura/tests/model/test_repo.py | 160 ++++++++++++++++++++++-----
scripts/refresh-last-commits.py | 14 ++-
3 files changed, 204 insertions(+), 49 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/31b47857/Allura/allura/model/repo.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/repo.py b/Allura/allura/model/repo.py
index c92d403..5766bae 100644
--- a/Allura/allura/model/repo.py
+++ b/Allura/allura/model/repo.py
@@ -858,6 +858,8 @@ class ModelCache(object):
self._query_cache = defaultdict(OrderedDict) # keyed by query, holds _id
self._instance_cache = defaultdict(OrderedDict) # keyed by _id
+ self._synthetic_ids = defaultdict(set)
+ self._synthetic_id_queries = defaultdict(set)
# temporary, for performance testing
self._query_hits = defaultdict(int)
@@ -891,24 +893,37 @@ class ModelCache(object):
_query = self._normalize_query(query)
self._touch(cls, _query)
if _query not in self._query_cache[cls]:
- self.set(cls, _query, self._model_query(cls).get(**query))
- else:
- self._query_hits[cls] += 1
+ val = self._model_query(cls).get(**query)
+ self.set(cls, _query, val)
+ return val
+ self._query_hits[cls] += 1
_id = self._query_cache[cls][_query]
- if _id not in self._instance_cache[cls]:
- model_query = getattr(cls, 'query', getattr(cls, 'm', None))
- self.set(cls, _query, self._model_query(cls).get(**query))
- else:
+ if _id is None:
self._instance_hits[cls] += 1
+ return None
+ if _id not in self._instance_cache[cls]:
+ val = self._model_query(cls).get(**query)
+ self.set(cls, _query, val)
+ return val
+ self._instance_hits[cls] += 1
return self._instance_cache[cls][_id]
def set(self, cls, query, val):
_query = self._normalize_query(query)
- _id = self._query_cache[cls].get(_query, getattr(val, '_id', None))
- if _id is None:
- _id = 'None_%s' % bson.ObjectId()
- self._query_cache[cls][_query] = _id
- self._instance_cache[cls][_id] = val
+ if val is not None:
+ _id = getattr(val, '_model_cache_id',
+ getattr(val, '_id',
+ self._query_cache[cls].get(_query,
+ None)))
+ if _id is None:
+ _id = val._model_cache_id = bson.ObjectId()
+ self._synthetic_ids[cls].add(_id)
+ if _id in self._synthetic_ids:
+ self._synthetic_id_queries[cls].add(_query)
+ self._query_cache[cls][_query] = _id
+ self._instance_cache[cls][_id] = val
+ else:
+ self._query_cache[cls][_query] = None
self._touch(cls, _query)
self._check_sizes(cls)
@@ -930,15 +945,22 @@ class ModelCache(object):
def _check_sizes(self, cls):
if self.num_queries(cls) > self._max_queries[cls]:
- self._remove_least_recently_used(self._query_cache[cls])
+ _id = self._remove_least_recently_used(self._query_cache[cls])
+ if _id in self._instance_cache[cls]:
+ instance = self._instance_cache[cls][_id]
+ self._try_flush(instance, expunge=False)
if self.num_instances(cls) > self._max_instances[cls]:
instance = self._remove_least_recently_used(self._instance_cache[cls])
- try:
- inst_session = session(instance)
- except AttributeError:
- inst_session = None
- if inst_session:
- inst_session.flush(instance)
+ self._try_flush(instance, expunge=True)
+
+ def _try_flush(self, instance, expunge=False):
+ try:
+ inst_session = session(instance)
+ except AttributeError:
+ inst_session = None
+ if inst_session:
+ inst_session.flush(instance)
+ if expunge:
inst_session.expunge(instance)
def _remove_least_recently_used(self, cache):
@@ -946,6 +968,25 @@ class ModelCache(object):
key, val = cache.popitem(last=False)
return val
+ def expire_new_instances(self, cls):
+ '''
+ Expire any instances that were "new" or had no _id value.
+
+ If a lot of new instances of a class are being created, it's possible
+ for a query to pull a copy from mongo when a copy keyed by the synthetic
+ ID is still in the cache, potentially causing de-sync between the copies
+ leading to one with missing data overwriting the other. Clear new
+ instances out of the cache relatively frequently (depending on the query
+ and instance cache sizes) to avoid this.
+ '''
+ for _query in self._synthetic_id_queries[cls]:
+ self._query_cache[cls].pop(_query)
+ self._synthetic_id_queries[cls] = set()
+ for _id in self._synthetic_ids[cls]:
+ instance = self._instance_cache[cls].pop(_id)
+ self._try_flush(instance, expunge=True)
+ self._synthetic_ids[cls] = set()
+
def num_queries(self, cls=None):
if cls is None:
return sum([len(c) for c in self._query_cache.values()])
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/31b47857/Allura/allura/tests/model/test_repo.py
----------------------------------------------------------------------
diff --git a/Allura/allura/tests/model/test_repo.py b/Allura/allura/tests/model/test_repo.py
index 365251a..83c4cb4 100644
--- a/Allura/allura/tests/model/test_repo.py
+++ b/Allura/allura/tests/model/test_repo.py
@@ -539,8 +539,8 @@ class TestModelCache(unittest.TestCase):
@mock.patch.object(M.repo.Tree.query, 'get')
@mock.patch.object(M.repo.LastCommit.query, 'get')
def test_get(self, lc_get, tr_get):
- tree = tr_get.return_value = mock.Mock(_id='foo', val='bar')
- lcd = lc_get.return_value = mock.Mock(_id='foo', val='qux')
+ tree = tr_get.return_value = mock.Mock(spec=['_id', 'val'], _id='foo', val='bar')
+ lcd = lc_get.return_value = mock.Mock(spec=['_id', 'val'], _id='foo', val='qux')
val = self.cache.get(M.repo.Tree, {'_id': 'foo'})
tr_get.assert_called_with(_id='foo')
@@ -552,7 +552,7 @@ class TestModelCache(unittest.TestCase):
@mock.patch.object(M.repo.Tree.query, 'get')
def test_get_no_query(self, tr_get):
- tree1 = tr_get.return_value = mock.Mock(_id='foo', val='bar')
+ tree1 = tr_get.return_value = mock.Mock(spec=['_id', 'val'], _id='foo', val='bar')
val = self.cache.get(M.repo.Tree, {'_id': 'foo'})
tr_get.assert_called_once_with(_id='foo')
self.assertEqual(val, tree1)
@@ -564,13 +564,13 @@ class TestModelCache(unittest.TestCase):
@mock.patch.object(M.repo.TreesDoc.m, 'get')
def test_get_doc(self, tr_get):
- trees = tr_get.return_value = mock.Mock(_id='foo', val='bar')
+ trees = tr_get.return_value = mock.Mock(spec=['_id', 'val'], _id='foo', val='bar')
val = self.cache.get(M.repo.TreesDoc, {'_id': 'foo'})
tr_get.assert_called_once_with(_id='foo')
self.assertEqual(val, trees)
def test_set(self):
- tree = mock.Mock(_id='foo', val='test_set')
+ tree = mock.Mock(spec=['_id', 'test_set'], _id='foo', val='test_set')
self.cache.set(M.repo.Tree, {'val': 'test_set'}, tree)
self.assertEqual(self.cache._query_cache, {M.repo.Tree: {(('val', 'test_set'),): 'foo'}})
self.assertEqual(self.cache._instance_cache, {M.repo.Tree: {'foo': tree}})
@@ -578,21 +578,47 @@ class TestModelCache(unittest.TestCase):
@mock.patch('bson.ObjectId')
def test_set_none_id(self, obj_id):
obj_id.return_value = 'OBJID'
- tree = mock.Mock(_id=None, val='test_set')
- self.cache.set(M.repo.Tree, {'val': 'test_set'}, tree)
- self.assertEqual(self.cache._query_cache, {M.repo.Tree: {(('val', 'test_set'),): 'None_OBJID'}})
- self.assertEqual(self.cache._instance_cache, {M.repo.Tree: {'None_OBJID': tree}})
+ tree = mock.Mock(spec=['_id', 'test_set'], _id=None, val='test_set')
+ self.cache.set(M.repo.Tree, {'val1': 'test_set1'}, tree)
+ self.cache.set(M.repo.Tree, {'val2': 'test_set2'}, tree)
+ self.assertEqual(dict(self.cache._query_cache[M.repo.Tree]), {
+ (('val1', 'test_set1'),): 'OBJID',
+ (('val2', 'test_set2'),): 'OBJID',
+ })
+ self.assertEqual(self.cache._instance_cache, {M.repo.Tree: {'OBJID': tree}})
+ tree._id = '_id'
+ self.assertEqual(self.cache.get(M.repo.Tree, {'val1': 'test_set1'}), tree)
+ self.assertEqual(self.cache.get(M.repo.Tree, {'val2': 'test_set2'}), tree)
+ self.cache.set(M.repo.Tree, {'val1': 'test_set2'}, tree)
+ self.assertEqual(self.cache.get(M.repo.Tree, {'val1': 'test_set1'}), tree)
+ self.assertEqual(self.cache.get(M.repo.Tree, {'val2': 'test_set2'}), tree)
@mock.patch('bson.ObjectId')
def test_set_none_val(self, obj_id):
obj_id.return_value = 'OBJID'
- self.cache.set(M.repo.Tree, {'val': 'test_set'}, None)
- self.assertEqual(self.cache._query_cache, {M.repo.Tree: {(('val', 'test_set'),): 'None_OBJID'}})
- self.assertEqual(self.cache._instance_cache, {M.repo.Tree: {'None_OBJID': None}})
+ self.cache.set(M.repo.Tree, {'val1': 'test_set1'}, None)
+ self.cache.set(M.repo.Tree, {'val2': 'test_set2'}, None)
+ self.assertEqual(dict(self.cache._query_cache[M.repo.Tree]), {
+ (('val1', 'test_set1'),): None,
+ (('val2', 'test_set2'),): None,
+ })
+ self.assertEqual(dict(self.cache._instance_cache[M.repo.Tree]), {})
+ tree1 = mock.Mock(spec=['_id', 'val'], _id='tree1', val='test_set')
+ tree2 = mock.Mock(spec=['_model_cache_id', '_id', 'val'], _model_cache_id='tree2', _id='tree1', val='test_set2')
+ self.cache.set(M.repo.Tree, {'val1': 'test_set1'}, tree1)
+ self.cache.set(M.repo.Tree, {'val2': 'test_set2'}, tree2)
+ self.assertEqual(dict(self.cache._query_cache[M.repo.Tree]), {
+ (('val1', 'test_set1'),): 'tree1',
+ (('val2', 'test_set2'),): 'tree2',
+ })
+ self.assertEqual(dict(self.cache._instance_cache[M.repo.Tree]), {
+ 'tree1': tree1,
+ 'tree2': tree2,
+ })
def test_instance_ids(self):
- tree1 = mock.Mock(_id='id1', val='tree1')
- tree2 = mock.Mock(_id='id2', val='tree2')
+ tree1 = mock.Mock(spec=['_id', 'val'], _id='id1', val='tree1')
+ tree2 = mock.Mock(spec=['_id', 'val'], _id='id2', val='tree2')
self.cache.set(M.repo.Tree, {'val': 'tree1'}, tree1)
self.cache.set(M.repo.Tree, {'val': 'tree2'}, tree2)
self.assertEqual(set(self.cache.instance_ids(M.repo.Tree)), set(['id1', 'id2']))
@@ -601,8 +627,8 @@ class TestModelCache(unittest.TestCase):
@mock.patch.object(M.repo.Tree.query, 'find')
def test_batch_load(self, tr_find):
# cls, query, attrs
- m1 = mock.Mock(_id='id1', foo=1, qux=3)
- m2 = mock.Mock(_id='id2', foo=2, qux=5)
+ m1 = mock.Mock(spec=['_id', 'foo', 'qux'], _id='id1', foo=1, qux=3)
+ m2 = mock.Mock(spec=['_id', 'foo', 'qux'], _id='id2', foo=2, qux=5)
tr_find.return_value = [m1, m2]
self.cache.batch_load(M.repo.Tree, {'foo': {'$in': 'bar'}})
@@ -619,8 +645,8 @@ class TestModelCache(unittest.TestCase):
@mock.patch.object(M.repo.Tree.query, 'find')
def test_batch_load_attrs(self, tr_find):
# cls, query, attrs
- m1 = mock.Mock(_id='id1', foo=1, qux=3)
- m2 = mock.Mock(_id='id2', foo=2, qux=5)
+ m1 = mock.Mock(spec=['_id', 'foo', 'qux'], _id='id1', foo=1, qux=3)
+ m2 = mock.Mock(spec=['_id', 'foo', 'qux'], _id='id2', foo=2, qux=5)
tr_find.return_value = [m1, m2]
self.cache.batch_load(M.repo.Tree, {'foo': {'$in': 'bar'}}, ['qux'])
@@ -637,10 +663,10 @@ class TestModelCache(unittest.TestCase):
def test_pruning(self):
cache = M.repo.ModelCache(max_queries=3, max_instances=2)
# ensure cache expires as LRU
- tree1 = mock.Mock(_id='foo', val='bar')
- tree2 = mock.Mock(_id='qux', val='fuz')
- tree3 = mock.Mock(_id='f00', val='b4r')
- tree4 = mock.Mock(_id='foo', val='zaz')
+ tree1 = mock.Mock(spec=['_id', '_val'], _id='foo', val='bar')
+ tree2 = mock.Mock(spec=['_id', '_val'], _id='qux', val='fuz')
+ tree3 = mock.Mock(spec=['_id', '_val'], _id='f00', val='b4r')
+ tree4 = mock.Mock(spec=['_id', '_val'], _id='foo', val='zaz')
cache.set(M.repo.Tree, {'_id': 'foo'}, tree1)
cache.set(M.repo.Tree, {'_id': 'qux'}, tree2)
cache.set(M.repo.Tree, {'_id': 'f00'}, tree3)
@@ -664,10 +690,10 @@ class TestModelCache(unittest.TestCase):
def test_pruning_query_vs_instance(self):
cache = M.repo.ModelCache(max_queries=3, max_instances=2)
# ensure cache expires as LRU
- tree1 = mock.Mock(_id='keep', val='bar')
- tree2 = mock.Mock(_id='tree2', val='fuz')
- tree3 = mock.Mock(_id='tree3', val='b4r')
- tree4 = mock.Mock(_id='tree4', val='zaz')
+ tree1 = mock.Mock(spec=['_id', '_val'], _id='keep', val='bar')
+ tree2 = mock.Mock(spec=['_id', '_val'], _id='tree2', val='fuz')
+ tree3 = mock.Mock(spec=['_id', '_val'], _id='tree3', val='b4r')
+ tree4 = mock.Mock(spec=['_id', '_val'], _id='tree4', val='zaz')
cache.set(M.repo.Tree, {'keep_query_1': 'bar'}, tree1)
cache.set(M.repo.Tree, {'drop_query_1': 'bar'}, tree2)
cache.set(M.repo.Tree, {'keep_query_2': 'bar'}, tree1) # should refresh tree1 in _instance_cache
@@ -681,3 +707,85 @@ class TestModelCache(unittest.TestCase):
'keep': tree1,
'tree3': tree3,
})
+
+ @mock.patch('bson.ObjectId')
+ def test_pruning_no_id(self, obj_id):
+ obj_id.side_effect = ['id1', 'id2', 'id3']
+ cache = M.repo.ModelCache(max_queries=3, max_instances=2)
+ # ensure cache considers same instance equal to itself, even if no _id
+ tree1 = mock.Mock(spec=['val'], val='bar')
+ cache.set(M.repo.Tree, {'query_1': 'bar'}, tree1)
+ cache.set(M.repo.Tree, {'query_2': 'bar'}, tree1)
+ cache.set(M.repo.Tree, {'query_3': 'bar'}, tree1)
+ self.assertEqual(cache._instance_cache[M.repo.Tree], {
+ 'id1': tree1,
+ })
+ self.assertEqual(cache._query_cache[M.repo.Tree], {
+ (('query_1', 'bar'),): 'id1',
+ (('query_2', 'bar'),): 'id1',
+ (('query_3', 'bar'),): 'id1',
+ })
+
+ @mock.patch('bson.ObjectId')
+ def test_pruning_none(self, obj_id):
+ obj_id.side_effect = ['id1', 'id2', 'id3']
+ cache = M.repo.ModelCache(max_queries=3, max_instances=2)
+ # ensure cache doesn't store None instances
+ cache.set(M.repo.Tree, {'query_1': 'bar'}, None)
+ cache.set(M.repo.Tree, {'query_2': 'bar'}, None)
+ cache.set(M.repo.Tree, {'query_3': 'bar'}, None)
+ self.assertEqual(cache._instance_cache[M.repo.Tree], {})
+ self.assertEqual(cache._query_cache[M.repo.Tree], {
+ (('query_1', 'bar'),): None,
+ (('query_2', 'bar'),): None,
+ (('query_3', 'bar'),): None,
+ })
+
+ @mock.patch('allura.model.repo.session')
+ @mock.patch.object(M.repo.Tree.query, 'get')
+ def test_pruning_query_flush(self, tr_get, session):
+ cache = M.repo.ModelCache(max_queries=3, max_instances=2)
+ # ensure cache doesn't store None instances
+ tree1 = mock.Mock(name='tree1', spec=['_id', '_val'], _id='tree1', val='bar')
+ tree2 = mock.Mock(name='tree2', spec=['_id', '_val'], _id='tree2', val='fuz')
+ tr_get.return_value = tree2
+ cache.set(M.repo.Tree, {'_id': 'tree1'}, tree1)
+ cache.set(M.repo.Tree, {'_id': 'tree2'}, tree2)
+ cache.get(M.repo.Tree, {'query_1': 'tree2'})
+ cache.get(M.repo.Tree, {'query_2': 'tree2'})
+ cache.get(M.repo.Tree, {'query_3': 'tree2'})
+ self.assertEqual(cache._query_cache[M.repo.Tree], {
+ (('query_1', 'tree2'),): 'tree2',
+ (('query_2', 'tree2'),): 'tree2',
+ (('query_3', 'tree2'),): 'tree2',
+ })
+ self.assertEqual(cache._instance_cache[M.repo.Tree], {
+ 'tree1': tree1,
+ 'tree2': tree2,
+ })
+ self.assertEqual(session.call_args_list, [mock.call(tree1), mock.call(tree2)])
+ self.assertEqual(session.return_value.flush.call_args_list, [mock.call(tree1), mock.call(tree2)])
+ assert not session.return_value.expunge.called
+
+ @mock.patch('allura.model.repo.session')
+ def test_pruning_instance_flush(self, session):
+ cache = M.repo.ModelCache(max_queries=3, max_instances=2)
+ # ensure cache doesn't store None instances
+ tree1 = mock.Mock(spec=['_id', '_val'], _id='tree1', val='bar')
+ tree2 = mock.Mock(spec=['_id', '_val'], _id='tree2', val='fuz')
+ tree3 = mock.Mock(spec=['_id', '_val'], _id='tree3', val='qux')
+ cache.set(M.repo.Tree, {'_id': 'tree1'}, tree1)
+ cache.set(M.repo.Tree, {'_id': 'tree2'}, tree2)
+ cache.set(M.repo.Tree, {'_id': 'tree3'}, tree3)
+ self.assertEqual(cache._query_cache[M.repo.Tree], {
+ (('_id', 'tree1'),): 'tree1',
+ (('_id', 'tree2'),): 'tree2',
+ (('_id', 'tree3'),): 'tree3',
+ })
+ self.assertEqual(cache._instance_cache[M.repo.Tree], {
+ 'tree2': tree2,
+ 'tree3': tree3,
+ })
+ session.assert_called_once_with(tree1)
+ session.return_value.flush.assert_called_once_with(tree1)
+ session.return_value.expunge.assert_called_once_with(tree1)
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/31b47857/scripts/refresh-last-commits.py
----------------------------------------------------------------------
diff --git a/scripts/refresh-last-commits.py b/scripts/refresh-last-commits.py
index a3e9e10..aa234e9 100644
--- a/scripts/refresh-last-commits.py
+++ b/scripts/refresh-last-commits.py
@@ -99,17 +99,21 @@ def refresh_repo_lcds(commit_ids, options):
print ' Processed %d commits (max: %f, avg: %f, tot: %f, cl: %d)' % (
len(timings), mt, at, tt, len(tree_cache))
lcd_cache = M.repo.ModelCache(
- max_instances={M.repo.LastCommit: 10000},
- max_queries={M.repo.LastCommit: 20000},
+ max_instances={M.repo.LastCommit: 4000},
+ max_queries={M.repo.LastCommit: 10000},
)
timings = []
print 'Processing last commits'
debug_step = int(pow(10, max(0, int(log10(len(commit_ids)) - log10(options.step) - 1))))
- for i, commit_id in enum_step(commit_ids, options.step):
+ _cids = commit_ids[options.skip:]
+ for i, commit_id in enum_step(_cids, options.step):
commit = M.repo.Commit.query.get(_id=commit_id)
with time(timings):
M.repo_refresh.compute_lcds(commit, lcd_cache)
- ThreadLocalORMSession.flush_all()
+ ThreadLocalORMSession.flush_all()
+ # ensure new LCDs get fully refreshed in the cache
+ # so that every commit sees the same copy
+ lcd_cache.expire_new_instances(M.repo.LastCommit)
if len(timings) % debug_step == 0:
_print_stats(lcd_cache, timings, debug_step, commit)
lcd_cache._get_walks_max = 0
@@ -197,6 +201,8 @@ def parse_options():
const=1, default=100, help='Refresh the LCD for every commit instead of every 100th')
parser.add_argument('--step', action='store', dest='step',
type=int, default=100, help='Refresh the LCD for every Nth commit instead of every 100th')
+ parser.add_argument('--skip', action='store', dest='skip',
+ type=int, default=0, help='Skip a number of commits')
return parser.parse_args()
if __name__ == '__main__':