You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by tv...@apache.org on 2013/02/05 21:23:35 UTC
[2/42] git commit: [#4691] New implementation of LastCommit info
[#4691] New implementation of LastCommit info
Project: http://git-wip-us.apache.org/repos/asf/incubator-allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-allura/commit/320025c4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-allura/tree/320025c4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-allura/diff/320025c4
Branch: refs/heads/master
Commit: 320025c41f9c47a2597229328d56fabe0a9fee60
Parents: ab5af7e
Author: Cory Johns <jo...@geek.net>
Authored: Wed Oct 10 02:28:07 2012 +0000
Committer: Tim Van Steenburgh <tv...@gmail.com>
Committed: Tue Feb 5 20:22:49 2013 +0000
----------------------------------------------------------------------
Allura/allura/model/repo.py | 358 +++++++++++++++++-
Allura/allura/model/repo_refresh.py | 126 +++----
Allura/allura/model/repository.py | 15 -
Allura/allura/tests/model/test_repo.py | 544 +++++++++++++++++++++++++++
ForgeSVN/forgesvn/model/svn.py | 33 ++-
scripts/refresh-all-repos.py | 11 +-
scripts/refresh-last-commits.py | 172 +++++++++
7 files changed, 1149 insertions(+), 110 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/320025c4/Allura/allura/model/repo.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/repo.py b/Allura/allura/model/repo.py
index 10a6521..f71142f 100644
--- a/Allura/allura/model/repo.py
+++ b/Allura/allura/model/repo.py
@@ -11,7 +11,7 @@ from difflib import SequenceMatcher, unified_diff
from pylons import c
import pymongo.errors
-from ming import Field, collection
+from ming import Field, collection, Index
from ming import schema as S
from ming.base import Object
from ming.utils import LazyProperty
@@ -61,9 +61,7 @@ TreeDoc = collection(
Field('blob_ids', [dict(name=str, id=str)]),
Field('other_ids', [dict(name=str, id=str, type=SObjType)]))
-# Information about the last commit to touch a tree/blob
-# LastCommitDoc.object_id = TreeDoc._id
-LastCommitDoc = collection(
+LastCommitDoc_old = collection(
'repo_last_commit', project_doc_session,
Field('_id', str),
Field('object_id', str, index=True),
@@ -77,6 +75,25 @@ LastCommitDoc = collection(
shortlink=str,
summary=str)))
+# Information about the last commit to touch a tree
+LastCommitDoc = collection(
+ 'repo_last_commit', main_doc_session,
+ Field('_id', S.ObjectId()),
+ Field('commit_ids', [str]),
+ Field('path', str),
+ Index('commit_ids', 'path'),
+ Field('entries', [dict(
+ type=str,
+ name=str,
+ commit_info=dict(
+ id=str,
+ date=datetime,
+ author=str,
+ author_email=str,
+ author_url=str,
+ shortlink=str,
+ summary=str))]))
+
# List of all trees contained within a commit
# TreesDoc._id = CommitDoc._id
# TreesDoc.tree_ids = [ TreeDoc._id, ... ]
@@ -160,7 +177,8 @@ class Commit(RepoObject):
self.tree_id = self.repo.compute_tree_new(self)
if self.tree_id is None:
return None
- t = Tree.query.get(_id=self.tree_id)
+ cache = getattr(c, 'model_cache', '') or ModelCache()
+ t = cache.get(Tree, dict(_id=self.tree_id))
if t is None:
self.tree_id = self.repo.compute_tree_new(self)
t = Tree.query.get(_id=self.tree_id)
@@ -182,13 +200,29 @@ class Commit(RepoObject):
def symbolic_ids(self):
return self.repo.symbolics_for_commit(self)
- def parent(self, index=0):
- ci = None
- if self.parent_ids:
- ci = self.query.get(_id=self.parent_ids[index])
- if ci:
+ def get_parent(self, index=0):
+ '''Get the parent of this commit.
+
+ If there is no parent commit, or if an invalid index is given,
+ returns None.
+ '''
+ try:
+ cache = getattr(c, 'model_cache', '') or ModelCache()
+ ci = cache.get(Commit, dict(_id=self.parent_ids[index]))
ci.set_context(self.repo)
- return ci
+ return ci
+ except IndexError as e:
+ return None
+
+ def climb_commit_tree(self):
+ '''
+ Returns a generator that walks up the commit tree along
+ the first-parent ancestory, starting with this commit.'''
+ yield self
+ ancestor = self.get_parent()
+ while ancestor:
+ yield ancestor
+ ancestor = ancestor.get_parent()
def url(self):
if self.repo is None: self.repo = self.guess_repo()
@@ -293,7 +327,7 @@ class Commit(RepoObject):
if not removed:
return []
copied = []
- prev_commit = self.parent()
+ prev_commit = self.get_parent()
for removed_name in removed[:]:
removed_blob = prev_commit.tree.get_obj_by_path(removed_name)
rename_info = None
@@ -316,6 +350,43 @@ class Commit(RepoObject):
cur = cur[part]
return cur
+ @LazyProperty
+ def changed_paths(self):
+ '''
+ Returns a list of paths changed in this commit.
+ Leading and trailing slashes are removed, and
+ the list is complete, meaning that if a sub-path
+ is changed, all of the parent paths are included
+ (including '' to represent the root path).
+
+ Example:
+
+ If the file /foo/bar is changed in the commit,
+ this would return ['', 'foo', 'foo/bar']
+ '''
+ diff_info = DiffInfoDoc.m.get(_id=self._id)
+ diffs = set()
+ for d in diff_info.differences:
+ diffs.add(d.name.strip('/'))
+ node_path = os.path.dirname(d.name)
+ while node_path:
+ diffs.add(node_path)
+ node_path = os.path.dirname(node_path)
+ diffs.add('') # include '/' if there are any changes
+ return diffs
+
+ @LazyProperty
+ def info(self):
+ return dict(
+ id=self._id,
+ author=self.authored.name,
+ author_email=self.authored.email,
+ date=self.authored.date,
+ author_url=self.author_url,
+ shortlink=self.shorthand_id(),
+ summary=self.summary
+ )
+
class Tree(RepoObject):
# Ephemeral attrs
repo=None
@@ -337,13 +408,14 @@ class Tree(RepoObject):
return sha_obj.hexdigest()
def __getitem__(self, name):
+ cache = getattr(c, 'model_cache', '') or ModelCache()
obj = self.by_name[name]
if obj['type'] == 'blob':
return Blob(self, name, obj['id'])
- obj = self.query.get(_id=obj['id'])
+ obj = cache.get(Tree, dict(_id=obj['id']))
if obj is None:
oid = self.repo.compute_tree_new(self.commit, self.path() + name + '/')
- obj = self.query.get(_id=oid)
+ obj = cache.get(Tree, dict(_id=oid))
if obj is None: raise KeyError, name
obj.set_context(self, name)
return obj
@@ -386,22 +458,71 @@ class Tree(RepoObject):
return None, None
def ls(self):
+ '''
+ List the entries in this tree, with historical commit info for
+ each node. Eventually, ls_old can be removed and this can be
+ replaced with the following:
+
+ last_commit = LastCommit.get(self)
+ return sorted(last_commit.entries, cmp=lambda a,b: cmp(b.type,a.type) or cmp(a.name,b.name))
+ '''
+ # look for existing new format first
+ last_commit = LastCommit.query.get(
+ commit_ids=self.commit._id,
+ path=self.path().strip('/'),
+ )
+ if last_commit:
+ sorted_entries = sorted(last_commit.entries, cmp=lambda a,b: cmp(b.type,a.type) or cmp(a.name,b.name))
+ mapped_entries = [self._dirent_map(e) for e in sorted_entries]
+ return mapped_entries
+ # otherwise, try old format
+ old_style_results = self.ls_old()
+ if old_style_results:
+ return old_style_results
+ # finally, use the new implentation that auto-vivifies
+ last_commit = LastCommit.get(self)
+ sorted_entries = sorted(last_commit.entries, cmp=lambda a,b: cmp(b.type,a.type) or cmp(a.name,b.name))
+ mapped_entries = [self._dirent_map(e) for e in sorted_entries]
+ return mapped_entries
+
+ def _dirent_map(self, dirent):
+ return dict(
+ kind=dirent.type,
+ name=dirent.name,
+ href=dirent.name + '/',
+ last_commit=dict(
+ author=dirent.commit_info.author,
+ author_email=dirent.commit_info.author_email,
+ author_url=dirent.commit_info.author_url,
+ date=dirent.commit_info.date,
+ href=self.repo.url_for_commit(dirent.commit_info['id']),
+ shortlink=dirent.commit_info.shortlink,
+ summary=dirent.commit_info.summary,
+ ),
+ )
+
+ def ls_old(self):
# Load last commit info
id_re = re.compile("^{0}:{1}:".format(
self.repo._id,
re.escape(h.really_unicode(self.path()).encode('utf-8'))))
lc_index = dict(
(lc.name, lc.commit_info)
- for lc in LastCommitDoc.m.find(dict(_id=id_re)))
+ for lc in LastCommitDoc_old.m.find(dict(_id=id_re)))
# FIXME: Temporarily fall back to old, semi-broken lookup behavior until refresh is done
oids = [ x.id for x in chain(self.tree_ids, self.blob_ids, self.other_ids) ]
id_re = re.compile("^{0}:".format(self.repo._id))
lc_index.update(dict(
(lc.object_id, lc.commit_info)
- for lc in LastCommitDoc.m.find(dict(_id=id_re, object_id={'$in': oids}))))
+ for lc in LastCommitDoc_old.m.find(dict(_id=id_re, object_id={'$in': oids}))))
# /FIXME
+ if not lc_index:
+ # allow fallback to new method instead
+ # of showing a bunch of Nones
+ return []
+
results = []
def _get_last_commit(name, oid):
lc = lc_index.get(name, lc_index.get(oid, None))
@@ -569,5 +690,210 @@ class Blob(object):
differ = SequenceMatcher(v0, v1)
return differ.get_opcodes()
+class LastCommit(RepoObject):
+ def __repr__(self):
+ return '<LastCommit /%s [%s]>' % (self.path, ',\n '.join(self.commit_ids))
+
+ @classmethod
+ def get(cls, tree):
+ '''Find the LastCommitDoc for the given tree.
+
+ Climbs the commit tree until either:
+
+ 1) An LCD is found for the given tree. (If the LCD was not found for the
+ tree's commit, the commits traversed while searching for it are
+ added to the LCD for faster retrieval in the future.)
+
+ 2) The commit in which the tree was most recently modified is found.
+ In this case, we know that the LCD hasn't been constructed for this
+ (chain of) commit(s), and it will have to be built.
+ '''
+ cache = getattr(c, 'model_cache', '') or ModelCache()
+ path = tree.path().strip('/')
+ commit_ids = []
+ cache._get_calls += 1
+ gw = 0
+ for commit in tree.commit.climb_commit_tree():
+ last_commit = cache.get(LastCommit, dict(
+ commit_ids=commit._id,
+ path=path,
+ ))
+ if last_commit:
+ cache._get_hits += 1
+ # found our LCD; add any traversed commits to it
+ if commit_ids:
+ last_commit.commit_ids.extend(commit_ids)
+ for commit_id in commit_ids:
+ cache.set(LastCommit, dict(commit_ids=commit_id, path=path), last_commit)
+ return last_commit
+ commit_ids.append(commit._id)
+ if path in commit.changed_paths:
+ cache._get_misses += 1
+ # tree was changed but no LCD found; have to build
+ tree = commit.tree
+ if path != '':
+ tree = tree.get_obj_by_path(path)
+ return cls.build(tree, commit_ids)
+ cache._get_walks += 1
+ gw += 1
+ cache._get_walks_max = max(cache._get_walks_max, gw)
+
+ @classmethod
+ def build(cls, tree, commit_ids=[]):
+ '''
+ Build the LCD record, presuming that this tree is where it was most
+ recently changed.
+
+ To build the LCD, we climb the commit tree, keeping track of which
+ entries we still need info about. (For multi-parent commits, it
+ doesn't matter which parent we follow because those would be merge
+ commits and ought to have the diff info populated for any file
+ touched by the merge.) At each step of the walk, we check the following:
+
+ 1) If the current tree has an LCD record, we can pull all the remaining
+ info we need from it, and we're done.
+
+ 2) If the tree was modified in this commit, then we pull the info for
+ all changed entries, then continue up the tree. Once we have data
+ for all entries, we're done.
+
+ (It may be possible to optimize this for SVN, if SVN can return all of
+ the LCD info from a single call and if that turns out to be more efficient
+ than walking up the tree. It is unclear if those hold without testing.)
+ '''
+ cache = getattr(c, 'model_cache', '') or ModelCache()
+ unfilled = set([n.name for n in chain(tree.tree_ids, tree.blob_ids, tree.other_ids)])
+ tree_nodes = set([n.name for n in tree.tree_ids])
+ path = tree.path().strip('/')
+ lcd = cls(
+ commit_ids=commit_ids,
+ path=path,
+ entries=[],
+ )
+ cache._build_calls += 1
+ bw = 0
+ for commit in tree.commit.climb_commit_tree():
+ partial_lcd = cache.get(LastCommit, dict(
+ commit_ids=commit._id,
+ path=path,
+ ))
+ for name in list(unfilled):
+ if os.path.join(path, name) in commit.changed_paths:
+ # changed in this commit, so gather the data
+ lcd.entries.append(dict(
+ type=name in tree_nodes and 'DIR' or 'BLOB',
+ name=name,
+ commit_info=commit.info,
+ ))
+ unfilled.remove(name)
+ elif partial_lcd:
+ # the partial LCD should contain anything we're missing
+ entry = partial_lcd.entry_by_name(name)
+ assert entry
+ lcd.entries.append(entry)
+ unfilled.remove(name)
+
+ if not unfilled:
+ break
+ cache._build_walks += 1
+ bw += 1
+ cache._build_walks_max = max(cache._build_walks_max, bw)
+ for commit_id in commit_ids:
+ cache.set(LastCommit, dict(commit_ids=commit_id, path=path), lcd)
+ return lcd
+
+ def entry_by_name(self, name):
+ for entry in self.entries:
+ if entry.name == name:
+ return entry
+ return None
+
mapper(Commit, CommitDoc, repository_orm_session)
mapper(Tree, TreeDoc, repository_orm_session)
+mapper(LastCommit, LastCommitDoc, repository_orm_session)
+
+
+class ModelCache(object):
+ '''
+ Cache model instances based on query params passed to get.
+ '''
+ def __init__(self, max_size=2000):
+ '''
+ The max_size of the cache is tracked separately for
+ each model class stored. I.e., you can have 2000
+ Commit instances and 2000 Tree instances in the cache
+ at once with the default value.
+ '''
+ self._cache = defaultdict(dict)
+ self.max_size = max_size
+ self._insertion_order = defaultdict(list)
+ # temporary, for performance testing
+ self._hits = 0
+ self._misses = 0
+ self._get_calls = 0
+ self._get_walks = 0
+ self._get_walks_max = 0
+ self._get_hits = 0
+ self._get_misses = 0
+ self._build_calls = 0
+ self._build_walks = 0
+ self._build_walks_max = 0
+
+ def _normalize_key(self, key):
+ _key = key
+ if not isinstance(_key, tuple):
+ _key = tuple(sorted(_key.items(), key=lambda k: k[0]))
+ return _key
+
+ def get(self, cls, key):
+ _key = self._normalize_key(key)
+ if _key not in self._cache[cls]:
+ self._misses += 1
+ query = getattr(cls, 'query', getattr(cls, 'm', None))
+ self.set(cls, _key, query.get(**key))
+ else:
+ self._hits += 1
+ return self._cache[cls][_key]
+
+ def set(self, cls, key, val):
+ _key = self._normalize_key(key)
+ self._manage_cache(cls, _key)
+ self._cache[cls][_key] = val
+
+ def _manage_cache(self, cls, key):
+ '''
+ Keep track of insertion order, prevent duplicates,
+ and expire from the cache in a FIFO manner.
+ '''
+ if key in self._cache[cls]:
+ return
+ self._insertion_order[cls].append(key)
+ if len(self._insertion_order[cls]) > self.max_size:
+ _key = self._insertion_order[cls].pop(0)
+ self._cache[cls].pop(_key)
+
+ def size(self):
+ return sum([len(c) for c in self._insertion_order.values()])
+
+ def keys(self, cls):
+ '''
+ Returns all the cache keys for a given class. Each
+ cache key will be a dict.
+ '''
+ if self._cache[cls]:
+ return [dict(k) for k in self._cache[cls].keys()]
+ return []
+
+ def batch_load(self, cls, query, attrs=None):
+ '''
+ Load multiple results given a query.
+
+ Optionally takes a list of attribute names to use
+ as the cache key. If not given, uses the keys of
+ the given query.
+ '''
+ if attrs is None:
+ attrs = query.keys()
+ for result in cls.query.find(query):
+ keys = {a: getattr(result, a) for a in attrs}
+ self.set(cls, keys, result)
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/320025c4/Allura/allura/model/repo_refresh.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/repo_refresh.py b/Allura/allura/model/repo_refresh.py
index 6e0db59..149fcae 100644
--- a/Allura/allura/model/repo_refresh.py
+++ b/Allura/allura/model/repo_refresh.py
@@ -2,12 +2,13 @@ import logging
from itertools import chain
from cPickle import dumps
import re
+import os
import bson
import tg
-from pylons import g
+from pylons import g,c
from ming.base import Object
from ming.orm import mapper, session
@@ -16,7 +17,7 @@ from allura.lib import utils
from allura.lib import helpers as h
from allura.model.repo import CommitDoc, TreeDoc, TreesDoc, DiffInfoDoc
from allura.model.repo import LastCommitDoc, CommitRunDoc
-from allura.model.repo import Commit
+from allura.model.repo import Commit, Tree, LastCommit, ModelCache
from allura.model.index import ArtifactReferenceDoc, ShortlinkDoc
log = logging.getLogger(__name__)
@@ -88,17 +89,25 @@ def refresh_repo(repo, all_commits=False, notify=True):
# Compute diffs
cache = {}
- # Have to compute_diffs() for all commits to ensure that LastCommitDocs
- # are set properly for forked repos. For some SCMs, compute_diffs()
- # we don't want to pre-compute the diffs because that would be too
- # expensive, so we skip them here and do them on-demand with caching.
+ # For some SCMs, we don't want to pre-compute the diffs because that
+ # would be too expensive, so we skip them here and do them on-demand
+ # with caching.
if repo._refresh_precompute:
- for i, oid in enumerate(reversed(all_commit_ids)):
+ for i, oid in enumerate(commit_ids):
ci = CommitDoc.m.find(dict(_id=oid), validate=False).next()
compute_diffs(repo._id, cache, ci)
if (i+1) % 100 == 0:
log.info('Compute diffs %d: %s', (i+1), ci._id)
+ if repo._refresh_precompute:
+ cache = ModelCache()
+ for i, oid in enumerate(reversed(commit_ids)):
+ ci = CommitDoc.m.find(dict(_id=oid), validate=False).next()
+ compute_lcds(ci, cache)
+ if (i+1) % 100 == 0:
+ log.info('Compute last commit info %d: %s', (i+1), ci._id)
+
+
log.info('Refresh complete for %s', repo.full_fs_path)
g.post_event(
'repo_refreshed',
@@ -348,7 +357,6 @@ def compute_diffs(repo_id, tree_cache, rhs_ci):
dict(name=name, lhs_id=lhs_id, rhs_id=rhs_id))
# Set last commit data
rhs_tree = tree_index[rhs_ci.tree_id]
- refresh_last_commit(repo_id, '/', rhs_tree, lhs_tree, None, commit_info)
# Build the diffinfo
di = DiffInfoDoc(dict(
_id=rhs_ci._id,
@@ -420,18 +428,21 @@ def _diff_trees(lhs, rhs, index, *path):
(o.name, o.id)
for o in rhs.tree_ids)
for o in lhs.tree_ids:
- rhs_id = rhs_tree_ids.pop(o.name, None)
- if rhs_id == o.id:
- continue # no change
- elif rhs_id is None:
+ rhs_id = rhs_tree_ids.pop(o.name, None) # remove so won't be picked up as added, below
+ if rhs_id == o.id: # no change
+ continue
+ elif rhs_id is None: # removed
yield (_fq(o.name), o.id, None)
- else:
- for difference in _diff_trees(
- index[o.id], index[rhs_id], index,
- o.name, *path):
- yield difference
- for name, id in rhs_tree_ids.items():
+ rhs_tree = Object(_id=None, tree_ids=[], blob_ids=[], other_ids=[])
+ else: # changed
+ rhs_tree = index[rhs_id]
+ for difference in _diff_trees(index[o.id], rhs_tree, index, o.name, *path):
+ yield difference
+ for name, id in rhs_tree_ids.items(): # added
yield (_fq(name), None, id)
+ lhs_tree = Object(_id=None, tree_ids=[], blob_ids=[], other_ids=[])
+ for difference in _diff_trees(lhs_tree, index[id], index, name, *path):
+ yield difference
# DIff the blobs
rhs_blob_ids = dict(
(o.name, o.id)
@@ -462,53 +473,6 @@ def get_commit_info(commit):
summary=commit.summary
)
-def refresh_last_commit(repo_id, path, tree, lhs_tree, parent_tree, commit_info):
- '''Build the LastCommit info.
-
- We only need to create LastCommit info for objects that are in the
- RHS but not in the LHS, because only those objects are only ones
- who have had anything changed in them. (If file x/y/z.txt changes,
- then it's hash will change, which also forces the hash for tree x/y
- to change, as well as the hash for tree x. So as long as an object's
- hash isn't in the LHS, it means it's new or modified in this commit.)
-
- In order to uniquely identify the tree or blob that a LastCommitDoc is
- for, the tree or blob hash is not sufficient; we also need to know
- either it's full path name, or it's parent tree and name. Because of
- this, we have to walk down the commit tree.'''
- if lhs_tree is not None and tree._id == lhs_tree._id:
- # tree was not changed in this commit (nor was anything under it)
- return
-
- # map LHS entries for easy lookup
- lhs_map = {}
- if lhs_tree:
- for lhs_child in chain(lhs_tree.tree_ids, lhs_tree.blob_ids, lhs_tree.other_ids):
- lhs_map[lhs_child.name] = lhs_child.id
-
- # update our children
- for child in chain(tree.tree_ids, tree.blob_ids, tree.other_ids):
- if child.id != lhs_map.get(child.name, None): # check if changed in this commit
- lc = set_last_commit(repo_id, path, child.name, child.id, commit_info)
-
- # (re)curse at our child trees
- for child_tree in tree.tree_ids:
- child_name = child_tree.name
- child_tree = TreeDoc.m.get(_id=child_tree.id)
- lhs_child = None
- if child_name in lhs_map:
- lhs_child = TreeDoc.m.get(_id=lhs_map[child_name])
- refresh_last_commit(repo_id, path + child_name + '/', child_tree, lhs_child, tree, commit_info)
-
-def set_last_commit(repo_id, path, name, oid, commit_info):
- lc = LastCommitDoc(dict(
- _id='%s:%s:%s' % (repo_id, path, name),
- object_id=oid,
- name=name,
- commit_info=commit_info))
- lc.m.save(safe=False, upsert=True)
- return lc
-
def last_known_commit_id(all_commit_ids, new_commit_ids):
"""
Return the newest "known" (cached in mongo) commit id.
@@ -522,3 +486,35 @@ def last_known_commit_id(all_commit_ids, new_commit_ids):
if not all_commit_ids: return None
if not new_commit_ids: return all_commit_ids[-1]
return all_commit_ids[all_commit_ids.index(new_commit_ids[0]) - 1]
+
+
+def compute_lcds(commit, cache):
+ '''
+ Compute LastCommit data for every Tree node under this tree.
+ '''
+ trees = cache.get(TreesDoc, dict(_id=commit._id))
+ if not trees:
+ log.error('Missing TreesDoc for %s; skipping compute_lcd' % commit)
+ return
+ _update_tree_cache(trees.tree_ids, cache)
+ c.model_cache = cache
+ for tree in _walk_commit_tree(commit, cache):
+ lcd = LastCommit.get(tree) # auto-vivify LCD
+
+def _walk_commit_tree(commit, cache):
+ def _walk_tree(tree):
+ yield tree
+ for x in tree.tree_ids:
+ sub_tree = cache.get(Tree, dict(_id=x.id))
+ sub_tree.set_context(tree, x.name)
+ for xx in _walk_tree(sub_tree):
+ yield xx
+ top_tree = cache.get(Tree, dict(_id=commit.tree_id))
+ top_tree.set_context(commit)
+ return _walk_tree(top_tree)
+
+def _update_tree_cache(tree_ids, cache):
+ current_ids = set(tree_ids)
+ cached_ids = set([k['_id'] for k in cache.keys(Tree)])
+ new_ids = current_ids - cached_ids
+ cache.batch_load(Tree, {'_id': {'$in': list(new_ids)}})
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/320025c4/Allura/allura/model/repository.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/repository.py b/Allura/allura/model/repository.py
index 8aa91e8..c322658 100644
--- a/Allura/allura/model/repository.py
+++ b/Allura/allura/model/repository.py
@@ -445,21 +445,6 @@ class Repository(Artifact, ActivityObject):
with self.push_upstream_context():
return MergeRequest.query.find(q).count()
- def get_last_commit(self, obj):
- from .repo import LastCommitDoc
- lc = LastCommitDoc.m.get(
- repo_id=self._id, object_id=obj._id)
- if lc is None:
- return dict(
- author=None,
- author_email=None,
- author_url=None,
- date=None,
- id=None,
- shortlink=None,
- summary=None)
- return lc.commit_info
-
@property
def forks(self):
return self.query.find({'upstream_repo.name': self.url()}).all()
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/320025c4/Allura/allura/tests/model/test_repo.py
----------------------------------------------------------------------
diff --git a/Allura/allura/tests/model/test_repo.py b/Allura/allura/tests/model/test_repo.py
index 90eaac1..0dbff66 100644
--- a/Allura/allura/tests/model/test_repo.py
+++ b/Allura/allura/tests/model/test_repo.py
@@ -1,5 +1,11 @@
+from datetime import datetime
+from collections import defaultdict
+import unittest
+import mock
from nose.tools import assert_equal
from pylons import c
+from bson import ObjectId
+from ming.orm import session
from alluratest.controller import setup_basic_test, setup_global_objects
from allura import model as M
@@ -67,3 +73,541 @@ class RepoImplTestBase(object):
self.assertEqual(run.commit_ids, commit_ids)
self.assertEqual(len(run.commit_ids), len(run.commit_times))
self.assertEqual(run.parent_commit_ids, [])
+
+
+class TestLastCommit(unittest.TestCase):
+ def setUp(self):
+ setup_basic_test()
+ setup_global_objects()
+ c.model_cache = M.repo.ModelCache()
+ self.repo = mock.Mock('repo', _commits={}, _last_commit=None)
+ self.repo.shorthand_for_commit = lambda _id: _id[:6]
+
+ def _build_tree(self, commit, path, tree_paths):
+ tree_nodes = []
+ blob_nodes = []
+ sub_paths = defaultdict(list)
+ def n(p):
+ m = mock.Mock()
+ m.name = p
+ return m
+ for p in tree_paths:
+ if '/' in p:
+ node, sub = p.split('/',1)
+ tree_nodes.append(n(node))
+ sub_paths[node].append(sub)
+ else:
+ blob_nodes.append(n(p))
+ tree = mock.Mock(
+ commit=commit,
+ path=mock.Mock(return_value=path),
+ tree_ids=tree_nodes,
+ blob_ids=blob_nodes,
+ other_ids=[],
+ )
+ tree.get_obj_by_path = lambda p: self._build_tree(commit, p, sub_paths[p])
+ return tree
+
+ def _add_commit(self, msg, tree_paths, diff_paths=None, parents=[]):
+ suser = dict(
+ name='test',
+ email='test@example.com',
+ date=datetime(2013, 1, 1 + len(self.repo._commits)),
+ )
+ commit = M.repo.Commit(
+ _id=str(ObjectId()),
+ message=msg,
+ parent_ids=[parent._id for parent in parents],
+ commited=suser,
+ authored=suser,
+ repo=self.repo,
+ )
+ commit.tree = self._build_tree(commit, '/', tree_paths)
+ diffinfo = M.repo.DiffInfoDoc(dict(
+ _id=commit._id,
+ differences=[{'name': p} for p in diff_paths or tree_paths],
+ ))
+ diffinfo.m.save()
+ self.repo._commits[commit._id] = commit
+ return commit
+
+ def test_single_commit(self):
+ commit1 = self._add_commit('Commit 1', [
+ 'file1',
+ 'dir1/file2',
+ ])
+ lcd = M.repo.LastCommit.get(commit1.tree)
+ self.assertEqual([self.repo._commits[c].message for c in lcd.commit_ids], [commit1.message])
+ self.assertEqual(lcd.path, '')
+ self.assertEqual(len(lcd.entries), 2)
+ self.assertEqual(lcd.entry_by_name('file1'), dict(
+ type='BLOB',
+ name='file1',
+ commit_info=dict(
+ summary='Commit 1',
+ author='test',
+ author_email='test@example.com',
+ date=datetime(2013, 1, 1),
+ author_url=None,
+ id=commit1._id,
+ shortlink=self.repo.shorthand_for_commit(commit1._id),
+ )))
+ self.assertEqual(lcd.entry_by_name('dir1'), dict(
+ type='DIR',
+ name='dir1',
+ commit_info=dict(
+ summary='Commit 1',
+ author='test',
+ author_email='test@example.com',
+ date=datetime(2013, 1, 1),
+ author_url=None,
+ id=commit1._id,
+ shortlink=self.repo.shorthand_for_commit(commit1._id),
+ )))
+
+ def test_multiple_commits_no_overlap(self):
+ commit1 = self._add_commit('Commit 1', ['file1'])
+ commit2 = self._add_commit('Commit 2', ['file1', 'dir1/file1'], ['dir1/file1'], [commit1])
+ commit3 = self._add_commit('Commit 3', ['file1', 'dir1/file1', 'file2'], ['file2'], [commit2])
+ lcd = M.repo.LastCommit.get(commit3.tree)
+ self.assertEqual([self.repo._commits[c].message for c in lcd.commit_ids], [commit3.message])
+ self.assertEqual(lcd.commit_ids, [commit3._id])
+ self.assertEqual(lcd.path, '')
+ self.assertEqual(len(lcd.entries), 3)
+ self.assertEqual(lcd.entry_by_name('file1'), dict(
+ type='BLOB',
+ name='file1',
+ commit_info=dict(
+ summary='Commit 1',
+ author='test',
+ author_email='test@example.com',
+ date=datetime(2013, 1, 1),
+ author_url=None,
+ id=commit1._id,
+ shortlink=self.repo.shorthand_for_commit(commit1._id),
+ )))
+ self.assertEqual(lcd.entry_by_name('dir1'), dict(
+ type='DIR',
+ name='dir1',
+ commit_info=dict(
+ summary='Commit 2',
+ author='test',
+ author_email='test@example.com',
+ date=datetime(2013, 1, 2),
+ author_url=None,
+ id=commit2._id,
+ shortlink=self.repo.shorthand_for_commit(commit2._id),
+ )))
+ self.assertEqual(lcd.entry_by_name('file2'), dict(
+ type='BLOB',
+ name='file2',
+ commit_info=dict(
+ summary='Commit 3',
+ author='test',
+ author_email='test@example.com',
+ date=datetime(2013, 1, 3),
+ author_url=None,
+ id=commit3._id,
+ shortlink=self.repo.shorthand_for_commit(commit3._id),
+ )))
+
+ def test_multiple_commits_with_overlap(self):
+ commit1 = self._add_commit('Commit 1', ['file1'])
+ commit2 = self._add_commit('Commit 2', ['file1', 'dir1/file1'], ['dir1/file1'], [commit1])
+ commit3 = self._add_commit('Commit 3', ['file1', 'dir1/file1', 'file2'], ['file1', 'file2'], [commit2])
+ lcd = M.repo.LastCommit.get(commit3.tree)
+ self.assertEqual([self.repo._commits[c].message for c in lcd.commit_ids], [commit3.message])
+ self.assertEqual(lcd.path, '')
+ self.assertEqual(len(lcd.entries), 3)
+ self.assertEqual(lcd.entry_by_name('file1'), dict(
+ type='BLOB',
+ name='file1',
+ commit_info=dict(
+ summary='Commit 3',
+ author='test',
+ author_email='test@example.com',
+ date=datetime(2013, 1, 3),
+ author_url=None,
+ id=commit3._id,
+ shortlink=self.repo.shorthand_for_commit(commit3._id),
+ )))
+ self.assertEqual(lcd.entry_by_name('dir1'), dict(
+ type='DIR',
+ name='dir1',
+ commit_info=dict(
+ summary='Commit 2',
+ author='test',
+ author_email='test@example.com',
+ date=datetime(2013, 1, 2),
+ author_url=None,
+ id=commit2._id,
+ shortlink=self.repo.shorthand_for_commit(commit2._id),
+ )))
+ self.assertEqual(lcd.entry_by_name('file2'), dict(
+ type='BLOB',
+ name='file2',
+ commit_info=dict(
+ summary='Commit 3',
+ author='test',
+ author_email='test@example.com',
+ date=datetime(2013, 1, 3),
+ author_url=None,
+ id=commit3._id,
+ shortlink=self.repo.shorthand_for_commit(commit3._id),
+ )))
+
+ def test_multiple_commits_subdir_change(self):
+ commit1 = self._add_commit('Commit 1', ['file1', 'dir1/file1'])
+ commit2 = self._add_commit('Commit 2', ['file1', 'dir1/file1', 'dir1/file2'], ['dir1/file2'], [commit1])
+ commit3 = self._add_commit('Commit 3', ['file1', 'dir1/file1', 'dir1/file2'], ['dir1/file1'], [commit2])
+ lcd = M.repo.LastCommit.get(commit3.tree)
+ self.assertEqual([self.repo._commits[c].message for c in lcd.commit_ids], [commit3.message])
+ self.assertEqual(lcd.path, '')
+ self.assertEqual(len(lcd.entries), 2)
+ self.assertEqual(lcd.entry_by_name('file1'), dict(
+ type='BLOB',
+ name='file1',
+ commit_info=dict(
+ summary='Commit 1',
+ author='test',
+ author_email='test@example.com',
+ date=datetime(2013, 1, 1),
+ author_url=None,
+ id=commit1._id,
+ shortlink=self.repo.shorthand_for_commit(commit1._id),
+ )))
+ self.assertEqual(lcd.entry_by_name('dir1'), dict(
+ type='DIR',
+ name='dir1',
+ commit_info=dict(
+ summary='Commit 3',
+ author='test',
+ author_email='test@example.com',
+ date=datetime(2013, 1, 3),
+ author_url=None,
+ id=commit3._id,
+ shortlink=self.repo.shorthand_for_commit(commit3._id),
+ )))
+
+ def test_subdir_lcd(self):
+ commit1 = self._add_commit('Commit 1', ['file1', 'dir1/file1'])
+ commit2 = self._add_commit('Commit 2', ['file1', 'dir1/file1', 'dir1/file2'], ['dir1/file2'], [commit1])
+ commit3 = self._add_commit('Commit 3', ['file1', 'dir1/file1', 'dir1/file2'], ['dir1/file1'], [commit2])
+ tree = self._build_tree(commit3, '/dir1', ['file1', 'file2'])
+ lcd = M.repo.LastCommit.get(tree)
+ self.assertEqual([self.repo._commits[c].message for c in lcd.commit_ids], [commit3.message])
+ self.assertEqual(lcd.path, 'dir1')
+ self.assertEqual(len(lcd.entries), 2)
+ self.assertEqual(lcd.entry_by_name('file1'), dict(
+ type='BLOB',
+ name='file1',
+ commit_info=dict(
+ summary='Commit 3',
+ author='test',
+ author_email='test@example.com',
+ date=datetime(2013, 1, 3),
+ author_url=None,
+ id=commit3._id,
+ shortlink=self.repo.shorthand_for_commit(commit3._id),
+ )))
+ self.assertEqual(lcd.entry_by_name('file2'), dict(
+ type='BLOB',
+ name='file2',
+ commit_info=dict(
+ summary='Commit 2',
+ author='test',
+ author_email='test@example.com',
+ date=datetime(2013, 1, 2),
+ author_url=None,
+ id=commit2._id,
+ shortlink=self.repo.shorthand_for_commit(commit2._id),
+ )))
+
+ def test_subdir_lcd_prev_commit(self):
+ commit1 = self._add_commit('Commit 1', ['file1', 'dir1/file1'])
+ commit2 = self._add_commit('Commit 2', ['file1', 'dir1/file1', 'dir1/file2'], ['dir1/file2'], [commit1])
+ commit3 = self._add_commit('Commit 3', ['file1', 'dir1/file1', 'dir1/file2'], ['dir1/file1'], [commit2])
+ commit4 = self._add_commit('Commit 4', ['file1', 'dir1/file1', 'dir1/file2', 'file2'], ['file2'], [commit3])
+ tree = self._build_tree(commit4, '/dir1', ['file1', 'file2'])
+ lcd = M.repo.LastCommit.get(tree)
+ self.assertEqual([self.repo._commits[c].message for c in lcd.commit_ids], [commit4.message, commit3.message])
+ self.assertEqual(lcd.path, 'dir1')
+ self.assertEqual(len(lcd.entries), 2)
+ self.assertEqual(lcd.entry_by_name('file1'), dict(
+ type='BLOB',
+ name='file1',
+ commit_info=dict(
+ summary='Commit 3',
+ author='test',
+ author_email='test@example.com',
+ date=datetime(2013, 1, 3),
+ author_url=None,
+ id=commit3._id,
+ shortlink=self.repo.shorthand_for_commit(commit3._id),
+ )))
+ self.assertEqual(lcd.entry_by_name('file2'), dict(
+ type='BLOB',
+ name='file2',
+ commit_info=dict(
+ summary='Commit 2',
+ author='test',
+ author_email='test@example.com',
+ date=datetime(2013, 1, 2),
+ author_url=None,
+ id=commit2._id,
+ shortlink=self.repo.shorthand_for_commit(commit2._id),
+ )))
+
+ def test_subdir_lcd_always_empty(self):
+ commit1 = self._add_commit('Commit 1', ['file1', 'dir1'])
+ commit2 = self._add_commit('Commit 2', ['file1', 'file2'], ['file2'], [commit1])
+ tree = self._build_tree(commit2, '/dir1', [])
+ lcd = M.repo.LastCommit.get(tree)
+ self.assertEqual([self.repo._commits[c].message for c in lcd.commit_ids], [commit2.message, commit1.message])
+ self.assertEqual(lcd.path, 'dir1')
+ self.assertEqual(lcd.entries, [])
+
+ def test_subdir_lcd_emptied(self):
+ commit1 = self._add_commit('Commit 1', ['file1', 'dir1/file1'])
+ commit2 = self._add_commit('Commit 2', ['file1'], ['dir1/file1'], [commit1])
+ tree = self._build_tree(commit2, '/dir1', [])
+ lcd = M.repo.LastCommit.get(tree)
+ self.assertEqual([self.repo._commits[c].message for c in lcd.commit_ids], [commit2.message])
+ self.assertEqual(lcd.path, 'dir1')
+ self.assertEqual(lcd.entries, [])
+
+ def test_existing_lcd_unchained(self):
+ commit1 = self._add_commit('Commit 1', ['file1', 'dir1/file1'])
+ commit2 = self._add_commit('Commit 2', ['file1', 'dir1/file1', 'dir1/file2'], ['dir1/file2'], [commit1])
+ commit3 = self._add_commit('Commit 3', ['file1', 'dir1/file1', 'dir1/file2'], ['file1'], [commit2])
+ prev_lcd = M.repo.LastCommit(
+ path='dir1',
+ commit_ids=[commit2._id],
+ entries=[
+ dict(
+ type='BLOB',
+ name='file1',
+ commit_info=dict(
+ summary='Commit 1',
+ author='test',
+ author_email='test@example.com',
+ date=datetime(2013, 1, 1),
+ author_url=None,
+ id=commit1._id,
+ shortlink=self.repo.shorthand_for_commit(commit1._id),
+ )),
+ dict(
+ type='BLOB',
+ name='file2',
+ commit_info=dict(
+ summary='Commit 2',
+ author='test',
+ author_email='test@example.com',
+ date=datetime(2013, 1, 2),
+ author_url=None,
+ id=commit2._id,
+ shortlink=self.repo.shorthand_for_commit(commit2._id),
+ )),
+ ],
+ )
+ session(prev_lcd).flush()
+ tree = self._build_tree(commit3, '/dir1', ['file1', 'file2'])
+ lcd = M.repo.LastCommit.get(tree)
+ self.assertEqual(lcd._id, prev_lcd._id)
+ self.assertEqual([self.repo._commits[c].message for c in lcd.commit_ids], [commit2.message, commit3.message])
+ self.assertEqual(lcd.path, 'dir1')
+ self.assertEqual(lcd.entries, prev_lcd.entries)
+
+ def test_existing_lcd_partial(self):
+ commit1 = self._add_commit('Commit 1', ['file1'])
+ commit2 = self._add_commit('Commit 2', ['file1', 'file2'], ['file2'], [commit1])
+ commit3 = self._add_commit('Commit 3', ['file1', 'file2', 'file3'], ['file3'], [commit2])
+ commit4 = self._add_commit('Commit 4', ['file1', 'file2', 'file3', 'file4'], ['file2', 'file4'], [commit3])
+ prev_lcd = M.repo.LastCommit(
+ path='',
+ commit_ids=[commit3._id],
+ entries=[
+ dict(
+ type='BLOB',
+ name='file1',
+ commit_info=dict(
+ summary='Existing LCD', # lying here to test that it uses this
+ author='test', # data instead of walking up the tree
+ author_email='test@example.com',
+ date=datetime(2013, 1, 1),
+ author_url=None,
+ id=commit1._id,
+ shortlink=self.repo.shorthand_for_commit(commit1._id),
+ )),
+ dict(
+ type='BLOB',
+ name='file2',
+ commit_info=dict(
+ summary='Commit 2',
+ author='test',
+ author_email='test@example.com',
+ date=datetime(2013, 1, 2),
+ author_url=None,
+ id=commit2._id,
+ shortlink=self.repo.shorthand_for_commit(commit2._id),
+ )),
+ dict(
+ type='BLOB',
+ name='file3',
+ commit_info=dict(
+ summary='Commit 3',
+ author='test',
+ author_email='test@example.com',
+ date=datetime(2013, 1, 3),
+ author_url=None,
+ id=commit3._id,
+ shortlink=self.repo.shorthand_for_commit(commit3._id),
+ )),
+ ],
+ )
+ session(prev_lcd).flush()
+ lcd = M.repo.LastCommit.get(commit4.tree)
+ self.assertEqual([self.repo._commits[c].message for c in lcd.commit_ids], [commit4.message])
+ self.assertEqual(lcd.path, '')
+ self.assertEqual(lcd.entry_by_name('file1')['commit_info']['summary'], 'Existing LCD')
+ self.assertEqual(len(lcd.entries), 4)
+ self.assertEqual(lcd.entry_by_name('file1'), dict(
+ type='BLOB',
+ name='file1',
+ commit_info=dict(
+ summary='Existing LCD',
+ author='test',
+ author_email='test@example.com',
+ date=datetime(2013, 1, 1),
+ author_url=None,
+ id=commit1._id,
+ shortlink=self.repo.shorthand_for_commit(commit1._id),
+ )))
+ self.assertEqual(lcd.entry_by_name('file2'), dict(
+ type='BLOB',
+ name='file2',
+ commit_info=dict(
+ summary='Commit 4',
+ author='test',
+ author_email='test@example.com',
+ date=datetime(2013, 1, 4),
+ author_url=None,
+ id=commit4._id,
+ shortlink=self.repo.shorthand_for_commit(commit4._id),
+ )))
+ self.assertEqual(lcd.entry_by_name('file3'), dict(
+ type='BLOB',
+ name='file3',
+ commit_info=dict(
+ summary='Commit 3',
+ author='test',
+ author_email='test@example.com',
+ date=datetime(2013, 1, 3),
+ author_url=None,
+ id=commit3._id,
+ shortlink=self.repo.shorthand_for_commit(commit3._id),
+ )))
+ self.assertEqual(lcd.entry_by_name('file4'), dict(
+ type='BLOB',
+ name='file4',
+ commit_info=dict(
+ summary='Commit 4',
+ author='test',
+ author_email='test@example.com',
+ date=datetime(2013, 1, 4),
+ author_url=None,
+ id=commit4._id,
+ shortlink=self.repo.shorthand_for_commit(commit4._id),
+ )))
+
+
+class TestModelCache(unittest.TestCase):
+ def setUp(self):
+ self.cache = M.repo.ModelCache()
+
+ def test_normalize_key(self):
+ self.assertEqual(self.cache._normalize_key({'foo': 1, 'bar': 2}), (('bar', 2), ('foo', 1)))
+
+ @mock.patch.object(M.repo.Tree.query, 'get')
+ @mock.patch.object(M.repo.LastCommit.query, 'get')
+ def test_get(self, lc_get, tr_get):
+ tr_get.return_value = 'bar'
+ lc_get.return_value = 'qux'
+
+ val = self.cache.get(M.repo.Tree, {'_id': 'foo'})
+ tr_get.assert_called_with(_id='foo')
+ self.assertEqual(val, 'bar')
+
+ val = self.cache.get(M.repo.LastCommit, {'_id': 'foo'})
+ lc_get.assert_called_with(_id='foo')
+ self.assertEqual(val, 'qux')
+
+ @mock.patch.object(M.repo.Tree.query, 'get')
+ def test_get_no_dup(self, tr_get):
+ tr_get.return_value = 'bar'
+ val = self.cache.get(M.repo.Tree, {'_id': 'foo'})
+ tr_get.assert_called_once_with(_id='foo')
+ self.assertEqual(val, 'bar')
+
+ tr_get.return_value = 'qux'
+ val = self.cache.get(M.repo.Tree, {'_id': 'foo'})
+ tr_get.assert_called_once_with(_id='foo')
+ self.assertEqual(val, 'bar')
+
+ @mock.patch.object(M.repo.TreesDoc.m, 'get')
+ def test_get_doc(self, tr_get):
+ tr_get.return_value = 'bar'
+ val = self.cache.get(M.repo.TreesDoc, {'_id': 'foo'})
+ tr_get.assert_called_once_with(_id='foo')
+ self.assertEqual(val, 'bar')
+
+ def test_set(self):
+ self.cache.set(M.repo.Tree, {'_id': 'foo'}, 'test_set')
+ self.assertEqual(self.cache._cache, {M.repo.Tree: {(('_id', 'foo'),): 'test_set'}})
+
+ def test_keys(self):
+ self.cache._cache[M.repo.Tree][(('_id', 'test_keys'), ('text', 'tko'))] = 'foo'
+ self.cache._cache[M.repo.Tree][(('fubar', 'scm'),)] = 'bar'
+ self.assertEqual(self.cache.keys(M.repo.Tree), [{'_id': 'test_keys', 'text': 'tko'}, {'fubar': 'scm'}])
+ self.assertEqual(self.cache.keys(M.repo.LastCommit), [])
+
+ @mock.patch.object(M.repo.Tree.query, 'find')
+ def test_batch_load(self, tr_find):
+ # cls, query, attrs
+ m1 = mock.Mock(foo=1, qux=3)
+ m2 = mock.Mock(foo=2, qux=5)
+ tr_find.return_value = [m1, m2]
+
+ self.cache.batch_load(M.repo.Tree, {'foo': {'$in': 'bar'}})
+ tr_find.assert_called_with({'foo': {'$in': 'bar'}})
+ self.assertEqual(self.cache._cache[M.repo.Tree], {
+ (('foo', 1),): m1,
+ (('foo', 2),): m2,
+ })
+
+ @mock.patch.object(M.repo.Tree.query, 'find')
+ def test_batch_load_attrs(self, tr_find):
+ # cls, query, attrs
+ m1 = mock.Mock(foo=1, qux=3)
+ m2 = mock.Mock(foo=2, qux=5)
+ tr_find.return_value = [m1, m2]
+
+ self.cache.batch_load(M.repo.Tree, {'foo': {'$in': 'bar'}}, ['qux'])
+ tr_find.assert_called_with({'foo': {'$in': 'bar'}})
+ self.assertEqual(self.cache._cache[M.repo.Tree], {
+ (('qux', 3),): m1,
+ (('qux', 5),): m2,
+ })
+
+ def test_pruning(self):
+ self.cache.max_size = 2
+ self.cache.set(M.repo.Tree, {'_id': 'foo'}, 'bar')
+ self.cache.set(M.repo.Tree, {'_id': 'qux'}, 'zaz')
+ self.cache.set(M.repo.Tree, {'_id': 'f00'}, 'b4r')
+ self.cache.set(M.repo.Tree, {'_id': 'qux'}, 'zaz')
+ self.assertEqual(self.cache._cache, {
+ M.repo.Tree: {
+ (('_id', 'qux'),): 'zaz',
+ (('_id', 'f00'),): 'b4r',
+ },
+ })
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/320025c4/ForgeSVN/forgesvn/model/svn.py
----------------------------------------------------------------------
diff --git a/ForgeSVN/forgesvn/model/svn.py b/ForgeSVN/forgesvn/model/svn.py
index f781d6c..0a77d89 100644
--- a/ForgeSVN/forgesvn/model/svn.py
+++ b/ForgeSVN/forgesvn/model/svn.py
@@ -76,7 +76,7 @@ class Repository(M.Repository):
while ci is not None and limit > 0:
yield ci._id
limit -= 1
- ci = ci.parent()
+ ci = ci.get_parent()
def latest(self, branch=None):
if self._impl is None: return None
@@ -416,15 +416,27 @@ class SVNImplementation(M.RepositoryImplementation):
log.debug('Compute tree for %d paths', len(infos))
tree_ids = []
blob_ids = []
+ chg_revno = infos[0][1]['last_changed_rev'].number
+ cur_revno = self._revno(commit._id)
+ commit_ids = [self._oid(revno) for revno in range(chg_revno, cur_revno+1)]
+ lcd = M.repo.LastCommit.query.get(
+ commit_ids=self._oid(chg_revno),
+ path=tree_path.strip('/'),
+ )
+ if lcd:
+ lcd.commit_ids = list(set(lcd.commit_ids + commit_ids))
+ lcd_is_new = False
+ else:
+ # we can't use the normal auto-vivification, because
+ # SVN repos don't have their diff infos filled out :(
+ lcd = M.repo.LastCommit(
+ commit_ids=commit_ids,
+ path=tree_path.strip('/'),
+ )
+ lcd_is_new = True
for path, info in infos[1:]:
last_commit_id = self._oid(info['last_changed_rev'].number)
last_commit = M.repo.Commit.query.get(_id=last_commit_id)
- M.repo_refresh.set_last_commit(
- self._repo._id,
- re.sub(r'/?$', '/', tree_path), # force it to end with /
- path,
- self._tree_oid(commit._id, path),
- M.repo_refresh.get_commit_info(last_commit))
if info.kind == pysvn.node_kind.dir:
tree_ids.append(Object(
id=self._tree_oid(commit._id, path),
@@ -435,6 +447,13 @@ class SVNImplementation(M.RepositoryImplementation):
name=path))
else:
assert False
+ if lcd_is_new:
+ lcd.entries.append(dict(
+ name=path,
+ type='DIR' if info.kind == pysvn.node_kind.dir else 'BLOB',
+ commit_info=last_commit.info,
+ ))
+ session(lcd).flush(lcd)
tree, is_new = RM.Tree.upsert(tree_id,
tree_ids=tree_ids,
blob_ids=blob_ids,
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/320025c4/scripts/refresh-all-repos.py
----------------------------------------------------------------------
diff --git a/scripts/refresh-all-repos.py b/scripts/refresh-all-repos.py
index 822148f..1cf7e3d 100644
--- a/scripts/refresh-all-repos.py
+++ b/scripts/refresh-all-repos.py
@@ -32,6 +32,7 @@ def main(options):
M.repo.TreesDoc.m.remove({})
M.repo.DiffInfoDoc.m.remove({})
M.repo.CommitRunDoc.m.remove({})
+ M.repo.LastCommitDoc.m.remove({})
for chunk in chunked_find(M.Project, q_project):
for p in chunk:
@@ -72,9 +73,6 @@ def main(options):
i = M.repo.TreeDoc.m.find({"_id": {"$in": tree_ids_chunk}}).count()
log.info("Deleting %i TreeDoc docs...", i)
M.repo.TreeDoc.m.remove({"_id": {"$in": tree_ids_chunk}})
- i = M.repo.LastCommitDoc.m.find({"object_id": {"$in": tree_ids_chunk}}).count()
- log.info("Deleting %i LastCommitDoc docs...", i)
- M.repo.LastCommitDoc.m.remove({"object_id": {"$in": tree_ids_chunk}})
del tree_ids
# delete these after TreeDoc and LastCommitDoc so that if
@@ -83,11 +81,10 @@ def main(options):
log.info("Deleting %i TreesDoc docs...", i)
M.repo.TreesDoc.m.remove({"_id": {"$in": ci_ids}})
- # delete LastCommitDocs for non-trees
- repo_lastcommit_re = re.compile("^{}:".format(c.app.repo._id))
- i = M.repo.LastCommitDoc.m.find(dict(_id=repo_lastcommit_re)).count()
+ # delete LastCommitDocs
+ i = M.repo.LastCommitDoc.m.find(dict(commit_ids={'$in': ci_ids})).count()
log.info("Deleting %i remaining LastCommitDoc docs, by repo id...", i)
- M.repo.LastCommitDoc.m.remove(dict(_id=repo_lastcommit_re))
+ M.repo.LastCommitDoc.m.remove(dict(commit_ids={'$in': ci_ids}))
i = M.repo.DiffInfoDoc.m.find({"_id": {"$in": ci_ids}}).count()
log.info("Deleting %i DiffInfoDoc docs...", i)
http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/320025c4/scripts/refresh-last-commits.py
----------------------------------------------------------------------
diff --git a/scripts/refresh-last-commits.py b/scripts/refresh-last-commits.py
new file mode 100644
index 0000000..8776010
--- /dev/null
+++ b/scripts/refresh-last-commits.py
@@ -0,0 +1,172 @@
+import argparse
+import logging
+import re
+from datetime import datetime
+from contextlib import contextmanager
+
+import faulthandler
+from pylons import c
+from ming.orm import ThreadLocalORMSession
+
+from allura import model as M
+from allura.lib.utils import chunked_find, chunked_list
+
+log = logging.getLogger(__name__)
+
+
+def main(options):
+ q_project = {}
+ if options.nbhd:
+ nbhd = M.Neighborhood.query.get(url_prefix=options.nbhd)
+ if not nbhd:
+ return "Invalid neighborhood url prefix."
+ q_project['neighborhood_id'] = nbhd._id
+ if options.project:
+ q_project['shortname'] = options.project
+ elif options.project_regex:
+ q_project['shortname'] = {'$regex': options.project_regex}
+
+ log.info('Refreshing last commit data')
+
+ for chunk in chunked_find(M.Project, q_project):
+ for p in chunk:
+ log.info("Refreshing last commit data for project '%s'." % p.shortname)
+ if options.dry_run:
+ continue
+ c.project = p
+ if options.mount_point:
+ mount_points = [options.mount_point]
+ else:
+ mount_points = [ac.options.mount_point for ac in
+ M.AppConfig.query.find(dict(project_id=p._id))]
+ for app in (p.app_instance(mp) for mp in mount_points):
+ c.app = app
+ if not hasattr(app, 'repo'):
+ continue
+ if c.app.repo.tool.lower() not in options.repo_types:
+ log.info("Skipping %r: wrong type (%s)", c.app.repo,
+ c.app.repo.tool.lower())
+ continue
+
+ ci_ids = list(reversed(list(c.app.repo.all_commit_ids())))
+ #ci_ids = list(c.app.repo.all_commit_ids())
+ if options.clean:
+ if options.diffs:
+ # delete DiffInfoDocs
+ i = M.repo.DiffInfoDoc.m.find(dict(commit_ids={'$in': ci_ids})).count()
+ log.info("Deleting %i DiffInfoDoc docs, by repo id...", i)
+ M.repo.LastCommitDoc.m.remove(dict(commit_ids={'$in': ci_ids}))
+
+ # delete LastCommitDocs
+ i = M.repo.LastCommitDoc.m.find(dict(commit_ids={'$in': ci_ids})).count()
+ log.info("Deleting %i LastCommitDoc docs, by repo id...", i)
+ M.repo.LastCommitDoc.m.remove(dict(commit_ids={'$in': ci_ids}))
+
+ try:
+ log.info('Refreshing all last commits in %r', c.app.repo)
+ if options.profile:
+ import cProfile
+ cProfile.runctx('refresh_repo_lcds(ci_ids, options)',
+ globals(), locals(), '/tmp/refresh_lcds.profile')
+ else:
+ refresh_repo_lcds(ci_ids, options)
+ except:
+ log.exception('Error refreshing %r', c.app.repo)
+ raise
+ ThreadLocalORMSession.flush_all()
+ ThreadLocalORMSession.close_all()
+
+
+def refresh_repo_lcds(commit_ids, options):
+ tree_cache = {}
+ timings = []
+ if options.diffs:
+ print 'Processing diffs'
+ for commit_id in commit_ids:
+ commit = M.repo.Commit.query.get(_id=commit_id)
+ with time(timings):
+ M.repo_refresh.compute_diffs(c.app.repo._id, tree_cache, commit)
+ if len(timings) % 1000 == 0:
+ mt = max(timings)
+ tt = sum(timings)
+ at = tt / len(timings)
+ print ' Processed %d commits (max: %f, avg: %f, tot: %f, cl: %d)' % (
+ len(timings), mt, at, tt, len(tree_cache))
+ lcd_cache = M.repo.ModelCache(80000)
+ timings = []
+ print 'Processing last commits'
+ for commit_id in commit_ids:
+ commit = M.repo.Commit.query.get(_id=commit_id)
+ with time(timings):
+ M.repo_refresh.compute_lcds(commit, lcd_cache)
+ if len(timings) % 100 == 0:
+ mt = max(timings)
+ tt = sum(timings)
+ at = tt / len(timings)
+ mat = sum(timings[-100:]) / 100
+ print ' Processed %d commits (max: %f, avg: %f, mavg: %f, tot: %f, lc: %d, lcl: %d, hits: %d, agw: %d, mgw: %d, gh: %d, abw: %d, mbw: %d, ts: %d)' % (
+ len(timings), mt, at, mat, tt, lcd_cache.size(), len(lcd_cache._cache[M.repo.LastCommit]),
+ lcd_cache._hits * 100 / (lcd_cache._hits + lcd_cache._misses),
+ lcd_cache._get_walks / lcd_cache._get_calls, lcd_cache._get_walks_max, lcd_cache._get_hits * 100 / lcd_cache._get_calls,
+ lcd_cache._build_walks / lcd_cache._build_calls, lcd_cache._build_walks_max,
+ len(lcd_cache.get(M.repo.TreesDoc, dict(_id=commit._id)).tree_ids))
+ ThreadLocalORMSession.flush_all()
+ ThreadLocalORMSession.close_all()
+ #if len(timings) == 300:
+ # break
+
+
+@contextmanager
+def time(timings):
+ s = datetime.now()
+ yield
+ timings.append((datetime.now() - s).total_seconds())
+
+
+def repo_type_list(s):
+ repo_types = []
+ for repo_type in s.split(','):
+ repo_type = repo_type.strip()
+ if repo_type not in ['svn', 'git', 'hg']:
+ raise argparse.ArgumentTypeError(
+ '{} is not a valid repo type.'.format(repo_type))
+ repo_types.append(repo_type)
+ return repo_types
+
+
+def parse_options():
+ parser = argparse.ArgumentParser(description='Using existing commit data, '
+ 'refresh the last commit metadata in MongoDB. Run for all repos (no args), '
+ 'or restrict by neighborhood, project, or code tool mount point.')
+ parser.add_argument('--nbhd', action='store', default='', dest='nbhd',
+ help='Restrict update to a particular neighborhood, e.g. /p/.')
+ parser.add_argument('--project', action='store', default='', dest='project',
+ help='Restrict update to a particular project. To specify a '
+ 'subproject, use a slash: project/subproject.')
+ parser.add_argument('--project-regex', action='store', default='',
+ dest='project_regex',
+ help='Restrict update to projects for which the shortname matches '
+ 'the provided regex.')
+ parser.add_argument('--repo-types', action='store', type=repo_type_list,
+ default=['svn', 'git', 'hg'], dest='repo_types',
+ help='Only refresh last commits for repos of the given type(s). Defaults to: '
+ 'svn,git,hg. Example: --repo-types=git,hg')
+ parser.add_argument('--mount_point', default='', dest='mount_point',
+ help='Restrict update to repos at the given tool mount point. ')
+ parser.add_argument('--clean', action='store_true', dest='clean',
+ default=False, help='Remove last commit mongo docs for '
+ 'project(s) being refreshed before doing the refresh.')
+ parser.add_argument('--dry-run', action='store_true', dest='dry_run',
+ default=False, help='Log names of projects that would have their '
+ 'last commits refreshed, but do not perform the actual refresh.')
+ parser.add_argument('--profile', action='store_true', dest='profile',
+ default=False, help='Enable the profiler (slow). Will log '
+ 'profiling output to ./refresh.profile')
+ parser.add_argument('--diffs', action='store_true', dest='diffs',
+ default=False, help='Refresh diffs as well as LCDs')
+ return parser.parse_args()
+
+if __name__ == '__main__':
+ import sys
+ faulthandler.enable()
+ sys.exit(main(parse_options()))