You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by jo...@apache.org on 2014/04/03 23:30:00 UTC
[2/3] git commit: [#7305] Consolidate allura.model.repo into
allura.model.repository
[#7305] Consolidate allura.model.repo into allura.model.repository
Signed-off-by: Cory Johns <cj...@slashdotmedia.com>
Project: http://git-wip-us.apache.org/repos/asf/allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/allura/commit/839d9cfb
Tree: http://git-wip-us.apache.org/repos/asf/allura/tree/839d9cfb
Diff: http://git-wip-us.apache.org/repos/asf/allura/diff/839d9cfb
Branch: refs/heads/cj/7305
Commit: 839d9cfb992d3401c687621fec2dc6c1eda40584
Parents: f72b7fd
Author: Cory Johns <cj...@slashdotmedia.com>
Authored: Wed Apr 2 20:26:03 2014 +0000
Committer: Cory Johns <cj...@slashdotmedia.com>
Committed: Wed Apr 2 20:26:03 2014 +0000
----------------------------------------------------------------------
Allura/allura/controllers/repository.py | 4 +-
Allura/allura/lib/custom_middleware.py | 16 +-
Allura/allura/model/__init__.py | 3 +-
Allura/allura/model/repo.py | 983 ------------------
Allura/allura/model/repo_refresh.py | 8 +-
Allura/allura/model/repository.py | 994 ++++++++++++++++++-
Allura/allura/scripts/refresh_last_commits.py | 18 +-
Allura/allura/scripts/refreshrepo.py | 26 +-
Allura/allura/tests/model/test_repo.py | 210 ++--
Allura/allura/tests/unit/test_repo.py | 51 +-
Allura/test-light.py | 4 +-
ForgeGit/forgegit/model/git_repo.py | 13 +-
.../forgegit/tests/model/test_repository.py | 10 +-
ForgeSVN/forgesvn/model/svn.py | 8 +-
.../forgesvn/tests/model/test_repository.py | 22 +-
.../tests/model/test_svnimplementation.py | 10 +-
scripts/migrations/028-remove-svn-trees.py | 8 +-
17 files changed, 1175 insertions(+), 1213 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/allura/blob/839d9cfb/Allura/allura/controllers/repository.py
----------------------------------------------------------------------
diff --git a/Allura/allura/controllers/repository.py b/Allura/allura/controllers/repository.py
index aabc467..1751839 100644
--- a/Allura/allura/controllers/repository.py
+++ b/Allura/allura/controllers/repository.py
@@ -223,7 +223,7 @@ class RepoRootController(BaseController, FeedController):
log.info('Grab %d commit objects by ID', len(commit_ids))
commits_by_id = {
c_obj._id: c_obj
- for c_obj in M.repo.CommitDoc.m.find(dict(_id={'$in': commit_ids}))}
+ for c_obj in M.repository.CommitDoc.m.find(dict(_id={'$in': commit_ids}))}
log.info('... build graph')
parents = {}
children = defaultdict(list)
@@ -629,7 +629,7 @@ class TreeBrowser(BaseController, DispatchIndex):
obj = self._tree[filename]
except KeyError:
raise exc.HTTPNotFound()
- if isinstance(obj, M.repo.Blob):
+ if isinstance(obj, M.repository.Blob):
return self.FileBrowserClass(
self._commit,
self._tree,
http://git-wip-us.apache.org/repos/asf/allura/blob/839d9cfb/Allura/allura/lib/custom_middleware.py
----------------------------------------------------------------------
diff --git a/Allura/allura/lib/custom_middleware.py b/Allura/allura/lib/custom_middleware.py
index ad1f276..c5737f0 100644
--- a/Allura/allura/lib/custom_middleware.py
+++ b/Allura/allura/lib/custom_middleware.py
@@ -30,7 +30,7 @@ from webob import exc, Request
import pysolr
from allura.lib import helpers as h
-import allura.model.repo
+import allura.model.repository
log = logging.getLogger(__name__)
@@ -235,11 +235,11 @@ class AlluraTimerMiddleware(TimerMiddleware):
'_refresh'),
# urlopen and socket io may or may not overlap partially
Timer('render', genshi.Stream, 'render'),
- Timer('repo.Blob.{method_name}', allura.model.repo.Blob, '*'),
- Timer('repo.Commit.{method_name}', allura.model.repo.Commit, '*'),
+ Timer('repo.Blob.{method_name}', allura.model.repository.Blob, '*'),
+ Timer('repo.Commit.{method_name}', allura.model.repository.Commit, '*'),
Timer('repo.LastCommit.{method_name}',
- allura.model.repo.LastCommit, '*'),
- Timer('repo.Tree.{method_name}', allura.model.repo.Tree, '*'),
+ allura.model.repository.LastCommit, '*'),
+ Timer('repo.Tree.{method_name}', allura.model.repository.Tree, '*'),
Timer('socket_read', socket._fileobject, 'read', 'readline',
'readlines', debug_each_call=False),
Timer('socket_write', socket._fileobject, 'write', 'writelines',
@@ -250,11 +250,11 @@ class AlluraTimerMiddleware(TimerMiddleware):
Timer('urlopen', urllib2, 'urlopen'),
Timer('base_repo_tool.{method_name}',
allura.model.repository.RepositoryImplementation, 'last_commit_ids'),
- Timer('_diffs_copied', allura.model.repo.Commit, '_diffs_copied'),
+ Timer('_diffs_copied', allura.model.repository.Commit, '_diffs_copied'),
Timer(
- 'sequencematcher.{method_name}', allura.model.repo.SequenceMatcher,
+ 'sequencematcher.{method_name}', allura.model.repository.SequenceMatcher,
'ratio', 'quick_ratio', 'real_quick_ratio'),
- Timer('unified_diff', allura.model.repo, 'unified_diff'),
+ Timer('unified_diff', allura.model.repository, 'unified_diff'),
] + [Timer('sidebar', ep.load(), 'sidebar_menu') for ep in tool_entry_points]
def before_logging(self, stat_record):
http://git-wip-us.apache.org/repos/asf/allura/blob/839d9cfb/Allura/allura/model/__init__.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/__init__.py b/Allura/allura/model/__init__.py
index 65a4561..a785773 100644
--- a/Allura/allura/model/__init__.py
+++ b/Allura/allura/model/__init__.py
@@ -42,7 +42,8 @@ from .session import artifact_orm_session, repository_orm_session
from .session import task_orm_session
from .session import ArtifactSessionExtension
-import repo
+from . import repository
+from . import repo_refresh
from ming.orm import Mapper
Mapper.compile_all()
http://git-wip-us.apache.org/repos/asf/allura/blob/839d9cfb/Allura/allura/model/repo.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/repo.py b/Allura/allura/model/repo.py
deleted file mode 100644
index 81d843d..0000000
--- a/Allura/allura/model/repo.py
+++ /dev/null
@@ -1,983 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import os
-import re
-import logging
-from hashlib import sha1
-from itertools import chain
-from datetime import datetime
-from collections import defaultdict, OrderedDict
-from difflib import SequenceMatcher, unified_diff
-import bson
-
-from pylons import tmpl_context as c
-import pymongo.errors
-
-from ming import Field, collection, Index
-from ming import schema as S
-from ming.base import Object
-from ming.utils import LazyProperty
-from ming.orm import mapper, session
-
-from allura.lib import utils
-from allura.lib import helpers as h
-from allura.lib.security import has_access
-
-from .auth import User
-from .project import AppConfig
-from .session import main_doc_session
-from .session import repository_orm_session
-from .timeline import ActivityObject
-
-log = logging.getLogger(__name__)
-
-# Some schema types
-SUser = dict(name=str, email=str, date=datetime)
-SObjType = S.OneOf('blob', 'tree', 'submodule')
-
-# Used for when we're going to batch queries using $in
-QSIZE = 100
-README_RE = re.compile('^README(\.[^.]*)?$', re.IGNORECASE)
-VIEWABLE_EXTENSIONS = [
- '.php', '.py', '.js', '.java', '.html', '.htm', '.yaml', '.sh',
- '.rb', '.phtml', '.txt', '.bat', '.ps1', '.xhtml', '.css', '.cfm', '.jsp', '.jspx',
- '.pl', '.php4', '.php3', '.rhtml', '.svg', '.markdown', '.json', '.ini', '.tcl', '.vbs', '.xsl']
-PYPELINE_EXTENSIONS = utils.MARKDOWN_EXTENSIONS + ['.rst']
-
-DIFF_SIMILARITY_THRESHOLD = .5 # used for determining file renames
-
-# Basic commit information
-# One of these for each commit in the physical repo on disk. The _id is the
-# hexsha of the commit (for Git and Hg).
-CommitDoc = collection(
- 'repo_ci', main_doc_session,
- Field('_id', str),
- Field('tree_id', str),
- Field('committed', SUser),
- Field('authored', SUser),
- Field('message', str),
- Field('parent_ids', [str], index=True),
- Field('child_ids', [str], index=True),
- Field('repo_ids', [S.ObjectId()], index=True))
-
-# Basic tree information (also see TreesDoc)
-TreeDoc = collection(
- 'repo_tree', main_doc_session,
- Field('_id', str),
- Field('tree_ids', [dict(name=str, id=str)]),
- Field('blob_ids', [dict(name=str, id=str)]),
- Field('other_ids', [dict(name=str, id=str, type=SObjType)]))
-
-# Information about the last commit to touch a tree
-LastCommitDoc = collection(
- 'repo_last_commit', main_doc_session,
- Field('_id', S.ObjectId()),
- Field('commit_id', str),
- Field('path', str),
- Index('commit_id', 'path'),
- Field('entries', [dict(
- name=str,
- commit_id=str)]))
-
-# List of all trees contained within a commit
-# TreesDoc._id = CommitDoc._id
-# TreesDoc.tree_ids = [ TreeDoc._id, ... ]
-TreesDoc = collection(
- 'repo_trees', main_doc_session,
- Field('_id', str),
- Field('tree_ids', [str]))
-
-# Information about which things were added/removed in commit
-# DiffInfoDoc._id = CommitDoc._id
-DiffInfoDoc = collection(
- 'repo_diffinfo', main_doc_session,
- Field('_id', str),
- Field(
- 'differences',
- [dict(name=str, lhs_id=str, rhs_id=str)]))
-
-# List of commit runs (a run is a linear series of single-parent commits)
-# CommitRunDoc.commit_ids = [ CommitDoc._id, ... ]
-CommitRunDoc = collection(
- 'repo_commitrun', main_doc_session,
- Field('_id', str),
- Field('parent_commit_ids', [str], index=True),
- Field('commit_ids', [str], index=True),
- Field('commit_times', [datetime]))
-
-
-class RepoObject(object):
-
- def __repr__(self): # pragma no cover
- return '<%s %s>' % (
- self.__class__.__name__, self._id)
-
- def primary(self):
- return self
-
- def index_id(self):
- '''Globally unique artifact identifier. Used for
- SOLR ID, shortlinks, and maybe elsewhere
- '''
- id = '%s.%s#%s' % (
- self.__class__.__module__,
- self.__class__.__name__,
- self._id)
- return id.replace('.', '/')
-
- @classmethod
- def upsert(cls, id, **kwargs):
- isnew = False
- r = cls.query.get(_id=id)
- if r is not None:
- return r, isnew
- try:
- r = cls(_id=id, **kwargs)
- session(r).flush(r)
- isnew = True
- except pymongo.errors.DuplicateKeyError: # pragma no cover
- session(r).expunge(r)
- r = cls.query.get(_id=id)
- return r, isnew
-
-
-class Commit(RepoObject, ActivityObject):
- type_s = 'Commit'
- # Ephemeral attrs
- repo = None
-
- def __init__(self, **kw):
- for k, v in kw.iteritems():
- setattr(self, k, v)
-
- @property
- def activity_name(self):
- return self.shorthand_id()
-
- @property
- def activity_extras(self):
- d = ActivityObject.activity_extras.fget(self)
- d.update(summary=self.summary)
- if self.repo:
- d.update(app_config_id=self.repo.app.config._id)
- return d
-
- def has_activity_access(self, perm, user, activity):
- """
- Check access against the original app.
-
- Commits have no ACLs and are therefore always viewable by any user, if
- they have access to the tool.
- """
- app_config_id = activity.obj.activity_extras.get('app_config_id')
- if app_config_id:
- app_config = AppConfig.query.get(_id=app_config_id)
- return has_access(app_config, perm, user)
- return True
-
- def set_context(self, repo):
- self.repo = repo
-
- @LazyProperty
- def author_url(self):
- u = User.by_email_address(self.authored.email)
- if u:
- return u.url()
-
- @LazyProperty
- def committer_url(self):
- u = User.by_email_address(self.committed.email)
- if u:
- return u.url()
-
- @LazyProperty
- def tree(self):
- return self.get_tree(create=True)
-
- def get_tree(self, create=True):
- if self.tree_id is None and create:
- self.tree_id = self.repo.compute_tree_new(self)
- if self.tree_id is None:
- return None
- cache = getattr(c, 'model_cache', '') or ModelCache()
- t = cache.get(Tree, dict(_id=self.tree_id))
- if t is None and create:
- self.tree_id = self.repo.compute_tree_new(self)
- t = Tree.query.get(_id=self.tree_id)
- cache.set(Tree, dict(_id=self.tree_id), t)
- if t is not None:
- t.set_context(self)
- return t
-
- @LazyProperty
- def summary(self):
- message = h.really_unicode(self.message)
- first_line = message.split('\n')[0]
- return h.text.truncate(first_line, 50)
-
- def shorthand_id(self):
- if self.repo is None:
- self.repo = self.guess_repo()
- if self.repo is None:
- return repr(self)
- return self.repo.shorthand_for_commit(self._id)
-
- @LazyProperty
- def symbolic_ids(self):
- return self.repo.symbolics_for_commit(self)
-
- def get_parent(self, index=0):
- '''Get the parent of this commit.
-
- If there is no parent commit, or if an invalid index is given,
- returns None.
- '''
- try:
- cache = getattr(c, 'model_cache', '') or ModelCache()
- ci = cache.get(Commit, dict(_id=self.parent_ids[index]))
- if not ci:
- return None
- ci.set_context(self.repo)
- return ci
- except IndexError:
- return None
-
- def climb_commit_tree(self, predicate=None):
- '''
- Returns a generator that walks up the commit tree along
- the first-parent ancestory, starting with this commit,
- optionally filtering by a predicate.'''
- ancestor = self
- while ancestor:
- if predicate is None or predicate(ancestor):
- yield ancestor
- ancestor = ancestor.get_parent()
-
- def url(self):
- if self.repo is None:
- self.repo = self.guess_repo()
- if self.repo is None:
- return '#'
- return self.repo.url_for_commit(self)
-
- def guess_repo(self):
- import traceback
- log.error('guess_repo: should not be called: %s' %
- ''.join(traceback.format_stack()))
- for ac in c.project.app_configs:
- try:
- app = c.project.app_instance(ac)
- if app.repo._id in self.repo_ids:
- return app.repo
- except AttributeError:
- pass
- return None
-
- def link_text(self):
- '''The link text that will be used when a shortlink to this artifact
- is expanded into an <a></a> tag.
-
- By default this method returns type_s + shorthand_id(). Subclasses should
- override this method to provide more descriptive link text.
- '''
- return self.shorthand_id()
-
- def context(self):
- result = dict(prev=None, next=None)
- if self.parent_ids:
- result['prev'] = self.query.find(
- dict(_id={'$in': self.parent_ids})).all()
- for ci in result['prev']:
- ci.set_context(self.repo)
- if self.child_ids:
- result['next'] = self.query.find(
- dict(_id={'$in': self.child_ids})).all()
- for ci in result['next']:
- ci.set_context(self.repo)
- return result
-
- @LazyProperty
- def diffs(self):
- return self.paged_diffs()
-
- def paged_diffs(self, start=0, end=None):
- di = DiffInfoDoc.m.get(_id=self._id)
- if di is None:
- return Object(added=[], removed=[], changed=[], copied=[], total=0)
- added = []
- removed = []
- changed = []
- copied = []
- for change in di.differences[start:end]:
- if change.rhs_id is None:
- removed.append(change.name)
- elif change.lhs_id is None:
- added.append(change.name)
- else:
- changed.append(change.name)
- copied = self._diffs_copied(added, removed)
- return Object(
- added=added, removed=removed,
- changed=changed, copied=copied,
- total=len(di.differences))
-
- def _diffs_copied(self, added, removed):
- '''Return list with file renames diffs.
-
- Will change `added` and `removed` lists also.
- '''
- def _blobs_similarity(removed_blob, added):
- best = dict(ratio=0, name='', blob=None)
- for added_name in added:
- added_blob = self.tree.get_obj_by_path(added_name)
- if not isinstance(added_blob, Blob):
- continue
- diff = SequenceMatcher(None, removed_blob.text,
- added_blob.text)
- ratio = diff.quick_ratio()
- if ratio > best['ratio']:
- best['ratio'] = ratio
- best['name'] = added_name
- best['blob'] = added_blob
-
- if ratio == 1:
- break # we'll won't find better similarity than 100% :)
-
- if best['ratio'] > DIFF_SIMILARITY_THRESHOLD:
- diff = ''
- if best['ratio'] < 1:
- added_blob = best['blob']
- rpath = ('a' + removed_blob.path()).encode('utf-8')
- apath = ('b' + added_blob.path()).encode('utf-8')
- diff = ''.join(unified_diff(list(removed_blob),
- list(added_blob),
- rpath, apath))
- return dict(new=best['name'],
- ratio=best['ratio'], diff=diff)
-
- def _trees_similarity(removed_tree, added):
- for added_name in added:
- added_tree = self.tree.get_obj_by_path(added_name)
- if not isinstance(added_tree, Tree):
- continue
- if removed_tree._id == added_tree._id:
- return dict(new=added_name,
- ratio=1, diff='')
-
- if not removed:
- return []
- copied = []
- prev_commit = self.get_parent()
- for removed_name in removed[:]:
- removed_blob = prev_commit.tree.get_obj_by_path(removed_name)
- rename_info = None
- if isinstance(removed_blob, Blob):
- rename_info = _blobs_similarity(removed_blob, added)
- elif isinstance(removed_blob, Tree):
- rename_info = _trees_similarity(removed_blob, added)
- if rename_info is not None:
- rename_info['old'] = removed_name
- copied.append(rename_info)
- removed.remove(rename_info['old'])
- added.remove(rename_info['new'])
- return copied
-
- def get_path(self, path, create=True):
- path = path.lstrip('/')
- parts = path.split('/')
- cur = self.get_tree(create)
- if cur is not None:
- for part in parts:
- if part != '':
- cur = cur[part]
- return cur
-
- def has_path(self, path):
- try:
- self.get_path(path)
- return True
- except KeyError:
- return False
-
- @LazyProperty
- def changed_paths(self):
- '''
- Returns a list of paths changed in this commit.
- Leading and trailing slashes are removed, and
- the list is complete, meaning that if a sub-path
- is changed, all of the parent paths are included
- (including '' to represent the root path).
-
- Example:
-
- If the file /foo/bar is changed in the commit,
- this would return ['', 'foo', 'foo/bar']
- '''
- changes = self.repo.get_changes(self._id)
- changed_paths = set()
- for c in changes:
- node = c.strip('/')
- changed_paths.add(node)
- node_path = os.path.dirname(node)
- while node_path:
- changed_paths.add(node_path)
- node_path = os.path.dirname(node_path)
- changed_paths.add('') # include '/' if there are any changes
- return changed_paths
-
- @LazyProperty
- def added_paths(self):
- '''
- Returns a list of paths added in this commit.
- Leading and trailing slashes are removed, and
- the list is complete, meaning that if a directory
- with subdirectories is added, all of the child
- paths are included (this relies on the DiffInfoDoc
- being complete).
-
- Example:
-
- If the directory /foo/bar/ is added in the commit
- which contains a subdirectory /foo/bar/baz/ with
- the file /foo/bar/baz/qux.txt, this would return:
- ['foo/bar', 'foo/bar/baz', 'foo/bar/baz/qux.txt']
- '''
- diff_info = DiffInfoDoc.m.get(_id=self._id)
- diffs = set()
- if diff_info:
- for d in diff_info.differences:
- if d.lhs_id is None:
- diffs.add(d.name.strip('/'))
- return diffs
-
- @LazyProperty
- def info(self):
- return dict(
- id=self._id,
- author=self.authored.name,
- author_email=self.authored.email,
- date=self.authored.date,
- author_url=self.author_url,
- shortlink=self.shorthand_id(),
- summary=self.summary
- )
-
-
-class Tree(RepoObject):
- # Ephemeral attrs
- repo = None
- commit = None
- parent = None
- name = None
-
- def compute_hash(self):
- '''Compute a hash based on the contents of the tree. Note that this
- hash does not necessarily correspond to any actual DVCS hash.
- '''
- lines = (
- ['tree' + x.name + x.id for x in self.tree_ids]
- + ['blob' + x.name + x.id for x in self.blob_ids]
- + [x.type + x.name + x.id for x in self.other_ids])
- sha_obj = sha1()
- for line in sorted(lines):
- sha_obj.update(line)
- return sha_obj.hexdigest()
-
- def __getitem__(self, name):
- cache = getattr(c, 'model_cache', '') or ModelCache()
- obj = self.by_name[name]
- if obj['type'] == 'blob':
- return Blob(self, name, obj['id'])
- if obj['type'] == 'submodule':
- log.info('Skipping submodule "%s"' % name)
- raise KeyError, name
- obj = cache.get(Tree, dict(_id=obj['id']))
- if obj is None:
- oid = self.repo.compute_tree_new(
- self.commit, self.path() + name + '/')
- obj = cache.get(Tree, dict(_id=oid))
- if obj is None:
- raise KeyError, name
- obj.set_context(self, name)
- return obj
-
- def get_obj_by_path(self, path):
- if hasattr(path, 'get'):
- path = path['new']
- if path.startswith('/'):
- path = path[1:]
- path = path.split('/')
- obj = self
- for p in path:
- try:
- obj = obj[p]
- except KeyError:
- return None
- return obj
-
- def get_blob_by_path(self, path):
- obj = self.get_obj_by_path(path)
- return obj if isinstance(obj, Blob) else None
-
- def set_context(self, commit_or_tree, name=None):
- assert commit_or_tree is not self
- self.repo = commit_or_tree.repo
- if name:
- self.commit = commit_or_tree.commit
- self.parent = commit_or_tree
- self.name = name
- else:
- self.commit = commit_or_tree
-
- def readme(self):
- 'returns (filename, unicode text) if a readme file is found'
- for x in self.blob_ids:
- if README_RE.match(x.name):
- name = x.name
- blob = self[name]
- return (x.name, h.really_unicode(blob.text))
- return None, None
-
- def ls(self):
- '''
- List the entries in this tree, with historical commit info for
- each node.
- '''
- last_commit = LastCommit.get(self)
- # ensure that the LCD is saved, even if
- # there is an error later in the request
- if last_commit:
- session(last_commit).flush(last_commit)
- return self._lcd_map(last_commit)
- else:
- return []
-
- def _lcd_map(self, lcd):
- if lcd is None:
- return []
- commit_ids = [e.commit_id for e in lcd.entries]
- commits = list(Commit.query.find(dict(_id={'$in': commit_ids})))
- for commit in commits:
- commit.set_context(self.repo)
- commit_infos = {c._id: c.info for c in commits}
- by_name = lambda n: n.name
- tree_names = sorted([n.name for n in self.tree_ids])
- blob_names = sorted(
- [n.name for n in chain(self.blob_ids, self.other_ids)])
-
- results = []
- for type, names in (('DIR', tree_names), ('BLOB', blob_names)):
- for name in names:
- commit_info = commit_infos.get(lcd.by_name.get(name))
- if not commit_info:
- commit_info = defaultdict(str)
- elif 'id' in commit_info:
- commit_info['href'] = self.repo.url_for_commit(
- commit_info['id'])
- results.append(dict(
- kind=type,
- name=name,
- href=name,
- last_commit=dict(
- author=commit_info['author'],
- author_email=commit_info['author_email'],
- author_url=commit_info['author_url'],
- date=commit_info.get('date'),
- href=commit_info.get('href', ''),
- shortlink=commit_info['shortlink'],
- summary=commit_info['summary'],
- ),
- ))
- return results
-
- def path(self):
- if self.parent:
- assert self.parent is not self
- return self.parent.path() + self.name + '/'
- else:
- return '/'
-
- def url(self):
- return self.commit.url() + 'tree' + self.path()
-
- @LazyProperty
- def by_name(self):
- d = Object((x.name, x) for x in self.other_ids)
- d.update(
- (x.name, Object(x, type='tree'))
- for x in self.tree_ids)
- d.update(
- (x.name, Object(x, type='blob'))
- for x in self.blob_ids)
- return d
-
- def is_blob(self, name):
- return self.by_name[name]['type'] == 'blob'
-
- def get_blob(self, name):
- x = self.by_name[name]
- return Blob(self, name, x.id)
-
-
-class Blob(object):
-
- '''Lightweight object representing a file in the repo'''
-
- def __init__(self, tree, name, _id):
- self._id = _id
- self.tree = tree
- self.name = name
- self.repo = tree.repo
- self.commit = tree.commit
- fn, ext = os.path.splitext(self.name)
- self.extension = ext or fn
-
- def path(self):
- return self.tree.path() + h.really_unicode(self.name)
-
- def url(self):
- return self.tree.url() + h.really_unicode(self.name)
-
- @LazyProperty
- def _content_type_encoding(self):
- return self.repo.guess_type(self.name)
-
- @LazyProperty
- def content_type(self):
- return self._content_type_encoding[0]
-
- @LazyProperty
- def content_encoding(self):
- return self._content_type_encoding[1]
-
- @property
- def has_pypeline_view(self):
- if README_RE.match(self.name) or self.extension in PYPELINE_EXTENSIONS:
- return True
- return False
-
- @property
- def has_html_view(self):
- if (self.content_type.startswith('text/') or
- self.extension in VIEWABLE_EXTENSIONS or
- self.extension in PYPELINE_EXTENSIONS or
- self.extension in self.repo._additional_viewable_extensions or
- utils.is_text_file(self.text)):
- return True
- return False
-
- @property
- def has_image_view(self):
- return self.content_type.startswith('image/')
-
- def open(self):
- return self.repo.open_blob(self)
-
- def __iter__(self):
- return iter(self.open())
-
- @LazyProperty
- def size(self):
- return self.repo.blob_size(self)
-
- @LazyProperty
- def text(self):
- return self.open().read()
-
- @classmethod
- def diff(cls, v0, v1):
- differ = SequenceMatcher(v0, v1)
- return differ.get_opcodes()
-
-
-class LastCommit(RepoObject):
-
- def __repr__(self):
- return '<LastCommit /%s %s>' % (self.path, self.commit_id)
-
- @classmethod
- def _last_commit_id(cls, commit, path):
- try:
- rev = commit.repo.log(commit._id, path, id_only=True).next()
- return commit.repo.rev_to_commit_id(rev)
- except StopIteration:
- log.error('Tree node not recognized by SCM: %s @ %s',
- path, commit._id)
- return commit._id
-
- @classmethod
- def _prev_commit_id(cls, commit, path):
- if not commit.parent_ids or path in commit.added_paths:
- return None # new paths by definition have no previous LCD
- lcid_cache = getattr(c, 'lcid_cache', '')
- if lcid_cache != '' and path in lcid_cache:
- return lcid_cache[path]
- try:
- log_iter = commit.repo.log(commit._id, path, id_only=True)
- log_iter.next()
- rev = log_iter.next()
- return commit.repo.rev_to_commit_id(rev)
- except StopIteration:
- return None
-
- @classmethod
- def get(cls, tree):
- '''Find or build the LastCommitDoc for the given tree.'''
- cache = getattr(c, 'model_cache', '') or ModelCache()
- path = tree.path().strip('/')
- last_commit_id = cls._last_commit_id(tree.commit, path)
- lcd = cache.get(cls, {'path': path, 'commit_id': last_commit_id})
- if lcd is None:
- commit = cache.get(Commit, {'_id': last_commit_id})
- commit.set_context(tree.repo)
- lcd = cls._build(commit.get_path(path))
- return lcd
-
- @classmethod
- def _build(cls, tree):
- '''
- Build the LCD record, presuming that this tree is where it was most
- recently changed.
- '''
- model_cache = getattr(c, 'model_cache', '') or ModelCache()
- path = tree.path().strip('/')
- entries = []
- prev_lcd = None
- prev_lcd_cid = cls._prev_commit_id(tree.commit, path)
- if prev_lcd_cid:
- prev_lcd = model_cache.get(
- cls, {'path': path, 'commit_id': prev_lcd_cid})
- entries = {}
- nodes = set(
- [node.name for node in chain(tree.tree_ids, tree.blob_ids, tree.other_ids)])
- changed = set(
- [node for node in nodes if os.path.join(path, node) in tree.commit.changed_paths])
- unchanged = [os.path.join(path, node) for node in nodes - changed]
- if prev_lcd:
- # get unchanged entries from previously computed LCD
- entries = prev_lcd.by_name
- elif unchanged:
- # no previously computed LCD, so get unchanged entries from SCM
- # (but only ask for the ones that we know we need)
- entries = tree.commit.repo.last_commit_ids(tree.commit, unchanged)
- if entries is None:
- # something strange went wrong; still show the list of files
- # and possibly try again later
- entries = {}
- # paths are fully-qualified; shorten them back to just node names
- entries = {
- os.path.basename(path): commit_id for path, commit_id in entries.iteritems()}
- # update with the nodes changed in this tree's commit
- entries.update({node: tree.commit._id for node in changed})
- # convert to a list of dicts, since mongo doesn't handle arbitrary keys
- # well (i.e., . and $ not allowed)
- entries = [{'name': name, 'commit_id': value}
- for name, value in entries.iteritems()]
- lcd = cls(
- commit_id=tree.commit._id,
- path=path,
- entries=entries,
- )
- model_cache.set(cls, {'path': path, 'commit_id': tree.commit._id}, lcd)
- return lcd
-
- @LazyProperty
- def by_name(self):
- return {n.name: n.commit_id for n in self.entries}
-
-mapper(Commit, CommitDoc, repository_orm_session)
-mapper(Tree, TreeDoc, repository_orm_session)
-mapper(LastCommit, LastCommitDoc, repository_orm_session)
-
-
-class ModelCache(object):
-
- '''
- Cache model instances based on query params passed to get.
- '''
-
- def __init__(self, max_instances=None, max_queries=None):
- '''
- By default, each model type can have 2000 instances and
- 8000 queries. You can override these for specific model
- types by passing in a dict() for either max_instances or
- max_queries keyed by the class(es) with the max values.
- Classes not in the dict() will use the default 2000/8000
- default.
-
- If you pass in a number instead of a dict, that value will
- be used as the max for all classes.
- '''
- max_instances_default = 2000
- max_queries_default = 8000
- if isinstance(max_instances, int):
- max_instances_default = max_instances
- if isinstance(max_queries, int):
- max_queries_default = max_queries
- self._max_instances = defaultdict(lambda: max_instances_default)
- self._max_queries = defaultdict(lambda: max_queries_default)
- if hasattr(max_instances, 'items'):
- self._max_instances.update(max_instances)
- if hasattr(max_queries, 'items'):
- self._max_queries.update(max_queries)
-
- # keyed by query, holds _id
- self._query_cache = defaultdict(OrderedDict)
- self._instance_cache = defaultdict(OrderedDict) # keyed by _id
- self._synthetic_ids = defaultdict(set)
- self._synthetic_id_queries = defaultdict(set)
-
- def _normalize_query(self, query):
- _query = query
- if not isinstance(_query, tuple):
- _query = tuple(sorted(_query.items(), key=lambda k: k[0]))
- return _query
-
- def _model_query(self, cls):
- if hasattr(cls, 'query'):
- return cls.query
- elif hasattr(cls, 'm'):
- return cls.m
- else:
- raise AttributeError(
- '%s has neither "query" nor "m" attribute' % cls)
-
- def get(self, cls, query):
- _query = self._normalize_query(query)
- self._touch(cls, _query)
- if _query not in self._query_cache[cls]:
- val = self._model_query(cls).get(**query)
- self.set(cls, _query, val)
- return val
- _id = self._query_cache[cls][_query]
- if _id is None:
- return None
- if _id not in self._instance_cache[cls]:
- val = self._model_query(cls).get(**query)
- self.set(cls, _query, val)
- return val
- return self._instance_cache[cls][_id]
-
- def set(self, cls, query, val):
- _query = self._normalize_query(query)
- if val is not None:
- _id = getattr(val, '_model_cache_id',
- getattr(val, '_id',
- self._query_cache[cls].get(_query,
- None)))
- if _id is None:
- _id = val._model_cache_id = bson.ObjectId()
- self._synthetic_ids[cls].add(_id)
- if _id in self._synthetic_ids:
- self._synthetic_id_queries[cls].add(_query)
- self._query_cache[cls][_query] = _id
- self._instance_cache[cls][_id] = val
- else:
- self._query_cache[cls][_query] = None
- self._touch(cls, _query)
- self._check_sizes(cls)
-
- def _touch(self, cls, query):
- '''
- Keep track of insertion order, prevent duplicates,
- and expire from the cache in a FIFO manner.
- '''
- _query = self._normalize_query(query)
- if _query not in self._query_cache[cls]:
- return
- _id = self._query_cache[cls].pop(_query)
- self._query_cache[cls][_query] = _id
-
- if _id not in self._instance_cache[cls]:
- return
- val = self._instance_cache[cls].pop(_id)
- self._instance_cache[cls][_id] = val
-
- def _check_sizes(self, cls):
- if self.num_queries(cls) > self._max_queries[cls]:
- _id = self._remove_least_recently_used(self._query_cache[cls])
- if _id in self._instance_cache[cls]:
- instance = self._instance_cache[cls][_id]
- self._try_flush(instance, expunge=False)
- if self.num_instances(cls) > self._max_instances[cls]:
- instance = self._remove_least_recently_used(
- self._instance_cache[cls])
- self._try_flush(instance, expunge=True)
-
- def _try_flush(self, instance, expunge=False):
- try:
- inst_session = session(instance)
- except AttributeError:
- inst_session = None
- if inst_session:
- inst_session.flush(instance)
- if expunge:
- inst_session.expunge(instance)
-
- def _remove_least_recently_used(self, cache):
- # last-used (most-recently-used) is last in cache, so take first
- key, val = cache.popitem(last=False)
- return val
-
- def expire_new_instances(self, cls):
- '''
- Expire any instances that were "new" or had no _id value.
-
- If a lot of new instances of a class are being created, it's possible
- for a query to pull a copy from mongo when a copy keyed by the synthetic
- ID is still in the cache, potentially causing de-sync between the copies
- leading to one with missing data overwriting the other. Clear new
- instances out of the cache relatively frequently (depending on the query
- and instance cache sizes) to avoid this.
- '''
- for _query in self._synthetic_id_queries[cls]:
- self._query_cache[cls].pop(_query)
- self._synthetic_id_queries[cls] = set()
- for _id in self._synthetic_ids[cls]:
- instance = self._instance_cache[cls].pop(_id)
- self._try_flush(instance, expunge=True)
- self._synthetic_ids[cls] = set()
-
- def num_queries(self, cls=None):
- if cls is None:
- return sum([len(c) for c in self._query_cache.values()])
- else:
- return len(self._query_cache[cls])
-
- def num_instances(self, cls=None):
- if cls is None:
- return sum([len(c) for c in self._instance_cache.values()])
- else:
- return len(self._instance_cache[cls])
-
- def instance_ids(self, cls):
- return self._instance_cache[cls].keys()
-
- def batch_load(self, cls, query, attrs=None):
- '''
- Load multiple results given a query.
-
- Optionally takes a list of attribute names to use
- as the cache key. If not given, uses the keys of
- the given query.
- '''
- if attrs is None:
- attrs = query.keys()
- for result in self._model_query(cls).find(query):
- keys = {a: getattr(result, a) for a in attrs}
- self.set(cls, keys, result)
http://git-wip-us.apache.org/repos/asf/allura/blob/839d9cfb/Allura/allura/model/repo_refresh.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/repo_refresh.py b/Allura/allura/model/repo_refresh.py
index a2dc628..cdd71dc 100644
--- a/Allura/allura/model/repo_refresh.py
+++ b/Allura/allura/model/repo_refresh.py
@@ -31,9 +31,9 @@ from ming.orm import mapper, session, ThreadLocalORMSession
from allura.lib import utils
from allura.lib import helpers as h
-from allura.model.repo import CommitDoc, TreeDoc, TreesDoc, DiffInfoDoc
-from allura.model.repo import CommitRunDoc
-from allura.model.repo import Commit, Tree, LastCommit, ModelCache
+from allura.model.repository import CommitDoc, TreeDoc, TreesDoc, DiffInfoDoc
+from allura.model.repository import CommitRunDoc
+from allura.model.repository import Commit, Tree, LastCommit, ModelCache
from allura.model.index import ArtifactReferenceDoc, ShortlinkDoc
from allura.model.auth import User
from allura.model.timeline import TransientActor
@@ -178,7 +178,7 @@ def refresh_commit_repos(all_commit_ids, repo):
repo_ids={'$ne': repo._id})):
oid = ci._id
ci.repo_ids.append(repo._id)
- index_id = 'allura.model.repo.Commit#' + oid
+ index_id = 'allura.model.repository.Commit#' + oid
ref = ArtifactReferenceDoc(dict(
_id=index_id,
artifact_reference=dict(
http://git-wip-us.apache.org/repos/asf/allura/blob/839d9cfb/Allura/allura/model/repository.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/repository.py b/Allura/allura/model/repository.py
index 81b4cd4..662ec59 100644
--- a/Allura/allura/model/repository.py
+++ b/Allura/allura/model/repository.py
@@ -25,10 +25,12 @@ from subprocess import Popen, PIPE
from hashlib import sha1
from datetime import datetime
from time import time
-from collections import defaultdict
+from collections import defaultdict, OrderedDict
from urlparse import urljoin
from threading import Thread
from Queue import Queue
+from itertools import chain
+from difflib import SequenceMatcher, unified_diff
import tg
from paste.deploy.converters import asint
@@ -36,19 +38,25 @@ from pylons import tmpl_context as c
from pylons import app_globals as g
import pymongo
import pymongo.errors
+import bson
from ming import schema as S
+from ming import Field, collection, Index
from ming.utils import LazyProperty
-from ming.orm import FieldProperty, session, Mapper
+from ming.orm import FieldProperty, session, Mapper, mapper
+from ming.base import Object
from allura.lib import helpers as h
from allura.lib import utils
+from allura.lib.security import has_access
from .artifact import Artifact, VersionedArtifact
from .auth import User
-from .repo_refresh import refresh_repo, unknown_commit_ids as unknown_commit_ids_repo
from .timeline import ActivityObject
from .monq_model import MonQTask
+from .project import AppConfig
+from .session import main_doc_session
+from .session import repository_orm_session
log = logging.getLogger(__name__)
config = utils.ConfigProxy(
@@ -62,20 +70,36 @@ VIEWABLE_EXTENSIONS = [
'.pl', '.php4', '.php3', '.rhtml', '.svg', '.markdown', '.json', '.ini', '.tcl', '.vbs', '.xsl']
+# Some schema types
+SUser = dict(name=str, email=str, date=datetime)
+SObjType = S.OneOf('blob', 'tree', 'submodule')
+
+# Used for when we're going to batch queries using $in
+QSIZE = 100
+README_RE = re.compile('^README(\.[^.]*)?$', re.IGNORECASE)
+VIEWABLE_EXTENSIONS = [
+ '.php', '.py', '.js', '.java', '.html', '.htm', '.yaml', '.sh',
+ '.rb', '.phtml', '.txt', '.bat', '.ps1', '.xhtml', '.css', '.cfm', '.jsp', '.jspx',
+ '.pl', '.php4', '.php3', '.rhtml', '.svg', '.markdown', '.json', '.ini', '.tcl', '.vbs', '.xsl']
+PYPELINE_EXTENSIONS = utils.MARKDOWN_EXTENSIONS + ['.rst']
+
+DIFF_SIMILARITY_THRESHOLD = .5 # used for determining file renames
+
+
class RepositoryImplementation(object):
# Repository-specific code
def init(self): # pragma no cover
- raise NotImplementedError, 'init'
+ raise NotImplementedError('init')
def clone_from(self, source_url): # pragma no cover
- raise NotImplementedError, 'clone_from'
+ raise NotImplemented('clone_from')
def commit(self, revision): # pragma no cover
- raise NotImplementedError, 'commit'
+ raise NotImplemented('commit')
def all_commit_ids(self): # pragma no cover
- raise NotImplementedError, 'all_commit_ids'
+ raise NotImplemented('all_commit_ids')
def new_commits(self, all_commits=False): # pragma no cover
'''Return a list of native commits in topological order (heads first).
@@ -83,21 +107,21 @@ class RepositoryImplementation(object):
"commit" is a repo-native object, NOT a Commit object.
If all_commits is False, only return commits not already indexed.
'''
- raise NotImplementedError, 'new_commits'
+ raise NotImplemented('new_commits')
def commit_parents(self, commit): # pragma no cover
'''Return a list of native commits for the parents of the given (native)
commit'''
- raise NotImplementedError, 'commit_parents'
+ raise NotImplemented('commit_parents')
def refresh_commit_info(self, oid, lazy=True): # pragma no cover
'''Refresh the data in the commit with id oid'''
- raise NotImplementedError, 'refresh_commit_info'
+ raise NotImplemented('refresh_commit_info')
def _setup_hooks(self, source_path=None): # pragma no cover
'''Install a hook in the repository that will ping the refresh url for
the repo. Optionally provide a path from which to copy existing hooks.'''
- raise NotImplementedError, '_setup_hooks'
+ raise NotImplemented('_setup_hooks')
# pragma no cover
def log(self, revs=None, path=None, exclude=None, id_only=True, **kw):
@@ -120,31 +144,31 @@ class RepositoryImplementation(object):
If id_only is True, returns only the commit ID (which can be faster),
otherwise it returns detailed information about each commit.
"""
- raise NotImplementedError, 'log'
+ raise NotImplemented('log')
def compute_tree_new(self, commit, path='/'): # pragma no cover
'''Used in hg and svn to compute a git-like-tree lazily with the new models'''
- raise NotImplementedError, 'compute_tree'
+ raise NotImplemented('compute_tree')
def open_blob(self, blob): # pragma no cover
'''Return a file-like object that contains the contents of the blob'''
- raise NotImplementedError, 'open_blob'
+ raise NotImplemented('open_blob')
def blob_size(self, blob):
'''Return a blob size in bytes'''
- raise NotImplementedError, 'blob_size'
+ raise NotImplemented('blob_size')
def tarball(self, revision, path=None):
'''Create a tarball for the revision'''
- raise NotImplementedError, 'tarball'
+ raise NotImplemented('tarball')
def is_empty(self):
'''Determine if the repository is empty by checking the filesystem'''
- raise NotImplementedError, 'is_empty'
+ raise NotImplemented('is_empty')
def is_file(self, path, rev=None):
'''Determine if the repository is a file by checking the filesystem'''
- raise NotImplementedError, 'is_file'
+ raise NotImplemented('is_file')
@classmethod
def shorthand_for_commit(cls, oid):
@@ -152,7 +176,7 @@ class RepositoryImplementation(object):
def symbolics_for_commit(self, commit):
'''Return symbolic branch and tag names for a commit.'''
- raise NotImplementedError, 'symbolics_for_commit'
+ raise NotImplemented('symbolics_for_commit')
def url_for_commit(self, commit, url_type='ci'):
'return an URL, given either a commit or object id'
@@ -194,19 +218,19 @@ class RepositoryImplementation(object):
@property
def head(self):
- raise NotImplementedError, 'head'
+ raise NotImplemented('head')
@property
def heads(self):
- raise NotImplementedError, 'heads'
+ raise NotImplemented('heads')
@property
def branches(self):
- raise NotImplementedError, 'branches'
+ raise NotImplemented('branches')
@property
def tags(self):
- raise NotImplementedError, 'tags'
+ raise NotImplemented('tags')
def last_commit_ids(self, commit, paths):
'''
@@ -601,11 +625,13 @@ class Repository(Artifact, ActivityObject):
return content_type, encoding
def unknown_commit_ids(self):
+ from allura.model.repo_refresh import unknown_commit_ids as unknown_commit_ids_repo
return unknown_commit_ids_repo(self.all_commit_ids())
def refresh(self, all_commits=False, notify=True, new_clone=False):
'''Find any new commits in the repository and update'''
try:
+ from allura.model.repo_refresh import refresh_repo
log.info('... %r analyzing', self)
self.set_status('analyzing')
refresh_repo(self, all_commits, notify, new_clone)
@@ -640,7 +666,7 @@ class Repository(Artifact, ActivityObject):
self._impl.tarball(revision, path)
def rev_to_commit_id(self, rev):
- raise NotImplementedError, 'rev_to_commit_id'
+ raise NotImplemented('rev_to_commit_id')
def set_status(self, status):
'''
@@ -780,6 +806,923 @@ class MergeRequest(VersionedArtifact, ActivityObject):
return result
+# Basic commit information
+# One of these for each commit in the physical repo on disk. The _id is the
+# hexsha of the commit (for Git and Hg).
+CommitDoc = collection(
+ 'repo_ci', main_doc_session,
+ Field('_id', str),
+ Field('tree_id', str),
+ Field('committed', SUser),
+ Field('authored', SUser),
+ Field('message', str),
+ Field('parent_ids', [str], index=True),
+ Field('child_ids', [str], index=True),
+ Field('repo_ids', [S.ObjectId()], index=True))
+
+# Basic tree information (also see TreesDoc)
+TreeDoc = collection(
+ 'repo_tree', main_doc_session,
+ Field('_id', str),
+ Field('tree_ids', [dict(name=str, id=str)]),
+ Field('blob_ids', [dict(name=str, id=str)]),
+ Field('other_ids', [dict(name=str, id=str, type=SObjType)]))
+
+# Information about the last commit to touch a tree
+LastCommitDoc = collection(
+ 'repo_last_commit', main_doc_session,
+ Field('_id', S.ObjectId()),
+ Field('commit_id', str),
+ Field('path', str),
+ Index('commit_id', 'path'),
+ Field('entries', [dict(
+ name=str,
+ commit_id=str)]))
+
+# List of all trees contained within a commit
+# TreesDoc._id = CommitDoc._id
+# TreesDoc.tree_ids = [ TreeDoc._id, ... ]
+TreesDoc = collection(
+ 'repo_trees', main_doc_session,
+ Field('_id', str),
+ Field('tree_ids', [str]))
+
+# Information about which things were added/removed in commit
+# DiffInfoDoc._id = CommitDoc._id
+DiffInfoDoc = collection(
+ 'repo_diffinfo', main_doc_session,
+ Field('_id', str),
+ Field(
+ 'differences',
+ [dict(name=str, lhs_id=str, rhs_id=str)]))
+
+# List of commit runs (a run is a linear series of single-parent commits)
+# CommitRunDoc.commit_ids = [ CommitDoc._id, ... ]
+CommitRunDoc = collection(
+ 'repo_commitrun', main_doc_session,
+ Field('_id', str),
+ Field('parent_commit_ids', [str], index=True),
+ Field('commit_ids', [str], index=True),
+ Field('commit_times', [datetime]))
+
+
+class RepoObject(object):
+
+ def __repr__(self): # pragma no cover
+ return '<%s %s>' % (
+ self.__class__.__name__, self._id)
+
+ def primary(self):
+ return self
+
+ def index_id(self):
+ '''Globally unique artifact identifier. Used for
+ SOLR ID, shortlinks, and maybe elsewhere
+ '''
+ id = '%s.%s#%s' % (
+ 'allura.model.repo', # preserve index_id after module consolidation
+ self.__class__.__name__,
+ self._id)
+ return id.replace('.', '/')
+
+ @classmethod
+ def upsert(cls, id, **kwargs):
+ isnew = False
+ r = cls.query.get(_id=id)
+ if r is not None:
+ return r, isnew
+ try:
+ r = cls(_id=id, **kwargs)
+ session(r).flush(r)
+ isnew = True
+ except pymongo.errors.DuplicateKeyError: # pragma no cover
+ session(r).expunge(r)
+ r = cls.query.get(_id=id)
+ return r, isnew
+
+
+class Commit(RepoObject, ActivityObject):
+ type_s = 'Commit'
+ # Ephemeral attrs
+ repo = None
+
+ def __init__(self, **kw):
+ for k, v in kw.iteritems():
+ setattr(self, k, v)
+
+ @property
+ def activity_name(self):
+ return self.shorthand_id()
+
+ @property
+ def activity_extras(self):
+ d = ActivityObject.activity_extras.fget(self)
+ d.update(summary=self.summary)
+ if self.repo:
+ d.update(app_config_id=self.repo.app.config._id)
+ return d
+
+ def has_activity_access(self, perm, user, activity):
+ """
+ Check access against the original app.
+
+ Commits have no ACLs and are therefore always viewable by any user, if
+ they have access to the tool.
+ """
+ app_config_id = activity.obj.activity_extras.get('app_config_id')
+ if app_config_id:
+ app_config = AppConfig.query.get(_id=app_config_id)
+ return has_access(app_config, perm, user)
+ return True
+
+ def set_context(self, repo):
+ self.repo = repo
+
+ @LazyProperty
+ def author_url(self):
+ u = User.by_email_address(self.authored.email)
+ if u:
+ return u.url()
+
+ @LazyProperty
+ def committer_url(self):
+ u = User.by_email_address(self.committed.email)
+ if u:
+ return u.url()
+
+ @LazyProperty
+ def tree(self):
+ return self.get_tree(create=True)
+
+ def get_tree(self, create=True):
+ if self.tree_id is None and create:
+ self.tree_id = self.repo.compute_tree_new(self)
+ if self.tree_id is None:
+ return None
+ cache = getattr(c, 'model_cache', '') or ModelCache()
+ t = cache.get(Tree, dict(_id=self.tree_id))
+ if t is None and create:
+ self.tree_id = self.repo.compute_tree_new(self)
+ t = Tree.query.get(_id=self.tree_id)
+ cache.set(Tree, dict(_id=self.tree_id), t)
+ if t is not None:
+ t.set_context(self)
+ return t
+
+ @LazyProperty
+ def summary(self):
+ message = h.really_unicode(self.message)
+ first_line = message.split('\n')[0]
+ return h.text.truncate(first_line, 50)
+
+ def shorthand_id(self):
+ if self.repo is None:
+ self.repo = self.guess_repo()
+ if self.repo is None:
+ return repr(self)
+ return self.repo.shorthand_for_commit(self._id)
+
+ @LazyProperty
+ def symbolic_ids(self):
+ return self.repo.symbolics_for_commit(self)
+
+ def get_parent(self, index=0):
+ '''Get the parent of this commit.
+
+ If there is no parent commit, or if an invalid index is given,
+ returns None.
+ '''
+ try:
+ cache = getattr(c, 'model_cache', '') or ModelCache()
+ ci = cache.get(Commit, dict(_id=self.parent_ids[index]))
+ if not ci:
+ return None
+ ci.set_context(self.repo)
+ return ci
+ except IndexError:
+ return None
+
+ def climb_commit_tree(self, predicate=None):
+ '''
+ Returns a generator that walks up the commit tree along
+ the first-parent ancestory, starting with this commit,
+ optionally filtering by a predicate.'''
+ ancestor = self
+ while ancestor:
+ if predicate is None or predicate(ancestor):
+ yield ancestor
+ ancestor = ancestor.get_parent()
+
+ def url(self):
+ if self.repo is None:
+ self.repo = self.guess_repo()
+ if self.repo is None:
+ return '#'
+ return self.repo.url_for_commit(self)
+
+ def guess_repo(self):
+ import traceback
+ log.error('guess_repo: should not be called: %s' %
+ ''.join(traceback.format_stack()))
+ for ac in c.project.app_configs:
+ try:
+ app = c.project.app_instance(ac)
+ if app.repo._id in self.repo_ids:
+ return app.repo
+ except AttributeError:
+ pass
+ return None
+
+ def link_text(self):
+ '''The link text that will be used when a shortlink to this artifact
+ is expanded into an <a></a> tag.
+
+ By default this method returns type_s + shorthand_id(). Subclasses should
+ override this method to provide more descriptive link text.
+ '''
+ return self.shorthand_id()
+
+ def context(self):
+ result = dict(prev=None, next=None)
+ if self.parent_ids:
+ result['prev'] = self.query.find(
+ dict(_id={'$in': self.parent_ids})).all()
+ for ci in result['prev']:
+ ci.set_context(self.repo)
+ if self.child_ids:
+ result['next'] = self.query.find(
+ dict(_id={'$in': self.child_ids})).all()
+ for ci in result['next']:
+ ci.set_context(self.repo)
+ return result
+
+ @LazyProperty
+ def diffs(self):
+ return self.paged_diffs()
+
+ def paged_diffs(self, start=0, end=None):
+ di = DiffInfoDoc.m.get(_id=self._id)
+ if di is None:
+ return Object(added=[], removed=[], changed=[], copied=[], total=0)
+ added = []
+ removed = []
+ changed = []
+ copied = []
+ for change in di.differences[start:end]:
+ if change.rhs_id is None:
+ removed.append(change.name)
+ elif change.lhs_id is None:
+ added.append(change.name)
+ else:
+ changed.append(change.name)
+ copied = self._diffs_copied(added, removed)
+ return Object(
+ added=added, removed=removed,
+ changed=changed, copied=copied,
+ total=len(di.differences))
+
+ def _diffs_copied(self, added, removed):
+ '''Return list with file renames diffs.
+
+ Will change `added` and `removed` lists also.
+ '''
+ def _blobs_similarity(removed_blob, added):
+ best = dict(ratio=0, name='', blob=None)
+ for added_name in added:
+ added_blob = self.tree.get_obj_by_path(added_name)
+ if not isinstance(added_blob, Blob):
+ continue
+ diff = SequenceMatcher(None, removed_blob.text,
+ added_blob.text)
+ ratio = diff.quick_ratio()
+ if ratio > best['ratio']:
+ best['ratio'] = ratio
+ best['name'] = added_name
+ best['blob'] = added_blob
+
+ if ratio == 1:
+ break # we'll won't find better similarity than 100% :)
+
+ if best['ratio'] > DIFF_SIMILARITY_THRESHOLD:
+ diff = ''
+ if best['ratio'] < 1:
+ added_blob = best['blob']
+ rpath = ('a' + removed_blob.path()).encode('utf-8')
+ apath = ('b' + added_blob.path()).encode('utf-8')
+ diff = ''.join(unified_diff(list(removed_blob),
+ list(added_blob),
+ rpath, apath))
+ return dict(new=best['name'],
+ ratio=best['ratio'], diff=diff)
+
+ def _trees_similarity(removed_tree, added):
+ for added_name in added:
+ added_tree = self.tree.get_obj_by_path(added_name)
+ if not isinstance(added_tree, Tree):
+ continue
+ if removed_tree._id == added_tree._id:
+ return dict(new=added_name,
+ ratio=1, diff='')
+
+ if not removed:
+ return []
+ copied = []
+ prev_commit = self.get_parent()
+ for removed_name in removed[:]:
+ removed_blob = prev_commit.tree.get_obj_by_path(removed_name)
+ rename_info = None
+ if isinstance(removed_blob, Blob):
+ rename_info = _blobs_similarity(removed_blob, added)
+ elif isinstance(removed_blob, Tree):
+ rename_info = _trees_similarity(removed_blob, added)
+ if rename_info is not None:
+ rename_info['old'] = removed_name
+ copied.append(rename_info)
+ removed.remove(rename_info['old'])
+ added.remove(rename_info['new'])
+ return copied
+
+ def get_path(self, path, create=True):
+ path = path.lstrip('/')
+ parts = path.split('/')
+ cur = self.get_tree(create)
+ if cur is not None:
+ for part in parts:
+ if part != '':
+ cur = cur[part]
+ return cur
+
+ def has_path(self, path):
+ try:
+ self.get_path(path)
+ return True
+ except KeyError:
+ return False
+
+ @LazyProperty
+ def changed_paths(self):
+ '''
+ Returns a list of paths changed in this commit.
+ Leading and trailing slashes are removed, and
+ the list is complete, meaning that if a sub-path
+ is changed, all of the parent paths are included
+ (including '' to represent the root path).
+
+ Example:
+
+ If the file /foo/bar is changed in the commit,
+ this would return ['', 'foo', 'foo/bar']
+ '''
+ changes = self.repo.get_changes(self._id)
+ changed_paths = set()
+ for change in changes:
+ node = change.strip('/')
+ changed_paths.add(node)
+ node_path = os.path.dirname(node)
+ while node_path:
+ changed_paths.add(node_path)
+ node_path = os.path.dirname(node_path)
+ changed_paths.add('') # include '/' if there are any changes
+ return changed_paths
+
+ @LazyProperty
+ def added_paths(self):
+ '''
+ Returns a list of paths added in this commit.
+ Leading and trailing slashes are removed, and
+ the list is complete, meaning that if a directory
+ with subdirectories is added, all of the child
+ paths are included (this relies on the DiffInfoDoc
+ being complete).
+
+ Example:
+
+ If the directory /foo/bar/ is added in the commit
+ which contains a subdirectory /foo/bar/baz/ with
+ the file /foo/bar/baz/qux.txt, this would return:
+ ['foo/bar', 'foo/bar/baz', 'foo/bar/baz/qux.txt']
+ '''
+ diff_info = DiffInfoDoc.m.get(_id=self._id)
+ diffs = set()
+ if diff_info:
+ for d in diff_info.differences:
+ if d.lhs_id is None:
+ diffs.add(d.name.strip('/'))
+ return diffs
+
+ @LazyProperty
+ def info(self):
+ return dict(
+ id=self._id,
+ author=self.authored.name,
+ author_email=self.authored.email,
+ date=self.authored.date,
+ author_url=self.author_url,
+ shortlink=self.shorthand_id(),
+ summary=self.summary
+ )
+
+
+class Tree(RepoObject):
+ # Ephemeral attrs
+ repo = None
+ commit = None
+ parent = None
+ name = None
+
+ def compute_hash(self):
+ '''Compute a hash based on the contents of the tree. Note that this
+ hash does not necessarily correspond to any actual DVCS hash.
+ '''
+ lines = (
+ ['tree' + x.name + x.id for x in self.tree_ids]
+ + ['blob' + x.name + x.id for x in self.blob_ids]
+ + [x.type + x.name + x.id for x in self.other_ids])
+ sha_obj = sha1()
+ for line in sorted(lines):
+ sha_obj.update(line)
+ return sha_obj.hexdigest()
+
+ def __getitem__(self, name):
+ cache = getattr(c, 'model_cache', '') or ModelCache()
+ obj = self.by_name[name]
+ if obj['type'] == 'blob':
+ return Blob(self, name, obj['id'])
+ if obj['type'] == 'submodule':
+ log.info('Skipping submodule "%s"' % name)
+ raise KeyError(name)
+ obj = cache.get(Tree, dict(_id=obj['id']))
+ if obj is None:
+ oid = self.repo.compute_tree_new(
+ self.commit, self.path() + name + '/')
+ obj = cache.get(Tree, dict(_id=oid))
+ if obj is None:
+ raise KeyError(name)
+ obj.set_context(self, name)
+ return obj
+
+ def get_obj_by_path(self, path):
+ if hasattr(path, 'get'):
+ path = path['new']
+ if path.startswith('/'):
+ path = path[1:]
+ path = path.split('/')
+ obj = self
+ for p in path:
+ try:
+ obj = obj[p]
+ except KeyError:
+ return None
+ return obj
+
+ def get_blob_by_path(self, path):
+ obj = self.get_obj_by_path(path)
+ return obj if isinstance(obj, Blob) else None
+
+ def set_context(self, commit_or_tree, name=None):
+ assert commit_or_tree is not self
+ self.repo = commit_or_tree.repo
+ if name:
+ self.commit = commit_or_tree.commit
+ self.parent = commit_or_tree
+ self.name = name
+ else:
+ self.commit = commit_or_tree
+
+ def readme(self):
+ 'returns (filename, unicode text) if a readme file is found'
+ for x in self.blob_ids:
+ if README_RE.match(x.name):
+ name = x.name
+ blob = self[name]
+ return (x.name, h.really_unicode(blob.text))
+ return None, None
+
+ def ls(self):
+ '''
+ List the entries in this tree, with historical commit info for
+ each node.
+ '''
+ last_commit = LastCommit.get(self)
+ # ensure that the LCD is saved, even if
+ # there is an error later in the request
+ if last_commit:
+ session(last_commit).flush(last_commit)
+ return self._lcd_map(last_commit)
+ else:
+ return []
+
+ def _lcd_map(self, lcd):
+ if lcd is None:
+ return []
+ commit_ids = [e.commit_id for e in lcd.entries]
+ commits = list(Commit.query.find(dict(_id={'$in': commit_ids})))
+ for commit in commits:
+ commit.set_context(self.repo)
+ commit_infos = {c._id: c.info for c in commits}
+ tree_names = sorted([n.name for n in self.tree_ids])
+ blob_names = sorted(
+ [n.name for n in chain(self.blob_ids, self.other_ids)])
+
+ results = []
+ for type, names in (('DIR', tree_names), ('BLOB', blob_names)):
+ for name in names:
+ commit_info = commit_infos.get(lcd.by_name.get(name))
+ if not commit_info:
+ commit_info = defaultdict(str)
+ elif 'id' in commit_info:
+ commit_info['href'] = self.repo.url_for_commit(
+ commit_info['id'])
+ results.append(dict(
+ kind=type,
+ name=name,
+ href=name,
+ last_commit=dict(
+ author=commit_info['author'],
+ author_email=commit_info['author_email'],
+ author_url=commit_info['author_url'],
+ date=commit_info.get('date'),
+ href=commit_info.get('href', ''),
+ shortlink=commit_info['shortlink'],
+ summary=commit_info['summary'],
+ ),
+ ))
+ return results
+
+ def path(self):
+ if self.parent:
+ assert self.parent is not self
+ return self.parent.path() + self.name + '/'
+ else:
+ return '/'
+
+ def url(self):
+ return self.commit.url() + 'tree' + self.path()
+
+ @LazyProperty
+ def by_name(self):
+ d = Object((x.name, x) for x in self.other_ids)
+ d.update(
+ (x.name, Object(x, type='tree'))
+ for x in self.tree_ids)
+ d.update(
+ (x.name, Object(x, type='blob'))
+ for x in self.blob_ids)
+ return d
+
+ def is_blob(self, name):
+ return self.by_name[name]['type'] == 'blob'
+
+ def get_blob(self, name):
+ x = self.by_name[name]
+ return Blob(self, name, x.id)
+
+
+class Blob(object):
+
+ '''Lightweight object representing a file in the repo'''
+
+ def __init__(self, tree, name, _id):
+ self._id = _id
+ self.tree = tree
+ self.name = name
+ self.repo = tree.repo
+ self.commit = tree.commit
+ fn, ext = os.path.splitext(self.name)
+ self.extension = ext or fn
+
+ def path(self):
+ return self.tree.path() + h.really_unicode(self.name)
+
+ def url(self):
+ return self.tree.url() + h.really_unicode(self.name)
+
+ @LazyProperty
+ def _content_type_encoding(self):
+ return self.repo.guess_type(self.name)
+
+ @LazyProperty
+ def content_type(self):
+ return self._content_type_encoding[0]
+
+ @LazyProperty
+ def content_encoding(self):
+ return self._content_type_encoding[1]
+
+ @property
+ def has_pypeline_view(self):
+ if README_RE.match(self.name) or self.extension in PYPELINE_EXTENSIONS:
+ return True
+ return False
+
+ @property
+ def has_html_view(self):
+ if (self.content_type.startswith('text/') or
+ self.extension in VIEWABLE_EXTENSIONS or
+ self.extension in PYPELINE_EXTENSIONS or
+ self.extension in self.repo._additional_viewable_extensions or
+ utils.is_text_file(self.text)):
+ return True
+ return False
+
+ @property
+ def has_image_view(self):
+ return self.content_type.startswith('image/')
+
+ def open(self):
+ return self.repo.open_blob(self)
+
+ def __iter__(self):
+ return iter(self.open())
+
+ @LazyProperty
+ def size(self):
+ return self.repo.blob_size(self)
+
+ @LazyProperty
+ def text(self):
+ return self.open().read()
+
+ @classmethod
+ def diff(cls, v0, v1):
+ differ = SequenceMatcher(v0, v1)
+ return differ.get_opcodes()
+
+
+class LastCommit(RepoObject):
+
+ def __repr__(self):
+ return '<LastCommit /%s %s>' % (self.path, self.commit_id)
+
+ @classmethod
+ def _last_commit_id(cls, commit, path):
+ try:
+ rev = commit.repo.log(commit._id, path, id_only=True).next()
+ return commit.repo.rev_to_commit_id(rev)
+ except StopIteration:
+ log.error('Tree node not recognized by SCM: %s @ %s',
+ path, commit._id)
+ return commit._id
+
+ @classmethod
+ def _prev_commit_id(cls, commit, path):
+ if not commit.parent_ids or path in commit.added_paths:
+ return None # new paths by definition have no previous LCD
+ lcid_cache = getattr(c, 'lcid_cache', '')
+ if lcid_cache != '' and path in lcid_cache:
+ return lcid_cache[path]
+ try:
+ log_iter = commit.repo.log(commit._id, path, id_only=True)
+ log_iter.next()
+ rev = log_iter.next()
+ return commit.repo.rev_to_commit_id(rev)
+ except StopIteration:
+ return None
+
+ @classmethod
+ def get(cls, tree):
+ '''Find or build the LastCommitDoc for the given tree.'''
+ cache = getattr(c, 'model_cache', '') or ModelCache()
+ path = tree.path().strip('/')
+ last_commit_id = cls._last_commit_id(tree.commit, path)
+ lcd = cache.get(cls, {'path': path, 'commit_id': last_commit_id})
+ if lcd is None:
+ commit = cache.get(Commit, {'_id': last_commit_id})
+ commit.set_context(tree.repo)
+ lcd = cls._build(commit.get_path(path))
+ return lcd
+
+ @classmethod
+ def _build(cls, tree):
+ '''
+ Build the LCD record, presuming that this tree is where it was most
+ recently changed.
+ '''
+ model_cache = getattr(c, 'model_cache', '') or ModelCache()
+ path = tree.path().strip('/')
+ entries = []
+ prev_lcd = None
+ prev_lcd_cid = cls._prev_commit_id(tree.commit, path)
+ if prev_lcd_cid:
+ prev_lcd = model_cache.get(
+ cls, {'path': path, 'commit_id': prev_lcd_cid})
+ entries = {}
+ nodes = set(
+ [node.name for node in chain(tree.tree_ids, tree.blob_ids, tree.other_ids)])
+ changed = set(
+ [node for node in nodes if os.path.join(path, node) in tree.commit.changed_paths])
+ unchanged = [os.path.join(path, node) for node in nodes - changed]
+ if prev_lcd:
+ # get unchanged entries from previously computed LCD
+ entries = prev_lcd.by_name
+ elif unchanged:
+ # no previously computed LCD, so get unchanged entries from SCM
+ # (but only ask for the ones that we know we need)
+ entries = tree.commit.repo.last_commit_ids(tree.commit, unchanged)
+ if entries is None:
+ # something strange went wrong; still show the list of files
+ # and possibly try again later
+ entries = {}
+ # paths are fully-qualified; shorten them back to just node names
+ entries = {
+ os.path.basename(path): commit_id for path, commit_id in entries.iteritems()}
+ # update with the nodes changed in this tree's commit
+ entries.update({node: tree.commit._id for node in changed})
+ # convert to a list of dicts, since mongo doesn't handle arbitrary keys
+ # well (i.e., . and $ not allowed)
+ entries = [{'name': name, 'commit_id': value}
+ for name, value in entries.iteritems()]
+ lcd = cls(
+ commit_id=tree.commit._id,
+ path=path,
+ entries=entries,
+ )
+ model_cache.set(cls, {'path': path, 'commit_id': tree.commit._id}, lcd)
+ return lcd
+
+ @LazyProperty
+ def by_name(self):
+ return {n.name: n.commit_id for n in self.entries}
+
+
+class ModelCache(object):
+
+ '''
+ Cache model instances based on query params passed to get.
+ '''
+
+ def __init__(self, max_instances=None, max_queries=None):
+ '''
+ By default, each model type can have 2000 instances and
+ 8000 queries. You can override these for specific model
+ types by passing in a dict() for either max_instances or
+ max_queries keyed by the class(es) with the max values.
+ Classes not in the dict() will use the default 2000/8000
+ default.
+
+ If you pass in a number instead of a dict, that value will
+ be used as the max for all classes.
+ '''
+ max_instances_default = 2000
+ max_queries_default = 8000
+ if isinstance(max_instances, int):
+ max_instances_default = max_instances
+ if isinstance(max_queries, int):
+ max_queries_default = max_queries
+ self._max_instances = defaultdict(lambda: max_instances_default)
+ self._max_queries = defaultdict(lambda: max_queries_default)
+ if hasattr(max_instances, 'items'):
+ self._max_instances.update(max_instances)
+ if hasattr(max_queries, 'items'):
+ self._max_queries.update(max_queries)
+
+ # keyed by query, holds _id
+ self._query_cache = defaultdict(OrderedDict)
+ self._instance_cache = defaultdict(OrderedDict) # keyed by _id
+ self._synthetic_ids = defaultdict(set)
+ self._synthetic_id_queries = defaultdict(set)
+
+ def _normalize_query(self, query):
+ _query = query
+ if not isinstance(_query, tuple):
+ _query = tuple(sorted(_query.items(), key=lambda k: k[0]))
+ return _query
+
+ def _model_query(self, cls):
+ if hasattr(cls, 'query'):
+ return cls.query
+ elif hasattr(cls, 'm'):
+ return cls.m
+ else:
+ raise AttributeError(
+ '%s has neither "query" nor "m" attribute' % cls)
+
+ def get(self, cls, query):
+ _query = self._normalize_query(query)
+ self._touch(cls, _query)
+ if _query not in self._query_cache[cls]:
+ val = self._model_query(cls).get(**query)
+ self.set(cls, _query, val)
+ return val
+ _id = self._query_cache[cls][_query]
+ if _id is None:
+ return None
+ if _id not in self._instance_cache[cls]:
+ val = self._model_query(cls).get(**query)
+ self.set(cls, _query, val)
+ return val
+ return self._instance_cache[cls][_id]
+
+ def set(self, cls, query, val):
+ _query = self._normalize_query(query)
+ if val is not None:
+ _id = getattr(val, '_model_cache_id',
+ getattr(val, '_id',
+ self._query_cache[cls].get(_query,
+ None)))
+ if _id is None:
+ _id = val._model_cache_id = bson.ObjectId()
+ self._synthetic_ids[cls].add(_id)
+ if _id in self._synthetic_ids:
+ self._synthetic_id_queries[cls].add(_query)
+ self._query_cache[cls][_query] = _id
+ self._instance_cache[cls][_id] = val
+ else:
+ self._query_cache[cls][_query] = None
+ self._touch(cls, _query)
+ self._check_sizes(cls)
+
+ def _touch(self, cls, query):
+ '''
+ Keep track of insertion order, prevent duplicates,
+ and expire from the cache in a FIFO manner.
+ '''
+ _query = self._normalize_query(query)
+ if _query not in self._query_cache[cls]:
+ return
+ _id = self._query_cache[cls].pop(_query)
+ self._query_cache[cls][_query] = _id
+
+ if _id not in self._instance_cache[cls]:
+ return
+ val = self._instance_cache[cls].pop(_id)
+ self._instance_cache[cls][_id] = val
+
+ def _check_sizes(self, cls):
+ if self.num_queries(cls) > self._max_queries[cls]:
+ _id = self._remove_least_recently_used(self._query_cache[cls])
+ if _id in self._instance_cache[cls]:
+ instance = self._instance_cache[cls][_id]
+ self._try_flush(instance, expunge=False)
+ if self.num_instances(cls) > self._max_instances[cls]:
+ instance = self._remove_least_recently_used(
+ self._instance_cache[cls])
+ self._try_flush(instance, expunge=True)
+
+ def _try_flush(self, instance, expunge=False):
+ try:
+ inst_session = session(instance)
+ except AttributeError:
+ inst_session = None
+ if inst_session:
+ inst_session.flush(instance)
+ if expunge:
+ inst_session.expunge(instance)
+
+ def _remove_least_recently_used(self, cache):
+ # last-used (most-recently-used) is last in cache, so take first
+ key, val = cache.popitem(last=False)
+ return val
+
+ def expire_new_instances(self, cls):
+ '''
+ Expire any instances that were "new" or had no _id value.
+
+ If a lot of new instances of a class are being created, it's possible
+ for a query to pull a copy from mongo when a copy keyed by the synthetic
+ ID is still in the cache, potentially causing de-sync between the copies
+ leading to one with missing data overwriting the other. Clear new
+ instances out of the cache relatively frequently (depending on the query
+ and instance cache sizes) to avoid this.
+ '''
+ for _query in self._synthetic_id_queries[cls]:
+ self._query_cache[cls].pop(_query)
+ self._synthetic_id_queries[cls] = set()
+ for _id in self._synthetic_ids[cls]:
+ instance = self._instance_cache[cls].pop(_id)
+ self._try_flush(instance, expunge=True)
+ self._synthetic_ids[cls] = set()
+
+ def num_queries(self, cls=None):
+ if cls is None:
+ return sum([len(c) for c in self._query_cache.values()])
+ else:
+ return len(self._query_cache[cls])
+
+ def num_instances(self, cls=None):
+ if cls is None:
+ return sum([len(c) for c in self._instance_cache.values()])
+ else:
+ return len(self._instance_cache[cls])
+
+ def instance_ids(self, cls):
+ return self._instance_cache[cls].keys()
+
+ def batch_load(self, cls, query, attrs=None):
+ '''
+ Load multiple results given a query.
+
+ Optionally takes a list of attribute names to use
+ as the cache key. If not given, uses the keys of
+ the given query.
+ '''
+ if attrs is None:
+ attrs = query.keys()
+ for result in self._model_query(cls).find(query):
+ keys = {a: getattr(result, a) for a in attrs}
+ self.set(cls, keys, result)
+
+
class GitLikeTree(object):
'''
@@ -922,4 +1865,7 @@ def zipdir(source, zipfile, exclude=None):
"STDERR: {3}".format(command, p.returncode, stdout, stderr))
+mapper(Commit, CommitDoc, repository_orm_session)
+mapper(Tree, TreeDoc, repository_orm_session)
+mapper(LastCommit, LastCommitDoc, repository_orm_session)
Mapper.compile_all()
http://git-wip-us.apache.org/repos/asf/allura/blob/839d9cfb/Allura/allura/scripts/refresh_last_commits.py
----------------------------------------------------------------------
diff --git a/Allura/allura/scripts/refresh_last_commits.py b/Allura/allura/scripts/refresh_last_commits.py
index 720af10..4673a2a 100644
--- a/Allura/allura/scripts/refresh_last_commits.py
+++ b/Allura/allura/scripts/refresh_last_commits.py
@@ -141,22 +141,22 @@ class RefreshLastCommits(ScriptTask):
if options.diffs:
print 'Processing diffs'
for i, commit_id in enumerate(commit_ids):
- commit = M.repo.Commit.query.get(_id=commit_id)
+ commit = M.repository.Commit.query.get(_id=commit_id)
with time(timings):
M.repo_refresh.compute_diffs(
c.app.repo._id, tree_cache, commit)
if i % 1000 == 0:
cls._print_stats(i, timings, 1000)
- model_cache = M.repo.ModelCache(
- max_instances={M.repo.LastCommit: 4000},
- max_queries={M.repo.LastCommit: 4000},
+ model_cache = M.repository.ModelCache(
+ max_instances={M.repository.LastCommit: 4000},
+ max_queries={M.repository.LastCommit: 4000},
)
lcid_cache = {}
timings = []
print 'Processing last commits'
for i, commit_id in enumerate(commit_ids):
- commit = M.repo.Commit.query.get(_id=commit_id)
+ commit = M.repository.Commit.query.get(_id=commit_id)
if commit is None:
print "Commit missing, skipping: %s" % commit_id
continue
@@ -174,18 +174,18 @@ class RefreshLastCommits(ScriptTask):
def _clean(cls, commit_ids, clean_diffs):
if clean_diffs:
# delete DiffInfoDocs
- i = M.repo.DiffInfoDoc.m.find(
+ i = M.repository.DiffInfoDoc.m.find(
dict(_id={'$in': commit_ids})).count()
log.info("Deleting %i DiffInfoDoc docs for %i commits...",
i, len(commit_ids))
- M.repo.DiffInfoDoc.m.remove(dict(_id={'$in': commit_ids}))
+ M.repository.DiffInfoDoc.m.remove(dict(_id={'$in': commit_ids}))
# delete LastCommitDocs
- i = M.repo.LastCommitDoc.m.find(
+ i = M.repository.LastCommitDoc.m.find(
dict(commit_id={'$in': commit_ids})).count()
log.info("Deleting %i LastCommitDoc docs for %i commits...",
i, len(commit_ids))
- M.repo.LastCommitDoc.m.remove(dict(commit_id={'$in': commit_ids}))
+ M.repository.LastCommitDoc.m.remove(dict(commit_id={'$in': commit_ids}))
@classmethod
def _print_stats(cls, processed, timings, debug_step):