You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by jo...@apache.org on 2013/06/28 16:59:36 UTC

[3/5] git commit: [#6272] Make SCM log indexless

[#6272] Make SCM log indexless

Signed-off-by: Cory Johns <cj...@slashdotmedia.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-allura/commit/a06b9146
Tree: http://git-wip-us.apache.org/repos/asf/incubator-allura/tree/a06b9146
Diff: http://git-wip-us.apache.org/repos/asf/incubator-allura/diff/a06b9146

Branch: refs/heads/master
Commit: a06b91463cac939fbd726fe9178255d583eea4bd
Parents: 9b9399f
Author: Cory Johns <cj...@slashdotmedia.com>
Authored: Mon Jun 10 23:24:51 2013 +0000
Committer: Cory Johns <cj...@slashdotmedia.com>
Committed: Fri Jun 28 14:58:53 2013 +0000

----------------------------------------------------------------------
 Allura/allura/controllers/repository.py         |  75 ++++----
 Allura/allura/lib/helpers.py                    |  11 ++
 Allura/allura/model/repository.py               | 190 +++++++------------
 Allura/allura/templates/jinja_master/lib.html   |   7 +
 Allura/allura/templates/widgets/repo/log.html   |  46 ++---
 ForgeGit/forgegit/model/git_repo.py             |  97 ++++++++--
 .../tests/functional/test_controllers.py        |  14 +-
 .../forgegit/tests/model/test_repository.py     |  17 +-
 ForgeSVN/forgesvn/model/svn.py                  | 134 ++++++++-----
 .../forgesvn/tests/model/test_repository.py     |  89 +++------
 10 files changed, 354 insertions(+), 326 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/a06b9146/Allura/allura/controllers/repository.py
----------------------------------------------------------------------
diff --git a/Allura/allura/controllers/repository.py b/Allura/allura/controllers/repository.py
index 084e17a..e8848eb 100644
--- a/Allura/allura/controllers/repository.py
+++ b/Allura/allura/controllers/repository.py
@@ -22,6 +22,7 @@ import re
 import difflib
 from urllib import quote, unquote
 from collections import defaultdict
+from itertools import islice
 
 from pylons import tmpl_context as c, app_globals as g
 from pylons import request, response
@@ -197,7 +198,7 @@ class RepoRootController(BaseController, FeedController):
             return dict(status='not_ready')
         # if c.app.repo.count() > 2000:
         #     return dict(status='too_many_commits')
-        if c.app.repo.count() == 0:
+        if c.app.repo.is_empty():
             return dict(status='no_commits')
         c.commit_browser_widget = self.commit_browser_widget
         return dict(status='ready')
@@ -206,11 +207,11 @@ class RepoRootController(BaseController, FeedController):
     @expose('json:')
     def commit_browser_data(self):
         head_ids = [ head.object_id for head in c.app.repo.get_heads() ]
-        commit_ids = list(c.app.repo.commitlog(head_ids))
+        commit_ids = [c.app.repo.rev_to_commit_id(r) for r in c.app.repo.log(head_ids, id_only=True)]
         log.info('Grab %d commit objects by ID', len(commit_ids))
-        commits_by_id = dict(
-            (c_obj._id, c_obj)
-            for c_obj in M.repo.CommitDoc.m.find(dict(_id={'$in': commit_ids})))
+        commits_by_id = {
+            c_obj._id: c_obj
+            for c_obj in M.repo.CommitDoc.m.find(dict(_id={'$in': commit_ids}))}
         log.info('... build graph')
         parents = {}
         children = defaultdict(list)
@@ -276,32 +277,30 @@ class RepoRestController(RepoRootController):
         return dict(commit_count=len(all_commits))
 
     @expose('json:')
-    def commits(self, **kw):
-        page_size = 25
-        offset = (int(kw.get('page',1)) * page_size) - page_size
-        revisions = c.app.repo.log(offset=offset, limit=page_size)
-
-        return dict(
-            commits=[
-                dict(
-                    parents=[dict(id=p) for p in commit.parent_ids],
-                    author=dict(
-                        name=commit.authored.name,
-                        email=commit.authored.email,
-                    ),
-                    url=commit.url(),
-                    id=commit._id,
-                    committed_date=commit.committed.date,
-                    authored_date=commit.authored.date,
-                    message=commit.message,
-                    tree=commit.tree._id,
-                    committer=dict(
-                        name=commit.committed.name,
-                        email=commit.committed.email,
-                    ),
-                )
+    def commits(self, rev=None, **kw):
+        revisions = islice(c.app.repo.log(rev, id_only=False), 25)
+
+        return {
+            'commits': [
+                {
+                    'parents': [{'id':p} for p in commit['parents']],
+                    'url': c.app.repo.url_for_commit(commit['id']),
+                    'id': commit['id'],
+                    'message': commit['message'],
+                    'tree': commit.get('tree'),
+                    'committed_date': commit['committed']['date'],
+                    'authored_date': commit['authored']['date'],
+                    'author': {
+                        'name': commit['authored']['name'],
+                        'email': commit['authored']['email'],
+                        },
+                    'committer': {
+                        'name': commit['committed']['name'],
+                        'email': commit['committed']['email'],
+                    },
+                }
             for commit in revisions
-        ])
+        ]}
 
 class MergeRequestsController(object):
     mr_filter=SCMMergeRequestFilterWidget()
@@ -478,21 +477,19 @@ class CommitBrowser(BaseController):
         if path:
             path = path.lstrip('/')
             is_file = self.tree._tree.get_blob_by_path(path) is not None
-        params = dict(path=path, rev=self._commit._id)
-        commits = list(c.app.repo.commits(limit=limit+1, **params))
+        commits = list(islice(c.app.repo.log(
+                revs=self._commit._id,
+                path=path,
+                id_only=False,
+                page_size=limit+1), limit+1))
         next_commit = None
         if len(commits) > limit:
-            next_commit = M.repo.Commit.query.get(_id=commits.pop())
-            next_commit.set_context(c.app.repo)
-        revisions = list(M.repo.Commit.query.find({'_id': {'$in': commits}}))
-        for commit in revisions:
-            commit.set_context(c.app.repo)
-        revisions = sorted(revisions, key=lambda c:commits.index(c._id))
+            next_commit = commits.pop()
         c.log_widget = self.log_widget
         return dict(
             username=c.user._id and c.user.username,
             branch=None,
-            log=revisions,
+            log=commits,
             next_commit=next_commit,
             limit=limit,
             path=path,

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/a06b9146/Allura/allura/lib/helpers.py
----------------------------------------------------------------------
diff --git a/Allura/allura/lib/helpers.py b/Allura/allura/lib/helpers.py
index ab506d5..144bef3 100644
--- a/Allura/allura/lib/helpers.py
+++ b/Allura/allura/lib/helpers.py
@@ -140,6 +140,17 @@ def really_unicode(s):
         yield 'latin-1'
     return _attempt_encodings(s, encodings())
 
+def find_user(email=None, username=None, display_name=None):
+    from allura import model as M
+    user = None
+    if email:
+        user = M.User.by_email_address(email)
+    if not user and username:
+        user = M.User.by_username(username)
+    if not user and display_name:
+        user = M.User.by_display_name(display_name)
+    return user
+
 def find_project(url_path):
     from allura import model as M
     for n in M.Neighborhood.query.find():

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/a06b9146/Allura/allura/model/repository.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/repository.py b/Allura/allura/model/repository.py
index dfb92f1..6916bec 100644
--- a/Allura/allura/model/repository.py
+++ b/Allura/allura/model/repository.py
@@ -101,11 +101,26 @@ class RepositoryImplementation(object):
         the repo.  Optionally provide a path from which to copy existing hooks.'''
         raise NotImplementedError, '_setup_hooks'
 
-    def log(self, object_id, skip, count): # pragma no cover
-        '''Return a list of (object_id, ci) beginning at the given commit ID and continuing
-        to the parent nodes in a breadth-first traversal.  Also return a list of 'next commit' options
-        (these are candidates for he next commit after 'count' commits have been
-        exhausted).'''
+    def log(self, revs=None, path=None, exclude=None, id_only=True, **kw): # pragma no cover
+        """
+        Returns a generator that returns information about commits reacable
+        by revs.
+
+        revs can be None or a list or tuple of identifiers, each of which
+        can be anything parsable by self.commit().  If revs is None, the
+        default branch head will be used.
+
+        If path is not None, only commits which modify files under path
+        will be included.
+
+        Exclude can be None or a list or tuple of identifiers, each of which
+        can be anything parsable by self.commit().  If not None, then any
+        revisions reachable by any of the revisions in exclude will not be
+        included.
+
+        If id_only is True, returns only the commit ID (which can be faster),
+        otherwise it returns detailed information about each commit.
+        """
         raise NotImplementedError, 'log'
 
     def compute_tree_new(self, commit, path='/'): # pragma no cover
@@ -207,6 +222,10 @@ class RepositoryImplementation(object):
         self._setup_hooks(source_path)
 
     @property
+    def head(self):
+        raise NotImplementedError, 'head'
+
+    @property
     def heads(self):
         raise NotImplementedError, 'heads'
 
@@ -357,14 +376,9 @@ class Repository(Artifact, ActivityObject):
         should try to remove the deprecated fields and clean this up.
         """
         return self._impl.tags
-
-    def _log(self, rev, skip, limit):
-        head = self.commit(rev)
-        if head is None: return
-        for _id in self.commitlog([head._id], skip, limit):
-            ci = head.query.get(_id=_id)
-            ci.set_context(self)
-            yield ci
+    @property
+    def head(self):
+        return self._impl.head
 
     def init_as_clone(self, source_path, source_name, source_url):
         self.upstream_repo.name = source_name
@@ -376,91 +390,41 @@ class Repository(Artifact, ActivityObject):
         g.post_event('repo_cloned', source_url, source_path)
         self.refresh(notify=False, new_clone=True)
 
-    def log(self, branch='master', offset=0, limit=10):
-        return list(self._log(branch, offset, limit))
-
-    def commitlog(self, commit_ids, skip=0, limit=sys.maxint):
-        seen = set()
-        def _visit(commit_id):
-            if commit_id in seen: return
-            run = CommitRunDoc.m.get(commit_ids=commit_id)
-            if run is None: return
-            index = False
-            for pos, (oid, time) in enumerate(izip(run.commit_ids, run.commit_times)):
-                if oid == commit_id: index = True
-                elif not index: continue
-                seen.add(oid)
-                ci_times[oid] = time
-                if pos+1 < len(run.commit_ids):
-                    ci_parents[oid] = [ run.commit_ids[pos+1] ]
-                else:
-                    ci_parents[oid] = run.parent_commit_ids
-            for oid in run.parent_commit_ids:
-                if oid not in seen:
-                    _visit(oid)
-
-        def _gen_ids(commit_ids, skip, limit):
-            # Traverse the graph in topo order, yielding commit IDs
-            commits = set(commit_ids)
-            new_parent = None
-            while commits and limit:
-                # next commit is latest commit that's valid to log
-                if new_parent in commits:
-                    ci = new_parent
-                else:
-                    ci = max(commits, key=lambda ci:ci_times[ci])
-                commits.remove(ci)
-                # remove this commit from its parents children and add any childless
-                # parents to the 'ready set'
-                new_parent = None
-                for oid in ci_parents.get(ci, []):
-                    children = ci_children[oid]
-                    children.discard(ci)
-                    if not children:
-                        commits.add(oid)
-                        new_parent = oid
-                if skip:
-                    skip -= 1
-                    continue
-                else:
-                    limit -= 1
-                    yield ci
-
-        # Load all the runs to build a commit graph
-        ci_times = {}
-        ci_parents = {}
-        ci_children = defaultdict(set)
-        log.info('Build commit graph')
-        for cid in commit_ids:
-            _visit(cid)
-        for oid, parents in ci_parents.iteritems():
-            for ci_parent in parents:
-                ci_children[ci_parent].add(oid)
-
-        return _gen_ids(commit_ids, skip, limit)
-
-    def count(self, branch='master'):
-        try:
-            ci = self.commit(branch)
-            if ci is None: return 0
-            return self.count_revisions(ci)
-        except: # pragma no cover
-            log.exception('Error getting repo count')
-            return 0
-
-    def count_revisions(self, ci):
-        from .repo_refresh import CommitRunBuilder
-        result = 0
-        # If there's no CommitRunDoc for this commit, the call to
-        # commitlog() below will raise a KeyError. Repair the CommitRuns for
-        # this repo by rebuilding them entirely.
-        if not CommitRunDoc.m.find(dict(commit_ids=ci._id)).count():
-            log.info('CommitRun incomplete, rebuilding with all commits')
-            rb = CommitRunBuilder(list(self.all_commit_ids()))
-            rb.run()
-            rb.cleanup()
-        for oid in self.commitlog([ci._id]): result += 1
-        return result
+    def log(self, revs=None, path=None, exclude=None, id_only=True, **kw):
+        """
+        Returns a generator that returns information about commits reacable
+        by revs which modify path.
+
+        revs can either be a single revision identifier or a list or tuple
+        of identifiers, each of which can be anything parsable by self.commit().
+        If revs is None, the default branch head will be used.
+
+        If path is not None, then only commits which change files under path
+        will be included.
+
+        Exclude can be None, a single revision identifier, or a list or tuple of
+        identifiers, each of which can be anything parsable by self.commit().
+        If not None, then any revisions reachable by any of the revisions in
+        exclude will not be included.
+
+        If id_only is True, returns only the commit ID (which can be faster),
+        otherwise it returns detailed information about each commit.
+        """
+        if revs is not None and not isinstance(revs, (list, tuple)):
+            revs = [revs]
+        if exclude is not None and not isinstance(exclude, (list, tuple)):
+            exclude = [exclude]
+        return self._impl.log(revs, path, exclude=exclude, id_only=id_only, **kw)
+
+    def commitlog(self, revs):
+        """
+        Return a generator that returns Commit model instances reachable by
+        the commits specified by revs.
+        """
+        for ci_id in self.log(revs, id_only=True):
+            commit = self.commit(ci_id)
+            commit.set_context(self)
+            yield commit
 
     def latest(self, branch=None):
         if self._impl is None:
@@ -582,6 +546,9 @@ class Repository(Artifact, ActivityObject):
             path = path.strip('/')
         self._impl.tarball(revision, path)
 
+    def rev_to_commit_id(self, rev):
+        raise NotImplementedError, 'rev_to_commit_id'
+
 class MergeRequest(VersionedArtifact, ActivityObject):
     statuses=['open', 'merged', 'rejected']
     class __mongometa__:
@@ -639,19 +606,11 @@ class MergeRequest(VersionedArtifact, ActivityObject):
         return self._commits()
 
     def _commits(self):
-        from .repo import Commit
-        result = []
-        next = [ self.downstream.commit_id ]
-        while next:
-            oid = next.pop(0)
-            ci = Commit.query.get(_id=oid)
-            if self.app.repo._id in ci.repo_ids:
-                continue
-            result.append(ci)
-            next += ci.parent_ids
         with self.push_downstream_context():
-            for ci in result: ci.set_context(c.app.repo)
-        return result
+            return list(c.app.repo.log(
+                self.downstream.commit_id,
+                exclude=self.app.repo.head,
+                id_only=False))
 
     @classmethod
     def upsert(cls, **kw):
@@ -923,17 +882,6 @@ class Commit(RepoObject):
                 else:
                     skip -= 1
 
-    def count_revisions(self):
-        seen_oids = set()
-        candidates = [ self.object_id ]
-        while candidates:
-            candidate = candidates.pop()
-            if candidate in seen_oids: continue
-            lc = LogCache.get(self.repo, candidate)
-            seen_oids.update(lc.object_ids)
-            candidates += lc.candidates
-        return len(seen_oids)
-
     def compute_diffs(self):
         self.diffs.added = []
         self.diffs.removed = []

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/a06b9146/Allura/allura/templates/jinja_master/lib.html
----------------------------------------------------------------------
diff --git a/Allura/allura/templates/jinja_master/lib.html b/Allura/allura/templates/jinja_master/lib.html
index 3897e5f..a5bfa29 100644
--- a/Allura/allura/templates/jinja_master/lib.html
+++ b/Allura/allura/templates/jinja_master/lib.html
@@ -58,6 +58,13 @@
   {% endif %}
 {%- endmacro %}
 
+{% macro user_link(email, name, size=16) -%}
+    {% set user = h.find_user(email) -%}
+    {% if user %}<a href="{{user.url()}}">{% endif -%}
+        {{ email_gravatar(email, name, size) }} {{ name }}
+    {%- if user %}</a>{% endif -%}
+{%- endmacro %}
+
 {% macro file_field(name, label) %}
   {% if label %}
   <label for="{{name}}">{{label}}</label>

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/a06b9146/Allura/allura/templates/widgets/repo/log.html
----------------------------------------------------------------------
diff --git a/Allura/allura/templates/widgets/repo/log.html b/Allura/allura/templates/widgets/repo/log.html
index 61aadab..840ebb2 100644
--- a/Allura/allura/templates/widgets/repo/log.html
+++ b/Allura/allura/templates/widgets/repo/log.html
@@ -16,7 +16,7 @@
        specific language governing permissions and limitations
        under the License.
 -#}
-{% from 'allura:templates/jinja_master/lib.html' import email_gravatar, abbr_date with context %}
+{% from 'allura:templates/jinja_master/lib.html' import user_link, abbr_date with context %}
 {% set app = app or c.app %}
 <div>
   {%if is_file%}
@@ -36,25 +36,19 @@
           <td>
             <div>
                 {%if is_file%}
-                    <div class="grid-1"><input type="checkbox" class="revision" revision="{{commit._id.split(':')[-1]}}" url_commit="{{commit.url()}}"></div>
+                    <div class="grid-1"><input type="checkbox" class="revision" revision="{{commit.id}}" url_commit="{{app.repo.url_for_commit(commit.id)}}"></div>
                 {%endif%}
-                <a href="{{app.repo.url_for_commit(commit)}}">{{commit.shorthand_id()}}</a>
-                {% if app.repo.symbolics_for_commit(commit)[1] %}
-                    ({% for tag in app.repo.symbolics_for_commit(commit)[1] -%}
-                        <a href="{{app.repo.url_for_commit(tag)}}">{{tag}}</a>{% if not loop.last %}&nbsp;{% endif %}
-                    {%- endfor %})
-                {% endif %}
-                {%if is_file%}
-                    ({{commit.tree.get_obj_by_path(request.params.get('path')).size|filesizeformat}})
-                {%endif%}
-                by
-                {{email_gravatar(commit.authored.email, title=commit.authored.name, size=16)}} {{commit.authored.name}}{%if commit.committed.email != commit.authored.email %}, pushed by
-                {% if commit.committer_url %}
-                    <a href="{{commit.committer_url}}">{{email_gravatar(commit.committed.email, title=commit.committed.name, size=16)}}
-                    {{commit.committed.name}}</a>
-                {% else %}
-                {{email_gravatar(commit.committed.email, title=commit.committed.name, size=16)}} {{commit.committed.name}}
-                {% endif %}
+                <a class="rev" href="{{app.repo.url_for_commit(commit.id)}}">{{app.repo.shorthand_for_commit(commit.id)}}</a>
+                {% if commit.refs -%}
+                    (
+                    {%- for ref in commit.refs -%}
+                        <a class="ref" href="{{app.repo.url_for_commit(ref)}}">{{ref}}</a>{% if not loop.last %},&nbsp;{% endif %}
+                    {%- endfor -%}
+                    )
+                {%- endif %}
+                by {{ user_link(commit.authored.email, commit.authored.name) }}
+                {%- if commit.committed.email != commit.authored.email %},
+                pushed by {{ user_link(commit.committed.email, commit.committed.name) }}
                 {% endif %}
                 {{g.markdown.convert(commit.message)}}
             </div>
@@ -63,16 +57,12 @@
             {% if commit.committed.date %}{{commit.committed.date|datetimeformat}}{% endif %}
           </td>
           <td style="text-align: left; vertical-align: text-top">
-            <a href="{{commit.url()}}tree{{request.params.get('path', '')}}">
-            {%if is_file%}
-                View
-            {% else %}
-                Tree
-            {%endif%}
-            </a>
+              <a class="browse" href="{{app.repo.url_for_commit(commit.id)}}tree{{request.params.get('path', '')}}">
+                  {{ 'View' if is_file else 'Tree' }}
+              </a>
               {%if is_file%}
               <br/>
-              <a href="{{commit.url()}}tree{{request.params.get('path', '')}}?format=raw">Download</a>
+              <a class="download" href="{{app.repo.url_for_commit(commit.id)}}tree{{request.params.get('path', '')}}?format=raw">Download</a>
               {%endif%}
           </td>
         </tr>
@@ -80,6 +70,6 @@
     </tbody>
   </table>
   {% if show_paging and next_commit %}
-      <a class="page_list" href="{{next_commit.url()}}log{{tg.url(params=request.params)}}">Older ></a>
+      <a class="page_list" href="{{app.repo.url_for_commit(next_commit.id)}}log{{tg.url(params=request.params)}}">Older ></a>
   {% endif %}
 </div>

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/a06b9146/ForgeGit/forgegit/model/git_repo.py
----------------------------------------------------------------------
diff --git a/ForgeGit/forgegit/model/git_repo.py b/ForgeGit/forgegit/model/git_repo.py
index 27b52d9..2220260 100644
--- a/ForgeGit/forgegit/model/git_repo.py
+++ b/ForgeGit/forgegit/model/git_repo.py
@@ -74,6 +74,9 @@ class Repository(M.Repository):
             merge_request.downstream.commit_id,
         )
 
+    def rev_to_commit_id(self, rev):
+        return self._impl.rev_parse(rev).hexsha
+
 class GitImplementation(M.RepositoryImplementation):
     post_receive_template = string.Template(
         '#!/bin/bash\n'
@@ -265,23 +268,78 @@ class GitImplementation(M.RepositoryImplementation):
         commit = self._git.commit(rev)
         return commit.count(path)
 
-    def log(self, object_id, skip, count):
-        obj = self._git.commit(object_id)
-        candidates = [ obj ]
-        result = []
-        seen = set()
-        while count and candidates:
-            candidates.sort(key=lambda c:c.committed_date)
-            obj = candidates.pop(-1)
-            if obj.hexsha in seen: continue
-            seen.add(obj.hexsha)
-            if skip == 0:
-                result.append(obj.hexsha)
-                count -= 1
+    def log(self, revs=None, path=None, exclude=None, id_only=True, **kw):
+        """
+        Returns a generator that returns information about commits reacable
+        by revs.
+
+        revs can be None or a list or tuple of revisions, each of which
+        can be anything parsable by self.commit().  If revs is None, the
+        default branch head will be used.
+
+        If path is not None, only commits which modify files under path
+        will be included.
+
+        Exclude can be None or a list or tuple of identifiers, each of which
+        can be anything parsable by self.commit().  If not None, then any
+        revisions reachable by any of the revisions in exclude will not be
+        included.
+
+        If id_only is True, returns only the commit ID, otherwise it returns
+        detailed information about each commit.
+        """
+        if exclude is not None:
+            revs.extend(['^%s' % e for e in exclude])
+
+        for ci, refs in self._iter_commits_with_refs(revs, '--', path):
+            if id_only:
+                yield ci.hexsha
             else:
-                skip -= 1
-            candidates += obj.parents
-        return result, [ p.hexsha for p in candidates ]
+                yield {
+                        'id': ci.hexsha,
+                        'message': h.really_unicode(ci.message or '--none--'),
+                        'authored': {
+                                'name': h.really_unicode(ci.author.name or '--none--'),
+                                'email': h.really_unicode(ci.author.email),
+                                'date': datetime.utcfromtimestamp(ci.authored_date),
+                            },
+                        'committed': {
+                                'name': h.really_unicode(ci.committer.name or '--none--'),
+                                'email': h.really_unicode(ci.committer.email),
+                                'date': datetime.utcfromtimestamp(ci.committed_date),
+                            },
+                        'refs': refs,
+                        'parents': [pci.hexsha for pci in ci.parents],
+                    }
+
+    def _iter_commits_with_refs(self, *args, **kwargs):
+        """
+        A reimplementation of GitPython's iter_commits that includes
+        the --decorate option.
+
+        Unfortunately, iter_commits discards the additional info returned
+        by adding --decorate, and the ref names are not exposed on the
+        commit objects without making an entirely separate call to log.
+
+        Ideally, since we're reimplementing it anyway, we would prefer
+        to add all the info we need to the format to avoid the additional
+        overhead of the lazy-load of the commit data, but the commit
+        message is a problem since it can contain newlines which breaks
+        parsing of the log lines (iter_commits can be broken this way,
+        too).  This does keep the id_only case fast and the overhead
+        of lazy-loading the commit data is probably fine.  But if this
+        ends up being a bottleneck, that would be one possibile
+        optimization.
+        """
+        proc = self._git.git.log(*args, format='%H%x00%d', as_process=True, **kwargs)
+        stream = proc.stdout
+        while True:
+            line = stream.readline()
+            if not line:
+                break
+            hexsha, decoration = line.strip().split('\x00')
+            refs = decoration.strip(' ()').split(', ') if decoration else []
+            yield (git.Commit(self._git, gitdb.util.hex_to_bin(hexsha)), refs)
 
     def open_blob(self, blob):
         return _OpenedGitBlob(
@@ -308,6 +366,9 @@ class GitImplementation(M.RepositoryImplementation):
             binsha += chr(int(e+o, 16))
         return git.Object.new_from_sha(self._git, binsha)
 
+    def rev_parse(self, rev):
+        return self._git.rev_parse(rev)
+
     def symbolics_for_commit(self, commit):
         try:
             branches = [b.name for b in self.branches if b.object_id == commit._id]
@@ -339,6 +400,10 @@ class GitImplementation(M.RepositoryImplementation):
         return not self._git or len(self._git.heads) == 0
 
     @LazyProperty
+    def head(self):
+        return self._git.head.commit.hexsha
+
+    @LazyProperty
     def heads(self):
         return [Object(name=b.name, object_id=b.commit.hexsha) for b in self._git.heads if b.is_valid()]
 

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/a06b9146/ForgeGit/forgegit/tests/functional/test_controllers.py
----------------------------------------------------------------------
diff --git a/ForgeGit/forgegit/tests/functional/test_controllers.py b/ForgeGit/forgegit/tests/functional/test_controllers.py
index 32ffb69..c4ac325 100644
--- a/ForgeGit/forgegit/tests/functional/test_controllers.py
+++ b/ForgeGit/forgegit/tests/functional/test_controllers.py
@@ -116,10 +116,8 @@ class TestRootController(_TestCase):
         assert 'Initial commit' in resp
         assert '<div class="markdown_content"><p>Change README</p></div>' in resp
         assert 'tree/README?format=raw">Download</a>' not in resp
-        assert '28 Bytes' not in resp.html.find('td').contents[1].text
         assert 'Tree' in resp.html.findAll('td')[2].text, resp.html.findAll('td')[2].text
         resp = self.app.get('/src-git/ci/1e146e67985dcd71c74de79613719bef7bddca4a/log/?path=/README')
-        assert '28 Bytes' in resp.html.find('td').contents[1].text
         assert 'View' in resp.html.findAll('td')[2].text
         assert 'Change README' in resp
         assert 'tree/README?format=raw">Download</a>' in resp
@@ -466,12 +464,14 @@ class TestFork(_TestCase):
         assert 'would like you to merge' in r, r.showbrowser()
         assert 'Improve documentation' in r, r.showbrowser()
         revs = r.html.findAll('tr', attrs={'class': 'rev'})
-        links = revs[0].findAll('a')
+        assert_equal(len(revs), 1)
+        rev_links = revs[0].findAll('a', attrs={'class': 'rev'})
+        browse_links = revs[0].findAll('a', attrs={'class': 'browse'})
         c_id = self.forked_repo.get_heads()[0]['object_id']
-        assert_equal(links[0].get('href'), '/p/test2/code/ci/%s/' % c_id)
-        assert_equal(links[0].getText(), '[%s]' % c_id[:6])
-        assert_equal(links[1].get('href'), '/p/test2/code/ci/%s/tree' % c_id)
-        assert_equal(links[1].getText(), 'Tree')
+        assert_equal(rev_links[0].get('href'), '/p/test2/code/ci/%s/' % c_id)
+        assert_equal(rev_links[0].getText(), '[%s]' % c_id[:6])
+        assert_equal(browse_links[0].get('href'), '/p/test2/code/ci/%s/tree' % c_id)
+        assert_equal(browse_links[0].getText(), 'Tree')
 
     def test_merge_request_list_view(self):
         r, mr_num = self._request_merge()

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/a06b9146/ForgeGit/forgegit/tests/model/test_repository.py
----------------------------------------------------------------------
diff --git a/ForgeGit/forgegit/tests/model/test_repository.py b/ForgeGit/forgegit/tests/model/test_repository.py
index 9509fb2..0bbd6c2 100644
--- a/ForgeGit/forgegit/tests/model/test_repository.py
+++ b/ForgeGit/forgegit/tests/model/test_repository.py
@@ -79,13 +79,8 @@ class TestNewGit(unittest.TestCase):
         assert self.rev.url() == (
             '/p/test/src-git/ci/'
             '1e146e67985dcd71c74de79613719bef7bddca4a/')
-        all_cis = self.repo.log(self.rev._id, 0, 1000)
+        all_cis = list(self.repo.log(self.rev._id, id_only=True))
         assert len(all_cis) == 4
-        assert_equal(self.repo.log(self.rev._id, 1,1000), all_cis[1:])
-        assert_equal(self.repo.log(self.rev._id, 0,3), all_cis[:3])
-        assert_equal(self.repo.log(self.rev._id, 1,2), all_cis[1:3])
-        for ci in all_cis:
-            ci.context()
         self.rev.tree.ls()
         # print self.rev.tree.readme()
         assert_equal(self.rev.tree.readme(), (
@@ -181,7 +176,7 @@ class TestGitRepo(unittest.TestCase, RepoImplTestBase):
             shutil.rmtree(dirname)
         repo.init()
         repo._impl.clone_from(repo_path)
-        assert len(repo.log())
+        assert len(list(repo.log()))
         assert not os.path.exists(os.path.join(g.tmpdir, 'testgit.git/hooks/update'))
         assert not os.path.exists(os.path.join(g.tmpdir, 'testgit.git/hooks/post-receive-user'))
         assert os.path.exists(os.path.join(g.tmpdir, 'testgit.git/hooks/post-receive'))
@@ -210,7 +205,7 @@ class TestGitRepo(unittest.TestCase, RepoImplTestBase):
             repo.init()
             repo._impl.clone_from(repo_path)
             assert not clone_from.called
-            assert len(repo.log())
+            assert len(list(repo.log()))
             assert os.path.exists(os.path.join(g.tmpdir, 'testgit.git/hooks/update'))
             assert os.path.exists(os.path.join(g.tmpdir, 'testgit.git/hooks/post-receive-user'))
             assert os.path.exists(os.path.join(g.tmpdir, 'testgit.git/hooks/post-receive'))
@@ -225,9 +220,9 @@ class TestGitRepo(unittest.TestCase, RepoImplTestBase):
         assert i['type_s'] == 'Git Repository', i
 
     def test_log(self):
-        for entry in self.repo.log():
-            assert str(entry.authored)
-            assert entry.message
+        for entry in self.repo.log(id_only=False):
+            assert str(entry['authored'])
+            assert entry['message']
 
     def test_commit(self):
         entry = self.repo.commit('HEAD')

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/a06b9146/ForgeSVN/forgesvn/model/svn.py
----------------------------------------------------------------------
diff --git a/ForgeSVN/forgesvn/model/svn.py b/ForgeSVN/forgesvn/model/svn.py
index 23c1887..82b31b9 100644
--- a/ForgeSVN/forgesvn/model/svn.py
+++ b/ForgeSVN/forgesvn/model/svn.py
@@ -73,29 +73,6 @@ class Repository(M.Repository):
 
     def compute_diffs(self): return
 
-    def count(self, *args, **kwargs):
-        return super(Repository, self).count(None)
-
-    def count_revisions(self, ci):
-        # since SVN histories are inherently linear and the commit _id
-        # contains the revision, just parse it out from there
-        return int(self._impl._revno(ci._id))
-
-    def log(self, branch='HEAD', offset=0, limit=10):
-        return list(self._log(branch, offset, limit))
-
-    def commitlog(self, commit_ids, skip=0, limit=sys.maxint):
-        ci_id = commit_ids[0]
-        if skip > 0:
-            rid, rev = ci_id.split(':')
-            rev = int(rev) - skip
-            ci_id = '%s:%s' % (rid, rev)
-        ci = self._impl.commit(ci_id)
-        while ci is not None and limit > 0:
-            yield ci._id
-            limit -= 1
-            ci = ci.get_parent()
-
     def latest(self, branch=None):
         if self._impl is None: return None
         return self._impl.commit('HEAD')
@@ -106,6 +83,9 @@ class Repository(M.Repository):
         fn += ('-' + '-'.join(path.split('/'))) if path else ''
         return fn
 
+    def rev_to_commit_id(self, rev):
+        return self._impl.rev_parse(rev)
+
 
 class SVNCalledProcessError(Exception):
     def __init__(self, cmd, returncode, stdout, stderr):
@@ -186,13 +166,13 @@ class SVNImplementation(M.RepositoryImplementation):
         return 'file://%s%s' % (self._repo.fs_path, self._repo.name)
 
     def shorthand_for_commit(self, oid):
-        return '[r%d]' % self._revno(oid)
+        return '[r%d]' % self._revno(self.rev_parse(oid))
 
     def url_for_commit(self, commit, url_type=None):
-        if isinstance(commit, basestring):
-            object_id = commit
-        else:
+        if hasattr(commit, '_id'):
             object_id = commit._id
+        else:
+            object_id = self.rev_parse(commit)
         if ':' in object_id:
             object_id = str(self._revno(object_id))
         return os.path.join(self._repo.url(), object_id) + '/'
@@ -299,17 +279,20 @@ class SVNImplementation(M.RepositoryImplementation):
         self._setup_special_files(source_url)
 
     def commit(self, rev):
-        if rev in ('HEAD', None):
-            oid = self._oid(self.head)
-        elif isinstance(rev, int) or rev.isdigit():
-            oid = self._oid(rev)
-        else:
-            oid = rev
+        oid = self.rev_parse(rev)
         result = M.repo.Commit.query.get(_id=oid)
         if result:
             result.set_context(self._repo)
         return result
 
+    def rev_parse(self, rev):
+        if rev in ('HEAD', None):
+            return self._oid(self.head)
+        elif isinstance(rev, int) or rev.isdigit():
+            return self._oid(rev)
+        else:
+            return rev
+
     def all_commit_ids(self):
         """Return a list of commit ids, starting with the head (most recent
         commit) and ending with the root (first commit).
@@ -494,20 +477,79 @@ class SVNImplementation(M.RepositoryImplementation):
         else:
             return self._blob_oid(commit_id, path)
 
-    def log(self, object_id, skip, count):
-        revno = self._revno(object_id)
-        result = []
-        while count and revno:
-            if skip == 0:
-                result.append(self._oid(revno))
-                count -= 1
-            else:
-                skip -= 1
-            revno -= 1
-        if revno:
-            return result, [ self._oid(revno) ]
+    def log(self, revs=None, path=None, exclude=None, id_only=True, page_size=25, **kw):
+        """
+        Returns a generator that returns information about commits reacable
+        by revs.
+
+        revs can be None or a list or tuple of identifiers, each of which
+        can be anything parsable by self.commit().  If revs is None, the
+        default head will be used.
+
+        If path is not None, only commits which modify files under path
+        will be included.
+
+        Exclude can be None or a list or tuple of identifiers, each of which
+        can be anything parsable by self.commit().  If not None, then any
+        revisions reachable by any of the revisions in exclude will not be
+        included.
+
+        If id_only is True, returns only the commit ID, otherwise it returns
+        detailed information about each commit.
+
+        Since pysvn doesn't have a generator version of log, this tries to
+        balance pulling too much data from SVN with calling SVN too many
+        times by pulling in pages of page_size at a time.
+        """
+        if revs is None:
+            revno = self.head
+        else:
+            revno = max([self._revno(self.rev_parse(r)) for r in revs])
+        if exclude is None:
+            exclude = 0
         else:
-            return result, []
+            exclude = max([self._revno(self.rev_parse(r)) for r in exclude])
+        if path is None:
+            url = self._url
+        else:
+            url = '/'.join([self._url, path])
+        while revno > exclude:
+            rev = pysvn.Revision(pysvn.opt_revision_kind.number, revno)
+            try:
+                logs = self._svn.log(url, revision_start=rev, limit=page_size)
+            except pysvn.ClientError as e:
+                if 'Unable to connect' in e.message:
+                    raise  # repo error
+                return  # no (more) history for this path
+            for ci in logs:
+                if ci.revision.number <= exclude:
+                    return
+                if id_only:
+                    yield ci.revision.number
+                else:
+                    yield self._map_log(ci)
+            if len(logs) < page_size:
+                return  # we didn't get a full page, don't bother calling SVN again
+            revno = ci.revision.number - 1
+
+    def _map_log(self, ci):
+        revno = ci.revision.number
+        return {
+                'id': revno,
+                'message': h.really_unicode(ci.get('message', '--none--')),
+                'authored': {
+                        'name': h.really_unicode(ci.get('author', '--none--')),
+                        'email': '',
+                        'date': datetime.utcfromtimestamp(ci.date),
+                    },
+                'committed': {
+                        'name': h.really_unicode(ci.get('author', '--none--')),
+                        'email': '',
+                        'date': datetime.utcfromtimestamp(ci.date),
+                    },
+                'refs': ['HEAD'] if revno == self.head else [],
+                'parents': [revno-1] if revno > 1 else [],
+            }
 
     def open_blob(self, blob):
         data = self._svn.cat(

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/a06b9146/ForgeSVN/forgesvn/tests/model/test_repository.py
----------------------------------------------------------------------
diff --git a/ForgeSVN/forgesvn/tests/model/test_repository.py b/ForgeSVN/forgesvn/tests/model/test_repository.py
index c7acfcf..0f90c17 100644
--- a/ForgeSVN/forgesvn/tests/model/test_repository.py
+++ b/ForgeSVN/forgesvn/tests/model/test_repository.py
@@ -33,6 +33,7 @@ from ming.base import Object
 from ming.orm import session, ThreadLocalORMSession
 from testfixtures import TempDirectory
 from IPython.testing.decorators import onlyif
+import pysvn
 
 from alluratest.controller import setup_basic_test, setup_global_objects
 from allura import model as M
@@ -85,13 +86,8 @@ class TestNewRepo(unittest.TestCase):
         assert self.rev.symbolic_ids == ([], [])
         assert self.rev.url() == (
             '/p/test/src/5/')
-        all_cis = self.repo.log(self.rev._id, 0, 1000)
+        all_cis = list(self.repo.log(self.rev._id))
         assert len(all_cis) == 5
-        assert self.repo.log(self.rev._id, 1,1000) == all_cis[1:]
-        assert self.repo.log(self.rev._id, 0,3) == all_cis[:3]
-        assert self.repo.log(self.rev._id, 1,2) == all_cis[1:3]
-        for ci in all_cis:
-            ci.context()
         self.rev.tree.ls()
         assert self.rev.tree.readme() == (
             'README', 'This is readme\nAnother Line\n')
@@ -169,7 +165,7 @@ class TestSVNRepo(unittest.TestCase, RepoImplTestBase):
         self.assertIn('exec $DIR/post-commit-user "$@"\n', c)
 
         repo.refresh(notify=False)
-        assert len(repo.log())
+        assert len(list(repo.log()))
 
         shutil.rmtree(dirname)
 
@@ -216,7 +212,7 @@ class TestSVNRepo(unittest.TestCase, RepoImplTestBase):
         self.assertIn('exec $DIR/post-commit-user "$@"\n', c)
 
         repo.refresh(notify=False)
-        assert len(repo.log())
+        assert len(list(repo.log()))
 
         shutil.rmtree(dirname)
 
@@ -225,16 +221,12 @@ class TestSVNRepo(unittest.TestCase, RepoImplTestBase):
         assert i['type_s'] == 'SVN Repository', i
 
     def test_log(self):
-        for entry in self.repo.log():
-            assert entry.committed.name == 'rick446'
-            assert entry.message
-            print '=='
-            print entry._id
-            print entry.message
-            print entry.diffs
+        for entry in self.repo.log(id_only=False):
+            assert entry['committed']['name'] == 'rick446'
+            assert entry['message']
 
     def test_paged_diffs(self):
-        entry = self.repo.log(2, limit=1)[0]
+        entry = self.repo.commit(self.repo.log(2, id_only=True).next())
         self.assertEqual(entry.diffs, entry.paged_diffs())
         self.assertEqual(entry.diffs, entry.paged_diffs(start=0))
         added_expected = entry.diffs.added[1:3]
@@ -248,14 +240,14 @@ class TestSVNRepo(unittest.TestCase, RepoImplTestBase):
         self.assertEqual(sorted(actual.keys()), sorted(empty.keys()))
 
     def test_diff_create_file(self):
-        entry = self.repo.log(1, limit=1)[0]
+        entry = self.repo.commit(self.repo.log(1, id_only=True).next())
         self.assertEqual(
             entry.diffs, dict(
                 copied=[], changed=[],
                 removed=[], added=['/README'], total=1))
 
     def test_diff_create_path(self):
-        entry = self.repo.log(2, limit=1)[0]
+        entry = self.repo.commit(self.repo.log(2, id_only=True).next())
         actual = entry.diffs
         actual.added = sorted(actual.added)
         self.assertEqual(
@@ -266,14 +258,14 @@ class TestSVNRepo(unittest.TestCase, RepoImplTestBase):
                     '/a/b/c/hello.txt']), total=4))
 
     def test_diff_modify_file(self):
-        entry = self.repo.log(3, limit=1)[0]
+        entry = self.repo.commit(self.repo.log(3, id_only=True).next())
         self.assertEqual(
             entry.diffs, dict(
                 copied=[], changed=['/README'],
                 removed=[], added=[], total=1))
 
     def test_diff_delete(self):
-        entry = self.repo.log(4, limit=1)[0]
+        entry = self.repo.commit(self.repo.log(4, id_only=True).next())
         self.assertEqual(
             entry.diffs, dict(
                 copied=[], changed=[],
@@ -281,7 +273,7 @@ class TestSVNRepo(unittest.TestCase, RepoImplTestBase):
 
     def test_diff_copy(self):
         # Copies are currently only detected as 'add'
-        entry = self.repo.log(5, limit=1)[0]
+        entry = self.repo.commit(self.repo.log(5, id_only=True).next())
         self.assertEqual(
             entry.diffs, dict(
                 copied=[], changed=[],
@@ -299,10 +291,6 @@ class TestSVNRepo(unittest.TestCase, RepoImplTestBase):
         assert svn_path_exists("file://%s" % repo_path)
         assert not svn_path_exists("file://%s/badpath" % repo_path)
 
-    def test_count_revisions(self):
-        ci = mock.Mock(_id='deadbeef:100')
-        self.assertEqual(self.repo.count_revisions(ci), 100)
-
     @onlyif(os.path.exists(tg.config.get('scm.repos.tarball.zip_binary', '/usr/bin/zip')), 'zip binary is missing')
     def test_tarball(self):
         tmpdir = tg.config['scm.repos.tarball.root']
@@ -605,7 +593,6 @@ class _TestWithRepo(_Test):
         self.repo._impl.url_for_commit = (
             lambda *a, **kw: M.RepositoryImplementation.url_for_commit(
                 self.repo._impl, *a, **kw))
-        self.repo._impl.log = lambda *a,**kw:(['foo'], [])
         self.repo._impl._repo = self.repo
         self.repo._impl.all_commit_ids = lambda *a,**kw: []
         self.repo._impl.commit().symbolic_ids = None
@@ -654,23 +641,6 @@ class TestRepo(_TestWithRepo):
         assert self.repo._impl.clone_from.called_with('srcpath')
         post_event.assert_called_once_with('repo_cloned', 'srcurl', 'srcpath')
 
-    @mock.patch.object(M.repo.CommitRunDoc.m, 'get')
-    def test_log(self, crd):
-        head = mock.Mock(name='commit_head', _id=3)
-        commits = dict([(i, mock.Mock(name='commit_%s'%i, _id=i)) for i in range(3)])
-        commits[3] = head
-        head.query.get = lambda _id: commits[_id]
-        self.repo._impl.commit = mock.Mock(return_value=head)
-        crd.return_value = mock.Mock(commit_ids=[3, 2, 1, 0], commit_times=[4, 3, 2, 1], parent_commit_ids=[])
-        log = self.repo.log()
-        assert_equal([c._id for c in log], [3, 2, 1, 0])
-
-    def test_count_revisions(self):
-        ci = mock.Mock()
-        self.repo.count_revisions = mock.Mock(return_value=42)
-        self.repo._impl.commit = mock.Mock(return_value=ci)
-        assert self.repo.count() == 42
-
     def test_latest(self):
         ci = mock.Mock()
         self.repo._impl.commit = mock.Mock(return_value=ci)
@@ -714,7 +684,6 @@ class TestRepo(_TestWithRepo):
         ci.committed.name = committer_name
         ci.committed.email = committer_email
         ci.author_url = '/u/test-committer/'
-        self.repo.count_revisions=mock.Mock(return_value=100)
         self.repo._impl.commit = mock.Mock(return_value=ci)
         self.repo._impl.new_commits = mock.Mock(return_value=['foo%d' % i for i in range(100) ])
         self.repo._impl.all_commit_ids = mock.Mock(return_value=['foo%d' % i for i in range(100) ])
@@ -743,7 +712,6 @@ class TestRepo(_TestWithRepo):
 
     def test_refresh_private(self):
         ci = mock.Mock()
-        self.repo.count_revisions=mock.Mock(return_value=100)
         self.repo._impl.commit = mock.Mock(return_value=ci)
         self.repo._impl.new_commits = mock.Mock(return_value=['foo%d' % i for i in range(100) ])
 
@@ -795,17 +763,28 @@ class TestMergeRequest(_TestWithRepoAndCommit):
             downstream=ming.base.Object(
                 project_id=c.project._id,
                 mount_point='test2',
-                commit_id='foo'),
+                commit_id='foo:2'),
             target_branch='foobranch',
             summary='summary',
             description='description')
         u = M.User.by_username('test-admin')
-        assert mr.creator == u
-        assert mr.creator_name == u.get_pref('display_name')
-        assert mr.creator_url == u.url()
-        assert mr.downstream_url == '/p/test/test2/'
-        assert mr.downstream_repo_url == 'http://svn.localhost/p/test/test2/'
-        assert mr.commits == [ self._make_commit('foo')[0] ]
+        assert_equal(mr.creator, u)
+        assert_equal(mr.creator_name,  u.get_pref('display_name'))
+        assert_equal(mr.creator_url,  u.url())
+        assert_equal(mr.downstream_url,  '/p/test/test2/')
+        assert_equal(mr.downstream_repo_url,  'http://svn.localhost/p/test/test2/')
+        with mock.patch('forgesvn.model.svn.SVNLibWrapper') as _svn,\
+             mock.patch('forgesvn.model.svn.SVNImplementation._map_log') as _map_log:
+            mr.app.repo._impl.head = 1
+            _svn().log.return_value = [mock.Mock(revision=mock.Mock(number=2))]
+            _map_log.return_value = 'bar'
+            assert_equal(mr.commits,  ['bar'])
+            # can't do assert_called_once_with because pysvn.Revision doesn't compare nicely
+            assert_equal(_svn().log.call_count, 1)
+            assert_equal(_svn().log.call_args[0], ('file:///tmp/svn/p/test/test2',))
+            assert_equal(_svn().log.call_args[1]['revision_start'].number, 2)
+            assert_equal(_svn().log.call_args[1]['limit'], 25)
+            _map_log.assert_called_once_with(_svn().log.return_value[0])
 
 class TestRepoObject(_TestWithRepoAndCommit):
 
@@ -856,12 +835,6 @@ class TestCommit(_TestWithRepo):
         x = self.ci.get_path('a/a')
         assert isinstance(x, M.repo.Tree)
 
-    def test_count_revisions(self):
-        rb = M.repo_refresh.CommitRunBuilder(['foo'])
-        rb.run()
-        rb.cleanup()
-        assert self.repo.count_revisions(self.ci) == 1
-
     def _unique_blobs(self):
         def counter():
             counter.i += 1