You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by he...@apache.org on 2015/08/10 15:39:26 UTC

[02/18] allura git commit: [#7925] Refactor and improve the diff processing

[#7925] Refactor and improve the diff processing


Project: http://git-wip-us.apache.org/repos/asf/allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/allura/commit/7f738bdf
Tree: http://git-wip-us.apache.org/repos/asf/allura/tree/7f738bdf
Diff: http://git-wip-us.apache.org/repos/asf/allura/diff/7f738bdf

Branch: refs/heads/master
Commit: 7f738bdfbb93e78d8d557f8b60fb1965ce9e67ce
Parents: 7eeb0dd
Author: Heith Seewald <hs...@slashdotmedia.com>
Authored: Mon Jul 27 16:06:28 2015 -0400
Committer: Heith Seewald <hs...@slashdotmedia.com>
Committed: Mon Aug 10 09:38:35 2015 -0400

----------------------------------------------------------------------
 Allura/allura/lib/custom_middleware.py |  4 --
 Allura/allura/model/repository.py      | 67 +----------------------------
 ForgeGit/forgegit/model/git_repo.py    | 51 +++++++++++++++-------
 ForgeSVN/forgesvn/model/svn.py         | 17 +++++---
 4 files changed, 47 insertions(+), 92 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/allura/blob/7f738bdf/Allura/allura/lib/custom_middleware.py
----------------------------------------------------------------------
diff --git a/Allura/allura/lib/custom_middleware.py b/Allura/allura/lib/custom_middleware.py
index e56a530..1938e51 100644
--- a/Allura/allura/lib/custom_middleware.py
+++ b/Allura/allura/lib/custom_middleware.py
@@ -322,10 +322,6 @@ class AlluraTimerMiddleware(TimerMiddleware):
             Timer('urlopen', urllib2, 'urlopen'),
             Timer('base_repo_tool.{method_name}',
                   allura.model.repository.RepositoryImplementation, 'last_commit_ids'),
-            Timer('_diffs_copied', allura.model.repository.Commit, '_diffs_copied'),
-            Timer(
-                'sequencematcher.{method_name}', allura.model.repository.SequenceMatcher,
-                'ratio', 'quick_ratio', 'real_quick_ratio'),
             Timer('unified_diff', allura.model.repository, 'unified_diff'),
         ] + [Timer('sidebar', ep.load(), 'sidebar_menu') for ep in tool_entry_points]
 

http://git-wip-us.apache.org/repos/asf/allura/blob/7f738bdf/Allura/allura/model/repository.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/repository.py b/Allura/allura/model/repository.py
index 5fe2ba9..636d10c 100644
--- a/Allura/allura/model/repository.py
+++ b/Allura/allura/model/repository.py
@@ -1168,78 +1168,15 @@ class Commit(RepoObject, ActivityObject):
 
     def paged_diffs(self, start=0, end=None):
         diffs = self.repo.paged_diffs(self._id, start, end)
-        if not diffs.get('copied'):
-            diffs['copied'] = []
-        copied = self._diffs_copied(diffs['added'], diffs['removed'])
-        diffs['copied'].extend(copied)
+
         return Object(
             added=sorted(diffs['added']),
             removed=sorted(diffs['removed']),
             changed=sorted(diffs['changed']),
             copied=sorted(diffs['copied']),
+            renamed=sorted(diffs['renamed']),
             total=diffs['total'])
 
-    def _diffs_copied(self, added, removed):
-        '''Return list with file renames diffs.
-
-        Will change `added` and `removed` lists also.
-        '''
-        def _blobs_similarity(removed_blob, added):
-            best = dict(ratio=0, name='', blob=None)
-            for added_name in added:
-                added_blob = self.tree.get_obj_by_path(added_name)
-                if not isinstance(added_blob, Blob):
-                    continue
-                diff = SequenceMatcher(None, removed_blob.text,
-                                       added_blob.text)
-                ratio = diff.quick_ratio()
-                if ratio > best['ratio']:
-                    best['ratio'] = ratio
-                    best['name'] = added_name
-                    best['blob'] = added_blob
-
-                if ratio == 1:
-                    break  # we'll won't find better similarity than 100% :)
-
-            if best['ratio'] > DIFF_SIMILARITY_THRESHOLD:
-                diff = ''
-                if best['ratio'] < 1:
-                    added_blob = best['blob']
-                    rpath = ('a' + removed_blob.path()).encode('utf-8')
-                    apath = ('b' + added_blob.path()).encode('utf-8')
-                    diff = ''.join(unified_diff(list(removed_blob),
-                                                list(added_blob),
-                                                rpath, apath))
-                return dict(new=best['name'],
-                            ratio=best['ratio'], diff=diff)
-
-        def _trees_similarity(removed_tree, added):
-            for added_name in added:
-                added_tree = self.tree.get_obj_by_path(added_name)
-                if not isinstance(added_tree, Tree):
-                    continue
-                if removed_tree._id == added_tree._id:
-                    return dict(new=added_name,
-                                ratio=1, diff='')
-
-        if not removed:
-            return []
-        copied = []
-        prev_commit = self.get_parent()
-        for removed_name in removed[:]:
-            removed_blob = prev_commit.tree.get_obj_by_path(removed_name)
-            rename_info = None
-            if isinstance(removed_blob, Blob):
-                rename_info = _blobs_similarity(removed_blob, added)
-            elif isinstance(removed_blob, Tree):
-                rename_info = _trees_similarity(removed_blob, added)
-            if rename_info is not None:
-                rename_info['old'] = removed_name
-                copied.append(rename_info)
-                removed.remove(rename_info['old'])
-                added.remove(rename_info['new'])
-        return copied
-
     def get_path(self, path, create=True):
         path = path.lstrip('/')
         parts = path.split('/')

http://git-wip-us.apache.org/repos/asf/allura/blob/7f738bdf/ForgeGit/forgegit/model/git_repo.py
----------------------------------------------------------------------
diff --git a/ForgeGit/forgegit/model/git_repo.py b/ForgeGit/forgegit/model/git_repo.py
index 9549fa6..9a8549a 100644
--- a/ForgeGit/forgegit/model/git_repo.py
+++ b/ForgeGit/forgegit/model/git_repo.py
@@ -642,36 +642,55 @@ class GitImplementation(M.RepositoryImplementation):
             max_count=1).splitlines()[1:]
 
     def paged_diffs(self, commit_id, start=0, end=None):
-        added, removed, changed = [], [], []
+        result = {'added': [], 'removed': [], 'changed': [], 'copied': [], 'renamed': [], 'total': 0}
+
         files = self._git.git.diff_tree(
             '--no-commit-id',
+            '--find-renames',
+            '--find-copies',
             '--name-status',
-            '--no-renames',
+            '--no-abbrev',
             '--root',
-            # show tree entry itself as well as subtrees (Commit.added_paths
-            # relies on this)
+            '--find-copies-harder',
+            # show tree entry itself as well as subtrees (Commit.added_paths relies on this)
             '-t',
             '-z',  # don't escape filenames and use \x00 as fields delimiter
             commit_id).split('\x00')[:-1]
 
-        total = len(files) / 2
-        files = [(files[i], h.really_unicode(files[i+1]))
-                 for i in xrange(0, len(files), 2)]
+        result['total'] = len(files) / 2
+        x = 0
+        while x < len(files):
+            try:
+                if files[x].startswith("R") or files[x].startswith("C"):
+                    change_list = result['renamed'] if files[x].startswith("R") else result['copied']
+                    ratio = float(files[x][1:4]) / 100.0
+                    change_list.append({
+                        'new': h.really_unicode(files[x + 2]),
+                        'old': h.really_unicode(files[x + 1]),
+                        'ratio': ratio,
+                        'diff': '',
+                    })
+                    del files[x:x+3]
+                    x += 3
+                    result['total'] -= 1
+                else:
+                    x += 2
+            except IndexError:
+                break
+
+        files = [(files[i], h.really_unicode(files[i + 1]))
+                 for i in xrange(0, result['total'] + 1, 2)]
 
         # files = [('A', u'filename'), ('D', u'another filename'), ...]
         for status, name in files[start:end]:
             if status == 'A':
-                added.append(name)
+                result['added'].append(name)
             elif status == 'D':
-                removed.append(name)
+                result['removed'].append(name)
             elif status == 'M':
-                changed.append(name)
-        return {
-            'added': added,
-            'removed': removed,
-            'changed': changed,
-            'total': total,
-        }
+                result['changed'].append(name)
+
+        return result
 
     @contextmanager
     def _shared_clone(self, from_path):

http://git-wip-us.apache.org/repos/asf/allura/blob/7f738bdf/ForgeSVN/forgesvn/model/svn.py
----------------------------------------------------------------------
diff --git a/ForgeSVN/forgesvn/model/svn.py b/ForgeSVN/forgesvn/model/svn.py
index cd410e3..f43f536 100644
--- a/ForgeSVN/forgesvn/model/svn.py
+++ b/ForgeSVN/forgesvn/model/svn.py
@@ -781,13 +781,7 @@ class SVNImplementation(M.RepositoryImplementation):
         return []
 
     def paged_diffs(self, commit_id, start=0, end=None):
-        result = {
-            'added': [],
-            'removed': [],
-            'changed': [],
-            'copied': [],
-            'total': 0,
-        }
+        result = {'added': [], 'removed': [], 'changed': [], 'copied': [], 'renamed': [], 'total': 0}
         rev = self._revision(commit_id)
         try:
             log_info = self._svn.log(
@@ -822,6 +816,15 @@ class SVNImplementation(M.RepositoryImplementation):
                 # svn add aaa.txt
                 # svn commit -m "Replace aaa.txt"
                 result['changed'].append(h.really_unicode(p.path))
+
+        for r in result['copied']:
+            if r['old'] in result['removed'][:]:
+                result['removed'].remove(r['old'])
+                result['copied'].remove(r)
+                result['renamed'].append(r)
+            if r['new'] in result['added']:
+                result['added'].remove(r['new'])
+
         return result
 
 Mapper.compile_all()