You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by he...@apache.org on 2015/08/10 19:33:04 UTC
[33/50] [abbrv] allura git commit: [#7925] Refactor and improve the
diff processing
[#7925] Refactor and improve the diff processing
Project: http://git-wip-us.apache.org/repos/asf/allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/allura/commit/7f738bdf
Tree: http://git-wip-us.apache.org/repos/asf/allura/tree/7f738bdf
Diff: http://git-wip-us.apache.org/repos/asf/allura/diff/7f738bdf
Branch: refs/heads/hs/7925
Commit: 7f738bdfbb93e78d8d557f8b60fb1965ce9e67ce
Parents: 7eeb0dd
Author: Heith Seewald <hs...@slashdotmedia.com>
Authored: Mon Jul 27 16:06:28 2015 -0400
Committer: Heith Seewald <hs...@slashdotmedia.com>
Committed: Mon Aug 10 09:38:35 2015 -0400
----------------------------------------------------------------------
Allura/allura/lib/custom_middleware.py | 4 --
Allura/allura/model/repository.py | 67 +----------------------------
ForgeGit/forgegit/model/git_repo.py | 51 +++++++++++++++-------
ForgeSVN/forgesvn/model/svn.py | 17 +++++---
4 files changed, 47 insertions(+), 92 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/allura/blob/7f738bdf/Allura/allura/lib/custom_middleware.py
----------------------------------------------------------------------
diff --git a/Allura/allura/lib/custom_middleware.py b/Allura/allura/lib/custom_middleware.py
index e56a530..1938e51 100644
--- a/Allura/allura/lib/custom_middleware.py
+++ b/Allura/allura/lib/custom_middleware.py
@@ -322,10 +322,6 @@ class AlluraTimerMiddleware(TimerMiddleware):
Timer('urlopen', urllib2, 'urlopen'),
Timer('base_repo_tool.{method_name}',
allura.model.repository.RepositoryImplementation, 'last_commit_ids'),
- Timer('_diffs_copied', allura.model.repository.Commit, '_diffs_copied'),
- Timer(
- 'sequencematcher.{method_name}', allura.model.repository.SequenceMatcher,
- 'ratio', 'quick_ratio', 'real_quick_ratio'),
Timer('unified_diff', allura.model.repository, 'unified_diff'),
] + [Timer('sidebar', ep.load(), 'sidebar_menu') for ep in tool_entry_points]
http://git-wip-us.apache.org/repos/asf/allura/blob/7f738bdf/Allura/allura/model/repository.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/repository.py b/Allura/allura/model/repository.py
index 5fe2ba9..636d10c 100644
--- a/Allura/allura/model/repository.py
+++ b/Allura/allura/model/repository.py
@@ -1168,78 +1168,15 @@ class Commit(RepoObject, ActivityObject):
def paged_diffs(self, start=0, end=None):
diffs = self.repo.paged_diffs(self._id, start, end)
- if not diffs.get('copied'):
- diffs['copied'] = []
- copied = self._diffs_copied(diffs['added'], diffs['removed'])
- diffs['copied'].extend(copied)
+
return Object(
added=sorted(diffs['added']),
removed=sorted(diffs['removed']),
changed=sorted(diffs['changed']),
copied=sorted(diffs['copied']),
+ renamed=sorted(diffs['renamed']),
total=diffs['total'])
- def _diffs_copied(self, added, removed):
- '''Return list with file renames diffs.
-
- Will change `added` and `removed` lists also.
- '''
- def _blobs_similarity(removed_blob, added):
- best = dict(ratio=0, name='', blob=None)
- for added_name in added:
- added_blob = self.tree.get_obj_by_path(added_name)
- if not isinstance(added_blob, Blob):
- continue
- diff = SequenceMatcher(None, removed_blob.text,
- added_blob.text)
- ratio = diff.quick_ratio()
- if ratio > best['ratio']:
- best['ratio'] = ratio
- best['name'] = added_name
- best['blob'] = added_blob
-
- if ratio == 1:
- break # we'll won't find better similarity than 100% :)
-
- if best['ratio'] > DIFF_SIMILARITY_THRESHOLD:
- diff = ''
- if best['ratio'] < 1:
- added_blob = best['blob']
- rpath = ('a' + removed_blob.path()).encode('utf-8')
- apath = ('b' + added_blob.path()).encode('utf-8')
- diff = ''.join(unified_diff(list(removed_blob),
- list(added_blob),
- rpath, apath))
- return dict(new=best['name'],
- ratio=best['ratio'], diff=diff)
-
- def _trees_similarity(removed_tree, added):
- for added_name in added:
- added_tree = self.tree.get_obj_by_path(added_name)
- if not isinstance(added_tree, Tree):
- continue
- if removed_tree._id == added_tree._id:
- return dict(new=added_name,
- ratio=1, diff='')
-
- if not removed:
- return []
- copied = []
- prev_commit = self.get_parent()
- for removed_name in removed[:]:
- removed_blob = prev_commit.tree.get_obj_by_path(removed_name)
- rename_info = None
- if isinstance(removed_blob, Blob):
- rename_info = _blobs_similarity(removed_blob, added)
- elif isinstance(removed_blob, Tree):
- rename_info = _trees_similarity(removed_blob, added)
- if rename_info is not None:
- rename_info['old'] = removed_name
- copied.append(rename_info)
- removed.remove(rename_info['old'])
- added.remove(rename_info['new'])
- return copied
-
def get_path(self, path, create=True):
path = path.lstrip('/')
parts = path.split('/')
http://git-wip-us.apache.org/repos/asf/allura/blob/7f738bdf/ForgeGit/forgegit/model/git_repo.py
----------------------------------------------------------------------
diff --git a/ForgeGit/forgegit/model/git_repo.py b/ForgeGit/forgegit/model/git_repo.py
index 9549fa6..9a8549a 100644
--- a/ForgeGit/forgegit/model/git_repo.py
+++ b/ForgeGit/forgegit/model/git_repo.py
@@ -642,36 +642,55 @@ class GitImplementation(M.RepositoryImplementation):
max_count=1).splitlines()[1:]
def paged_diffs(self, commit_id, start=0, end=None):
- added, removed, changed = [], [], []
+ result = {'added': [], 'removed': [], 'changed': [], 'copied': [], 'renamed': [], 'total': 0}
+
files = self._git.git.diff_tree(
'--no-commit-id',
+ '--find-renames',
+ '--find-copies',
'--name-status',
- '--no-renames',
+ '--no-abbrev',
'--root',
- # show tree entry itself as well as subtrees (Commit.added_paths
- # relies on this)
+ '--find-copies-harder',
+ # show tree entry itself as well as subtrees (Commit.added_paths relies on this)
'-t',
'-z', # don't escape filenames and use \x00 as fields delimiter
commit_id).split('\x00')[:-1]
- total = len(files) / 2
- files = [(files[i], h.really_unicode(files[i+1]))
- for i in xrange(0, len(files), 2)]
+ result['total'] = len(files) / 2
+ x = 0
+ while x < len(files):
+ try:
+ if files[x].startswith("R") or files[x].startswith("C"):
+ change_list = result['renamed'] if files[x].startswith("R") else result['copied']
+ ratio = float(files[x][1:4]) / 100.0
+ change_list.append({
+ 'new': h.really_unicode(files[x + 2]),
+ 'old': h.really_unicode(files[x + 1]),
+ 'ratio': ratio,
+ 'diff': '',
+ })
+ del files[x:x+3]
+ x += 3
+ result['total'] -= 1
+ else:
+ x += 2
+ except IndexError:
+ break
+
+ files = [(files[i], h.really_unicode(files[i + 1]))
+ for i in xrange(0, result['total'] + 1, 2)]
# files = [('A', u'filename'), ('D', u'another filename'), ...]
for status, name in files[start:end]:
if status == 'A':
- added.append(name)
+ result['added'].append(name)
elif status == 'D':
- removed.append(name)
+ result['removed'].append(name)
elif status == 'M':
- changed.append(name)
- return {
- 'added': added,
- 'removed': removed,
- 'changed': changed,
- 'total': total,
- }
+ result['changed'].append(name)
+
+ return result
@contextmanager
def _shared_clone(self, from_path):
http://git-wip-us.apache.org/repos/asf/allura/blob/7f738bdf/ForgeSVN/forgesvn/model/svn.py
----------------------------------------------------------------------
diff --git a/ForgeSVN/forgesvn/model/svn.py b/ForgeSVN/forgesvn/model/svn.py
index cd410e3..f43f536 100644
--- a/ForgeSVN/forgesvn/model/svn.py
+++ b/ForgeSVN/forgesvn/model/svn.py
@@ -781,13 +781,7 @@ class SVNImplementation(M.RepositoryImplementation):
return []
def paged_diffs(self, commit_id, start=0, end=None):
- result = {
- 'added': [],
- 'removed': [],
- 'changed': [],
- 'copied': [],
- 'total': 0,
- }
+ result = {'added': [], 'removed': [], 'changed': [], 'copied': [], 'renamed': [], 'total': 0}
rev = self._revision(commit_id)
try:
log_info = self._svn.log(
@@ -822,6 +816,15 @@ class SVNImplementation(M.RepositoryImplementation):
# svn add aaa.txt
# svn commit -m "Replace aaa.txt"
result['changed'].append(h.really_unicode(p.path))
+
+ for r in result['copied']:
+ if r['old'] in result['removed'][:]:
+ result['removed'].remove(r['old'])
+ result['copied'].remove(r)
+ result['renamed'].append(r)
+ if r['new'] in result['added']:
+ result['added'].remove(r['new'])
+
return result
Mapper.compile_all()