You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@allura.apache.org by jo...@apache.org on 2013/12/17 02:15:02 UTC

git commit: [#6821] Remove old last_commit_ids method and fix handling of merge commits in git

Updated Branches:
  refs/heads/cj/6821 77d18b129 -> 6578d12a2


[#6821] Remove old last_commit_ids method and fix handling of merge commits in git

Signed-off-by: Cory Johns <cj...@slashdotmedia.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-allura/commit/6578d12a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-allura/tree/6578d12a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-allura/diff/6578d12a

Branch: refs/heads/cj/6821
Commit: 6578d12a216a72a9cebd07099015fed3a5260334
Parents: 77d18b1
Author: Cory Johns <cj...@slashdotmedia.com>
Authored: Tue Dec 17 00:51:26 2013 +0000
Committer: Cory Johns <cj...@slashdotmedia.com>
Committed: Tue Dec 17 00:51:26 2013 +0000

----------------------------------------------------------------------
 Allura/allura/model/repository.py   | 64 +++++++++++---------------------
 ForgeGit/forgegit/model/git_repo.py | 34 ++++++++++++-----
 2 files changed, 47 insertions(+), 51 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/6578d12a/Allura/allura/model/repository.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/repository.py b/Allura/allura/model/repository.py
index 2aeef64..10f2950 100644
--- a/Allura/allura/model/repository.py
+++ b/Allura/allura/model/repository.py
@@ -142,45 +142,6 @@ class RepositoryImplementation(object):
         '''Create a tarball for the revision'''
         raise NotImplementedError, 'tarball'
 
-    def last_commit_ids(self, commit, paths):
-        '''
-        Return a mapping {path: commit_id} of the _id of the last
-        commit to touch each path, starting from the given commit.
-        '''
-        orig_commit = commit
-        timeout = float(config.get('lcd_timeout', 60))
-        start_time = time()
-        paths = set(paths)
-        result = {}
-        seen_commits = set()
-        while paths and commit:
-            if time() - start_time > timeout:
-                log.error('last_commit_ids timeout for %s on %s', orig_commit._id, ', '.join(paths))
-                return result
-            seen_commits.add(commit._id)
-            changed = paths & set(commit.changed_paths)
-            result.update({path: commit._id for path in changed})
-            paths = paths - changed
-
-            # Hacky work-around for DiffInfoDocs previously having been
-            # computed wrong (not including children of added trees).
-            # Can be removed once all projects have had diffs / LCDs refreshed.
-            parent = commit.get_parent()
-            if parent and parent._id in seen_commits:
-                log.error('last_commit_ids loop detected at %s for %s on %s', parent._id, orig_commit._id, ', '.join(paths))
-                return result  # sanity check for bad data (loops)
-            if parent:
-                changed = set([path for path in paths if not parent.has_path(path)])
-                result.update({path: commit._id for path in changed})
-                paths = paths - changed
-            else:
-                result.update({path: commit._id for path in paths})
-                paths = set()
-            # end hacky work-around
-
-            commit = parent
-        return result
-
     def is_empty(self):
         '''Determine if the repository is empty by checking the filesystem'''
         raise NotImplementedError, 'is_empty'
@@ -248,9 +209,17 @@ class RepositoryImplementation(object):
         raise NotImplementedError, 'tags'
 
     def last_commit_ids(self, commit, paths):
-        """
-        Find the ID of the last commit to touch each path.
-        """
+        '''
+        Return a mapping {path: commit_id} of the _id of the last
+        commit to touch each path, starting from the given commit.
+
+        Chunks the set of paths based on lcd_thread_chunk_size and
+        runs each chunk (if more than one) in a separate thread.
+
+        Each thread will call :meth:`_get_last_commit` to get the
+        commit ID and list of changed files for the last commit
+        to touch any file in a given chunk.
+        '''
         if not paths:
             return {}
         timeout = float(tg.config.get('lcd_timeout', 60))
@@ -300,6 +269,17 @@ class RepositoryImplementation(object):
                 chunks.all_tasks_done.release()
         return result
 
+    def _get_last_commit(self, commit_id, paths):
+        """
+        For a given commit ID and set of paths / files,
+        use the SCM to determine the last commit to touch
+        any of the given paths / files.
+
+        Should return a tuple containing the ID of the
+        commit and the list of all files changed in the commit.
+        """
+        raise NotImplementedError('_get_last_commit')
+
 class Repository(Artifact, ActivityObject):
     BATCH_SIZE=100
     class __mongometa__:

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/6578d12a/ForgeGit/forgegit/model/git_repo.py
----------------------------------------------------------------------
diff --git a/ForgeGit/forgegit/model/git_repo.py b/ForgeGit/forgegit/model/git_repo.py
index 229f2bb..5a2c3b9 100644
--- a/ForgeGit/forgegit/model/git_repo.py
+++ b/ForgeGit/forgegit/model/git_repo.py
@@ -499,16 +499,32 @@ class GitImplementation(M.RepositoryImplementation):
         session(self._repo).flush(self._repo)
 
     def _get_last_commit(self, commit_id, paths):
-        output = self._git.git.log(
-                commit_id, '--', *paths,
-                pretty='format:%H',
-                name_only=True,
-                max_count=1)
-        if not output:
-            return None, set()
-        else:
+        # git apparently considers merge commits to have "touched" a path
+        # if the path is changed in either branch being merged, even though
+        # the --name-only output doesn't include those files.  So, we have
+        # to filter out the merge commits that don't actually include any
+        # of the referenced paths in the list of files.
+        files = []
+        # don't skip first commit we're called with because it might be
+        # a valid change commit; however...
+        skip = 0
+        while commit_id and not files:
+            output = self._git.git.log(
+                    commit_id, '--', *paths,
+                    pretty='format:%H',
+                    name_only=True,
+                    max_count=1,
+                    skip=skip)
             lines = output.split('\n')
-            return lines[0], set(lines[1:])
+            commit_id = lines[0]
+            files = prefix_paths_union(paths, set(lines[1:]))
+            # *do* skip subsequent merge commits or we'll get stuck on an infinite
+            # loop matching and then diregarding the merge commit over and over
+            skip = 1
+        if commit_id:
+            return commit_id, files
+        else:
+            return None, set()
 
 class _OpenedGitBlob(object):
     CHUNK_SIZE=4096