You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@buildstream.apache.org by tv...@apache.org on 2021/02/04 08:14:08 UTC

[buildstream] 01/02: Limiting git history to reduce cache size

This is an automated email from the ASF dual-hosted git repository.

tvb pushed a commit to branch reduce_history_in_cache
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 0fd230671d0c865b976ff1d9a4f70ae8ff631cfb
Author: Phillip Smyth <ph...@Nexus-x240.dyn.ducie.codethink.co.uk>
AuthorDate: Tue May 29 13:48:19 2018 +0100

    Limiting git history to reduce cache size
---
 buildstream/plugins/sources/git.py | 80 +++++++++++++++++++++++++++++++++-----
 tests/sources/git.py               |  1 +
 2 files changed, 72 insertions(+), 9 deletions(-)

diff --git a/buildstream/plugins/sources/git.py b/buildstream/plugins/sources/git.py
index 44065ad..4b53c7f 100644
--- a/buildstream/plugins/sources/git.py
+++ b/buildstream/plugins/sources/git.py
@@ -70,6 +70,7 @@ git - stage files from a git repository
 import os
 import re
 import shutil
+import shlex
 from collections import Mapping
 from io import StringIO
 
@@ -150,15 +151,76 @@ class GitMirror():
     def stage(self, directory):
         fullpath = os.path.join(directory, self.path)
 
-        # We need to pass '--no-hardlinks' because there's nothing to
-        # stop the build from overwriting the files in the .git directory
-        # inside the sandbox.
-        self.source.call([self.source.host_git, 'clone', '--no-checkout', '--no-hardlinks', self.mirror, fullpath],
-                         fail="Failed to create git mirror {} in directory: {}".format(self.mirror, fullpath))
-
-        self.source.call([self.source.host_git, 'checkout', '--force', self.ref],
-                         fail="Failed to checkout git ref {}".format(self.ref),
-                         cwd=fullpath)
+        # Need to get every commit since the last tagged object until the tracking commit
+        if self.has_ref():
+            all_tags = self.source.check_output([self.source.host_git, 'tag'], cwd=self.mirror)[1]
+            all_tags = [x.strip() for x in all_tags.split('\n')]
+            if all_tags:
+                tags_since_sha = self.source.check_output([self.source.host_git,
+                                                           'tag',
+                                                           '--sort',
+                                                           '--contains',
+                                                           self.ref],
+                                                          cwd=self.mirror)[1]
+
+                tags_since_sha = [x.strip() for x in tags_since_sha.split('\n')]
+                preceeding_tags = [x for x in all_tags if x not in tags_since_sha]
+                if preceeding_tags:
+                    last_tag_before_ref = preceeding_tags[-1]
+                else:
+                    last_tag_before_ref = 'HEAD'
+
+                # find number of commits since last_tag_before_ref
+                target_depth = self.source.check_output([self.source.host_git,
+                                                         'rev-list',
+                                                         '--count',
+                                                         'HEAD...{}'.format(last_tag_before_ref)])[1]
+
+            else:
+                target_depth = self.source.check_output([self.source.host_git,
+                                                         'rev-list',
+                                                         '--count',
+                                                         'HEAD...{}'.format(self.ref)], cwd=self.mirror)[1]
+
+        if int(target_depth) == 0:
+            target_depth = 1
+
+        branch = self.source.check_output([self.source.host_git,
+                                           'rev-parse',
+                                           '--abbrev-ref',
+                                           'HEAD'], cwd=self.mirror)[1]
+
+        self.source.call([self.source.host_git,
+                          'init',
+                          fullpath])
+
+        self.source.call([self.source.host_git,
+                          'fetch',
+                          '--depth={}'.format(int(target_depth)),
+                          'ext::git -c uploadpack.allowReachableSHA1InWant=true %s {}'
+                          .format(shlex.quote(self.mirror)),
+                          self.ref],
+                         env=dict(os.environ, GIT_ALLOW_PROTOCOL="ext"), cwd=fullpath)
+
+        self.source.call([self.source.host_git,
+                          'checkout',
+                          'FETCH_HEAD'], cwd=fullpath)
+
+        if "master" not in branch:
+            self.source.call([self.source.host_git,
+                              'branch',
+                              '-D',
+                              'master'], cwd=fullpath)
+
+        self.source.call([self.source.host_git,
+                          'reflog',
+                          'expire',
+                          '--expire-unreachable=all'
+                          '--all'], cwd=fullpath)
+
+        self.source.call([self.source.host_git,
+                          'repack',
+                          '-ad'], cwd=fullpath)
 
     def init_workspace(self, directory):
         fullpath = os.path.join(directory, self.path)
diff --git a/tests/sources/git.py b/tests/sources/git.py
index 06888c3..495c147 100644
--- a/tests/sources/git.py
+++ b/tests/sources/git.py
@@ -288,6 +288,7 @@ def test_submodule_fetch_submodule_individual_checkout_explicit(cli, tmpdir, dat
 @pytest.mark.skipif(HAVE_GIT is False, reason="git is not available")
 @pytest.mark.datafiles(os.path.join(DATA_DIR, 'project-override'))
 def test_submodule_fetch_project_override(cli, tmpdir, datafiles):
+    print("\n\n\nTemp Directory: {}\n\n\n".format(tmpdir))
     project = os.path.join(datafiles.dirname, datafiles.basename)
     checkoutdir = os.path.join(str(tmpdir), "checkout")