You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@buildstream.apache.org by ro...@apache.org on 2020/12/29 13:30:46 UTC

[buildstream] 02/03: artifactcache.py: make cleanup clean by object rather than by ref

This is an automated email from the ASF dual-hosted git repository.

root pushed a commit to branch abderrahim/cleanup-speedup
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit fc15a92b96327522bd3c722c2cf03f01219c3e68
Author: Abderrahim Kitouni <ak...@gnome.org>
AuthorDate: Wed Sep 16 10:11:29 2020 +0100

    artifactcache.py: make cleanup clean by object rather than by ref
    
    This makes cleanup much faster
    
    Fixes #734
---
 buildstream/_artifactcache/artifactcache.py | 123 +++++++++++++++-------------
 buildstream/_artifactcache/cascache.py      |   4 +-
 2 files changed, 66 insertions(+), 61 deletions(-)

diff --git a/buildstream/_artifactcache/artifactcache.py b/buildstream/_artifactcache/artifactcache.py
index 24fdbe8..29af36a 100644
--- a/buildstream/_artifactcache/artifactcache.py
+++ b/buildstream/_artifactcache/artifactcache.py
@@ -21,6 +21,7 @@ import multiprocessing
 import os
 import signal
 import string
+import time
 from collections import Mapping, namedtuple
 
 from ..types import _KeyStrength
@@ -137,6 +138,7 @@ class ArtifactCache():
         self.project_remote_specs = {}
 
         self._required_elements = set()       # The elements required for this session
+        self._start_time = time.time()        # The start time of the current session
         self._cache_size = None               # The current cache size, sometimes it's an estimate
         self._cache_quota = None              # The cache quota
         self._cache_quota_original = None     # The cache quota as specified by the user, in bytes
@@ -279,7 +281,8 @@ class ArtifactCache():
                     try:
                         ref = self.get_artifact_fullname(element, key)
 
-                        self.cas.update_mtime(ref)
+                        tree = self.cas.resolve_ref(ref, update_mtime=True)
+                        self.cas.update_tree_mtime(tree)
                     except CASError:
                         pass
 
@@ -315,73 +318,77 @@ class ArtifactCache():
                               utils._pretty_size(volume_size, dec_places=2),
                               utils._pretty_size(volume_avail, dec_places=2)))
 
-        # Build a set of the cache keys which are required
-        # based on the required elements at cleanup time
-        #
-        # We lock both strong and weak keys - deleting one but not the
-        # other won't save space, but would be a user inconvenience.
-        required_artifacts = set()
+        # Update the mtimes for the artifacts required by the current session.
+        # This makes sure they aren't deleted as part of the cleanup
         for element in self._required_elements:
-            required_artifacts.update([
-                element._get_cache_key(strength=_KeyStrength.STRONG),
-                element._get_cache_key(strength=_KeyStrength.WEAK)
-            ])
+            strong_key = element._get_cache_key(strength=_KeyStrength.STRONG)
+            weak_key = element._get_cache_key(strength=_KeyStrength.WEAK)
+            for key in (strong_key, weak_key):
+                if key:
+                    try:
+                        ref = self.get_artifact_fullname(element, key)
+
+                        tree = self.cas.resolve_ref(ref, update_mtime=True)
+                        self.cas.update_tree_mtime(tree)
+                    except CASError:
+                        pass
 
         # Do a real computation of the cache size once, just in case
         self.compute_cache_size()
 
-        while self.get_cache_size() >= self._cache_lower_threshold:
-            try:
-                to_remove = artifacts.pop(0)
-            except IndexError:
-                # If too many artifacts are required, and we therefore
-                # can't remove them, we have to abort the build.
-                #
-                # FIXME: Asking the user what to do may be neater
-                #
-                default_conf = os.path.join(os.environ['XDG_CONFIG_HOME'],
-                                            'buildstream.conf')
-                detail = ("Aborted after removing {} refs and saving {} disk space.\n"
-                          "The remaining {} in the cache is required by the {} elements in your build plan\n\n"
-                          "There is not enough space to complete the build.\n"
-                          "Please increase the cache-quota in {} and/or make more disk space."
-                          .format(removed_ref_count,
-                                  utils._pretty_size(space_saved, dec_places=2),
-                                  utils._pretty_size(self.get_cache_size(), dec_places=2),
-                                  len(self._required_elements),
-                                  (context.config_origin or default_conf)))
-
-                if self.has_quota_exceeded():
-                    raise ArtifactError("Cache too full. Aborting.",
-                                        detail=detail,
-                                        reason="cache-too-full")
-                else:
-                    break
+        last_mtime = 0
+        for mtime, to_remove in self.cas.list_objects():
+            if mtime >= self._start_time:
+                break
 
-            key = to_remove.rpartition('/')[2]
-            if key not in required_artifacts:
+            try:
+                st = os.stat(to_remove)
+                if st.st_mtime >= self._start_time:
+                    continue
+                os.unlink(to_remove)
 
-                # Remove the actual artifact, if it's not required.
-                size = self.remove(to_remove)
+                space_saved += st.st_size
+                self._cache_size -= st.st_size
+                last_mtime = mtime
+            except FileNotFoundError:
+                pass
 
-                removed_ref_count += 1
-                space_saved += size
+            if self.get_cache_size() < self._cache_lower_threshold:
+                break
 
-                self._message(MessageType.STATUS,
-                              "Freed {: <7} {}".format(
-                                  utils._pretty_size(size, dec_places=2),
-                                  to_remove))
+            # User callback
+            #
+            # Currently this process is fairly slow, but we should
+            # think about throttling this progress() callback if this
+            # becomes too intense.
+            if progress:
+                progress()
 
-                # Remove the size from the removed size
-                self.set_cache_size(self._cache_size - size)
+        for mtime, ref in self.cas.list_refs():
+            if mtime > last_mtime:
+                break
 
-                # User callback
-                #
-                # Currently this process is fairly slow, but we should
-                # think about throttling this progress() callback if this
-                # becomes too intense.
-                if progress:
-                    progress()
+            self.remove(ref)
+            removed_ref_count += 1
+
+        # If too many artifacts are required, and we therefore
+        # can't remove them, we have to abort the build.
+        default_conf = os.path.join(os.environ['XDG_CONFIG_HOME'],
+                                    'buildstream.conf')
+        detail = ("Aborted after removing {} refs and saving {} disk space.\n"
+                  "The remaining {} in the cache is required by the {} elements in your build plan\n\n"
+                  "There is not enough space to complete the build.\n"
+                  "Please increase the cache-quota in {} and/or make more disk space."
+                  .format(removed_ref_count,
+                          utils._pretty_size(space_saved, dec_places=2),
+                          utils._pretty_size(self.get_cache_size(), dec_places=2),
+                          len(self._required_elements),
+                          (context.config_origin or default_conf)))
+
+        if self.has_quota_exceeded():
+            raise ArtifactError("Cache too full. Aborting.",
+                                detail=detail,
+                                reason="cache-too-full")
 
         # Informational message about the side effects of the cleanup
         self._message(MessageType.INFO, "Cleanup completed",
@@ -617,7 +624,7 @@ class ArtifactCache():
             if remove_extract:
                 utils._force_rmtree(extract)
 
-        return self.cas.remove(ref)
+        return self.cas.remove(ref, defer_prune=True)
 
     # extract():
     #
diff --git a/buildstream/_artifactcache/cascache.py b/buildstream/_artifactcache/cascache.py
index 27020a0..9dd25b3 100644
--- a/buildstream/_artifactcache/cascache.py
+++ b/buildstream/_artifactcache/cascache.py
@@ -535,9 +535,7 @@ class CASCache():
                 # Obtain the mtime (the time a file was last modified)
                 mtimes.append(os.path.getmtime(ref_path))
 
-        # NOTE: Sorted will sort from earliest to latest, thus the
-        # first ref of this list will be the file modified earliest.
-        return [ref for _, ref in sorted(zip(mtimes, refs))]
+        return sorted(zip(mtimes, refs))
 
     # list_objects():
     #