You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@buildstream.apache.org by ro...@apache.org on 2020/12/29 13:30:46 UTC
[buildstream] 02/03: artifactcache.py: make cleanup clean by object
rather than by ref
This is an automated email from the ASF dual-hosted git repository.
root pushed a commit to branch abderrahim/cleanup-speedup
in repository https://gitbox.apache.org/repos/asf/buildstream.git
commit fc15a92b96327522bd3c722c2cf03f01219c3e68
Author: Abderrahim Kitouni <ak...@gnome.org>
AuthorDate: Wed Sep 16 10:11:29 2020 +0100
artifactcache.py: make cleanup clean by object rather than by ref
This makes cleanup much faster
Fixes #734
---
buildstream/_artifactcache/artifactcache.py | 123 +++++++++++++++-------------
buildstream/_artifactcache/cascache.py | 4 +-
2 files changed, 66 insertions(+), 61 deletions(-)
diff --git a/buildstream/_artifactcache/artifactcache.py b/buildstream/_artifactcache/artifactcache.py
index 24fdbe8..29af36a 100644
--- a/buildstream/_artifactcache/artifactcache.py
+++ b/buildstream/_artifactcache/artifactcache.py
@@ -21,6 +21,7 @@ import multiprocessing
import os
import signal
import string
+import time
from collections import Mapping, namedtuple
from ..types import _KeyStrength
@@ -137,6 +138,7 @@ class ArtifactCache():
self.project_remote_specs = {}
self._required_elements = set() # The elements required for this session
+ self._start_time = time.time() # The start time of the current session
self._cache_size = None # The current cache size, sometimes it's an estimate
self._cache_quota = None # The cache quota
self._cache_quota_original = None # The cache quota as specified by the user, in bytes
@@ -279,7 +281,8 @@ class ArtifactCache():
try:
ref = self.get_artifact_fullname(element, key)
- self.cas.update_mtime(ref)
+ tree = self.cas.resolve_ref(ref, update_mtime=True)
+ self.cas.update_tree_mtime(tree)
except CASError:
pass
@@ -315,73 +318,77 @@ class ArtifactCache():
utils._pretty_size(volume_size, dec_places=2),
utils._pretty_size(volume_avail, dec_places=2)))
- # Build a set of the cache keys which are required
- # based on the required elements at cleanup time
- #
- # We lock both strong and weak keys - deleting one but not the
- # other won't save space, but would be a user inconvenience.
- required_artifacts = set()
+ # Update the mtimes for the artifacts required by the current session.
+ # This makes sure they aren't deleted as part of the cleanup
for element in self._required_elements:
- required_artifacts.update([
- element._get_cache_key(strength=_KeyStrength.STRONG),
- element._get_cache_key(strength=_KeyStrength.WEAK)
- ])
+ strong_key = element._get_cache_key(strength=_KeyStrength.STRONG)
+ weak_key = element._get_cache_key(strength=_KeyStrength.WEAK)
+ for key in (strong_key, weak_key):
+ if key:
+ try:
+ ref = self.get_artifact_fullname(element, key)
+
+ tree = self.cas.resolve_ref(ref, update_mtime=True)
+ self.cas.update_tree_mtime(tree)
+ except CASError:
+ pass
# Do a real computation of the cache size once, just in case
self.compute_cache_size()
- while self.get_cache_size() >= self._cache_lower_threshold:
- try:
- to_remove = artifacts.pop(0)
- except IndexError:
- # If too many artifacts are required, and we therefore
- # can't remove them, we have to abort the build.
- #
- # FIXME: Asking the user what to do may be neater
- #
- default_conf = os.path.join(os.environ['XDG_CONFIG_HOME'],
- 'buildstream.conf')
- detail = ("Aborted after removing {} refs and saving {} disk space.\n"
- "The remaining {} in the cache is required by the {} elements in your build plan\n\n"
- "There is not enough space to complete the build.\n"
- "Please increase the cache-quota in {} and/or make more disk space."
- .format(removed_ref_count,
- utils._pretty_size(space_saved, dec_places=2),
- utils._pretty_size(self.get_cache_size(), dec_places=2),
- len(self._required_elements),
- (context.config_origin or default_conf)))
-
- if self.has_quota_exceeded():
- raise ArtifactError("Cache too full. Aborting.",
- detail=detail,
- reason="cache-too-full")
- else:
- break
+ last_mtime = 0
+ for mtime, to_remove in self.cas.list_objects():
+ if mtime >= self._start_time:
+ break
- key = to_remove.rpartition('/')[2]
- if key not in required_artifacts:
+ try:
+ st = os.stat(to_remove)
+ if st.st_mtime >= self._start_time:
+ continue
+ os.unlink(to_remove)
- # Remove the actual artifact, if it's not required.
- size = self.remove(to_remove)
+ space_saved += st.st_size
+ self._cache_size -= st.st_size
+ last_mtime = mtime
+ except FileNotFoundError:
+ pass
- removed_ref_count += 1
- space_saved += size
+ if self.get_cache_size() < self._cache_lower_threshold:
+ break
- self._message(MessageType.STATUS,
- "Freed {: <7} {}".format(
- utils._pretty_size(size, dec_places=2),
- to_remove))
+ # User callback
+ #
+ # Currently this process is fairly slow, but we should
+ # think about throttling this progress() callback if this
+ # becomes too intense.
+ if progress:
+ progress()
- # Remove the size from the removed size
- self.set_cache_size(self._cache_size - size)
+ for mtime, ref in self.cas.list_refs():
+ if mtime > last_mtime:
+ break
- # User callback
- #
- # Currently this process is fairly slow, but we should
- # think about throttling this progress() callback if this
- # becomes too intense.
- if progress:
- progress()
+ self.remove(ref)
+ removed_ref_count += 1
+
+ # If too many artifacts are required, and we therefore
+ # can't remove them, we have to abort the build.
+ default_conf = os.path.join(os.environ['XDG_CONFIG_HOME'],
+ 'buildstream.conf')
+ detail = ("Aborted after removing {} refs and saving {} disk space.\n"
+ "The remaining {} in the cache is required by the {} elements in your build plan\n\n"
+ "There is not enough space to complete the build.\n"
+ "Please increase the cache-quota in {} and/or make more disk space."
+ .format(removed_ref_count,
+ utils._pretty_size(space_saved, dec_places=2),
+ utils._pretty_size(self.get_cache_size(), dec_places=2),
+ len(self._required_elements),
+ (context.config_origin or default_conf)))
+
+ if self.has_quota_exceeded():
+ raise ArtifactError("Cache too full. Aborting.",
+ detail=detail,
+ reason="cache-too-full")
# Informational message about the side effects of the cleanup
self._message(MessageType.INFO, "Cleanup completed",
@@ -617,7 +624,7 @@ class ArtifactCache():
if remove_extract:
utils._force_rmtree(extract)
- return self.cas.remove(ref)
+ return self.cas.remove(ref, defer_prune=True)
# extract():
#
diff --git a/buildstream/_artifactcache/cascache.py b/buildstream/_artifactcache/cascache.py
index 27020a0..9dd25b3 100644
--- a/buildstream/_artifactcache/cascache.py
+++ b/buildstream/_artifactcache/cascache.py
@@ -535,9 +535,7 @@ class CASCache():
# Obtain the mtime (the time a file was last modified)
mtimes.append(os.path.getmtime(ref_path))
- # NOTE: Sorted will sort from earliest to latest, thus the
- # first ref of this list will be the file modified earliest.
- return [ref for _, ref in sorted(zip(mtimes, refs))]
+ return sorted(zip(mtimes, refs))
# list_objects():
#