You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@buildstream.apache.org by ro...@apache.org on 2020/12/29 13:30:07 UTC

[buildstream] 09/32: Add cache_quota to user config

This is an automated email from the ASF dual-hosted git repository.

root pushed a commit to branch testing/local-cache-expiry
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit f6e51cd6fc860bf765af7f11b9008d9eb52d2e1d
Author: Tristan Maat <tr...@codethink.co.uk>
AuthorDate: Tue Mar 20 09:23:37 2018 +0000

    Add cache_quota to user config
---
 buildstream/_context.py          | 55 ++++++++++++++++++++++++++++++++++++++++
 buildstream/data/userconfig.yaml |  8 ++++++
 buildstream/utils.py             | 43 +++++++++++++++++++++++++++++++
 3 files changed, 106 insertions(+)

diff --git a/buildstream/_context.py b/buildstream/_context.py
index 1a59af2..5cc7f43 100644
--- a/buildstream/_context.py
+++ b/buildstream/_context.py
@@ -21,6 +21,7 @@ import os
 import datetime
 from collections import deque, Mapping
 from contextlib import contextmanager
+from . import utils
 from . import _cachekey
 from . import _signals
 from . import _site
@@ -62,6 +63,12 @@ class Context():
         # The locations from which to push and pull prebuilt artifacts
         self.artifact_cache_specs = []
 
+        # The artifact cache quota
+        self.cache_quota = None
+
+        # The lower threshold to which we aim to reduce the cache size
+        self.cache_lower_threshold = None
+
         # The directory to store build logs
         self.logdir = None
 
@@ -153,6 +160,7 @@ class Context():
         _yaml.node_validate(defaults, [
             'sourcedir', 'builddir', 'artifactdir', 'logdir',
             'scheduler', 'artifacts', 'logging', 'projects',
+            'cache'
         ])
 
         for directory in ['sourcedir', 'builddir', 'artifactdir', 'logdir']:
@@ -165,6 +173,53 @@ class Context():
             path = os.path.normpath(path)
             setattr(self, directory, path)
 
+        # Load quota configuration
+        # We need to find the first existing directory in the path of
+        # our artifactdir - the artifactdir may not have been created
+        # yet.
+        cache = _yaml.node_get(defaults, Mapping, 'cache')
+        _yaml.node_validate(cache, ['quota'])
+
+        artifactdir_volume = self.artifactdir
+        while not os.path.exists(artifactdir_volume):
+            artifactdir_volume = os.path.dirname(artifactdir_volume)
+
+        # We read and parse the cache quota as specified by the user
+        cache_quota = _yaml.node_get(cache, str, 'quota', default_value='infinity')
+        try:
+            cache_quota = utils._parse_size(cache_quota, artifactdir_volume)
+        except utils.UtilError as e:
+            raise LoadError(LoadErrorReason.INVALID_DATA,
+                            "{}\nPlease specify the value in bytes or as a % of full disk space.\n"
+                            "\nValid values are, for example: 800M 10G 1T 50%\n"
+                            .format(str(e))) from e
+
+        # If we are asked not to set a quota, we set it to the maximum
+        # disk space available minus a headroom of 2GB, such that we
+        # at least try to avoid raising Exceptions.
+        #
+        # Of course, we might still end up running out during a build
+        # if we end up writing more than 2G, but hey, this stuff is
+        # already really fuzzy.
+        #
+        if cache_quota is None:
+            stat = os.statvfs(artifactdir_volume)
+            # Again, the artifact directory may not yet have been
+            # created
+            if not os.path.exists(self.artifactdir):
+                cache_size = 0
+            else:
+                cache_size = utils._get_dir_size(self.artifactdir)
+            cache_quota = cache_size + stat.f_bsize * stat.f_bavail
+
+        if 'BST_TEST_SUITE' in os.environ:
+            headroom = 0
+        else:
+            headroom = 2e9
+
+        self.cache_quota = cache_quota - headroom
+        self.cache_lower_threshold = self.cache_quota / 2
+
         # Load artifact share configuration
         self.artifact_cache_specs = ArtifactCache.specs_from_config_node(defaults)
 
diff --git a/buildstream/data/userconfig.yaml b/buildstream/data/userconfig.yaml
index 6bb54ff..6f9f190 100644
--- a/buildstream/data/userconfig.yaml
+++ b/buildstream/data/userconfig.yaml
@@ -23,6 +23,14 @@ artifactdir: ${XDG_CACHE_HOME}/buildstream/artifacts
 logdir: ${XDG_CACHE_HOME}/buildstream/logs
 
 #
+#    Cache
+#
+cache:
+  # Size of the artifact cache - BuildStream will attempt to keep the
+  # artifact cache within this size.
+  quota: infinity
+
+#
 #    Scheduler
 #
 scheduler:
diff --git a/buildstream/utils.py b/buildstream/utils.py
index f3f9c6c..1cdf575 100644
--- a/buildstream/utils.py
+++ b/buildstream/utils.py
@@ -563,6 +563,49 @@ def _get_dir_size(path):
     return get_size(path)
 
 
+# _parse_size():
+#
+# Convert a string representing data size to a number of
+# bytes. E.g. "2K" -> 2048.
+#
+# This uses the same format as systemd's
+# [resource-control](https://www.freedesktop.org/software/systemd/man/systemd.resource-control.html#).
+#
+# Arguments:
+#     size (str) The string to parse
+#     volume (str) A path on the volume to consider for percentage
+#                  specifications
+#
+# Returns:
+#     (int|None) The number of bytes, or None if 'infinity' was specified.
+#
+# Raises:
+#     UtilError if the string is not a valid data size.
+#
+def _parse_size(size, volume):
+    if size == 'infinity':
+        return None
+
+    matches = re.fullmatch(r'([0-9]+\.?[0-9]*)([KMGT%]?)', size)
+    if matches is None:
+        raise UtilError("{} is not a valid data size.".format(size))
+
+    num, unit = matches.groups()
+
+    if unit == '%':
+        num = float(num)
+        if num > 100:
+            raise UtilError("{}% is not a valid percentage value.".format(num))
+
+        stat_ = os.statvfs(volume)
+        disk_size = stat_.f_blocks * stat_.f_bsize
+
+        return disk_size * (num / 100)
+
+    units = ('', 'K', 'M', 'G', 'T')
+    return int(num) * 1024**units.index(unit)
+
+
 # A sentinel to be used as a default argument for functions that need
 # to distinguish between a kwarg set to None and an unset kwarg.
 _sentinel = object()