You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@buildstream.apache.org by gi...@apache.org on 2020/12/29 13:24:27 UTC

[buildstream] branch jonathan/cache-cache-size created (now 4c3d3e3)

This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a change to branch jonathan/cache-cache-size
in repository https://gitbox.apache.org/repos/asf/buildstream.git.


      at 4c3d3e3  Add tests for cache size

This branch includes the following new commits:

     new d6bf2fe  Use ArtifactCache's get_cache_size when calculating the quota
     new b31395f  artifactcache: Read and write cache size to disk
     new 4c3d3e3  Add tests for cache size

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[buildstream] 02/03: artifactcache: Read and write cache size to disk

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch jonathan/cache-cache-size
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit b31395f7850699492698d46f498aacd69e5e8b61
Author: Jonathan Maw <jo...@codethink.co.uk>
AuthorDate: Fri Aug 17 15:36:25 2018 +0100

    artifactcache: Read and write cache size to disk
---
 buildstream/_artifactcache/__init__.py      |  2 +-
 buildstream/_artifactcache/artifactcache.py | 55 +++++++++++++++++++++++++++--
 2 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/buildstream/_artifactcache/__init__.py b/buildstream/_artifactcache/__init__.py
index 07ed52b..fad483a 100644
--- a/buildstream/_artifactcache/__init__.py
+++ b/buildstream/_artifactcache/__init__.py
@@ -17,4 +17,4 @@
 #  Authors:
 #        Tristan Van Berkom <tr...@codethink.co.uk>
 
-from .artifactcache import ArtifactCache, ArtifactCacheSpec
+from .artifactcache import ArtifactCache, ArtifactCacheSpec, CACHE_SIZE_FILE
diff --git a/buildstream/_artifactcache/artifactcache.py b/buildstream/_artifactcache/artifactcache.py
index 95dbc08..f28fe39 100644
--- a/buildstream/_artifactcache/artifactcache.py
+++ b/buildstream/_artifactcache/artifactcache.py
@@ -28,6 +28,9 @@ from .. import utils
 from .. import _yaml
 
 
+CACHE_SIZE_FILE = "cache_size"
+
+
 # An ArtifactCacheSpec holds the user configuration for a single remote
 # artifact cache.
 #
@@ -283,7 +286,11 @@ class ArtifactCache():
         # If we don't currently have an estimate, figure out the real
         # cache size.
         if self.estimated_size is None:
-            self.estimated_size = self.calculate_cache_size()
+            stored_size = self._read_cache_size()
+            if stored_size is not None:
+                self.estimated_size = stored_size
+            else:
+                self.estimated_size = self.calculate_cache_size()
 
         return self.estimated_size
 
@@ -542,6 +549,7 @@ class ArtifactCache():
             self.estimated_size = self.calculate_cache_size()
 
         self.estimated_size += artifact_size
+        self._write_cache_size(self.estimated_size)
 
     # _set_cache_size()
     #
@@ -552,6 +560,49 @@ class ArtifactCache():
     def _set_cache_size(self, cache_size):
         self.estimated_size = cache_size
 
+        # set_cache_size is called in cleanup, where it may set the cache to None
+        if self.estimated_size is not None:
+            self._write_cache_size(self.estimated_size)
+
+    # _write_cache_size()
+    #
+    # Writes the given size of the artifact to the cache's size file
+    #
+    def _write_cache_size(self, size):
+        assert isinstance(size, int)
+        size_file_path = os.path.join(self.context.artifactdir, CACHE_SIZE_FILE)
+        with open(size_file_path, "w") as f:
+            f.write(str(size))
+
+    # _read_cache_size()
+    #
+    # Reads and returns the size of the artifact cache that's stored in the
+    # cache's size file
+    #
+    def _read_cache_size(self):
+        size_file_path = os.path.join(self.context.artifactdir, CACHE_SIZE_FILE)
+
+        if not os.path.exists(size_file_path):
+            return None
+
+        with open(size_file_path, "r") as f:
+            size = f.read()
+
+        try:
+            num_size = int(size)
+        except ValueError as e:
+            raise ArtifactError("Size '{}' parsed from '{}' was not an integer".format(
+                size, size_file_path)) from e
+
+        return num_size
+
+    # _calculate_cache_quota()
+    #
+    # Calculates and sets the cache quota and lower threshold based on the
+    # quota set in Context.
+    # It checks that the quota is both a valid expression, and that there is
+    # enough disk space to satisfy that quota
+    #
     def _calculate_cache_quota(self):
         # Headroom intended to give BuildStream a bit of leeway.
         # This acts as the minimum size of cache_quota and also
@@ -577,7 +628,7 @@ class ArtifactCache():
         stat = os.statvfs(artifactdir_volume)
         available_space = (stat.f_bsize * stat.f_bavail)
 
-        cache_size = self.calculate_cache_size()
+        cache_size = self.get_approximate_cache_size()
 
         # Ensure system has enough storage for the cache_quota
         #


[buildstream] 03/03: Add tests for cache size

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch jonathan/cache-cache-size
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 4c3d3e3d0e5cf0a56e8e92c3d1c9488718f2f964
Author: Jonathan Maw <jo...@codethink.co.uk>
AuthorDate: Mon Aug 20 16:38:41 2018 +0100

    Add tests for cache size
---
 tests/artifactcache/cache_size.py | 62 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/tests/artifactcache/cache_size.py b/tests/artifactcache/cache_size.py
new file mode 100644
index 0000000..0d12cda
--- /dev/null
+++ b/tests/artifactcache/cache_size.py
@@ -0,0 +1,62 @@
+import os
+import pytest
+
+from buildstream import _yaml
+from buildstream._artifactcache import CACHE_SIZE_FILE
+
+from tests.testutils import cli, create_element_size
+
+# XXX: Currently lacking:
+#      * A way to check whether it's faster to read cache size on
+#        successive invocations.
+#      * A way to check whether the cache size file has been read.
+
+
+def create_project(project_dir):
+    project_file = os.path.join(project_dir, "project.conf")
+    project_conf = {
+        "name": "test"
+    }
+    _yaml.dump(project_conf, project_file)
+    element_name = "test.bst"
+    create_element_size(element_name, project_dir, ".", [], 1024)
+
+
+def test_cache_size_roundtrip(cli, tmpdir):
+    # Builds (to put files in the cache), then invokes buildstream again
+    # to check nothing breaks
+
+    # Create project
+    project_dir = str(tmpdir)
+    create_project(project_dir)
+
+    # Build, to populate the cache
+    res = cli.run(project=project_dir, args=["build", "test.bst"])
+    res.assert_success()
+
+    # Show, to check that nothing breaks while reading cache size
+    res = cli.run(project=project_dir, args=["show", "test.bst"])
+    res.assert_success()
+
+
+def test_cache_size_write(cli, tmpdir):
+    # Builds (to put files in the cache), then checks a number is
+    # written to the cache size file.
+
+    project_dir = str(tmpdir)
+    create_project(project_dir)
+
+    # Artifact cache must be in a known place
+    artifactdir = os.path.join(project_dir, "artifacts")
+    cli.configure({"artifactdir": artifactdir})
+
+    # Build, to populate the cache
+    res = cli.run(project=project_dir, args=["build", "test.bst"])
+    res.assert_success()
+
+    # Inspect the artifact cache
+    sizefile = os.path.join(artifactdir, CACHE_SIZE_FILE)
+    assert os.path.isfile(sizefile)
+    with open(sizefile, "r") as f:
+        size_data = f.read()
+    size = int(size_data)


[buildstream] 01/03: Use ArtifactCache's get_cache_size when calculating the quota

Posted by gi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch jonathan/cache-cache-size
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit d6bf2feffab79b1f4b93a61338f137fba08335ca
Author: Jonathan Maw <jo...@codethink.co.uk>
AuthorDate: Wed Aug 15 18:08:00 2018 +0100

    Use ArtifactCache's get_cache_size when calculating the quota
    
    The changes to tests are required because I moved the cache size
    validation into `ArtifactCache`. This matters because some tests
    instantiate `ArtifactCache`s without running the validation done in
    `Context.load`.
---
 buildstream/_artifactcache/artifactcache.py | 67 ++++++++++++++++++++++++--
 buildstream/_artifactcache/cascache.py      |  2 +
 buildstream/_context.py                     | 74 ++---------------------------
 buildstream/_frontend/app.py                |  6 ++-
 buildstream/_scheduler/queues/buildqueue.py |  2 +-
 buildstream/_scheduler/scheduler.py         |  4 +-
 tests/testutils/artifactshare.py            |  3 +-
 7 files changed, 79 insertions(+), 79 deletions(-)

diff --git a/buildstream/_artifactcache/artifactcache.py b/buildstream/_artifactcache/artifactcache.py
index d98c291..95dbc08 100644
--- a/buildstream/_artifactcache/artifactcache.py
+++ b/buildstream/_artifactcache/artifactcache.py
@@ -82,7 +82,6 @@ class ArtifactCache():
         self.extractdir = os.path.join(context.artifactdir, 'extract')
         self.tmpdir = os.path.join(context.artifactdir, 'tmp')
 
-        self.max_size = context.cache_quota
         self.estimated_size = None
 
         self.global_remote_specs = []
@@ -90,6 +89,8 @@ class ArtifactCache():
 
         self._local = False
         self.cache_size = None
+        self.cache_quota = None
+        self.cache_lower_threshold = None
 
         os.makedirs(self.extractdir, exist_ok=True)
         os.makedirs(self.tmpdir, exist_ok=True)
@@ -227,7 +228,7 @@ class ArtifactCache():
     def clean(self):
         artifacts = self.list_artifacts()
 
-        while self.calculate_cache_size() >= self.context.cache_quota - self.context.cache_lower_threshold:
+        while self.calculate_cache_size() >= self.cache_quota - self.cache_lower_threshold:
             try:
                 to_remove = artifacts.pop(0)
             except IndexError:
@@ -241,7 +242,7 @@ class ArtifactCache():
                           "Please increase the cache-quota in {}."
                           .format(self.context.config_origin or default_conf))
 
-                if self.calculate_cache_size() > self.context.cache_quota:
+                if self.calculate_cache_size() > self.cache_quota:
                     raise ArtifactError("Cache too full. Aborting.",
                                         detail=detail,
                                         reason="cache-too-full")
@@ -551,6 +552,66 @@ class ArtifactCache():
     def _set_cache_size(self, cache_size):
         self.estimated_size = cache_size
 
+    def _calculate_cache_quota(self):
+        # Headroom intended to give BuildStream a bit of leeway.
+        # This acts as the minimum size of cache_quota and also
+        # is taken from the user requested cache_quota.
+        #
+        if 'BST_TEST_SUITE' in os.environ:
+            headroom = 0
+        else:
+            headroom = 2e9
+
+        artifactdir_volume = self.context.artifactdir
+        while not os.path.exists(artifactdir_volume):
+            artifactdir_volume = os.path.dirname(artifactdir_volume)
+
+        try:
+            cache_quota = utils._parse_size(self.context.config_cache_quota, artifactdir_volume)
+        except utils.UtilError as e:
+            raise LoadError(LoadErrorReason.INVALID_DATA,
+                            "{}\nPlease specify the value in bytes or as a % of full disk space.\n"
+                            "\nValid values are, for example: 800M 10G 1T 50%\n"
+                            .format(str(e))) from e
+
+        stat = os.statvfs(artifactdir_volume)
+        available_space = (stat.f_bsize * stat.f_bavail)
+
+        cache_size = self.calculate_cache_size()
+
+        # Ensure system has enough storage for the cache_quota
+        #
+        # If cache_quota is none, set it to the maximum it could possibly be.
+        #
+        # Also check that cache_quota is atleast as large as our headroom.
+        #
+        if cache_quota is None:  # Infinity, set to max system storage
+            cache_quota = cache_size + available_space
+        if cache_quota < headroom:  # Check minimum
+            raise LoadError(LoadErrorReason.INVALID_DATA,
+                            "Invalid cache quota ({}): ".format(utils._pretty_size(cache_quota)) +
+                            "BuildStream requires a minimum cache quota of 2G.")
+        elif cache_quota > cache_size + available_space:  # Check maximum
+            raise LoadError(LoadErrorReason.INVALID_DATA,
+                            ("Your system does not have enough available " +
+                             "space to support the cache quota specified.\n" +
+                             "You currently have:\n" +
+                             "- {used} of cache in use at {local_cache_path}\n" +
+                             "- {available} of available system storage").format(
+                                 used=utils._pretty_size(cache_size),
+                                 local_cache_path=self.context.artifactdir,
+                                 available=utils._pretty_size(available_space)))
+
+        # Place a slight headroom (2e9 (2GB) on the cache_quota) into
+        # cache_quota to try and avoid exceptions.
+        #
+        # Of course, we might still end up running out during a build
+        # if we end up writing more than 2G, but hey, this stuff is
+        # already really fuzzy.
+        #
+        self.cache_quota = cache_quota - headroom
+        self.cache_lower_threshold = self.cache_quota / 2
+
 
 # _configured_remote_artifact_cache_specs():
 #
diff --git a/buildstream/_artifactcache/cascache.py b/buildstream/_artifactcache/cascache.py
index 074899d..ac21cc0 100644
--- a/buildstream/_artifactcache/cascache.py
+++ b/buildstream/_artifactcache/cascache.py
@@ -61,6 +61,8 @@ class CASCache(ArtifactCache):
         os.makedirs(os.path.join(self.casdir, 'refs', 'heads'), exist_ok=True)
         os.makedirs(os.path.join(self.casdir, 'objects'), exist_ok=True)
 
+        self._calculate_cache_quota()
+
         self._enable_push = enable_push
 
         # Per-project list of _CASRemote instances.
diff --git a/buildstream/_context.py b/buildstream/_context.py
index 8ebb61d..a94d374 100644
--- a/buildstream/_context.py
+++ b/buildstream/_context.py
@@ -64,12 +64,6 @@ class Context():
         # The locations from which to push and pull prebuilt artifacts
         self.artifact_cache_specs = []
 
-        # The artifact cache quota
-        self.cache_quota = None
-
-        # The lower threshold to which we aim to reduce the cache size
-        self.cache_lower_threshold = None
-
         # The directory to store build logs
         self.logdir = None
 
@@ -124,6 +118,8 @@ class Context():
         self._workspaces = None
         self._log_handle = None
         self._log_filename = None
+        self.config_cache_quota = 'infinity'
+        self.artifactdir_volume = None
 
     # load()
     #
@@ -183,71 +179,7 @@ class Context():
         cache = _yaml.node_get(defaults, Mapping, 'cache')
         _yaml.node_validate(cache, ['quota'])
 
-        artifactdir_volume = self.artifactdir
-        while not os.path.exists(artifactdir_volume):
-            artifactdir_volume = os.path.dirname(artifactdir_volume)
-
-        # We read and parse the cache quota as specified by the user
-        cache_quota = _yaml.node_get(cache, str, 'quota', default_value='infinity')
-        try:
-            cache_quota = utils._parse_size(cache_quota, artifactdir_volume)
-        except utils.UtilError as e:
-            raise LoadError(LoadErrorReason.INVALID_DATA,
-                            "{}\nPlease specify the value in bytes or as a % of full disk space.\n"
-                            "\nValid values are, for example: 800M 10G 1T 50%\n"
-                            .format(str(e))) from e
-
-        # Headroom intended to give BuildStream a bit of leeway.
-        # This acts as the minimum size of cache_quota and also
-        # is taken from the user requested cache_quota.
-        #
-        if 'BST_TEST_SUITE' in os.environ:
-            headroom = 0
-        else:
-            headroom = 2e9
-
-        stat = os.statvfs(artifactdir_volume)
-        available_space = (stat.f_bsize * stat.f_bavail)
-
-        # Again, the artifact directory may not yet have been created yet
-        #
-        if not os.path.exists(self.artifactdir):
-            cache_size = 0
-        else:
-            cache_size = utils._get_dir_size(self.artifactdir)
-
-        # Ensure system has enough storage for the cache_quota
-        #
-        # If cache_quota is none, set it to the maximum it could possibly be.
-        #
-        # Also check that cache_quota is atleast as large as our headroom.
-        #
-        if cache_quota is None:  # Infinity, set to max system storage
-            cache_quota = cache_size + available_space
-        if cache_quota < headroom:  # Check minimum
-            raise LoadError(LoadErrorReason.INVALID_DATA,
-                            "Invalid cache quota ({}): ".format(utils._pretty_size(cache_quota)) +
-                            "BuildStream requires a minimum cache quota of 2G.")
-        elif cache_quota > cache_size + available_space:  # Check maximum
-            raise LoadError(LoadErrorReason.INVALID_DATA,
-                            ("Your system does not have enough available " +
-                             "space to support the cache quota specified.\n" +
-                             "You currently have:\n" +
-                             "- {used} of cache in use at {local_cache_path}\n" +
-                             "- {available} of available system storage").format(
-                                 used=utils._pretty_size(cache_size),
-                                 local_cache_path=self.artifactdir,
-                                 available=utils._pretty_size(available_space)))
-
-        # Place a slight headroom (2e9 (2GB) on the cache_quota) into
-        # cache_quota to try and avoid exceptions.
-        #
-        # Of course, we might still end up running out during a build
-        # if we end up writing more than 2G, but hey, this stuff is
-        # already really fuzzy.
-        #
-        self.cache_quota = cache_quota - headroom
-        self.cache_lower_threshold = self.cache_quota / 2
+        self.config_cache_quota = _yaml.node_get(cache, str, 'quota', default_value='infinity')
 
         # Load artifact share configuration
         self.artifact_cache_specs = ArtifactCache.specs_from_config_node(defaults)
diff --git a/buildstream/_frontend/app.py b/buildstream/_frontend/app.py
index 1550fbc..1e357f1 100644
--- a/buildstream/_frontend/app.py
+++ b/buildstream/_frontend/app.py
@@ -198,8 +198,10 @@ class App():
             option_value = self._main_options.get(cli_option)
             if option_value is not None:
                 setattr(self.context, context_attr, option_value)
-
-        Platform.create_instance(self.context)
+        try:
+            Platform.create_instance(self.context)
+        except BstError as e:
+            self._error_exit(e, "Error instantiating platform")
 
         # Create the logger right before setting the message handler
         self.logger = LogLine(self.context,
diff --git a/buildstream/_scheduler/queues/buildqueue.py b/buildstream/_scheduler/queues/buildqueue.py
index 5967fbf..2009fce 100644
--- a/buildstream/_scheduler/queues/buildqueue.py
+++ b/buildstream/_scheduler/queues/buildqueue.py
@@ -97,7 +97,7 @@ class BuildQueue(Queue):
             cache = element._get_artifact_cache()
             cache._add_artifact_size(artifact_size)
 
-            if cache.get_approximate_cache_size() > self._scheduler.context.cache_quota:
+            if cache.get_approximate_cache_size() > cache.cache_quota:
                 self._scheduler._check_cache_size_real()
 
     def done(self, job, element, result, success):
diff --git a/buildstream/_scheduler/scheduler.py b/buildstream/_scheduler/scheduler.py
index 3d1d79b..38d38be 100644
--- a/buildstream/_scheduler/scheduler.py
+++ b/buildstream/_scheduler/scheduler.py
@@ -29,6 +29,7 @@ from contextlib import contextmanager
 # Local imports
 from .resources import Resources, ResourceType
 from .jobs import CacheSizeJob, CleanupJob
+from .._platform import Platform
 
 
 # A decent return code for Scheduler.run()
@@ -316,7 +317,8 @@ class Scheduler():
         self._sched()
 
     def _run_cleanup(self, cache_size):
-        if cache_size and cache_size < self.context.cache_quota:
+        platform = Platform.get_platform()
+        if cache_size and cache_size < platform.artifactcache.cache_quota:
             return
 
         job = CleanupJob(self, 'cleanup', 'cleanup',
diff --git a/tests/testutils/artifactshare.py b/tests/testutils/artifactshare.py
index 76b729e..05e87a4 100644
--- a/tests/testutils/artifactshare.py
+++ b/tests/testutils/artifactshare.py
@@ -140,6 +140,7 @@ class ArtifactShare():
 
         return statvfs_result(f_blocks=self.total_space,
                               f_bfree=self.free_space - repo_size,
+                              f_bavail=self.free_space - repo_size,
                               f_bsize=1)
 
 
@@ -156,4 +157,4 @@ def create_artifact_share(directory, *, total_space=None, free_space=None):
         share.close()
 
 
-statvfs_result = namedtuple('statvfs_result', 'f_blocks f_bfree f_bsize')
+statvfs_result = namedtuple('statvfs_result', 'f_blocks f_bfree f_bsize f_bavail')