You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@buildstream.apache.org by no...@apache.org on 2020/12/29 12:42:39 UTC

[buildstream] branch jonathan/yamlcache-no-read created (now b67b8bb)

This is an automated email from the ASF dual-hosted git repository.

not-in-ldap pushed a change to branch jonathan/yamlcache-no-read
in repository https://gitbox.apache.org/repos/asf/buildstream.git.


      at b67b8bb  yamlcache: Lookup files in the cache without reading the file

This branch includes the following new commits:

     new b67b8bb  yamlcache: Lookup files in the cache without reading the file

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[buildstream] 01/01: yamlcache: Lookup files in the cache without reading the file

Posted by no...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

not-in-ldap pushed a commit to branch jonathan/yamlcache-no-read
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit b67b8bba1d49b555af9a651c6a618bdfc2aba228
Author: Jonathan Maw <jo...@codethink.co.uk>
AuthorDate: Thu Jan 31 16:30:55 2019 +0000

    yamlcache: Lookup files in the cache without reading the file
    
    For files that are not part of a junction (i.e. not checked-out into a
    temporary directory), use the mtime to see if the file has changed.
    
    For files that are, use the junction element's cache key.
---
 buildstream/_yaml.py        |  8 ++++----
 buildstream/_yamlcache.py   | 26 +++++++++++++++++---------
 tests/frontend/yamlcache.py | 32 ++++++++++++--------------------
 3 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/buildstream/_yaml.py b/buildstream/_yaml.py
index 7e12183..f1d16e2 100644
--- a/buildstream/_yaml.py
+++ b/buildstream/_yaml.py
@@ -197,12 +197,12 @@ def load(filename, shortname=None, copy_tree=False, *, project=None, yaml_cache=
 
     try:
         data = None
-        with open(filename) as f:
-            contents = f.read()
-        if yaml_cache:
-            data, key = yaml_cache.get(project, filename, contents, copy_tree)
 
+        if yaml_cache:
+            data, key = yaml_cache.get(project, filename, copy_tree)
         if not data:
+            with open(filename) as f:
+                contents = f.read()
             data = load_data(contents, file, copy_tree=copy_tree)
             if yaml_cache:
                 yaml_cache.put_from_key(project, filename, key, data)
diff --git a/buildstream/_yamlcache.py b/buildstream/_yamlcache.py
index 8911700..07a1b8d 100644
--- a/buildstream/_yamlcache.py
+++ b/buildstream/_yamlcache.py
@@ -127,15 +127,14 @@ class YamlCache():
     # Args:
     #    project (Project) or None: The project this file is in, if it exists.
     #    filepath (str): The absolute path to the file.
-    #    contents (str): The contents of the file to be cached
     #    copy_tree (bool): Whether the data should make a copy when it's being generated
     #                      (i.e. exactly as when called in yaml)
     #
     # Returns:
     #    (decorated dict): The parsed yaml from the cache, or None if the file isn't in the cache.
     #    (str):            The key used to look up the parsed yaml in the cache
-    def get(self, project, filepath, contents, copy_tree):
-        key = self._calculate_key(contents, copy_tree)
+    def get(self, project, filepath, copy_tree):
+        key = self._calculate_key(project, filepath, copy_tree)
         data = self._get(project, filepath, key)
         return data, key
 
@@ -146,12 +145,11 @@ class YamlCache():
     # Args:
     #    project (Project): The project this file is in.
     #    filepath (str): The path to the file.
-    #    contents (str): The contents of the file that has been cached
     #    copy_tree (bool): Whether the data should make a copy when it's being generated
     #                      (i.e. exactly as when called in yaml)
     #    value (decorated dict): The data to put into the cache.
-    def put(self, project, filepath, contents, copy_tree, value):
-        key = self._calculate_key(contents, copy_tree)
+    def put(self, project, filepath, copy_tree, value):
+        key = self._calculate_key(project, filepath, copy_tree)
         self.put_from_key(project, filepath, key, value)
 
     # put_from_key():
@@ -213,13 +211,23 @@ class YamlCache():
     # Calculates a key for putting into the cache.
     #
     # Args:
-    #    (basic object)... : Any number of strictly-ordered basic objects
+    #    project (Project) or None: The project this file is in.
+    #    filepath (str): The path to the file.
+    #    copy_tree (bool): Whether the data should make a copy when it's being generated
+    #                      (i.e. exactly as when called in yaml)
     #
     # Returns:
     #   (str): A key made out of every arg passed in
     @staticmethod
-    def _calculate_key(*args):
-        string = pickle.dumps(args)
+    def _calculate_key(project, filepath, copy_tree):
+        if project and project.junction:
+            # files in a junction only change if the junction element changes
+            # NOTE: This may change when junction workspaces are revisited/fixed
+            content_key = project.junction._get_cache_key()
+        else:
+            stat = os.stat(filepath)
+            content_key = stat.st_mtime
+        string = pickle.dumps(content_key, copy_tree)
         return hashlib.sha1(string).hexdigest()
 
     # _get():
diff --git a/tests/frontend/yamlcache.py b/tests/frontend/yamlcache.py
index 99b5d71..5dc52d1 100644
--- a/tests/frontend/yamlcache.py
+++ b/tests/frontend/yamlcache.py
@@ -14,10 +14,10 @@ from contextlib import contextmanager
 
 
 def generate_project(tmpdir, ref_storage, with_junction, name="test"):
-    if with_junction == 'junction':
+    if with_junction:
         subproject_dir = generate_project(
             tmpdir, ref_storage,
-            'no-junction', name='test-subproject'
+            False, name='test-subproject'
         )
 
     project_dir = os.path.join(tmpdir, name)
@@ -33,7 +33,7 @@ def generate_project(tmpdir, ref_storage, with_junction, name="test"):
     _yaml.dump(project_conf, project_conf_path)
 
     # elements
-    if with_junction == 'junction':
+    if with_junction:
         junction_name = 'junction.bst'
         junction_dir = os.path.join(project_dir, elements_path)
         junction_path = os.path.join(project_dir, elements_path, junction_name)
@@ -58,12 +58,6 @@ def with_yamlcache(project_dir):
         yield yamlcache, project
 
 
-def yamlcache_key(yamlcache, in_file, copy_tree=False):
-    with open(in_file) as f:
-        key = yamlcache._calculate_key(f.read(), copy_tree)
-    return key
-
-
 def modified_file(input_file, tmpdir):
     with open(input_file) as f:
         data = f.read()
@@ -77,12 +71,13 @@ def modified_file(input_file, tmpdir):
 
 
 @pytest.mark.parametrize('ref_storage', ['inline', 'project.refs'])
-@pytest.mark.parametrize('with_junction', ['no-junction', 'junction'])
-@pytest.mark.parametrize('move_project', ['move', 'no-move'])
-def test_yamlcache_used(cli, tmpdir, ref_storage, with_junction, move_project):
+@pytest.mark.parametrize('with_junction', [True, False], ids=['junction', 'no-junction'])
+def test_yamlcache_used(cli, tmpdir, ref_storage, with_junction):
     # Generate the project
     project = generate_project(str(tmpdir), ref_storage, with_junction)
-    if with_junction == 'junction':
+    element_path = os.path.join(project, 'elements', 'test.bst')
+    element_mtime = 0
+    if with_junction:
         result = cli.run(project=project, args=['source', 'fetch', '--track', 'junction.bst'])
         result.assert_success()
 
@@ -90,17 +85,14 @@ def test_yamlcache_used(cli, tmpdir, ref_storage, with_junction, move_project):
     result = cli.run(project=project, args=['show', 'test.bst'])
     result.assert_success()
 
-    element_path = os.path.join(project, 'elements', 'test.bst')
     with with_yamlcache(project) as (yc, prj):
         # Check that it's in the cache
         assert yc.is_cached(prj, element_path)
 
-        # *Absolutely* horrible cache corruption to check it's being used
-        # Modifying the data from the cache is fraught with danger,
-        # so instead I'll load a modified version of the original file
+        # Modify files in the yaml cache to test whether it's being used
         temppath = modified_file(element_path, str(tmpdir))
         contents = _yaml.load(temppath, copy_tree=False, project=prj)
-        key = yamlcache_key(yc, element_path)
+        key = yc._calculate_key(prj, element_path, copy_tree=False)
         yc.put_from_key(prj, element_path, key, contents)
 
     # Show that a variable has been added
@@ -112,13 +104,13 @@ def test_yamlcache_used(cli, tmpdir, ref_storage, with_junction, move_project):
 
 
 @pytest.mark.parametrize('ref_storage', ['inline', 'project.refs'])
-@pytest.mark.parametrize('with_junction', ['junction', 'no-junction'])
+@pytest.mark.parametrize('with_junction', [True, False], ids=['junction', 'no-junction'])
 def test_yamlcache_changed_file(cli, tmpdir, ref_storage, with_junction):
     # i.e. a file is cached, the file is changed, loading the file (with cache) returns new data
     # inline and junction can only be changed by opening a workspace
     # Generate the project
     project = generate_project(str(tmpdir), ref_storage, with_junction)
-    if with_junction == 'junction':
+    if with_junction:
         result = cli.run(project=project, args=['source', 'fetch', '--track', 'junction.bst'])
         result.assert_success()