You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@buildstream.apache.org by no...@apache.org on 2020/12/29 12:54:09 UTC

[buildstream] branch juerg/public-data created (now 958ddeb)

This is an automated email from the ASF dual-hosted git repository.

not-in-ldap pushed a change to branch juerg/public-data
in repository https://gitbox.apache.org/repos/asf/buildstream.git.


      at 958ddeb  _artifact.py: Use CASCache.contains_directory_and_blobs()

This branch includes the following new commits:

     new 08ac8eb  _artifact.py: don't consider an artifact cached if public data is missing
     new d1bdfc4  cascache.py: use buildbox-casd when checking whether a file is cached
     new 9f7fafa  _artifact.py: Do not use `Context.require_artifact_directories`
     new 4e68b02  _context.py: Drop `require_artifact_directories`
     new e8e7741  _artifactcache.py: Fix misleading log message when pulling artifacts
     new a791e0a  _artifactcache.py: Handle BlobNotFound error in pull()
     new 1298d0c  cascache.py: Add allow_partial parameter to fetch_blobs()
     new ffa36b4  tests/frontend/pull.py: Add test_pull_missing_blob_split_share
     new 2f9acf5  WIP: cascache.py: Add contains_directory_and_blobs() method
     new 958ddeb  _artifact.py: Use CASCache.contains_directory_and_blobs()

The 10 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[buildstream] 01/10: _artifact.py: don't consider an artifact cached if public data is missing

Posted by no...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

not-in-ldap pushed a commit to branch juerg/public-data
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 08ac8eb3bda78f2682085bf106ea16c638d93313
Author: Abderrahim Kitouni <ak...@gnome.org>
AuthorDate: Sun Mar 22 20:28:54 2020 +0100

    _artifact.py: don't consider an artifact cached if public data is missing
    
    Part of #1264
---
 src/buildstream/_artifact.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/buildstream/_artifact.py b/src/buildstream/_artifact.py
index a9cd56c..659facb 100644
--- a/src/buildstream/_artifact.py
+++ b/src/buildstream/_artifact.py
@@ -443,6 +443,11 @@ class Artifact:
             self._cached = False
             return False
 
+        # Check whether public data is available
+        if not self._cas.contains_file(artifact.public_data):
+            self._cached = False
+            return False
+
         self._cached = True
         return True
 


[buildstream] 03/10: _artifact.py: Do not use `Context.require_artifact_directories`

Posted by no...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

not-in-ldap pushed a commit to branch juerg/public-data
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 9f7fafafbf90e81752d3205ebd062f3f47147907
Author: Jürg Billeter <j...@bitron.ch>
AuthorDate: Tue Apr 7 17:51:48 2020 +0200

    _artifact.py: Do not use `Context.require_artifact_directories`
    
    It is always `True`.
---
 src/buildstream/_artifact.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/buildstream/_artifact.py b/src/buildstream/_artifact.py
index 659facb..0e1355f 100644
--- a/src/buildstream/_artifact.py
+++ b/src/buildstream/_artifact.py
@@ -429,17 +429,11 @@ class Artifact:
             self._cached = False
             return False
 
-        # Determine whether directories are required
-        require_directories = context.require_artifact_directories
         # Determine whether file contents are required as well
         require_files = context.require_artifact_files or self._element._artifact_files_required()
 
         # Check whether 'files' subdirectory is available, with or without file contents
-        if (
-            require_directories
-            and str(artifact.files)
-            and not self._cas.contains_directory(artifact.files, with_files=require_files)
-        ):
+        if str(artifact.files) and not self._cas.contains_directory(artifact.files, with_files=require_files):
             self._cached = False
             return False
 


[buildstream] 09/10: WIP: cascache.py: Add contains_directory_and_blobs() method

Posted by no...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

not-in-ldap pushed a commit to branch juerg/public-data
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 2f9acf50abe8f776988ff8d90e90be6e80f866d7
Author: Jürg Billeter <j...@bitron.ch>
AuthorDate: Tue Apr 7 18:26:43 2020 +0200

    WIP: cascache.py: Add contains_directory_and_blobs() method
---
 src/buildstream/_cas/cascache.py | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/src/buildstream/_cas/cascache.py b/src/buildstream/_cas/cascache.py
index 61a1a8f..22f5b7c 100644
--- a/src/buildstream/_cas/cascache.py
+++ b/src/buildstream/_cas/cascache.py
@@ -211,6 +211,41 @@ class CASCache:
                 raise CASCacheError("Unsupported buildbox-casd version: FetchTree unimplemented") from e
             raise
 
+    def contains_directory_and_blobs(self, directory, blobs, *, with_files):
+        cas = self.get_cas()
+        local_cas = self.get_local_cas()
+
+        if str(directory):
+            request = local_cas_pb2.FetchTreeRequest()
+            request.root_digest.CopyFrom(directory)
+            request.fetch_file_blobs = with_files
+            directory_future = local_cas.FetchTree.future(request)
+        else:
+            directory_future = None
+
+        if blobs:
+            request = remote_execution_pb2.FindMissingBlobsRequest()
+            request.blob_digests.extend(blobs)
+            blobs_future = cas.FindMissingBlobs.future(request)
+        else:
+            blobs_future = None
+
+        try:
+            if directory_future:
+                directory_future.result()
+
+            if blobs_future:
+                response = blobs_future.result()
+                return len(response.missing_blob_digests) == 0
+            else:
+                return True
+        except grpc.RpcError as e:
+            if e.code() == grpc.StatusCode.NOT_FOUND:
+                return False
+            if e.code() == grpc.StatusCode.UNIMPLEMENTED:
+                raise CASCacheError("Unsupported buildbox-casd version: FetchTree unimplemented") from e
+            raise
+
     # checkout():
     #
     # Checkout the specified directory digest.


[buildstream] 06/10: _artifactcache.py: Handle BlobNotFound error in pull()

Posted by no...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

not-in-ldap pushed a commit to branch juerg/public-data
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit a791e0a756684c2505f16f0d64537a59c9b90309
Author: Jürg Billeter <j...@bitron.ch>
AuthorDate: Fri Mar 27 16:54:57 2020 +0100

    _artifactcache.py: Handle BlobNotFound error in pull()
---
 src/buildstream/_artifactcache.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/buildstream/_artifactcache.py b/src/buildstream/_artifactcache.py
index 4290e92..9b800ac 100644
--- a/src/buildstream/_artifactcache.py
+++ b/src/buildstream/_artifactcache.py
@@ -22,6 +22,7 @@ import os
 import grpc
 
 from ._basecache import BaseCache
+from ._cas.casremote import BlobNotFound
 from ._exceptions import ArtifactError, CASError, CacheError, CASRemoteError, RemoteError
 from ._protos.buildstream.v2 import buildstream_pb2, buildstream_pb2_grpc, artifact_pb2, artifact_pb2_grpc
 
@@ -310,6 +311,10 @@ class ArtifactCache(BaseCache):
                     return True
 
                 element.info("Remote ({}) does not have artifact {} cached".format(remote, display_key))
+            except BlobNotFound as e:
+                # Not all blobs are available on this remote
+                element.info("Remote cas ({}) does not have blob {} cached".format(remote, e.blob))
+                continue
             except CASError as e:
                 element.warn("Could not pull from remote {}: {}".format(remote, e))
                 errors.append(e)


[buildstream] 08/10: tests/frontend/pull.py: Add test_pull_missing_blob_split_share

Posted by no...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

not-in-ldap pushed a commit to branch juerg/public-data
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit ffa36b46e1218d1dea845b6208cd63fb11d0cf88
Author: Jürg Billeter <j...@bitron.ch>
AuthorDate: Mon Mar 30 13:48:05 2020 +0200

    tests/frontend/pull.py: Add test_pull_missing_blob_split_share
    
    This is identical to `test_pull_missing_blob` except that it uses
    separate servers for artifact proto index and CAS.
---
 tests/frontend/pull.py | 92 ++++++++++++++++++++++++++++++++++----------------
 1 file changed, 63 insertions(+), 29 deletions(-)

diff --git a/tests/frontend/pull.py b/tests/frontend/pull.py
index 3ae394f..1845f32 100644
--- a/tests/frontend/pull.py
+++ b/tests/frontend/pull.py
@@ -8,7 +8,13 @@ import pytest
 from buildstream import utils, _yaml
 from buildstream.testing import cli  # pylint: disable=unused-import
 from buildstream.testing import create_repo
-from tests.testutils import create_artifact_share, generate_junction, assert_shared, assert_not_shared
+from tests.testutils import (
+    create_artifact_share,
+    create_split_share,
+    generate_junction,
+    assert_shared,
+    assert_not_shared,
+)
 
 
 # Project directory
@@ -227,46 +233,74 @@ def test_push_pull_cross_junction(cli, tmpdir, datafiles):
         assert cli.get_element_state(project, "junction.bst:import-etc.bst") == "cached"
 
 
+def _test_pull_missing_blob(cli, project, index, storage):
+    # First build the target element and push to the remote.
+    result = cli.run(project=project, args=["build", "target.bst"])
+    result.assert_success()
+    assert cli.get_element_state(project, "target.bst") == "cached"
+
+    # Assert that everything is now cached in the remote.
+    all_elements = ["target.bst", "import-bin.bst", "import-dev.bst", "compose-all.bst"]
+    for element_name in all_elements:
+        project_name = "test"
+        artifact_name = cli.get_artifact_name(project, project_name, element_name)
+        artifact_proto = index.get_artifact_proto(artifact_name)
+        assert artifact_proto
+        assert storage.get_cas_files(artifact_proto)
+
+    # Now we've pushed, delete the user's local artifact cache
+    # directory and try to redownload it from the share
+    #
+    casdir = os.path.join(cli.directory, "cas")
+    shutil.rmtree(casdir)
+    artifactdir = os.path.join(cli.directory, "artifacts")
+    shutil.rmtree(artifactdir)
+
+    # Assert that nothing is cached locally anymore
+    for element_name in all_elements:
+        assert cli.get_element_state(project, element_name) != "cached"
+
+    # Now delete blobs in the remote without deleting the artifact ref.
+    # This simulates scenarios with concurrent artifact expiry.
+    remote_objdir = os.path.join(storage.repodir, "cas", "objects")
+    shutil.rmtree(remote_objdir)
+
+    # Now try bst build
+    result = cli.run(project=project, args=["build", "target.bst"])
+    result.assert_success()
+
+    # Assert that no artifacts were pulled
+    assert not result.get_pulled_elements()
+
+
 @pytest.mark.datafiles(DATA_DIR)
 def test_pull_missing_blob(cli, tmpdir, datafiles):
     project = str(datafiles)
 
     with create_artifact_share(os.path.join(str(tmpdir), "artifactshare")) as share:
-
-        # First build the target element and push to the remote.
         cli.configure({"artifacts": {"url": share.repo, "push": True}})
-        result = cli.run(project=project, args=["build", "target.bst"])
-        result.assert_success()
-        assert cli.get_element_state(project, "target.bst") == "cached"
 
-        # Assert that everything is now cached in the remote.
-        all_elements = ["target.bst", "import-bin.bst", "import-dev.bst", "compose-all.bst"]
-        for element_name in all_elements:
-            assert_shared(cli, share, project, element_name)
+        _test_pull_missing_blob(cli, project, share, share)
 
-        # Now we've pushed, delete the user's local artifact cache
-        # directory and try to redownload it from the share
-        #
-        casdir = os.path.join(cli.directory, "cas")
-        shutil.rmtree(casdir)
-        artifactdir = os.path.join(cli.directory, "artifacts")
-        shutil.rmtree(artifactdir)
 
-        # Assert that nothing is cached locally anymore
-        for element_name in all_elements:
-            assert cli.get_element_state(project, element_name) != "cached"
+@pytest.mark.datafiles(DATA_DIR)
+def test_pull_missing_blob_split_share(cli, tmpdir, datafiles):
+    project = str(datafiles)
 
-        # Now delete blobs in the remote without deleting the artifact ref.
-        # This simulates scenarios with concurrent artifact expiry.
-        remote_objdir = os.path.join(share.repodir, "cas", "objects")
-        shutil.rmtree(remote_objdir)
+    indexshare = os.path.join(str(tmpdir), "indexshare")
+    storageshare = os.path.join(str(tmpdir), "storageshare")
 
-        # Now try bst build
-        result = cli.run(project=project, args=["build", "target.bst"])
-        result.assert_success()
+    with create_split_share(indexshare, storageshare) as (index, storage):
+        cli.configure(
+            {
+                "artifacts": [
+                    {"url": index.repo, "push": True, "type": "index"},
+                    {"url": storage.repo, "push": True, "type": "storage"},
+                ]
+            }
+        )
 
-        # Assert that no artifacts were pulled
-        assert not result.get_pulled_elements()
+        _test_pull_missing_blob(cli, project, index, storage)
 
 
 @pytest.mark.datafiles(DATA_DIR)


[buildstream] 07/10: cascache.py: Add allow_partial parameter to fetch_blobs()

Posted by no...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

not-in-ldap pushed a commit to branch juerg/public-data
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 1298d0c485ddc0ccd9aa91b79b70199056117b38
Author: Jürg Billeter <j...@bitron.ch>
AuthorDate: Mon Mar 30 12:14:01 2020 +0200

    cascache.py: Add allow_partial parameter to fetch_blobs()
    
    This fixes handling of missing blobs in `ArtifactCache.pull()`.
---
 src/buildstream/_artifactcache.py         | 2 +-
 src/buildstream/_cas/cascache.py          | 9 ++++++---
 src/buildstream/_sourcecache.py           | 6 +-----
 src/buildstream/sandbox/_sandboxremote.py | 7 +------
 4 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/src/buildstream/_artifactcache.py b/src/buildstream/_artifactcache.py
index 9b800ac..9cebeb1 100644
--- a/src/buildstream/_artifactcache.py
+++ b/src/buildstream/_artifactcache.py
@@ -405,7 +405,7 @@ class ArtifactCache(BaseCache):
             remote.init()
 
             # fetch_blobs() will return the blobs that are still missing
-            missing_blobs = self.cas.fetch_blobs(remote, missing_blobs)
+            missing_blobs = self.cas.fetch_blobs(remote, missing_blobs, allow_partial=True)
 
         if missing_blobs:
             raise ArtifactError("Blobs not found on configured artifact servers")
diff --git a/src/buildstream/_cas/cascache.py b/src/buildstream/_cas/cascache.py
index 03be75c..61a1a8f 100644
--- a/src/buildstream/_cas/cascache.py
+++ b/src/buildstream/_cas/cascache.py
@@ -647,16 +647,19 @@ class CASCache:
 
     # fetch_blobs():
     #
-    # Fetch blobs from remote CAS. Returns missing blobs that could not be fetched.
+    # Fetch blobs from remote CAS. Optionally returns missing blobs that could
+    # not be fetched.
     #
     # Args:
     #    remote (CASRemote): The remote repository to fetch from
     #    digests (list): The Digests of blobs to fetch
+    #    allow_partial (bool): True to return missing blobs, False to raise a
+    #                          BlobNotFound error if a blob is missing
     #
     # Returns: The Digests of the blobs that were not available on the remote CAS
     #
-    def fetch_blobs(self, remote, digests):
-        missing_blobs = []
+    def fetch_blobs(self, remote, digests, *, allow_partial=False):
+        missing_blobs = [] if allow_partial else None
 
         remote.init()
 
diff --git a/src/buildstream/_sourcecache.py b/src/buildstream/_sourcecache.py
index e485fbd..4533a25 100644
--- a/src/buildstream/_sourcecache.py
+++ b/src/buildstream/_sourcecache.py
@@ -242,11 +242,7 @@ class SourceCache(BaseCache):
                 self.cas._fetch_directory(remote, source_proto.files)
                 required_blobs = self.cas.required_blobs_for_directory(source_proto.files)
                 missing_blobs = self.cas.local_missing_blobs(required_blobs)
-                missing_blobs = self.cas.fetch_blobs(remote, missing_blobs)
-
-                if missing_blobs:
-                    source.info("Remote cas ({}) does not have source {} cached".format(remote, display_key))
-                    continue
+                self.cas.fetch_blobs(remote, missing_blobs)
 
                 source.info("Pulled source {} <- {}".format(display_key, remote))
                 return True
diff --git a/src/buildstream/sandbox/_sandboxremote.py b/src/buildstream/sandbox/_sandboxremote.py
index 3dcbb2c..5b03852 100644
--- a/src/buildstream/sandbox/_sandboxremote.py
+++ b/src/buildstream/sandbox/_sandboxremote.py
@@ -291,12 +291,7 @@ class SandboxRemote(SandboxREAPI):
                     blobs_to_fetch = artifactcache.find_missing_blobs(project, local_missing_blobs)
 
                 with CASRemote(self.storage_remote_spec, cascache) as casremote:
-                    remote_missing_blobs = cascache.fetch_blobs(casremote, blobs_to_fetch)
-
-                if remote_missing_blobs:
-                    raise SandboxError(
-                        "{} output files are missing on the CAS server".format(len(remote_missing_blobs))
-                    )
+                    cascache.fetch_blobs(casremote, blobs_to_fetch)
 
     def _execute_action(self, action, flags):
         stdout, stderr = self._get_output()


[buildstream] 10/10: _artifact.py: Use CASCache.contains_directory_and_blobs()

Posted by no...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

not-in-ldap pushed a commit to branch juerg/public-data
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 958ddebf9057f0e16f396589f62ace66e6e67f53
Author: Jürg Billeter <j...@bitron.ch>
AuthorDate: Tue Apr 7 18:26:57 2020 +0200

    _artifact.py: Use CASCache.contains_directory_and_blobs()
---
 src/buildstream/_artifact.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/buildstream/_artifact.py b/src/buildstream/_artifact.py
index 0e1355f..10885f7 100644
--- a/src/buildstream/_artifact.py
+++ b/src/buildstream/_artifact.py
@@ -433,12 +433,10 @@ class Artifact:
         require_files = context.require_artifact_files or self._element._artifact_files_required()
 
         # Check whether 'files' subdirectory is available, with or without file contents
-        if str(artifact.files) and not self._cas.contains_directory(artifact.files, with_files=require_files):
-            self._cached = False
-            return False
-
         # Check whether public data is available
-        if not self._cas.contains_file(artifact.public_data):
+        if not self._cas.contains_directory_and_blobs(
+            artifact.files, [artifact.public_data], with_files=require_files
+        ):
             self._cached = False
             return False
 


[buildstream] 04/10: _context.py: Drop `require_artifact_directories`

Posted by no...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

not-in-ldap pushed a commit to branch juerg/public-data
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 4e68b02d43d4c0c4e48b3a9a595808fe04a7395d
Author: Jürg Billeter <j...@bitron.ch>
AuthorDate: Tue Apr 7 17:52:37 2020 +0200

    _context.py: Drop `require_artifact_directories`
    
    It is always `True`.
---
 src/buildstream/_context.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/buildstream/_context.py b/src/buildstream/_context.py
index 090b3e0..1419bf4 100644
--- a/src/buildstream/_context.py
+++ b/src/buildstream/_context.py
@@ -154,9 +154,6 @@ class Context:
         # Whether or not to cache build trees on artifact creation
         self.cache_buildtrees = None
 
-        # Whether directory trees are required for all artifacts in the local cache
-        self.require_artifact_directories = True
-
         # Whether file contents are required for all artifacts in the local cache
         self.require_artifact_files = True
 


[buildstream] 02/10: cascache.py: use buildbox-casd when checking whether a file is cached

Posted by no...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

not-in-ldap pushed a commit to branch juerg/public-data
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit d1bdfc4acaaa7934e7367c299df22b02eca5bddb
Author: Abderrahim Kitouni <ak...@gnome.org>
AuthorDate: Wed Mar 25 17:22:24 2020 +0100

    cascache.py: use buildbox-casd when checking whether a file is cached
    
    this lets buildbox-casd know it is needed and not delete it when pruning
---
 src/buildstream/_cas/cascache.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/buildstream/_cas/cascache.py b/src/buildstream/_cas/cascache.py
index 48b700c..03be75c 100644
--- a/src/buildstream/_cas/cascache.py
+++ b/src/buildstream/_cas/cascache.py
@@ -175,7 +175,13 @@ class CASCache:
     # Returns: True if the file is in the cache, False otherwise
     #
     def contains_file(self, digest):
-        return os.path.exists(self.objpath(digest))
+        cas = self.get_cas()
+
+        request = remote_execution_pb2.FindMissingBlobsRequest()
+        request.blob_digests.append(digest)
+
+        response = cas.FindMissingBlobs(request)
+        return len(response.missing_blob_digests) == 0
 
     # contains_directory():
     #


[buildstream] 05/10: _artifactcache.py: Fix misleading log message when pulling artifacts

Posted by no...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

not-in-ldap pushed a commit to branch juerg/public-data
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit e8e77415094b4a69cab3afe406c56b9202638c6a
Author: Jürg Billeter <j...@bitron.ch>
AuthorDate: Mon Mar 30 13:17:16 2020 +0200

    _artifactcache.py: Fix misleading log message when pulling artifacts
    
    Do not print message "Pulled artifact" when pulling the artifact proto
    without data. This matches the log messages in `SourceCache`.
---
 src/buildstream/_artifactcache.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/buildstream/_artifactcache.py b/src/buildstream/_artifactcache.py
index f1648e9..4290e92 100644
--- a/src/buildstream/_artifactcache.py
+++ b/src/buildstream/_artifactcache.py
@@ -281,7 +281,6 @@ class ArtifactCache(BaseCache):
                 element.status("Pulling artifact {} <- {}".format(display_key, remote))
                 artifact = self._pull_artifact_proto(element, key, remote)
                 if artifact:
-                    element.info("Pulled artifact {} <- {}".format(display_key, remote))
                     break
 
                 element.info("Remote ({}) does not have artifact {} cached".format(remote, display_key))
@@ -307,7 +306,7 @@ class ArtifactCache(BaseCache):
                 element.status("Pulling data for artifact {} <- {}".format(display_key, remote))
 
                 if self._pull_artifact_storage(element, artifact, remote, pull_buildtrees=pull_buildtrees):
-                    element.info("Pulled data for artifact {} <- {}".format(display_key, remote))
+                    element.info("Pulled artifact {} <- {}".format(display_key, remote))
                     return True
 
                 element.info("Remote ({}) does not have artifact {} cached".format(remote, display_key))