You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@buildstream.apache.org by no...@apache.org on 2020/12/29 12:54:09 UTC
[buildstream] branch juerg/public-data created (now 958ddeb)
This is an automated email from the ASF dual-hosted git repository.
not-in-ldap pushed a change to branch juerg/public-data
in repository https://gitbox.apache.org/repos/asf/buildstream.git.
at 958ddeb _artifact.py: Use CASCache.contains_directory_and_blobs()
This branch includes the following new commits:
new 08ac8eb _artifact.py: don't consider an artifact cached if public data is missing
new d1bdfc4 cascache.py: use buildbox-casd when checking whether a file is cached
new 9f7fafa _artifact.py: Do not use `Context.require_artifact_directories`
new 4e68b02 _context.py: Drop `require_artifact_directories`
new e8e7741 _artifactcache.py: Fix misleading log message when pulling artifacts
new a791e0a _artifactcache.py: Handle BlobNotFound error in pull()
new 1298d0c cascache.py: Add allow_partial parameter to fetch_blobs()
new ffa36b4 tests/frontend/pull.py: Add test_pull_missing_blob_split_share
new 2f9acf5 WIP: cascache.py: Add contains_directory_and_blobs() method
new 958ddeb _artifact.py: Use CASCache.contains_directory_and_blobs()
The 10 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
[buildstream] 01/10: _artifact.py: don't consider an artifact
cached if public data is missing
Posted by no...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
not-in-ldap pushed a commit to branch juerg/public-data
in repository https://gitbox.apache.org/repos/asf/buildstream.git
commit 08ac8eb3bda78f2682085bf106ea16c638d93313
Author: Abderrahim Kitouni <ak...@gnome.org>
AuthorDate: Sun Mar 22 20:28:54 2020 +0100
_artifact.py: don't consider an artifact cached if public data is missing
Part of #1264
---
src/buildstream/_artifact.py | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/src/buildstream/_artifact.py b/src/buildstream/_artifact.py
index a9cd56c..659facb 100644
--- a/src/buildstream/_artifact.py
+++ b/src/buildstream/_artifact.py
@@ -443,6 +443,11 @@ class Artifact:
self._cached = False
return False
+ # Check whether public data is available
+ if not self._cas.contains_file(artifact.public_data):
+ self._cached = False
+ return False
+
self._cached = True
return True
[buildstream] 03/10: _artifact.py: Do not use
`Context.require_artifact_directories`
Posted by no...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
not-in-ldap pushed a commit to branch juerg/public-data
in repository https://gitbox.apache.org/repos/asf/buildstream.git
commit 9f7fafafbf90e81752d3205ebd062f3f47147907
Author: Jürg Billeter <j...@bitron.ch>
AuthorDate: Tue Apr 7 17:51:48 2020 +0200
_artifact.py: Do not use `Context.require_artifact_directories`
It is always `True`.
---
src/buildstream/_artifact.py | 8 +-------
1 file changed, 1 insertion(+), 7 deletions(-)
diff --git a/src/buildstream/_artifact.py b/src/buildstream/_artifact.py
index 659facb..0e1355f 100644
--- a/src/buildstream/_artifact.py
+++ b/src/buildstream/_artifact.py
@@ -429,17 +429,11 @@ class Artifact:
self._cached = False
return False
- # Determine whether directories are required
- require_directories = context.require_artifact_directories
# Determine whether file contents are required as well
require_files = context.require_artifact_files or self._element._artifact_files_required()
# Check whether 'files' subdirectory is available, with or without file contents
- if (
- require_directories
- and str(artifact.files)
- and not self._cas.contains_directory(artifact.files, with_files=require_files)
- ):
+ if str(artifact.files) and not self._cas.contains_directory(artifact.files, with_files=require_files):
self._cached = False
return False
[buildstream] 09/10: WIP: cascache.py: Add
contains_directory_and_blobs() method
Posted by no...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
not-in-ldap pushed a commit to branch juerg/public-data
in repository https://gitbox.apache.org/repos/asf/buildstream.git
commit 2f9acf50abe8f776988ff8d90e90be6e80f866d7
Author: Jürg Billeter <j...@bitron.ch>
AuthorDate: Tue Apr 7 18:26:43 2020 +0200
WIP: cascache.py: Add contains_directory_and_blobs() method
---
src/buildstream/_cas/cascache.py | 35 +++++++++++++++++++++++++++++++++++
1 file changed, 35 insertions(+)
diff --git a/src/buildstream/_cas/cascache.py b/src/buildstream/_cas/cascache.py
index 61a1a8f..22f5b7c 100644
--- a/src/buildstream/_cas/cascache.py
+++ b/src/buildstream/_cas/cascache.py
@@ -211,6 +211,41 @@ class CASCache:
raise CASCacheError("Unsupported buildbox-casd version: FetchTree unimplemented") from e
raise
+ def contains_directory_and_blobs(self, directory, blobs, *, with_files):
+ cas = self.get_cas()
+ local_cas = self.get_local_cas()
+
+ if str(directory):
+ request = local_cas_pb2.FetchTreeRequest()
+ request.root_digest.CopyFrom(directory)
+ request.fetch_file_blobs = with_files
+ directory_future = local_cas.FetchTree.future(request)
+ else:
+ directory_future = None
+
+ if blobs:
+ request = remote_execution_pb2.FindMissingBlobsRequest()
+ request.blob_digests.extend(blobs)
+ blobs_future = cas.FindMissingBlobs.future(request)
+ else:
+ blobs_future = None
+
+ try:
+ if directory_future:
+ directory_future.result()
+
+ if blobs_future:
+ response = blobs_future.result()
+ return len(response.missing_blob_digests) == 0
+ else:
+ return True
+ except grpc.RpcError as e:
+ if e.code() == grpc.StatusCode.NOT_FOUND:
+ return False
+ if e.code() == grpc.StatusCode.UNIMPLEMENTED:
+ raise CASCacheError("Unsupported buildbox-casd version: FetchTree unimplemented") from e
+ raise
+
# checkout():
#
# Checkout the specified directory digest.
[buildstream] 06/10: _artifactcache.py: Handle BlobNotFound error
in pull()
Posted by no...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
not-in-ldap pushed a commit to branch juerg/public-data
in repository https://gitbox.apache.org/repos/asf/buildstream.git
commit a791e0a756684c2505f16f0d64537a59c9b90309
Author: Jürg Billeter <j...@bitron.ch>
AuthorDate: Fri Mar 27 16:54:57 2020 +0100
_artifactcache.py: Handle BlobNotFound error in pull()
---
src/buildstream/_artifactcache.py | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/src/buildstream/_artifactcache.py b/src/buildstream/_artifactcache.py
index 4290e92..9b800ac 100644
--- a/src/buildstream/_artifactcache.py
+++ b/src/buildstream/_artifactcache.py
@@ -22,6 +22,7 @@ import os
import grpc
from ._basecache import BaseCache
+from ._cas.casremote import BlobNotFound
from ._exceptions import ArtifactError, CASError, CacheError, CASRemoteError, RemoteError
from ._protos.buildstream.v2 import buildstream_pb2, buildstream_pb2_grpc, artifact_pb2, artifact_pb2_grpc
@@ -310,6 +311,10 @@ class ArtifactCache(BaseCache):
return True
element.info("Remote ({}) does not have artifact {} cached".format(remote, display_key))
+ except BlobNotFound as e:
+ # Not all blobs are available on this remote
+ element.info("Remote cas ({}) does not have blob {} cached".format(remote, e.blob))
+ continue
except CASError as e:
element.warn("Could not pull from remote {}: {}".format(remote, e))
errors.append(e)
[buildstream] 08/10: tests/frontend/pull.py: Add
test_pull_missing_blob_split_share
Posted by no...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
not-in-ldap pushed a commit to branch juerg/public-data
in repository https://gitbox.apache.org/repos/asf/buildstream.git
commit ffa36b46e1218d1dea845b6208cd63fb11d0cf88
Author: Jürg Billeter <j...@bitron.ch>
AuthorDate: Mon Mar 30 13:48:05 2020 +0200
tests/frontend/pull.py: Add test_pull_missing_blob_split_share
This is identical to `test_pull_missing_blob` except that it uses
separate servers for artifact proto index and CAS.
---
tests/frontend/pull.py | 92 ++++++++++++++++++++++++++++++++++----------------
1 file changed, 63 insertions(+), 29 deletions(-)
diff --git a/tests/frontend/pull.py b/tests/frontend/pull.py
index 3ae394f..1845f32 100644
--- a/tests/frontend/pull.py
+++ b/tests/frontend/pull.py
@@ -8,7 +8,13 @@ import pytest
from buildstream import utils, _yaml
from buildstream.testing import cli # pylint: disable=unused-import
from buildstream.testing import create_repo
-from tests.testutils import create_artifact_share, generate_junction, assert_shared, assert_not_shared
+from tests.testutils import (
+ create_artifact_share,
+ create_split_share,
+ generate_junction,
+ assert_shared,
+ assert_not_shared,
+)
# Project directory
@@ -227,46 +233,74 @@ def test_push_pull_cross_junction(cli, tmpdir, datafiles):
assert cli.get_element_state(project, "junction.bst:import-etc.bst") == "cached"
+def _test_pull_missing_blob(cli, project, index, storage):
+ # First build the target element and push to the remote.
+ result = cli.run(project=project, args=["build", "target.bst"])
+ result.assert_success()
+ assert cli.get_element_state(project, "target.bst") == "cached"
+
+ # Assert that everything is now cached in the remote.
+ all_elements = ["target.bst", "import-bin.bst", "import-dev.bst", "compose-all.bst"]
+ for element_name in all_elements:
+ project_name = "test"
+ artifact_name = cli.get_artifact_name(project, project_name, element_name)
+ artifact_proto = index.get_artifact_proto(artifact_name)
+ assert artifact_proto
+ assert storage.get_cas_files(artifact_proto)
+
+ # Now we've pushed, delete the user's local artifact cache
+ # directory and try to redownload it from the share
+ #
+ casdir = os.path.join(cli.directory, "cas")
+ shutil.rmtree(casdir)
+ artifactdir = os.path.join(cli.directory, "artifacts")
+ shutil.rmtree(artifactdir)
+
+ # Assert that nothing is cached locally anymore
+ for element_name in all_elements:
+ assert cli.get_element_state(project, element_name) != "cached"
+
+ # Now delete blobs in the remote without deleting the artifact ref.
+ # This simulates scenarios with concurrent artifact expiry.
+ remote_objdir = os.path.join(storage.repodir, "cas", "objects")
+ shutil.rmtree(remote_objdir)
+
+ # Now try bst build
+ result = cli.run(project=project, args=["build", "target.bst"])
+ result.assert_success()
+
+ # Assert that no artifacts were pulled
+ assert not result.get_pulled_elements()
+
+
@pytest.mark.datafiles(DATA_DIR)
def test_pull_missing_blob(cli, tmpdir, datafiles):
project = str(datafiles)
with create_artifact_share(os.path.join(str(tmpdir), "artifactshare")) as share:
-
- # First build the target element and push to the remote.
cli.configure({"artifacts": {"url": share.repo, "push": True}})
- result = cli.run(project=project, args=["build", "target.bst"])
- result.assert_success()
- assert cli.get_element_state(project, "target.bst") == "cached"
- # Assert that everything is now cached in the remote.
- all_elements = ["target.bst", "import-bin.bst", "import-dev.bst", "compose-all.bst"]
- for element_name in all_elements:
- assert_shared(cli, share, project, element_name)
+ _test_pull_missing_blob(cli, project, share, share)
- # Now we've pushed, delete the user's local artifact cache
- # directory and try to redownload it from the share
- #
- casdir = os.path.join(cli.directory, "cas")
- shutil.rmtree(casdir)
- artifactdir = os.path.join(cli.directory, "artifacts")
- shutil.rmtree(artifactdir)
- # Assert that nothing is cached locally anymore
- for element_name in all_elements:
- assert cli.get_element_state(project, element_name) != "cached"
+@pytest.mark.datafiles(DATA_DIR)
+def test_pull_missing_blob_split_share(cli, tmpdir, datafiles):
+ project = str(datafiles)
- # Now delete blobs in the remote without deleting the artifact ref.
- # This simulates scenarios with concurrent artifact expiry.
- remote_objdir = os.path.join(share.repodir, "cas", "objects")
- shutil.rmtree(remote_objdir)
+ indexshare = os.path.join(str(tmpdir), "indexshare")
+ storageshare = os.path.join(str(tmpdir), "storageshare")
- # Now try bst build
- result = cli.run(project=project, args=["build", "target.bst"])
- result.assert_success()
+ with create_split_share(indexshare, storageshare) as (index, storage):
+ cli.configure(
+ {
+ "artifacts": [
+ {"url": index.repo, "push": True, "type": "index"},
+ {"url": storage.repo, "push": True, "type": "storage"},
+ ]
+ }
+ )
- # Assert that no artifacts were pulled
- assert not result.get_pulled_elements()
+ _test_pull_missing_blob(cli, project, index, storage)
@pytest.mark.datafiles(DATA_DIR)
[buildstream] 07/10: cascache.py: Add allow_partial parameter to
fetch_blobs()
Posted by no...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
not-in-ldap pushed a commit to branch juerg/public-data
in repository https://gitbox.apache.org/repos/asf/buildstream.git
commit 1298d0c485ddc0ccd9aa91b79b70199056117b38
Author: Jürg Billeter <j...@bitron.ch>
AuthorDate: Mon Mar 30 12:14:01 2020 +0200
cascache.py: Add allow_partial parameter to fetch_blobs()
This fixes handling of missing blobs in `ArtifactCache.pull()`.
---
src/buildstream/_artifactcache.py | 2 +-
src/buildstream/_cas/cascache.py | 9 ++++++---
src/buildstream/_sourcecache.py | 6 +-----
src/buildstream/sandbox/_sandboxremote.py | 7 +------
4 files changed, 9 insertions(+), 15 deletions(-)
diff --git a/src/buildstream/_artifactcache.py b/src/buildstream/_artifactcache.py
index 9b800ac..9cebeb1 100644
--- a/src/buildstream/_artifactcache.py
+++ b/src/buildstream/_artifactcache.py
@@ -405,7 +405,7 @@ class ArtifactCache(BaseCache):
remote.init()
# fetch_blobs() will return the blobs that are still missing
- missing_blobs = self.cas.fetch_blobs(remote, missing_blobs)
+ missing_blobs = self.cas.fetch_blobs(remote, missing_blobs, allow_partial=True)
if missing_blobs:
raise ArtifactError("Blobs not found on configured artifact servers")
diff --git a/src/buildstream/_cas/cascache.py b/src/buildstream/_cas/cascache.py
index 03be75c..61a1a8f 100644
--- a/src/buildstream/_cas/cascache.py
+++ b/src/buildstream/_cas/cascache.py
@@ -647,16 +647,19 @@ class CASCache:
# fetch_blobs():
#
- # Fetch blobs from remote CAS. Returns missing blobs that could not be fetched.
+ # Fetch blobs from remote CAS. Optionally returns missing blobs that could
+ # not be fetched.
#
# Args:
# remote (CASRemote): The remote repository to fetch from
# digests (list): The Digests of blobs to fetch
+ # allow_partial (bool): True to return missing blobs, False to raise a
+ # BlobNotFound error if a blob is missing
#
# Returns: The Digests of the blobs that were not available on the remote CAS
#
- def fetch_blobs(self, remote, digests):
- missing_blobs = []
+ def fetch_blobs(self, remote, digests, *, allow_partial=False):
+ missing_blobs = [] if allow_partial else None
remote.init()
diff --git a/src/buildstream/_sourcecache.py b/src/buildstream/_sourcecache.py
index e485fbd..4533a25 100644
--- a/src/buildstream/_sourcecache.py
+++ b/src/buildstream/_sourcecache.py
@@ -242,11 +242,7 @@ class SourceCache(BaseCache):
self.cas._fetch_directory(remote, source_proto.files)
required_blobs = self.cas.required_blobs_for_directory(source_proto.files)
missing_blobs = self.cas.local_missing_blobs(required_blobs)
- missing_blobs = self.cas.fetch_blobs(remote, missing_blobs)
-
- if missing_blobs:
- source.info("Remote cas ({}) does not have source {} cached".format(remote, display_key))
- continue
+ self.cas.fetch_blobs(remote, missing_blobs)
source.info("Pulled source {} <- {}".format(display_key, remote))
return True
diff --git a/src/buildstream/sandbox/_sandboxremote.py b/src/buildstream/sandbox/_sandboxremote.py
index 3dcbb2c..5b03852 100644
--- a/src/buildstream/sandbox/_sandboxremote.py
+++ b/src/buildstream/sandbox/_sandboxremote.py
@@ -291,12 +291,7 @@ class SandboxRemote(SandboxREAPI):
blobs_to_fetch = artifactcache.find_missing_blobs(project, local_missing_blobs)
with CASRemote(self.storage_remote_spec, cascache) as casremote:
- remote_missing_blobs = cascache.fetch_blobs(casremote, blobs_to_fetch)
-
- if remote_missing_blobs:
- raise SandboxError(
- "{} output files are missing on the CAS server".format(len(remote_missing_blobs))
- )
+ cascache.fetch_blobs(casremote, blobs_to_fetch)
def _execute_action(self, action, flags):
stdout, stderr = self._get_output()
[buildstream] 10/10: _artifact.py: Use
CASCache.contains_directory_and_blobs()
Posted by no...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
not-in-ldap pushed a commit to branch juerg/public-data
in repository https://gitbox.apache.org/repos/asf/buildstream.git
commit 958ddebf9057f0e16f396589f62ace66e6e67f53
Author: Jürg Billeter <j...@bitron.ch>
AuthorDate: Tue Apr 7 18:26:57 2020 +0200
_artifact.py: Use CASCache.contains_directory_and_blobs()
---
src/buildstream/_artifact.py | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/src/buildstream/_artifact.py b/src/buildstream/_artifact.py
index 0e1355f..10885f7 100644
--- a/src/buildstream/_artifact.py
+++ b/src/buildstream/_artifact.py
@@ -433,12 +433,10 @@ class Artifact:
require_files = context.require_artifact_files or self._element._artifact_files_required()
# Check whether 'files' subdirectory is available, with or without file contents
- if str(artifact.files) and not self._cas.contains_directory(artifact.files, with_files=require_files):
- self._cached = False
- return False
-
# Check whether public data is available
- if not self._cas.contains_file(artifact.public_data):
+ if not self._cas.contains_directory_and_blobs(
+ artifact.files, [artifact.public_data], with_files=require_files
+ ):
self._cached = False
return False
[buildstream] 04/10: _context.py: Drop
`require_artifact_directories`
Posted by no...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
not-in-ldap pushed a commit to branch juerg/public-data
in repository https://gitbox.apache.org/repos/asf/buildstream.git
commit 4e68b02d43d4c0c4e48b3a9a595808fe04a7395d
Author: Jürg Billeter <j...@bitron.ch>
AuthorDate: Tue Apr 7 17:52:37 2020 +0200
_context.py: Drop `require_artifact_directories`
It is always `True`.
---
src/buildstream/_context.py | 3 ---
1 file changed, 3 deletions(-)
diff --git a/src/buildstream/_context.py b/src/buildstream/_context.py
index 090b3e0..1419bf4 100644
--- a/src/buildstream/_context.py
+++ b/src/buildstream/_context.py
@@ -154,9 +154,6 @@ class Context:
# Whether or not to cache build trees on artifact creation
self.cache_buildtrees = None
- # Whether directory trees are required for all artifacts in the local cache
- self.require_artifact_directories = True
-
# Whether file contents are required for all artifacts in the local cache
self.require_artifact_files = True
[buildstream] 02/10: cascache.py: use buildbox-casd when checking
whether a file is cached
Posted by no...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
not-in-ldap pushed a commit to branch juerg/public-data
in repository https://gitbox.apache.org/repos/asf/buildstream.git
commit d1bdfc4acaaa7934e7367c299df22b02eca5bddb
Author: Abderrahim Kitouni <ak...@gnome.org>
AuthorDate: Wed Mar 25 17:22:24 2020 +0100
cascache.py: use buildbox-casd when checking whether a file is cached
this lets buildbox-casd know it is needed and not delete it when pruning
---
src/buildstream/_cas/cascache.py | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/src/buildstream/_cas/cascache.py b/src/buildstream/_cas/cascache.py
index 48b700c..03be75c 100644
--- a/src/buildstream/_cas/cascache.py
+++ b/src/buildstream/_cas/cascache.py
@@ -175,7 +175,13 @@ class CASCache:
# Returns: True if the file is in the cache, False otherwise
#
def contains_file(self, digest):
- return os.path.exists(self.objpath(digest))
+ cas = self.get_cas()
+
+ request = remote_execution_pb2.FindMissingBlobsRequest()
+ request.blob_digests.append(digest)
+
+ response = cas.FindMissingBlobs(request)
+ return len(response.missing_blob_digests) == 0
# contains_directory():
#
[buildstream] 05/10: _artifactcache.py: Fix misleading log message
when pulling artifacts
Posted by no...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
not-in-ldap pushed a commit to branch juerg/public-data
in repository https://gitbox.apache.org/repos/asf/buildstream.git
commit e8e77415094b4a69cab3afe406c56b9202638c6a
Author: Jürg Billeter <j...@bitron.ch>
AuthorDate: Mon Mar 30 13:17:16 2020 +0200
_artifactcache.py: Fix misleading log message when pulling artifacts
Do not print message "Pulled artifact" when pulling the artifact proto
without data. This matches the log messages in `SourceCache`.
---
src/buildstream/_artifactcache.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/src/buildstream/_artifactcache.py b/src/buildstream/_artifactcache.py
index f1648e9..4290e92 100644
--- a/src/buildstream/_artifactcache.py
+++ b/src/buildstream/_artifactcache.py
@@ -281,7 +281,6 @@ class ArtifactCache(BaseCache):
element.status("Pulling artifact {} <- {}".format(display_key, remote))
artifact = self._pull_artifact_proto(element, key, remote)
if artifact:
- element.info("Pulled artifact {} <- {}".format(display_key, remote))
break
element.info("Remote ({}) does not have artifact {} cached".format(remote, display_key))
@@ -307,7 +306,7 @@ class ArtifactCache(BaseCache):
element.status("Pulling data for artifact {} <- {}".format(display_key, remote))
if self._pull_artifact_storage(element, artifact, remote, pull_buildtrees=pull_buildtrees):
- element.info("Pulled data for artifact {} <- {}".format(display_key, remote))
+ element.info("Pulled artifact {} <- {}".format(display_key, remote))
return True
element.info("Remote ({}) does not have artifact {} cached".format(remote, display_key))