You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@buildstream.apache.org by ro...@apache.org on 2020/12/29 13:32:15 UTC
[buildstream] branch valentindavid/local-cache-exec-leak created
(now 3c26f0c)
This is an automated email from the ASF dual-hosted git repository.
root pushed a change to branch valentindavid/local-cache-exec-leak
in repository https://gitbox.apache.org/repos/asf/buildstream.git.
at 3c26f0c Deduplicate files in local cache with or without exec rights
This branch includes the following new commits:
new 3c26f0c Deduplicate files in local cache with or without exec rights
The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
[buildstream] 01/01: Deduplicate files in local cache with or
without exec rights
Posted by ro...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
root pushed a commit to branch valentindavid/local-cache-exec-leak
in repository https://gitbox.apache.org/repos/asf/buildstream.git
commit 3c26f0c88ff2b0477a0d28457cf15a65cb753171
Author: Valentin David <va...@codethink.co.uk>
AuthorDate: Tue Feb 12 18:38:55 2019 +0100
Deduplicate files in local cache with or without exec rights
If we introduce an exact same object with execution rights as existing
file without execution right, we should not expect that the files
suddenly get execution rights. This breaks reproducibility and it is
easy to encounter. For example install an empty file with execution
rights. Or copy files from another artifact and `chmod +x` it.
---
src/buildstream/_cas/cascache.py | 109 +++++++++++++++----------
src/buildstream/_cas/casremote.py | 9 +-
src/buildstream/_cas/casserver.py | 2 +-
src/buildstream/storage/_casbaseddirectory.py | 2 +-
src/buildstream/storage/_filebaseddirectory.py | 5 +-
tests/frontend/buildcheckout.py | 33 ++++++++
tests/integration/shell.py | 2 +-
tests/testutils/artifactshare.py | 2 +-
8 files changed, 110 insertions(+), 54 deletions(-)
diff --git a/src/buildstream/_cas/cascache.py b/src/buildstream/_cas/cascache.py
index 49a5e88..1008eef 100644
--- a/src/buildstream/_cas/cascache.py
+++ b/src/buildstream/_cas/cascache.py
@@ -81,9 +81,10 @@ class CASCacheUsage():
#
class CASCache():
- def __init__(self, path):
+ def __init__(self, path, *, disable_exec=False):
self.casdir = os.path.join(path, 'cas')
self.tmpdir = os.path.join(path, 'tmp')
+ self._disable_exec = disable_exec
os.makedirs(os.path.join(self.casdir, 'refs', 'heads'), exist_ok=True)
os.makedirs(os.path.join(self.casdir, 'objects'), exist_ok=True)
os.makedirs(self.tmpdir, exist_ok=True)
@@ -136,7 +137,7 @@ class CASCache():
# Optionally check presence of files
if with_files:
for filenode in directory.files:
- if not os.path.exists(self.objpath(filenode.digest)):
+ if not os.path.exists(self.objpath(filenode.digest, is_exec=filenode.is_executable)):
return False
# Check subdirectories
@@ -169,13 +170,9 @@ class CASCache():
# regular file, create hardlink
fullpath = os.path.join(dest, filenode.name)
if can_link:
- utils.safe_link(self.objpath(filenode.digest), fullpath)
+ utils.safe_link(self.objpath(filenode.digest, is_exec=filenode.is_executable), fullpath)
else:
- utils.safe_copy(self.objpath(filenode.digest), fullpath)
-
- if filenode.is_executable:
- os.chmod(fullpath, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR |
- stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH)
+ utils.safe_copy(self.objpath(filenode.digest, is_exec=filenode.is_executable), fullpath)
for dirnode in directory.directories:
fullpath = os.path.join(dest, dirnode.name)
@@ -332,8 +329,12 @@ class CASCache():
# Returns:
# (str): The path of the object
#
- def objpath(self, digest):
- return os.path.join(self.casdir, 'objects', digest.hash[:2], digest.hash[2:])
+ def objpath(self, digest, *, is_exec=False):
+ if is_exec and not self._disable_exec:
+ filename = '{}.exec'.format(digest.hash[2:])
+ else:
+ filename = digest.hash[2:]
+ return os.path.join(self.casdir, 'objects', digest.hash[:2], filename)
# add_object():
#
@@ -350,7 +351,7 @@ class CASCache():
#
# Either `path` or `buffer` must be passed, but not both.
#
- def add_object(self, *, digest=None, path=None, buffer=None, link_directly=False):
+ def add_object(self, *, digest=None, path=None, buffer=None, link_directly=False, is_exec=False):
# Exactly one of the two parameters has to be specified
assert (path is None) != (buffer is None)
@@ -369,8 +370,7 @@ class CASCache():
for chunk in iter(lambda: tmp.read(_BUFFER_SIZE), b""):
h.update(chunk)
else:
- tmp = stack.enter_context(self._temporary_object())
-
+ tmp = stack.enter_context(self._temporary_object(is_exec=is_exec))
if path:
with open(path, 'rb') as f:
for chunk in iter(lambda: f.read(_BUFFER_SIZE), b""):
@@ -386,7 +386,7 @@ class CASCache():
digest.size_bytes = os.fstat(tmp.fileno()).st_size
# Place file at final location
- objpath = self.objpath(digest)
+ objpath = self.objpath(digest, is_exec=is_exec)
os.makedirs(os.path.dirname(objpath), exist_ok=True)
os.link(tmp.name, objpath)
@@ -592,19 +592,25 @@ class CASCache():
#
def remote_missing_blobs(self, remote, blobs):
missing_blobs = dict()
+ executable = {}
+
# Limit size of FindMissingBlobs request
for required_blobs_group in _grouper(iter(blobs), 512):
request = remote_execution_pb2.FindMissingBlobsRequest(instance_name=remote.spec.instance_name)
- for required_digest in required_blobs_group:
+ for is_exec, required_digest in required_blobs_group:
d = request.blob_digests.add()
d.CopyFrom(required_digest)
+ if required_digest.hash not in executable:
+ executable[required_digest.hash] = set()
+ executable[required_digest.hash].add(is_exec)
response = remote.cas.FindMissingBlobs(request)
for missing_digest in response.missing_blob_digests:
d = remote_execution_pb2.Digest()
d.CopyFrom(missing_digest)
- missing_blobs[d.hash] = d
+ for is_exec in executable[missing_digest.hash]:
+ missing_blobs[(is_exec, d.hash)] = (is_exec, d)
return missing_blobs.values()
@@ -619,10 +625,10 @@ class CASCache():
#
def local_missing_blobs(self, digests):
missing_blobs = []
- for digest in digests:
- objpath = self.objpath(digest)
+ for is_exec, digest in digests:
+ objpath = self.objpath(digest, is_exec=is_exec)
if not os.path.exists(objpath):
- missing_blobs.append(digest)
+ missing_blobs.append((is_exec, digest))
return missing_blobs
# required_blobs_for_directory():
@@ -636,7 +642,7 @@ class CASCache():
# parse directory, and recursively add blobs
- yield directory_digest
+ yield False, directory_digest
directory = remote_execution_pb2.Directory()
@@ -644,7 +650,7 @@ class CASCache():
directory.ParseFromString(f.read())
for filenode in directory.files:
- yield filenode.digest
+ yield filenode.is_executable, filenode.digest
for dirnode in directory.directories:
if dirnode.name not in excluded_subdirs:
@@ -797,9 +803,9 @@ class CASCache():
for filenode in directory.files:
if update_mtime:
- os.utime(self.objpath(filenode.digest))
+ os.utime(self.objpath(filenode.digest, is_exec=filenode.is_executable))
if check_exists:
- if not os.path.exists(self.objpath(filenode.digest)):
+ if not os.path.exists(self.objpath(filenode.digest, is_exec=filenode.is_executable)):
raise FileNotFoundError
reachable.add(filenode.digest.hash)
@@ -813,10 +819,12 @@ class CASCache():
#
# Create a named temporary file with 0o0644 access rights.
@contextlib.contextmanager
- def _temporary_object(self):
+ def _temporary_object(self, *, is_exec=False):
with utils._tempnamedfile(dir=self.tmpdir) as f:
- os.chmod(f.name,
- stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH)
+ access = stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH
+ if is_exec and not self._disable_exec:
+ access |= stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
+ os.chmod(f.name, access)
yield f
# _ensure_blob():
@@ -830,27 +838,27 @@ class CASCache():
# Returns:
# (str): The path of the object
#
- def _ensure_blob(self, remote, digest):
- objpath = self.objpath(digest)
+ def _ensure_blob(self, remote, digest, is_exec=False):
+ objpath = self.objpath(digest, is_exec=is_exec)
if os.path.exists(objpath):
# already in local repository
return objpath
- with self._temporary_object() as f:
+ with self._temporary_object(is_exec=is_exec) as f:
remote._fetch_blob(digest, f)
- added_digest = self.add_object(path=f.name, link_directly=True)
+ added_digest = self.add_object(path=f.name, link_directly=True, is_exec=is_exec)
assert added_digest.hash == digest.hash
return objpath
def _batch_download_complete(self, batch, *, missing_blobs=None):
- for digest, data in batch.send(missing_blobs=missing_blobs):
- with self._temporary_object() as f:
+ for digest, data, is_exec in batch.send(missing_blobs=missing_blobs):
+ with self._temporary_object(is_exec=is_exec) as f:
f.write(data)
f.flush()
- added_digest = self.add_object(path=f.name, link_directly=True)
+ added_digest = self.add_object(path=f.name, link_directly=True, is_exec=is_exec)
assert added_digest.hash == digest.hash
# Helper function for _fetch_directory().
@@ -864,8 +872,9 @@ class CASCache():
return _CASBatchRead(remote)
# Helper function for _fetch_directory().
- def _fetch_directory_node(self, remote, digest, batch, fetch_queue, fetch_next_queue, *, recursive=False):
- in_local_cache = os.path.exists(self.objpath(digest))
+ def _fetch_directory_node(self, remote, digest, batch, fetch_queue, fetch_next_queue,
+ *, recursive=False, is_exec=False):
+ in_local_cache = os.path.exists(self.objpath(digest, is_exec=is_exec))
if in_local_cache:
# Skip download, already in local cache.
@@ -873,14 +882,14 @@ class CASCache():
elif (digest.size_bytes >= remote.max_batch_total_size_bytes or
not remote.batch_read_supported):
# Too large for batch request, download in independent request.
- self._ensure_blob(remote, digest)
+ self._ensure_blob(remote, digest, is_exec=is_exec)
in_local_cache = True
else:
- if not batch.add(digest):
+ if not batch.add(digest, is_exec=is_exec):
# Not enough space left in batch request.
# Complete pending batch first.
batch = self._fetch_directory_batch(remote, batch, fetch_queue, fetch_next_queue)
- batch.add(digest)
+ batch.add(digest, is_exec=is_exec)
if recursive:
if in_local_cache:
@@ -963,25 +972,31 @@ class CASCache():
batch = _CASBatchRead(remote)
- for digest in digests:
+ for d in digests:
+ try:
+ is_exec, digest = d
+ except TypeError:
+ digest = d
+ is_exec = False
+
if (digest.size_bytes >= remote.max_batch_total_size_bytes or
not remote.batch_read_supported):
# Too large for batch request, download in independent request.
try:
- self._ensure_blob(remote, digest)
+ self._ensure_blob(remote, digest, is_exec=is_exec)
except grpc.RpcError as e:
if e.code() == grpc.StatusCode.NOT_FOUND:
missing_blobs.append(digest)
else:
raise CASCacheError("Failed to fetch blob: {}".format(e)) from e
else:
- if not batch.add(digest):
+ if not batch.add(digest, is_exec=is_exec):
# Not enough space left in batch request.
# Complete pending batch first.
self._batch_download_complete(batch, missing_blobs=missing_blobs)
batch = _CASBatchRead(remote)
- batch.add(digest)
+ batch.add(digest, is_exec=is_exec)
# Complete last pending batch
self._batch_download_complete(batch, missing_blobs=missing_blobs)
@@ -999,8 +1014,14 @@ class CASCache():
def send_blobs(self, remote, digests, u_uid=uuid.uuid4()):
batch = _CASBatchUpdate(remote)
- for digest in digests:
- with open(self.objpath(digest), 'rb') as f:
+ for d in digests:
+ try:
+ is_exec, digest = d
+ except TypeError:
+ digest = d
+ is_exec = False
+
+ with open(self.objpath(digest, is_exec=is_exec), 'rb') as f:
assert os.fstat(f.fileno()).st_size == digest.size_bytes
if (digest.size_bytes >= remote.max_batch_total_size_bytes or
diff --git a/src/buildstream/_cas/casremote.py b/src/buildstream/_cas/casremote.py
index cd46e9c..b461d4e 100644
--- a/src/buildstream/_cas/casremote.py
+++ b/src/buildstream/_cas/casremote.py
@@ -310,8 +310,9 @@ class _CASBatchRead():
self._request.instance_name = remote.instance_name
self._size = 0
self._sent = False
+ self._is_exec = {}
- def add(self, digest):
+ def add(self, digest, *, is_exec=False):
assert not self._sent
new_batch_size = self._size + digest.size_bytes
@@ -323,6 +324,9 @@ class _CASBatchRead():
request_digest.hash = digest.hash
request_digest.size_bytes = digest.size_bytes
self._size = new_batch_size
+ if digest.hash not in self._is_exec:
+ self._is_exec[digest.hash] = set()
+ self._is_exec[digest.hash].add(is_exec)
return True
def send(self, *, missing_blobs=None):
@@ -349,7 +353,8 @@ class _CASBatchRead():
raise CASRemoteError("Failed to download blob {}: expected {} bytes, received {} bytes".format(
response.digest.hash, response.digest.size_bytes, len(response.data)))
- yield (response.digest, response.data)
+ for is_exec in self._is_exec[response.digest.hash]:
+ yield (response.digest, response.data, is_exec)
# Represents a batch of blobs queued for upload.
diff --git a/src/buildstream/_cas/casserver.py b/src/buildstream/_cas/casserver.py
index 9606c26..582ac2a 100644
--- a/src/buildstream/_cas/casserver.py
+++ b/src/buildstream/_cas/casserver.py
@@ -64,7 +64,7 @@ class ArtifactTooLargeException(Exception):
def create_server(repo, *, enable_push,
max_head_size=int(10e9),
min_head_size=int(2e9)):
- cas = CASCache(os.path.abspath(repo))
+ cas = CASCache(os.path.abspath(repo), disable_exec=True)
artifactdir = os.path.join(os.path.abspath(repo), 'artifacts', 'refs')
sourcedir = os.path.join(os.path.abspath(repo), 'source_protos')
diff --git a/src/buildstream/storage/_casbaseddirectory.py b/src/buildstream/storage/_casbaseddirectory.py
index 2c5d751..b040755 100644
--- a/src/buildstream/storage/_casbaseddirectory.py
+++ b/src/buildstream/storage/_casbaseddirectory.py
@@ -136,8 +136,8 @@ class CasBasedDirectory(Directory):
entry = IndexEntry(filename, _FileType.REGULAR_FILE,
modified=modified or filename in self.index)
path = os.path.join(basename, filename)
- entry.digest = self.cas_cache.add_object(path=path, link_directly=can_link)
entry.is_executable = os.access(path, os.X_OK)
+ entry.digest = self.cas_cache.add_object(path=path, link_directly=can_link, is_exec=entry.is_executable)
self.index[filename] = entry
self.__invalidate_digest()
diff --git a/src/buildstream/storage/_filebaseddirectory.py b/src/buildstream/storage/_filebaseddirectory.py
index 8c55819..4f93737 100644
--- a/src/buildstream/storage/_filebaseddirectory.py
+++ b/src/buildstream/storage/_filebaseddirectory.py
@@ -272,11 +272,8 @@ class FileBasedDirectory(Directory):
continue
if entry.type == _FileType.REGULAR_FILE:
- src_path = source_directory.cas_cache.objpath(entry.digest)
+ src_path = source_directory.cas_cache.objpath(entry.digest, is_exec=entry.is_executable)
actionfunc(src_path, dest_path, result=result)
- if entry.is_executable:
- os.chmod(dest_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR |
- stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH)
else:
assert entry.type == _FileType.SYMLINK
os.symlink(entry.target, dest_path)
diff --git a/tests/frontend/buildcheckout.py b/tests/frontend/buildcheckout.py
index 97bce91..ccd284c 100644
--- a/tests/frontend/buildcheckout.py
+++ b/tests/frontend/buildcheckout.py
@@ -4,6 +4,8 @@
import os
import tarfile
import hashlib
+import shutil
+import stat
import subprocess
import re
@@ -885,3 +887,34 @@ def test_partial_checkout_fail(tmpdir, datafiles, cli):
checkout_dir])
res.assert_main_error(ErrorDomain.STREAM, 'uncached-checkout-attempt')
assert re.findall(r'Remote \((\S+)\) does not have artifact (\S+) cached', res.stderr)
+
+
+@pytest.mark.datafiles(DATA_DIR)
+def test_access_rights(datafiles, cli):
+ project = str(datafiles)
+ checkout = os.path.join(cli.directory, 'checkout')
+
+ shutil.copyfile(os.path.join(project, 'files', 'bin-files', 'usr', 'bin', 'hello'),
+ os.path.join(project, 'files', 'bin-files', 'usr', 'bin', 'hello-2'))
+ os.chmod(os.path.join(project, 'files', 'bin-files', 'usr', 'bin', 'hello'),
+ 0o0755)
+ os.chmod(os.path.join(project, 'files', 'bin-files', 'usr', 'bin', 'hello-2'),
+ 0o0644)
+
+ result = cli.run(project=project, args=['build', 'target.bst'])
+ result.assert_success()
+
+ checkout_args = ['artifact', 'checkout', 'target.bst',
+ '--directory', checkout]
+
+ # Now check it out
+ result = cli.run(project=project, args=checkout_args)
+ result.assert_success()
+
+ st = os.lstat(os.path.join(checkout, 'usr', 'bin', 'hello'))
+ assert stat.S_ISREG(st.st_mode)
+ assert stat.S_IMODE(st.st_mode) == 0o0755
+
+ st = os.lstat(os.path.join(checkout, 'usr', 'bin', 'hello-2'))
+ assert stat.S_ISREG(st.st_mode)
+ assert stat.S_IMODE(st.st_mode) == 0o0644
diff --git a/tests/integration/shell.py b/tests/integration/shell.py
index f7de3e4..b55eb7b 100644
--- a/tests/integration/shell.py
+++ b/tests/integration/shell.py
@@ -430,7 +430,7 @@ def test_integration_partial_artifact(cli, datafiles, tmpdir, integration_cache)
# Remove the binary from the CAS
cachedir = cli.config['cachedir']
- objpath = os.path.join(cachedir, 'cas', 'objects', digest[:2], digest[2:])
+ objpath = os.path.join(cachedir, 'cas', 'objects', digest[:2], '{}.exec'.format(digest[2:]))
os.unlink(objpath)
# check shell doesn't work
diff --git a/tests/testutils/artifactshare.py b/tests/testutils/artifactshare.py
index a5522c8..965a4a9 100644
--- a/tests/testutils/artifactshare.py
+++ b/tests/testutils/artifactshare.py
@@ -46,7 +46,7 @@ class ArtifactShare():
self.artifactdir = os.path.join(self.repodir, 'artifacts', 'refs')
os.makedirs(self.artifactdir)
- self.cas = CASCache(self.repodir)
+ self.cas = CASCache(self.repodir, disable_exec=True)
self.total_space = total_space
self.free_space = free_space