You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@buildstream.apache.org by gi...@apache.org on 2020/12/29 13:05:07 UTC

[buildstream] 03/12: Add the virtual directory class 'Directory' and one implementation.

This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch jmac/virtual_directories_pre_rebase
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 227bc2042b4c4edfd853a995ebca8967307e3e1a
Author: Jim MacArthur <ji...@codethink.co.uk>
AuthorDate: Tue May 8 16:08:12 2018 +0100

    Add the virtual directory class 'Directory' and one implementation.
    
    buildstream/storage/Directory.py: New file.
    buildstream/storage/_filebaseddirectory.py: New file.
    buildstream/_exceptions.py: New VIRTUAL_FS exception source.
---
 buildstream/_exceptions.py                 |   1 +
 buildstream/element.py                     |   1 -
 buildstream/storage/_filebaseddirectory.py | 226 +++++++++++++++++++++++++++++
 buildstream/storage/directory.py           | 155 ++++++++++++++++++++
 4 files changed, 382 insertions(+), 1 deletion(-)

diff --git a/buildstream/_exceptions.py b/buildstream/_exceptions.py
index 8baf167..b46bba9 100644
--- a/buildstream/_exceptions.py
+++ b/buildstream/_exceptions.py
@@ -88,6 +88,7 @@ class ErrorDomain(Enum):
     ELEMENT = 11
     APP = 12
     STREAM = 13
+    VIRTUAL_FS = 14
 
 
 # BstError is an internal base exception class for BuildSream
diff --git a/buildstream/element.py b/buildstream/element.py
index 1f985ae..7964db6 100644
--- a/buildstream/element.py
+++ b/buildstream/element.py
@@ -97,7 +97,6 @@ from . import _site
 from ._platform import Platform
 from .sandbox._config import SandboxConfig
 
-
 # _KeyStrength():
 #
 # Strength of cache key
diff --git a/buildstream/storage/_filebaseddirectory.py b/buildstream/storage/_filebaseddirectory.py
new file mode 100644
index 0000000..57de1ef
--- /dev/null
+++ b/buildstream/storage/_filebaseddirectory.py
@@ -0,0 +1,226 @@
+#!/usr/bin/env python3
+#
+#  Copyright (C) 2018 Codethink Limited
+#
+#  This program is free software; you can redistribute it and/or
+#  modify it under the terms of the GNU Lesser General Public
+#  License as published by the Free Software Foundation; either
+#  version 2 of the License, or (at your option) any later version.
+#
+#  This library is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
+#  Lesser General Public License for more details.
+#
+#  You should have received a copy of the GNU Lesser General Public
+#  License along with this library. If not, see <http://www.gnu.org/licenses/>.
+#
+#  Authors:
+#        Jim MacArthur <ji...@codethink.co.uk>
+
+"""
+FileBasedDirectory
+=========
+
+Implementation of the Directory class which backs onto a normal POSIX filing system.
+
+See also: :ref:`sandboxing`.
+"""
+
+from collections import OrderedDict
+
+import calendar
+import os
+import time
+from .._exceptions import BstError, ErrorDomain
+from .directory import Directory
+from ..utils import link_files, copy_files, list_relative_paths
+from ..utils import _set_deterministic_user, _set_deterministic_mtime
+
+
+class VirtualDirectoryError(BstError):
+    """Raised by Directory functions when system calls fail.
+    This will be handled internally by the BuildStream core,
+    if you need to handle this error, then it should be reraised,
+    or either of the :class:`.ElementError` or :class:`.SourceError`
+    exceptions should be raised from this error.
+    """
+    def __init__(self, message, reason=None):
+        super().__init__(message, domain=ErrorDomain.VIRTUAL_FS, reason=reason)
+
+
+# Like os.path.getmtime(), but doesnt explode on symlinks
+# Copy/pasted from compose.py
+def getmtime(path):
+    stat = os.lstat(path)
+    return stat.st_mtime
+
+# FileBasedDirectory intentionally doesn't call its superclass constuctor,
+# which is mean to be unimplemented.
+# pylint: disable=super-init-not-called
+
+
+class _FileObject():
+    """A description of a file in a virtual directory. The contents of
+    this class are never used, but there needs to be something present
+    for files so is_empty() works correctly.
+
+    """
+    def __init__(self, virtual_directory: Directory, filename: str):
+        self.directory = virtual_directory
+        self.filename = filename
+
+
+class FileBasedDirectory(Directory):
+    def __init__(self, external_directory=None):
+        self.external_directory = external_directory
+        self.index = OrderedDict()
+        self._directory_read = False
+
+    def _populate_index(self):
+        if self._directory_read:
+            return
+        for entry in os.listdir(self.external_directory):
+            if os.path.isdir(os.path.join(self.external_directory, entry)):
+                self.index[entry] = FileBasedDirectory(os.path.join(self.external_directory, entry))
+            else:
+                self.index[entry] = _FileObject(self, entry)
+        self._directory_read = True
+
+    def descend(self, subdirectory_spec, create=False):
+        """ See superclass Directory for arguments """
+        # It's very common to send a directory name instead of a list and this causes
+        # bizarre errors, so check for it here
+        if not isinstance(subdirectory_spec, list):
+            subdirectory_spec = [subdirectory_spec]
+        if not subdirectory_spec:
+            return self
+
+        # Because of the way split works, it's common to get a list which begins with
+        # an empty string. Detect these and remove them, then start again.
+        if subdirectory_spec[0] == "":
+            return self.descend(subdirectory_spec[1:], create)
+
+        self._populate_index()
+        if subdirectory_spec[0] in self.index:
+            entry = self.index[subdirectory_spec[0]]
+            if isinstance(entry, FileBasedDirectory):
+                new_path = os.path.join(self.external_directory, subdirectory_spec[0])
+                return FileBasedDirectory(new_path).descend(subdirectory_spec[1:], create)
+            else:
+                error = "Cannot descend into {}, which is a '{}' in the directory {}"
+                raise VirtualDirectoryError(error.format(subdirectory_spec[0],
+                                                         type(entry).__name__,
+                                                         self.external_directory))
+        else:
+            if create:
+                new_path = os.path.join(self.external_directory, subdirectory_spec[0])
+                os.makedirs(new_path, exist_ok=True)
+                return FileBasedDirectory(new_path).descend(subdirectory_spec[1:], create)
+            else:
+                error = "No entry called '{}' found in the directory rooted at {}"
+                raise VirtualDirectoryError(error.format(subdirectory_spec[0], self.external_directory))
+
+    def import_files(self, external_pathspec, *, files=None,
+                     report_written=True, update_utimes=False,
+                     can_link=False):
+        """ See superclass Directory for arguments """
+
+        if isinstance(external_pathspec, Directory):
+            source_directory = external_pathspec.external_directory
+        else:
+            source_directory = external_pathspec
+
+        if can_link and not update_utimes:
+            import_result = link_files(source_directory, self.external_directory, files=files,
+                                       ignore_missing=False, report_written=report_written)
+        else:
+            import_result = copy_files(source_directory, self.external_directory, files=files,
+                                       ignore_missing=False, report_written=report_written)
+        if update_utimes:
+            cur_time = time.time()
+
+            for f in import_result.files_written:
+                os.utime(os.path.join(self.external_directory, f), times=(cur_time, cur_time))
+        return import_result
+
+    def set_deterministic_mtime(self):
+        _set_deterministic_mtime(self.external_directory)
+
+    def set_deterministic_user(self):
+        _set_deterministic_user(self.external_directory)
+
+    def export_files(self, to_directory, *, can_link=False, can_destroy=False):
+        if can_destroy:
+            # Try a simple rename of the sandbox root; if that
+            # doesnt cut it, then do the regular link files code path
+            try:
+                os.rename(self.external_directory, to_directory)
+                return
+            except OSError:
+                # Proceed using normal link/copy
+                pass
+
+        os.makedirs(to_directory, exist_ok=True)
+        if can_link:
+            link_files(self.external_directory, to_directory)
+        else:
+            copy_files(self.external_directory, to_directory)
+
+    # Add a directory entry deterministically to a tar file
+    #
+    # This function takes extra steps to ensure the output is deterministic.
+    # First, it sorts the results of os.listdir() to ensure the ordering of
+    # the files in the archive is the same.  Second, it sets a fixed
+    # timestamp for each entry. See also https://bugs.python.org/issue24465.
+    def export_to_tar(self, tf, dir_arcname, mtime=0):
+        # We need directories here, including non-empty ones,
+        # so list_relative_paths is not used.
+        for filename in sorted(os.listdir(self.external_directory)):
+            source_name = os.path.join(self.external_directory, filename)
+            arcname = os.path.join(dir_arcname, filename)
+            tarinfo = tf.gettarinfo(source_name, arcname)
+            tarinfo.mtime = mtime
+
+            if tarinfo.isreg():
+                with open(source_name, "rb") as f:
+                    tf.addfile(tarinfo, f)
+            elif tarinfo.isdir():
+                tf.addfile(tarinfo)
+                self.descend(filename.split(os.path.sep)).export_to_tar(tf, arcname, mtime)
+            else:
+                tf.addfile(tarinfo)
+
+    def is_empty(self):
+        self._populate_index()
+        return len(self.index) == 0
+
+    def mark_unmodified(self):
+        """ Marks all files in this directory (recursively) as unmodified.
+        """
+        _set_deterministic_mtime(self.external_directory)
+
+    def list_modified_paths(self):
+        """Provide a list of relative paths which have been modified since the
+        last call to mark_unmodified.
+
+        Return value: List(str) - list of modified paths
+        """
+        magic_timestamp = calendar.timegm([2011, 11, 11, 11, 11, 11])
+
+        return [f for f in list_relative_paths(self.external_directory)
+                if getmtime(os.path.join(self.external_directory, f)) != magic_timestamp]
+
+    def list_relative_paths(self):
+        """Provide a list of all relative paths.
+
+        Return value: List(str) - list of all paths
+        """
+
+        return list_relative_paths(self.external_directory)
+
+    def __str__(self):
+        # This returns the whole path (since we don't know where the directory started)
+        # which exposes the sandbox directory; we will have to assume for the time being
+        # that people will not abuse __str__.
+        return self.external_directory
diff --git a/buildstream/storage/directory.py b/buildstream/storage/directory.py
new file mode 100644
index 0000000..f70863f
--- /dev/null
+++ b/buildstream/storage/directory.py
@@ -0,0 +1,155 @@
+#!/usr/bin/env python3
+#
+#  Copyright (C) 2018 Codethink Limited
+#
+#  This program is free software; you can redistribute it and/or
+#  modify it under the terms of the GNU Lesser General Public
+#  License as published by the Free Software Foundation; either
+#  version 2 of the License, or (at your option) any later version.
+#
+#  This library is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
+#  Lesser General Public License for more details.
+#
+#  You should have received a copy of the GNU Lesser General Public
+#  License along with this library. If not, see <http://www.gnu.org/licenses/>.
+#
+#  Authors:
+#        Jim MacArthur <ji...@codethink.co.uk>
+
+"""
+Directory
+=========
+
+This is a virtual Directory class to isolate the rest of BuildStream
+from the backing store implementation.  Sandboxes are allowed to read
+from and write to the underlying storage, but all others must use this
+Directory class to access files and directories in the sandbox.
+
+See also: :ref:`sandboxing`.
+
+"""
+
+
+class Directory():
+    def __init__(self, external_directory=None):
+        raise NotImplementedError()
+
+    def descend(self, subdirectory_spec, create=False):
+        """Descend one or more levels of directory hierarchy and return a new
+        Directory object for that directory.
+
+        Args:
+          subdirectory_spec (list of str): A list of strings which are all directory
+            names.
+          create (boolean): If this is true, the directories will be created if
+            they don't already exist.
+
+        Yields:
+          A Directory object representing the found directory.
+
+        Raises:
+          VirtualDirectoryError: if any of the components in subdirectory_spec
+            cannot be found, or are files, or symlinks to files.
+
+        """
+        raise NotImplementedError()
+
+    # Import and export of files and links
+    def import_files(self, external_pathspec, *, files=None,
+                     report_written=True, update_utimes=False,
+                     can_link=False):
+        """Imports some or all files from external_path into this directory.
+
+        Args:
+          external_pathspec: Either a string containing a pathname, or a
+            Directory object, to use as the source.
+          files (list of str): A list of all the files relative to
+            the external_pathspec to copy. If 'None' is supplied, all
+            files are copied.
+          report_written (bool): Return the full list of files
+            written. Defaults to true. If false, only a list of
+            overwritten files is returned.
+          update_utimes (bool): Update the access and modification time
+            of each file copied to the current time.
+          can_link (bool): Whether it's OK to create a hard link to the
+            original content, meaning the stored copy will change when the
+            original files change. Setting this doesn't guarantee hard
+            links will be made. can_link will never be used if
+            update_utimes is set.
+
+        Yields:
+          (FileListResult) - A report of files imported and overwritten.
+
+        """
+
+        raise NotImplementedError()
+
+    def export_files(self, to_directory, *, can_link=False, can_destroy=False):
+        """Copies everything from this into to_directory.
+
+        Args:
+          to_directory (string): a path outside this directory object
+            where the contents will be copied to.
+          can_link (bool): Whether we can create hard links in to_directory
+            instead of copying. Setting this does not guarantee hard links will be used.
+          can_destroy (bool): Can we destroy the data already in this
+            directory when exporting? If set, this may allow data to be
+            moved rather than copied which will be quicker.
+        """
+
+        raise NotImplementedError()
+
+    def export_to_tar(self, tarfile, destination_dir, mtime=0):
+        """ Exports this directory into the given tar file.
+
+        Args:
+          tarfile (TarFile): A Python TarFile object to export into.
+          destination_dir (str): The prefix for all filenames inside the archive.
+          mtime (int): mtimes of all files in the archive are set to this.
+        """
+        raise NotImplementedError()
+
+    # Convenience functions
+    def is_empty(self):
+        """ Return true if this directory has no files, subdirectories or links in it.
+        """
+        raise NotImplementedError()
+
+    def set_deterministic_mtime(self):
+        """ Sets a static modification time for all regular files in this directory.
+        The magic number for timestamps is 2011-11-11 11:11:11.
+        """
+        raise NotImplementedError()
+
+    def set_deterministic_user(self):
+        """ Sets all files in this directory to the current user's euid/egid.
+        """
+        raise NotImplementedError()
+
+    def mark_unmodified(self):
+        """ Marks all files in this directory (recursively) as unmodified.
+        """
+        raise NotImplementedError()
+
+    def list_modified_paths(self):
+        """Provide a list of relative paths which have been modified since the
+        last call to mark_unmodified. Includes directories only if
+        they are empty.
+
+        Yields:
+          (List(str)) - list of all modified files with relative paths.
+
+        """
+        raise NotImplementedError()
+
+    def list_relative_paths(self):
+        """Provide a list of all relative paths in this directory. Includes
+        directories only if they are empty.
+
+        Yields:
+          (List(str)) - list of all files with relative paths.
+
+        """
+        raise NotImplementedError()