You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@buildstream.apache.org by tv...@apache.org on 2021/02/04 07:12:15 UTC

[buildstream] 02/03: Log details of artifact splitting when building 'compose' elements

This is an automated email from the ASF dual-hosted git repository.

tvb pushed a commit to branch sam/compose-log-splits
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit f508685a3f5f77ee551cb27554186c679bd7911f
Author: Sam Thursfield <sa...@codethink.co.uk>
AuthorDate: Tue Nov 7 13:12:53 2017 +0000

    Log details of artifact splitting when building 'compose' elements
    
    Artifact split rules are complicated to reason about. There needs to be
    some way of seeing why a 'compose' artifact included a given file.
    
    The log file now contains a long message like this, listing every
    artifact, domain and file:
    
        [--:--:--] INFO    [initramfs/initramfs.bst]: Composed 2369 files
    
            Integration
              - var/cache/ldconfig/aux-cache
    
            gnu-toolchain/binutils.bst runtime
              - usr/x86_64-unknown-linux-gnu
              - usr/x86_64-unknown-linux-gnu/bin/ar
              - usr/x86_64-unknown-linux-gnu/bin/as
              - usr/x86_64-unknown-linux-gnu/bin/ld
    
            gnu-toolchain/make.bst (not in any domain)
              - usr
              - usr/lib
              - usr/share
    
            ...
    
    Note that a file can be in multiple domains.
    
    Size of the log files is a concern and it may be that in future we
    'compress' some of these entries, e.g. if all files in a given
    directory come from one artifact then we just need to list the
    directory, not every filename.
---
 buildstream/element.py                  | 50 +++++++++++++++++++++++++++----
 buildstream/plugins/elements/compose.py | 52 +++++++++++++++++++++++++++++++--
 2 files changed, 94 insertions(+), 8 deletions(-)

diff --git a/buildstream/element.py b/buildstream/element.py
index f0df03b..f92b241 100644
--- a/buildstream/element.py
+++ b/buildstream/element.py
@@ -102,6 +102,20 @@ class ElementError(BstError):
         super().__init__(message, detail=detail, domain=ErrorDomain.ELEMENT, reason=reason)
 
 
+class StagingResult():
+    """Result of a staging operation."""
+
+    def __init__(self, file_list_result=None, manifest=None):
+        if file_list_result:
+            # This class extends utils.FileListResult; but Python has no magic
+            # way of casting an object to a subclass, so we make a new object
+            # and copy all the attributes across from the old one.
+            self.__dict__.update(file_list_result.__dict__)
+
+        self.manifest = manifest or {}
+        """Map from each file to the artifact and split rule which staged it."""
+
+
 class Element(Plugin):
     """Element()
 
@@ -430,9 +444,10 @@ class Element(Plugin):
                 if path is None \
                 else os.path.join(basedir, path.lstrip(os.sep))
 
-            files = self.__compute_splits(include, exclude, orphans)
-            result = utils.link_files(artifact, stagedir, files=files,
+            manifest = self.__compute_splits(include, exclude, orphans)
+            result = utils.link_files(artifact, stagedir, files=manifest.keys(),
                                       report_written=True)
+            result = StagingResult(result, manifest)
 
         return result
 
@@ -453,6 +468,10 @@ class Element(Plugin):
            exclude (list): An optional list of domains to exclude files from
            orphans (bool): Whether to include files not spoken for by split domains
 
+        Returns:
+           (dict): A mapping from each file to the artifact and split-rules domain
+                   that produced it.
+
         Raises:
            (:class:`.ElementError`): If any of the dependencies in `scope` have not
                                      yet produced artifacts, or if forbidden overlaps
@@ -461,7 +480,7 @@ class Element(Plugin):
         ignored = {}
         overlaps = OrderedDict()
         files_written = {}
-
+        manifest = {}
         for dep in self.dependencies(scope):
             result = dep.stage_artifact(sandbox,
                                         path=path,
@@ -483,6 +502,10 @@ class Element(Plugin):
 
             if result.ignored:
                 ignored[dep.name] = result.ignored
+                for f in result.ignored:
+                    del result.manifest[f]
+
+            manifest.update(result.manifest)
 
         if overlaps:
             overlap_error = overlap_warning = False
@@ -512,6 +535,13 @@ class Element(Plugin):
                 raise ElementError("Non-whitelisted overlaps detected and fail-on-overlaps is set",
                                    detail=error_detail, reason="overlap-error")
 
+        if overwrites:
+            detail = "Staged files overwrite existing files in staging area:\n"
+            for key, value in overwrites.items():
+                detail += "\nFrom {}:\n".format(key)
+                detail += "  " + "  ".join(["/" + f + "\n" for f in value])
+            self.warn("Overlapping files", detail=detail)
+
         if ignored:
             detail = "Not staging files which would replace non-empty directories:\n"
             for key, value in ignored.items():
@@ -519,6 +549,8 @@ class Element(Plugin):
                 detail += "  " + "  ".join(["/" + f + "\n" for f in value])
             self.warn("Ignored files", detail=detail)
 
+        return manifest
+
     def integrate(self, sandbox):
         """Integrate currently staged filesystem against this artifact.
 
@@ -1782,12 +1814,13 @@ class Element(Plugin):
 
     def __compute_splits(self, include=None, exclude=None, orphans=True):
         basedir = os.path.join(self.__artifacts.extract(self), 'files')
+        manifest = {}
 
         # No splitting requested, just report complete artifact
         if orphans and not (include or exclude):
             for filename in utils.list_relative_paths(basedir):
-                yield filename
-            return
+                manifest[filename] = {'artifact': self}
+            return manifest
 
         if not self.__splits:
             self.__init_splits()
@@ -1816,12 +1849,14 @@ class Element(Plugin):
             include_file = False
             exclude_file = False
             claimed_file = False
+            included_by_domains = []
 
             for domain in element_domains:
                 if self.__splits[domain].match(filename):
                     claimed_file = True
                     if domain in include:
                         include_file = True
+                        included_by_domains.append(domain)
                     if domain in exclude:
                         exclude_file = True
 
@@ -1829,7 +1864,10 @@ class Element(Plugin):
                 include_file = True
 
             if include_file and not exclude_file:
-                yield filename.lstrip(os.sep)
+                manifest_entry = {'artifact': self}
+                manifest_entry['domains'] = included_by_domains
+                manifest[filename.lstrip(os.sep)] = manifest_entry
+        return manifest
 
     def _load_public_data(self):
         self._assert_cached()
diff --git a/buildstream/plugins/elements/compose.py b/buildstream/plugins/elements/compose.py
index 29e289a..7b6f99c 100644
--- a/buildstream/plugins/elements/compose.py
+++ b/buildstream/plugins/elements/compose.py
@@ -33,6 +33,7 @@ The default configuration and possible options are as such:
      :language: yaml
 """
 
+import collections
 import os
 from buildstream import utils
 from buildstream import Element, ElementError, Scope
@@ -112,6 +113,7 @@ class ComposeElement(Element):
             f: getmtime(os.path.join(basedir, f))
             for f in utils.list_relative_paths(basedir)
         }
+
         modified_files = []
         removed_files = []
         added_files = []
@@ -181,12 +183,58 @@ class ComposeElement(Element):
         detail = "\n".join(lines)
 
         with self.timed_activity("Creating composition", detail=detail, silent_nested=True):
-            self.info("Composing {} files".format(len(manifest)))
-            utils.link_files(basedir, installdir, files=manifest)
+            manifest = self.stage_dependency_artifacts(sandbox, Scope.BUILD,
+                                                       path=stagedir,
+                                                       include=self.include,
+                                                       exclude=self.exclude,
+                                                       orphans=self.include_orphans)
+
+            if self.integration:
+                self.status("Moving {} integration files".format(len(integration_files)))
+                utils.move_files(basedir, installdir, integration_files)
+
+                for filename in integration_files:
+                    manifest[filename] = manifest.get(filename, {})
+                    manifest[filename]['integration'] = True
+
+        total_files = len(manifest)
+        detail = self._readable_manifest(manifest)
+        self.log("Composed {} files".format(total_files), detail=detail)
 
         # And we're done
         return os.path.join(os.sep, 'buildstream', 'install')
 
+    # Show a list of files that made it into the artifact, grouped by the
+    # artifact and split-rules domains that resulted in each one being there.
+    def _readable_manifest(self, manifest):
+        domains = collections.defaultdict(list)
+
+        # Convert the filename->domain mapping into a domain->filename mapping.
+        for filename, entry in manifest.items():
+            if filename == '.':
+                continue
+
+            if 'artifact' in entry:
+                domains_for_file = entry.get('domains') or ["(no domain)"]
+                for domain in domains_for_file:
+                    full_domain_name = entry['artifact'].name + " " + domain
+                    if entry.get('integration', False) is True:
+                        full_domain_name += " (modified during integration)"
+
+                    domains[full_domain_name].append(filename)
+            else:
+                domains["Integration"].append(filename)
+
+        # Display the mapping neatly for the user.
+        lines = []
+        for domain in sorted(domains):
+            lines.extend(["", domain])
+
+            contents = sorted(domains[domain])
+            lines.extend("  - " + filename for filename in contents)
+
+        return "\n".join(lines)
+
 
 # Like os.path.getmtime(), but doesnt explode on symlinks
 #