You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@buildstream.apache.org by tv...@apache.org on 2021/10/10 09:19:36 UTC

[buildstream] branch tristan/disambiguate-artifact-globs created (now 19a15be)

This is an automated email from the ASF dual-hosted git repository.

tvb pushed a change to branch tristan/disambiguate-artifact-globs
in repository https://gitbox.apache.org/repos/asf/buildstream.git.


      at 19a15be  Revert to expecting '.bst' on the command line to disambiguate elements and artifacts

This branch includes the following new commits:

     new 19a15be  Revert to expecting '.bst' on the command line to disambiguate elements and artifacts

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


[buildstream] 01/01: Revert to expecting '.bst' on the command line to disambiguate elements and artifacts

Posted by tv...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tvb pushed a commit to branch tristan/disambiguate-artifact-globs
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 19a15be6ef41619ace5dec1b488ac6c8a5f36c7f
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Sun Oct 10 18:05:08 2021 +0900

    Revert to expecting '.bst' on the command line to disambiguate elements and artifacts
    
    Summary of changes:
    
      * _stream.py: Treat targets with `.bst` suffixes as elements and other
        targets as artifacts, and improve error reporting around this.
    
        Also use a new machine readable error string to denote when we fail
        to create a directory when creating a workspace.
    
      * _loader/loader.py: Remove handling of LoadErrorReason.LOADING_DIRECTORY
    
        This additional handling was only to suggest that maybe the user
        meant to specify "<directory_name>.bst" in the case a directory is
        encountered, but now we will bail out earlier if an element target
        is specified without a ".bst" suffix anyway.
    
        The only way to reach this error is to load a directory which itself
        already has a ".bst" suffix, in which case the additional suggestion
        is no longer useful.
    
      * tests/format/elementnames.py: Expect different error for target elements
    
      * tests/frontend/artifact_show.py: Removed test that is no longer relevant,
        it is now impossible to glob for both elements and artifacts with the
        same glob expression.
    
      * tests/frontend/show.py: Removed some parameters of the glob test which
        were expecting to get element results without specifying a ".bst" suffix
    
      * tests/frontend/workspace.py: Specify a newly added machine readable reason
        for the error of failing to create a directory while opening a workspace
    
      * tests/internals/loader.py: Trigger the LoadErrorReason.LOADING_DIRECTORY
        error by creating a directory named "element.bst", so that it gets by
        the initial element name suffix checks.
    
    Fixes #1411
---
 src/buildstream/_loader/loader.py |  12 ----
 src/buildstream/_stream.py        | 142 +++++++++++++++++++++-----------------
 tests/format/elementnames.py      |  28 ++++++--
 tests/frontend/artifact_show.py   |  24 -------
 tests/frontend/show.py            |  10 +--
 tests/frontend/workspace.py       |   2 +-
 tests/internals/loader.py         |   3 +-
 7 files changed, 106 insertions(+), 115 deletions(-)

diff --git a/src/buildstream/_loader/loader.py b/src/buildstream/_loader/loader.py
index ea73afd..71fff51 100644
--- a/src/buildstream/_loader/loader.py
+++ b/src/buildstream/_loader/loader.py
@@ -298,18 +298,6 @@ class Loader:
 
                 raise LoadError(message, LoadErrorReason.MISSING_FILE, detail=detail) from e
 
-            if e.reason == LoadErrorReason.LOADING_DIRECTORY:
-                # If a <directory>.bst file exists in the element path,
-                # let's suggest this as a plausible alternative.
-                message = str(e)
-                if provenance_node:
-                    message = "{}: {}".format(provenance_node.get_provenance(), message)
-                detail = None
-                if os.path.exists(os.path.join(self._basedir, filename + ".bst")):
-                    element_name = filename + ".bst"
-                    detail = "Did you mean '{}'?\n".format(element_name)
-                raise LoadError(message, LoadErrorReason.LOADING_DIRECTORY, detail=detail) from e
-
             # Otherwise, we don't know the reason, so just raise
             raise
 
diff --git a/src/buildstream/_stream.py b/src/buildstream/_stream.py
index 6fcfd12..ad7d596 100644
--- a/src/buildstream/_stream.py
+++ b/src/buildstream/_stream.py
@@ -1040,7 +1040,11 @@ class Stream:
                 if todo_elements:
                     # This output should make creating the remaining workspaces as easy as possible.
                     todo_elements = "\nDid not try to create workspaces for " + todo_elements
-                raise StreamError("Failed to create workspace directory: {}".format(e) + todo_elements) from e
+                raise StreamError(
+                    "Failed to create workspace directory: {}".format(e),
+                    reason="workspace-directory-failure",
+                    detail=todo_elements,
+                ) from e
 
             workspaces.create_workspace(target, directory, checkout=not no_checkout)
             self._context.messenger.info("Created a workspace for element: {}".format(target._get_full_name()))
@@ -1999,80 +2003,94 @@ class Stream:
     def _expand_and_classify_targets(
         self, targets: Iterable[str], valid_artifact_names: bool = False
     ) -> Tuple[List[str], List[str]]:
-        initial_targets = []
-        element_targets = []
-        artifact_names = []
-        globs = {}  # Count whether a glob matched elements and artifacts
+        #
+        # We use dicts here instead of sets, in order to deduplicate any possibly duplicate
+        # entries, while also retaining the original order of element specification/discovery,
+        # (which we cannot do with sets).
+        #
+        element_names = {}
+        artifact_names = {}
+        element_globs = {}
+        artifact_globs = {}
 
-        # First extract the globs
+        # First sort out globs and targets
         for target in targets:
             if any(c in "*?[" for c in target):
-                globs[target] = 0
+                if target.endswith(".bst"):
+                    element_globs[target]
+                else:
+                    artifact_globs[target]
+            elif target.endswith(".bst"):
+                element_names[target] = True
             else:
-                initial_targets.append(target)
+                artifact_names[target] = True
 
-        # Filter out any targets which are found to be artifact names
-        if valid_artifact_names:
-            for target in initial_targets:
-                try:
-                    verify_artifact_ref(target)
-                except ArtifactElementError:
-                    element_targets.append(target)
-                else:
-                    artifact_names.append(target)
-        else:
-            element_targets = initial_targets
+        # Bail out in commands which don't support artifacts if any of the targets
+        # or globs did not end with the expected '.bst' suffix.
+        #
+        if (artifact_names or artifact_globs) and not valid_artifact_names:
+            raise StreamError(
+                "Invalid element names or element glob patterns were specified: {}".format(
+                    ", ".join(list(artifact_names) + list(artifact_globs))
+                ),
+                reason="invalid-element-names",
+                detail="Element names and element glob expressions must end in '.bst'",
+            )
+
+        # Verify targets which were not classified as elements
+        for artifact_name in artifact_names.keys():
+            try:
+                verify_artifact_ref(artifact_name)
+            except ArtifactElementError as e:
+                raise StreamError(
+                    "Specified target does not appear to be an artifact or element name: ".format(artifact_name),
+                    reason="unrecognized-target-format",
+                    detail="Element names and element glob expressions must end in '.bst'",
+                ) from e
 
         # Expand globs for elements
-        if self._project:
+        if element_globs:
+
+            # Bail out if an element glob is specified without providing a project directory
+            if not self._project:
+                raise StreamError(
+                    "Element glob expressions were specified without any project directory: {}".format(
+                        ", ".join(element_globs)
+                    ),
+                    reason="glob-elements-without-project",
+                )
+
+            # Collect a list of `all_elements` in the project, stripping out the leading
+            # project directory and element path prefix, to produce only element names.
+            #
             all_elements = []
             element_path_length = len(self._project.element_path) + 1
             for dirpath, _, filenames in os.walk(self._project.element_path):
                 for filename in filenames:
                     if filename.endswith(".bst"):
-                        element_path = os.path.join(dirpath, filename)
-                        element_path = element_path[element_path_length:]  # Strip out the element_path
-                        all_elements.append(element_path)
-
-            for glob in globs:
-                matched = False
-                for element_path in utils.glob(all_elements, glob):
-                    element_targets.append(element_path)
-                    matched = True
-                if matched:
-                    globs[glob] = globs[glob] + 1
-
-        # Expand globs for artifact names
-        if valid_artifact_names:
-            for glob in globs:
-                matches = self._artifacts.list_artifacts(glob=glob)
-                if matches:
-                    artifact_names.extend(matches)
-                    globs[glob] = globs[glob] + 1
-
-        # Issue warnings and errors
-        unmatched = [glob for glob, glob_count in globs.items() if glob_count == 0]
-        doubly_matched = [glob for glob, glob_count in globs.items() if glob_count > 1]
-
-        # Warn the user if any of the provided globs did not match anything
-        if unmatched:
-            if valid_artifact_names:
-                message = "No elements or artifacts matched the following glob expression(s): {}".format(
-                    ", ".join(unmatched)
-                )
-            else:
-                message = "No elements matched the following glob expression(s): {}".format(", ".join(unmatched))
-            self._context.messenger.warn(message)
+                        element_name = os.path.join(dirpath, filename)
+                        element_name = element_name[element_path_length:]
+                        all_elements.append(element_name)
 
-        if doubly_matched:
-            raise StreamError(
-                "The provided glob expression(s) matched both element names and artifact names: {}".format(
-                    ", ".join(doubly_matched)
-                ),
-                reason="glob-elements-and-artifacts",
-            )
-
-        return element_targets, artifact_names
+            # Glob the elements and add the results to the set
+            #
+            for glob in element_globs.keys():
+                glob_results = list(utils.glob(all_elements, glob))
+                for element_name in glob_results:
+                    element_names[glob_results] = True
+                if not glob_results:
+                    self._context.messenger.warn("No elements matched the glob expression: {}".format(glob))
+
+        # Glob the artifact names and add the results to the set
+        #
+        for glob in artifact_globs.keys():
+            glob_results = self._artifacts.list_artifacts(glob=glob)
+            for artifact_name in glob_results:
+                artifact_names[glob_results] = True
+            if not glob_results:
+                self._context.messenger.warn("No artifact names matched the glob expression: {}".format(glob))
+
+        return list(element_names), list(artifact_names)
 
 
 # _handle_compression()
diff --git a/tests/format/elementnames.py b/tests/format/elementnames.py
index f059d54..2e34952 100644
--- a/tests/format/elementnames.py
+++ b/tests/format/elementnames.py
@@ -11,19 +11,33 @@ DATA_DIR = os.path.dirname(os.path.realpath(__file__))
 
 
 @pytest.mark.parametrize(
-    "target,reason,provenance",
+    "target,domain,reason,provenance",
     [
-        ("farm.pony", LoadErrorReason.BAD_ELEMENT_SUFFIX, None),
-        ('The "quoted" pony.bst', LoadErrorReason.BAD_CHARACTERS_IN_NAME, None),
-        ("bad-suffix-dep.bst", LoadErrorReason.BAD_ELEMENT_SUFFIX, "bad-suffix-dep.bst [line 3 column 2]"),
-        ("bad-chars-dep.bst", LoadErrorReason.BAD_CHARACTERS_IN_NAME, "bad-chars-dep.bst [line 3 column 2]"),
+        # When specifying a bad suffix on the command line we get a different error, we
+        # catch this error earlier on in the load sequence while sorting out element and
+        # artifact names and glob expressions.
+        #
+        ("farm.pony", ErrorDomain.STREAM, "invalid-element-names", None),
+        ('The "quoted" pony.bst', ErrorDomain.LOAD, LoadErrorReason.BAD_CHARACTERS_IN_NAME, None),
+        (
+            "bad-suffix-dep.bst",
+            ErrorDomain.LOAD,
+            LoadErrorReason.BAD_ELEMENT_SUFFIX,
+            "bad-suffix-dep.bst [line 3 column 2]",
+        ),
+        (
+            "bad-chars-dep.bst",
+            ErrorDomain.LOAD,
+            LoadErrorReason.BAD_CHARACTERS_IN_NAME,
+            "bad-chars-dep.bst [line 3 column 2]",
+        ),
     ],
     ids=["toplevel-bad-suffix", "toplevel-bad-chars", "dependency-bad-suffix", "dependency-bad-chars"],
 )
 @pytest.mark.datafiles(DATA_DIR)
-def test_invalid_element_names(cli, datafiles, target, reason, provenance):
+def test_invalid_element_names(cli, datafiles, target, domain, reason, provenance):
     project = os.path.join(str(datafiles), "elementnames")
     result = cli.run(project=project, silent=True, args=["show", target])
-    result.assert_main_error(ErrorDomain.LOAD, reason)
+    result.assert_main_error(domain, reason)
     if provenance:
         assert provenance in result.stderr
diff --git a/tests/frontend/artifact_show.py b/tests/frontend/artifact_show.py
index 652adfb..6810847 100644
--- a/tests/frontend/artifact_show.py
+++ b/tests/frontend/artifact_show.py
@@ -150,30 +150,6 @@ def test_artifact_show_glob(cli, tmpdir, datafiles, pattern, expected_prefixes):
         assert found, "Expected result {} not found".format(expected_prefix)
 
 
-# Test artifact show glob behaviors
-@pytest.mark.datafiles(SIMPLE_DIR)
-@pytest.mark.parametrize(
-    "pattern",
-    [
-        # Catch all glob will match everything, that is an error since the glob matches
-        # both elements and artifacts
-        #
-        "**",
-        # This glob is more selective but will also match both artifacts and elements
-        #
-        "**import-bin**",
-    ],
-)
-def test_artifact_show_doubly_matched_glob_error(cli, tmpdir, datafiles, pattern):
-    project = str(datafiles)
-
-    result = cli.run(project=project, args=["build", "target.bst"])
-    result.assert_success()
-
-    result = cli.run(project=project, args=["artifact", "show", pattern])
-    result.assert_main_error(ErrorDomain.STREAM, "glob-elements-and-artifacts")
-
-
 # Test artifact show artifact in remote
 @pytest.mark.datafiles(DATA_DIR)
 def test_artifact_show_element_available_remotely(cli, tmpdir, datafiles):
diff --git a/tests/frontend/show.py b/tests/frontend/show.py
index 456123b..9ed5b65 100644
--- a/tests/frontend/show.py
+++ b/tests/frontend/show.py
@@ -55,9 +55,6 @@ def test_show_fail(cli, datafiles):
 @pytest.mark.parametrize(
     "pattern,expected_elements",
     [
-        # Use catch all glob. This should report all elements.
-        #
-        ("**", ["import-bin.bst", "import-dev.bst", "compose-all.bst", "target.bst", "subdir/target.bst"]),
         # Only bst files, same as "**" for `bst show`
         #
         ("**.bst", ["import-bin.bst", "import-dev.bst", "compose-all.bst", "target.bst", "subdir/target.bst"]),
@@ -67,16 +64,13 @@ def test_show_fail(cli, datafiles):
         ("*.bst", ["import-bin.bst", "import-dev.bst", "compose-all.bst", "target.bst"]),
         # Report only targets in the subdirectory
         #
-        ("subdir/*", ["subdir/target.bst"]),
+        ("subdir/*.bst", ["subdir/target.bst"]),
         # Report both targets which end in "target.bst"
         #
         ("**target.bst", ["target.bst", "subdir/target.bst"]),
         # All elements starting with the prefix "import"
         #
-        ("import*", ["import-bin.bst", "import-dev.bst"]),
-        # Glob would match artifact refs, but `bst show` does not accept these as input.
-        #
-        ("test/**", []),
+        ("import*.bst", ["import-bin.bst", "import-dev.bst"]),
     ],
     ids=["**", "**.bst", "*.bst", "subdir/*", "**target.bst", "import*", "test/**"],
 )
diff --git a/tests/frontend/workspace.py b/tests/frontend/workspace.py
index 26c0a99..bd64a24 100644
--- a/tests/frontend/workspace.py
+++ b/tests/frontend/workspace.py
@@ -215,7 +215,7 @@ def test_open_multi_unwritable(cli, tmpdir, datafiles):
         # Using this finally to make sure we always put thing back how they should be.
         os.chmod(workspace_object.workspace_cmd, cwdstat.st_mode)
 
-    result.assert_main_error(ErrorDomain.STREAM, None)
+    result.assert_main_error(ErrorDomain.STREAM, "workspace-directory-failure")
     # Normally we avoid checking stderr in favour of using the mechine readable result.assert_main_error
     # But Tristan was very keen that the names of the elements left needing workspaces were present in the out put
     assert " ".join([element_name for element_name, workspace_dir_suffix in element_tuples[1:]]) in result.stderr
diff --git a/tests/internals/loader.py b/tests/internals/loader.py
index 2da0172..fd4a357 100644
--- a/tests/internals/loader.py
+++ b/tests/internals/loader.py
@@ -90,7 +90,8 @@ def test_invalid_key(datafiles):
 def test_invalid_directory_load(datafiles):
 
     basedir = str(datafiles)
+    os.makedirs(os.path.join(basedir, "element.bst"))
     with make_loader(basedir) as loader, pytest.raises(LoadError) as exc:
-        loader.load(["elements/"])
+        loader.load(["element.bst"])
 
     assert exc.value.reason == LoadErrorReason.LOADING_DIRECTORY