You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@buildstream.apache.org by tv...@apache.org on 2021/12/07 10:44:02 UTC

[buildstream] 02/04: Add fetch and track source URL policy

This is an automated email from the ASF dual-hosted git repository.

tvb pushed a commit to branch tristan/mirror-policy
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 62dac2b6d77973edc1569161704ed7f05b21a9ef
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Fri Nov 12 17:12:31 2021 +0900

    Add fetch and track source URL policy
    
    This adds a new feature which allows the user configuration to specify
    what URIs are allowed to be accessed while fetching and tracking.
---
 src/buildstream/_context.py          | 20 +++++++++++++++++++-
 src/buildstream/_project.py          | 33 ++++++++++++++++++++++-----------
 src/buildstream/data/userconfig.yaml | 22 ++++++++++++++++++++++
 src/buildstream/source.py            |  6 +++---
 src/buildstream/types.py             | 23 +++++++++++++++++++++++
 5 files changed, 89 insertions(+), 15 deletions(-)

diff --git a/src/buildstream/_context.py b/src/buildstream/_context.py
index bdc780b..911d278 100644
--- a/src/buildstream/_context.py
+++ b/src/buildstream/_context.py
@@ -34,7 +34,7 @@ from ._elementsourcescache import ElementSourcesCache
 from ._remotespec import RemoteSpec, RemoteExecutionSpec
 from ._sourcecache import SourceCache
 from ._cas import CASCache, CASLogLevel
-from .types import _CacheBuildTrees, _PipelineSelection, _SchedulerErrorAction
+from .types import _CacheBuildTrees, _PipelineSelection, _SchedulerErrorAction, _SourceUriPolicy
 from ._workspaces import Workspaces, WorkspaceProjectCache
 from .node import Node, MappingNode
 
@@ -169,6 +169,12 @@ class Context:
         # Control which dependencies to build
         self.build_dependencies: Optional[_PipelineSelection] = None
 
+        # Control which URIs can be accessed when fetching sources
+        self.fetch_source: Optional[str] = None
+
+        # Control which URIs can be accessed when tracking sources
+        self.track_source: Optional[str] = None
+
         # Size of the artifact cache in bytes
         self.config_cache_quota: Optional[int] = None
 
@@ -298,6 +304,8 @@ class Context:
                 "logdir",
                 "scheduler",
                 "build",
+                "fetch",
+                "track",
                 "artifacts",
                 "source-caches",
                 "logging",
@@ -431,6 +439,16 @@ class Context:
             )
         self.build_dependencies = _PipelineSelection(dependencies)
 
+        # Load fetch config
+        fetch = defaults.get_mapping("fetch")
+        fetch.validate_keys(["source"])
+        self.fetch_source = fetch.get_enum("source", _SourceUriPolicy)
+
+        # Load track config
+        track = defaults.get_mapping("track")
+        track.validate_keys(["source"])
+        self.track_source = track.get_enum("source", _SourceUriPolicy)
+
         # Load per-projects overrides
         self._project_overrides = defaults.get_mapping("projects", default={})
 
diff --git a/src/buildstream/_project.py b/src/buildstream/_project.py
index 24783ea..c699f87 100644
--- a/src/buildstream/_project.py
+++ b/src/buildstream/_project.py
@@ -37,7 +37,7 @@ from .exceptions import LoadErrorReason
 from ._options import OptionPool
 from .node import ScalarNode, MappingNode, ProvenanceInformation, _assert_symbol_name
 from ._pluginfactory import ElementFactory, SourceFactory, load_plugin_origin
-from .types import CoreWarnings, _HostMount, _SourceMirror
+from .types import CoreWarnings, _HostMount, _SourceMirror, _SourceUriPolicy
 from ._projectrefs import ProjectRefs, ProjectRefStorage
 from ._loader import Loader, LoadContext
 from .element import Element
@@ -141,6 +141,7 @@ class Project:
         self._shell_command: List[str] = []  # The default interactive shell command
         self._shell_environment: Dict[str, str] = {}  # Statically set environment vars
         self._shell_host_files: List[_HostMount] = []  # A list of HostMount objects
+        self._mirror_override: bool = False  # Whether mirrors have been declared in user configuration
 
         # This is a lookup table of lists indexed by project,
         # the child dictionaries are lists of ScalarNodes indicating
@@ -393,9 +394,10 @@ class Project:
     # Args:
     #    alias (str): The alias.
     #    first_pass (bool): Whether to use first pass configuration (for junctions)
+    #    tracking (bool): Whether we want the aliases for tracking (otherwise assume fetching)
     #
     # Returns a list of every URI to replace an alias with
-    def get_alias_uris(self, alias, *, first_pass=False):
+    def get_alias_uris(self, alias, *, first_pass=False, tracking=False):
         if first_pass:
             config = self.first_pass_config
         else:
@@ -404,16 +406,23 @@ class Project:
         if not alias or alias not in config._aliases:  # pylint: disable=unsupported-membership-test
             return [None]
 
-        mirror_list = []
-        for mirror_name, mirror in config.mirrors.items():
-            if alias in mirror.aliases:
-                if mirror_name == config.default_mirror:
-                    mirror_list = mirror.aliases[alias] + mirror_list
-                else:
-                    mirror_list += mirror.aliases[alias]
+        uri_list = []
+        policy = self._context.track_source if tracking else self._context.fetch_source
 
-        mirror_list.append(config._aliases.get_str(alias))
-        return mirror_list
+        if policy in (_SourceUriPolicy.ALL, _SourceUriPolicy.MIRRORS) or (
+            policy == _SourceUriPolicy.USER and self._mirror_override
+        ):
+            for mirror_name, mirror in config.mirrors.items():
+                if alias in mirror.aliases:
+                    if mirror_name == config.default_mirror:
+                        uri_list = mirror.aliases[alias] + uri_list
+                    else:
+                        uri_list += mirror.aliases[alias]
+
+        if policy in (_SourceUriPolicy.ALL, _SourceUriPolicy.ALIASES):
+            uri_list.append(config._aliases.get_str(alias))
+
+        return uri_list
 
     # load_elements()
     #
@@ -1002,6 +1011,8 @@ class Project:
         mirrors_node = overrides.get_sequence("mirrors", default=None)
         if mirrors_node is None:
             mirrors_node = config.get_sequence("mirrors", default=[])
+        else:
+            self._mirror_override = True
 
         # Perform variable substitutions in source mirror definitions,
         # even if the mirrors are specified in user configuration.
diff --git a/src/buildstream/data/userconfig.yaml b/src/buildstream/data/userconfig.yaml
index 3232de3..3fc29f7 100644
--- a/src/buildstream/data/userconfig.yaml
+++ b/src/buildstream/data/userconfig.yaml
@@ -72,6 +72,28 @@ build:
 
 
 #
+# Source fetch related configuration
+#
+fetch:
+
+  #
+  # Which URIs are allowed to be accessed
+  #
+  source: all
+
+
+#
+# Source track related configuration
+#
+track:
+
+  #
+  # Which URIs are allowed to be accessed
+  #
+  source: all
+
+
+#
 #    Logging
 #
 logging:
diff --git a/src/buildstream/source.py b/src/buildstream/source.py
index afc2092..b2dfd60 100644
--- a/src/buildstream/source.py
+++ b/src/buildstream/source.py
@@ -1256,7 +1256,7 @@ class Source(Plugin):
                         break
 
                 alias = fetcher._get_alias()
-                for uri in project.get_alias_uris(alias, first_pass=self.__first_pass):
+                for uri in project.get_alias_uris(alias, first_pass=self.__first_pass, tracking=False):
                     try:
                         fetcher.fetch(uri)
                     # FIXME: Need to consider temporary vs. permanent failures,
@@ -1284,7 +1284,7 @@ class Source(Plugin):
                 self.fetch(**kwargs)
                 return
 
-            for uri in project.get_alias_uris(alias, first_pass=self.__first_pass):
+            for uri in project.get_alias_uris(alias, first_pass=self.__first_pass, tracking=False):
                 new_source = self.__clone_for_uri(uri)
                 try:
                     new_source.fetch(**kwargs)
@@ -1314,7 +1314,7 @@ class Source(Plugin):
 
         # NOTE: We are assuming here that tracking only requires substituting the
         #       first alias used
-        for uri in reversed(project.get_alias_uris(alias, first_pass=self.__first_pass)):
+        for uri in reversed(project.get_alias_uris(alias, first_pass=self.__first_pass, tracking=True)):
             new_source = self.__clone_for_uri(uri)
             try:
                 ref = new_source.track(**kwargs)  # pylint: disable=assignment-from-none
diff --git a/src/buildstream/types.py b/src/buildstream/types.py
index 6fa4fe8..f278820 100644
--- a/src/buildstream/types.py
+++ b/src/buildstream/types.py
@@ -255,6 +255,29 @@ class _CacheBuildTrees(FastEnum):
     NEVER = "never"
 
 
+# _SourceUriPolicy()
+#
+# A policy for which URIs to access when fetching and tracking
+#
+class _SourceUriPolicy(FastEnum):
+
+    # Use all URIs from default aliases and mirrors
+    ALL = "all"
+
+    # Use only the base source aliases defined in project configuration
+    #
+    ALIASES = "aliases"
+
+    # Use only URIs from source mirrors (whether they are found
+    # in project configuration or user configuration)
+    MIRRORS = "mirrors"
+
+    # Use only URIs from user configuration, intentionally causing
+    # a failure if we try to access a source for which the user
+    # configuration has not provided a mirror
+    USER = "user"
+
+
 # _PipelineSelection()
 #
 # Defines the kind of pipeline selection to make when the pipeline