You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@buildstream.apache.org by tv...@apache.org on 2022/04/05 10:04:18 UTC

[buildstream-plugins] 03/49: Initially adding cargo source

This is an automated email from the ASF dual-hosted git repository.

tvb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/buildstream-plugins.git

commit fee253c97b0dfb785826c29ecae64c84dfa52e99
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Fri Mar 18 16:28:06 2022 +0900

    Initially adding cargo source
    
    From bst-plugins-experimental
---
 src/buildstream_plugins/sources/cargo.py | 442 +++++++++++++++++++++++++++++++
 1 file changed, 442 insertions(+)

diff --git a/src/buildstream_plugins/sources/cargo.py b/src/buildstream_plugins/sources/cargo.py
new file mode 100644
index 0000000..bebc305
--- /dev/null
+++ b/src/buildstream_plugins/sources/cargo.py
@@ -0,0 +1,442 @@
+#
+#  Copyright (C) 2019 Codethink Limited
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+#  Authors:
+#        Tristan Van Berkom <tr...@codethink.co.uk>
+
+"""
+cargo - Automatically stage crate dependencies
+==============================================
+A convenience Source element for vendoring rust project dependencies.
+
+Placing this source in the source list, after a source which stages a
+Cargo.lock file, will allow this source to read the Cargo.lock file and
+obtain the crates automatically into %{vendordir}.
+
+**Usage:**
+
+.. code:: yaml
+
+   # Specify the cargo source kind
+   kind: cargo
+
+   # Url of the crates repository to download from (default: https://static.crates.io/crates)
+   url: https://static.crates.io/crates
+
+   # Internal source reference, this is a list of dictionaries
+   # which store the crate names and versions.
+   #
+   # This will be automatically updated with `bst track`
+   ref:
+   - name: packagename
+     version: 1.2.1
+   - name: packagename
+     version: 1.3.0
+
+   # Specify a directory for the vendored crates (defaults to ./crates)
+   vendor-dir: crates
+
+   # Optionally specify the name of the lock file to use (defaults to Cargo.lock)
+   cargo-lock: Cargo.lock
+
+
+See `built-in functionality doumentation
+<https://docs.buildstream.build/master/buildstream.source.html#core-source-builtins>`_ for
+details on common configuration options for sources.
+"""
+
+import contextlib
+import json
+import os.path
+import shutil
+import tarfile
+import urllib.error
+import urllib.request
+
+import toml
+from buildstream import Source, SourceFetcher, SourceError
+from buildstream import utils
+
+
+# This automatically goes into .cargo/config
+#
+_default_vendor_config_template = (
+    "[source.crates-io]\n"
+    + 'registry = "{vendorurl}"\n'
+    + 'replace-with = "vendored-sources"\n'
+    + "[source.vendored-sources]\n"
+    + 'directory = "{vendordir}"\n'
+)
+
+
+# Crate()
+#
+# Use a SourceFetcher class to be the per crate helper
+#
+# Args:
+#    cargo (Cargo): The main Source implementation
+#    name (str): The name of the crate to depend on
+#    version (str): The version of the crate to depend on
+#    sha (str|None): The sha256 checksum of the downloaded crate
+#
+class Crate(SourceFetcher):
+    def __init__(self, cargo, name, version, sha=None):
+        super().__init__()
+
+        self.cargo = cargo
+        self.name = name
+        self.version = str(version)
+        self.sha = sha
+        self.mark_download_url(self._get_url())
+
+    ########################################################
+    #     SourceFetcher API method implementations         #
+    ########################################################
+
+    def fetch(self, alias_override=None, **kwargs):
+
+        # Just a defensive check, it is impossible for the
+        # file to be already cached because Source.fetch() will
+        # not be called if the source is already cached.
+        #
+        if os.path.isfile(self._get_mirror_file()):
+            return  # pragma: nocover
+
+        # Download the crate
+        crate_url = self._get_url(alias_override)
+        with self.cargo.timed_activity("Downloading: {}".format(crate_url), silent_nested=True):
+            sha256 = self._download(crate_url)
+            if self.sha is not None and sha256 != self.sha:
+                raise SourceError(
+                    "File downloaded from {} has sha256sum '{}', not '{}'!".format(crate_url, sha256, self.sha)
+                )
+
+    ########################################################
+    #        Helper APIs for the Cargo Source to use       #
+    ########################################################
+
+    # stage()
+    #
+    # A delegate method to do the work for a single crate
+    # in Source.stage().
+    #
+    # Args:
+    #    (directory): The vendor subdirectory to stage to
+    #
+    def stage(self, directory):
+        try:
+            mirror_file = self._get_mirror_file()
+            with tarfile.open(mirror_file) as tar:
+                tar.extractall(path=directory)
+                members = tar.getmembers()
+
+            if members:
+                dirname = members[0].name.split("/")[0]
+                package_dir = os.path.join(directory, dirname)
+                checksum_file = os.path.join(package_dir, ".cargo-checksum.json")
+                with open(checksum_file, "w", encoding="utf-8") as f:
+                    checksum_data = {"package": self.sha, "files": {}}
+                    json.dump(checksum_data, f)
+
+        except (tarfile.TarError, OSError) as e:
+            raise SourceError("{}: Error staging source: {}".format(self, e)) from e
+
+    # is_cached()
+    #
+    # Get whether we have a local cached version of the source
+    #
+    # Returns:
+    #   (bool): Whether we are cached or not
+    #
+    def is_cached(self):
+        return os.path.isfile(self._get_mirror_file())
+
+    # is_resolved()
+    #
+    # Get whether the current crate is resolved
+    #
+    # Returns:
+    #   (bool): Whether we have a sha or not
+    #
+    def is_resolved(self):
+        return self.sha is not None
+
+    ########################################################
+    #                   Private helpers                    #
+    ########################################################
+
+    # _download()
+    #
+    # Downloads the crate from the url and caches it.
+    #
+    # Args:
+    #    url (str): The url to download from
+    #
+    # Returns:
+    #    (str): The sha256 checksum of the downloaded crate
+    #
+    def _download(self, url):
+
+        try:
+            with self.cargo.tempdir() as td:
+                default_name = os.path.basename(url)
+                request = urllib.request.Request(url)
+                request.add_header("Accept", "*/*")
+                request.add_header("User-Agent", "BuildStream/2")
+
+                # We do not use etag in case what we have in cache is
+                # not matching ref in order to be able to recover from
+                # corrupted download.
+                if self.sha:
+                    etag = self._get_etag(self.sha)
+                    if etag and self.is_cached():
+                        request.add_header("If-None-Match", etag)
+
+                with contextlib.closing(urllib.request.urlopen(request)) as response:
+                    info = response.info()
+
+                    etag = info["ETag"] if "ETag" in info else None
+
+                    filename = info.get_filename(default_name)
+                    filename = os.path.basename(filename)
+                    local_file = os.path.join(td, filename)
+                    with open(local_file, "wb") as dest:
+                        shutil.copyfileobj(response, dest)
+
+                # Make sure url-specific mirror dir exists.
+                os.makedirs(self._get_mirror_dir(), exist_ok=True)
+
+                # Store by sha256sum
+                sha256 = utils.sha256sum(local_file)
+                # Even if the file already exists, move the new file over.
+                # In case the old file was corrupted somehow.
+                os.rename(local_file, self._get_mirror_file(sha256))
+
+                if etag:
+                    self._store_etag(sha256, etag)
+                return sha256
+
+        except urllib.error.HTTPError as e:
+            if e.code == 304:
+                # 304 Not Modified.
+                # Because we use etag only for matching sha, currently specified sha is what
+                # we would have downloaded.
+                return self.sha
+            raise SourceError("{}: Error mirroring {}: {}".format(self, url, e), temporary=True,) from e
+
+        except (urllib.error.URLError, urllib.error.ContentTooShortError, OSError,) as e:
+            raise SourceError("{}: Error mirroring {}: {}".format(self, url, e), temporary=True,) from e
+
+    # _get_url()
+    #
+    # Fetches the URL to download this crate from
+    #
+    # Args:
+    #    alias (str|None): The URL alias to apply, if any
+    #
+    # Returns:
+    #    (str): The URL for this crate
+    #
+    def _get_url(self, alias=None):
+        url = self.cargo.translate_url(self.cargo.url, alias_override=alias)
+        return "{url}/{name}/{name}-{version}.crate".format(url=url, name=self.name, version=self.version)
+
+    # _get_etag()
+    #
+    # Fetches the locally stored ETag information for this
+    # crate's download.
+    #
+    # Args:
+    #    sha (str): The sha256 checksum of the downloaded crate
+    #
+    # Returns:
+    #    (str|None): The ETag to use for requests, or None if nothing is
+    #                locally downloaded
+    #
+    def _get_etag(self, sha):
+        etagfilename = os.path.join(self._get_mirror_dir(), "{}.etag".format(sha))
+        if os.path.exists(etagfilename):
+            with open(etagfilename, "r", encoding="utf-8") as etagfile:
+                return etagfile.read()
+
+        return None
+
+    # _store_etag()
+    #
+    # Stores the locally cached ETag information for this crate.
+    #
+    # Args:
+    #    sha (str): The sha256 checksum of the downloaded crate
+    #    etag (str): The ETag to use for requests of this crate
+    #
+    def _store_etag(self, sha, etag):
+        etagfilename = os.path.join(self._get_mirror_dir(), "{}.etag".format(sha))
+        with utils.save_file_atomic(etagfilename) as etagfile:
+            etagfile.write(etag)
+
+    # _get_mirror_dir()
+    #
+    # Gets the local mirror directory for this upstream cargo repository
+    #
+    def _get_mirror_dir(self):
+        return os.path.join(
+            self.cargo.get_mirror_directory(), utils.url_directory_name(self.cargo.url), self.name, self.version,
+        )
+
+    # _get_mirror_file()
+    #
+    # Gets the local mirror filename for this crate
+    #
+    # Args:
+    #    sha (str|None): The sha256 checksum of the downloaded crate
+    #
+    def _get_mirror_file(self, sha=None):
+        return os.path.join(self._get_mirror_dir(), sha or self.sha)
+
+
+class CargoSource(Source):
+    BST_MIN_VERSION = "2.0"
+
+    # We need the Cargo.lock file to construct our ref at track time
+    BST_REQUIRES_PREVIOUS_SOURCES_TRACK = True
+
+    ########################################################
+    #       Plugin/Source API method implementations       #
+    ########################################################
+    def configure(self, node):
+
+        # The url before any aliasing
+        #
+        self.url = node.get_str("url", "https://static.crates.io/crates")
+        # XXX: should we use get_sequence here?
+        self.ref = node.get_sequence("ref", None)
+        if self.ref is not None:
+            self.ref = self.ref.strip_node_info()
+        self.cargo_lock = node.get_str("cargo-lock", "Cargo.lock")
+        self.vendor_dir = node.get_str("vendor-dir", "crates")
+
+        node.validate_keys(Source.COMMON_CONFIG_KEYS + ["url", "ref", "cargo-lock", "vendor-dir"])
+
+        self.crates = self._parse_crates(self.ref)
+
+    def preflight(self):
+        return
+
+    def get_unique_key(self):
+        return [self.url, self.cargo_lock, self.vendor_dir, self.ref]
+
+    def is_resolved(self):
+        return (self.ref is not None) and all(crate.is_resolved() for crate in self.crates)
+
+    def is_cached(self):
+        return all(crate.is_cached() for crate in self.crates)
+
+    def load_ref(self, node):
+        # XXX: this should be get_sequence, and parse_crate should expect nodes
+        self.ref = node.get_sequence("ref", None)
+        self.crates = self._parse_crates(self.ref)
+
+    def get_ref(self):
+        return self.ref
+
+    def set_ref(self, ref, node):
+        node["ref"] = self.ref = ref
+        self.crates = self._parse_crates(self.ref)
+
+    def track(self, *, previous_sources_dir):
+        new_ref = []
+        lockfile = os.path.join(previous_sources_dir, self.cargo_lock)
+
+        try:
+            with open(lockfile, "r", encoding="utf-8") as f:
+                try:
+                    lock = toml.load(f)
+                except toml.TomlDecodeError as e:
+                    raise SourceError(
+                        "Malformed Cargo.lock file at: {}".format(self.cargo_lock), detail="{}".format(e),
+                    ) from e
+        except FileNotFoundError as e:
+            raise SourceError(
+                "Failed to find Cargo.lock file at: {}".format(self.cargo_lock),
+                detail="The cargo plugin expects to find a Cargo.lock file in\n"
+                + "the sources staged before it in the source list, but none was found.",
+            ) from e
+
+        # FIXME: Better validation would be good here, so we can raise more
+        #        useful error messages in the case of a malformed Cargo.lock file.
+        #
+        for package in lock["package"]:
+            if "source" not in package:
+                continue
+            new_ref += [{"name": package["name"], "version": str(package["version"])}]
+
+        # Make sure the order we set it at track time is deterministic
+        new_ref = sorted(new_ref, key=lambda c: (c["name"], c["version"]))
+
+        # Download the crates and get their shas
+        for crate_obj in new_ref:
+            crate = Crate(self, crate_obj["name"], crate_obj["version"])
+
+            crate_url = crate._get_url()
+            with self.timed_activity("Downloading: {}".format(crate_url), silent_nested=True):
+                crate_obj["sha"] = crate._download(crate_url)
+
+        return new_ref
+
+    def stage(self, directory):
+
+        # Stage the crates into the vendor directory
+        vendor_dir = os.path.join(directory, self.vendor_dir)
+        for crate in self.crates:
+            crate.stage(vendor_dir)
+
+        # Stage our vendor config
+        vendor_config = _default_vendor_config_template.format(
+            vendorurl=self.translate_url(self.url), vendordir=self.vendor_dir
+        )
+        conf_dir = os.path.join(directory, ".cargo")
+        conf_file = os.path.join(conf_dir, "config")
+        os.makedirs(conf_dir, exist_ok=True)
+        with open(conf_file, "w", encoding="utf-8") as f:
+            f.write(vendor_config)
+
+    def get_source_fetchers(self):
+        return self.crates
+
+    ########################################################
+    #                   Private helpers                    #
+    ########################################################
+
+    # _parse_crates():
+    #
+    # Generates a list of crates based on the passed ref
+    #
+    # Args:
+    #    (list|None) refs: The list of name/version dictionaries
+    #
+    # Returns:
+    #    (list): A list of Crate objects
+    #
+    def _parse_crates(self, refs):
+
+        # Return an empty list for no ref
+        if refs is None:
+            return []
+
+        return [Crate(self, crate["name"], crate["version"], sha=crate.get("sha", None),) for crate in refs]
+
+
+def setup():
+    return CargoSource