You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@buildstream.apache.org by tv...@apache.org on 2022/04/05 10:04:18 UTC
[buildstream-plugins] 03/49: Initially adding cargo source
This is an automated email from the ASF dual-hosted git repository.
tvb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/buildstream-plugins.git
commit fee253c97b0dfb785826c29ecae64c84dfa52e99
Author: Tristan van Berkom <tr...@codethink.co.uk>
AuthorDate: Fri Mar 18 16:28:06 2022 +0900
Initially adding cargo source
From bst-plugins-experimental
---
src/buildstream_plugins/sources/cargo.py | 442 +++++++++++++++++++++++++++++++
1 file changed, 442 insertions(+)
diff --git a/src/buildstream_plugins/sources/cargo.py b/src/buildstream_plugins/sources/cargo.py
new file mode 100644
index 0000000..bebc305
--- /dev/null
+++ b/src/buildstream_plugins/sources/cargo.py
@@ -0,0 +1,442 @@
+#
+# Copyright (C) 2019 Codethink Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Authors:
+# Tristan Van Berkom <tr...@codethink.co.uk>
+
+"""
+cargo - Automatically stage crate dependencies
+==============================================
+A convenience Source element for vendoring rust project dependencies.
+
+Placing this source in the source list, after a source which stages a
+Cargo.lock file, will allow this source to read the Cargo.lock file and
+obtain the crates automatically into %{vendordir}.
+
+**Usage:**
+
+.. code:: yaml
+
+ # Specify the cargo source kind
+ kind: cargo
+
+ # Url of the crates repository to download from (default: https://static.crates.io/crates)
+ url: https://static.crates.io/crates
+
+ # Internal source reference, this is a list of dictionaries
+ # which store the crate names and versions.
+ #
+ # This will be automatically updated with `bst track`
+ ref:
+ - name: packagename
+ version: 1.2.1
+ - name: packagename
+ version: 1.3.0
+
+ # Specify a directory for the vendored crates (defaults to ./crates)
+ vendor-dir: crates
+
+ # Optionally specify the name of the lock file to use (defaults to Cargo.lock)
+ cargo-lock: Cargo.lock
+
+
+See `built-in functionality doumentation
+<https://docs.buildstream.build/master/buildstream.source.html#core-source-builtins>`_ for
+details on common configuration options for sources.
+"""
+
+import contextlib
+import json
+import os.path
+import shutil
+import tarfile
+import urllib.error
+import urllib.request
+
+import toml
+from buildstream import Source, SourceFetcher, SourceError
+from buildstream import utils
+
+
+# This automatically goes into .cargo/config
+#
+_default_vendor_config_template = (
+ "[source.crates-io]\n"
+ + 'registry = "{vendorurl}"\n'
+ + 'replace-with = "vendored-sources"\n'
+ + "[source.vendored-sources]\n"
+ + 'directory = "{vendordir}"\n'
+)
+
+
+# Crate()
+#
+# Use a SourceFetcher class to be the per crate helper
+#
+# Args:
+# cargo (Cargo): The main Source implementation
+# name (str): The name of the crate to depend on
+# version (str): The version of the crate to depend on
+# sha (str|None): The sha256 checksum of the downloaded crate
+#
+class Crate(SourceFetcher):
+ def __init__(self, cargo, name, version, sha=None):
+ super().__init__()
+
+ self.cargo = cargo
+ self.name = name
+ self.version = str(version)
+ self.sha = sha
+ self.mark_download_url(self._get_url())
+
+ ########################################################
+ # SourceFetcher API method implementations #
+ ########################################################
+
+ def fetch(self, alias_override=None, **kwargs):
+
+ # Just a defensive check, it is impossible for the
+ # file to be already cached because Source.fetch() will
+ # not be called if the source is already cached.
+ #
+ if os.path.isfile(self._get_mirror_file()):
+ return # pragma: nocover
+
+ # Download the crate
+ crate_url = self._get_url(alias_override)
+ with self.cargo.timed_activity("Downloading: {}".format(crate_url), silent_nested=True):
+ sha256 = self._download(crate_url)
+ if self.sha is not None and sha256 != self.sha:
+ raise SourceError(
+ "File downloaded from {} has sha256sum '{}', not '{}'!".format(crate_url, sha256, self.sha)
+ )
+
+ ########################################################
+ # Helper APIs for the Cargo Source to use #
+ ########################################################
+
+ # stage()
+ #
+ # A delegate method to do the work for a single crate
+ # in Source.stage().
+ #
+ # Args:
+ # (directory): The vendor subdirectory to stage to
+ #
+ def stage(self, directory):
+ try:
+ mirror_file = self._get_mirror_file()
+ with tarfile.open(mirror_file) as tar:
+ tar.extractall(path=directory)
+ members = tar.getmembers()
+
+ if members:
+ dirname = members[0].name.split("/")[0]
+ package_dir = os.path.join(directory, dirname)
+ checksum_file = os.path.join(package_dir, ".cargo-checksum.json")
+ with open(checksum_file, "w", encoding="utf-8") as f:
+ checksum_data = {"package": self.sha, "files": {}}
+ json.dump(checksum_data, f)
+
+ except (tarfile.TarError, OSError) as e:
+ raise SourceError("{}: Error staging source: {}".format(self, e)) from e
+
+ # is_cached()
+ #
+ # Get whether we have a local cached version of the source
+ #
+ # Returns:
+ # (bool): Whether we are cached or not
+ #
+ def is_cached(self):
+ return os.path.isfile(self._get_mirror_file())
+
+ # is_resolved()
+ #
+ # Get whether the current crate is resolved
+ #
+ # Returns:
+ # (bool): Whether we have a sha or not
+ #
+ def is_resolved(self):
+ return self.sha is not None
+
+ ########################################################
+ # Private helpers #
+ ########################################################
+
+ # _download()
+ #
+ # Downloads the crate from the url and caches it.
+ #
+ # Args:
+ # url (str): The url to download from
+ #
+ # Returns:
+ # (str): The sha256 checksum of the downloaded crate
+ #
+ def _download(self, url):
+
+ try:
+ with self.cargo.tempdir() as td:
+ default_name = os.path.basename(url)
+ request = urllib.request.Request(url)
+ request.add_header("Accept", "*/*")
+ request.add_header("User-Agent", "BuildStream/2")
+
+ # We do not use etag in case what we have in cache is
+ # not matching ref in order to be able to recover from
+ # corrupted download.
+ if self.sha:
+ etag = self._get_etag(self.sha)
+ if etag and self.is_cached():
+ request.add_header("If-None-Match", etag)
+
+ with contextlib.closing(urllib.request.urlopen(request)) as response:
+ info = response.info()
+
+ etag = info["ETag"] if "ETag" in info else None
+
+ filename = info.get_filename(default_name)
+ filename = os.path.basename(filename)
+ local_file = os.path.join(td, filename)
+ with open(local_file, "wb") as dest:
+ shutil.copyfileobj(response, dest)
+
+ # Make sure url-specific mirror dir exists.
+ os.makedirs(self._get_mirror_dir(), exist_ok=True)
+
+ # Store by sha256sum
+ sha256 = utils.sha256sum(local_file)
+ # Even if the file already exists, move the new file over.
+ # In case the old file was corrupted somehow.
+ os.rename(local_file, self._get_mirror_file(sha256))
+
+ if etag:
+ self._store_etag(sha256, etag)
+ return sha256
+
+ except urllib.error.HTTPError as e:
+ if e.code == 304:
+ # 304 Not Modified.
+ # Because we use etag only for matching sha, currently specified sha is what
+ # we would have downloaded.
+ return self.sha
+ raise SourceError("{}: Error mirroring {}: {}".format(self, url, e), temporary=True,) from e
+
+ except (urllib.error.URLError, urllib.error.ContentTooShortError, OSError,) as e:
+ raise SourceError("{}: Error mirroring {}: {}".format(self, url, e), temporary=True,) from e
+
+ # _get_url()
+ #
+ # Fetches the URL to download this crate from
+ #
+ # Args:
+ # alias (str|None): The URL alias to apply, if any
+ #
+ # Returns:
+ # (str): The URL for this crate
+ #
+ def _get_url(self, alias=None):
+ url = self.cargo.translate_url(self.cargo.url, alias_override=alias)
+ return "{url}/{name}/{name}-{version}.crate".format(url=url, name=self.name, version=self.version)
+
+ # _get_etag()
+ #
+ # Fetches the locally stored ETag information for this
+ # crate's download.
+ #
+ # Args:
+ # sha (str): The sha256 checksum of the downloaded crate
+ #
+ # Returns:
+ # (str|None): The ETag to use for requests, or None if nothing is
+ # locally downloaded
+ #
+ def _get_etag(self, sha):
+ etagfilename = os.path.join(self._get_mirror_dir(), "{}.etag".format(sha))
+ if os.path.exists(etagfilename):
+ with open(etagfilename, "r", encoding="utf-8") as etagfile:
+ return etagfile.read()
+
+ return None
+
+ # _store_etag()
+ #
+ # Stores the locally cached ETag information for this crate.
+ #
+ # Args:
+ # sha (str): The sha256 checksum of the downloaded crate
+ # etag (str): The ETag to use for requests of this crate
+ #
+ def _store_etag(self, sha, etag):
+ etagfilename = os.path.join(self._get_mirror_dir(), "{}.etag".format(sha))
+ with utils.save_file_atomic(etagfilename) as etagfile:
+ etagfile.write(etag)
+
+ # _get_mirror_dir()
+ #
+ # Gets the local mirror directory for this upstream cargo repository
+ #
+ def _get_mirror_dir(self):
+ return os.path.join(
+ self.cargo.get_mirror_directory(), utils.url_directory_name(self.cargo.url), self.name, self.version,
+ )
+
+ # _get_mirror_file()
+ #
+ # Gets the local mirror filename for this crate
+ #
+ # Args:
+ # sha (str|None): The sha256 checksum of the downloaded crate
+ #
+ def _get_mirror_file(self, sha=None):
+ return os.path.join(self._get_mirror_dir(), sha or self.sha)
+
+
+class CargoSource(Source):
+ BST_MIN_VERSION = "2.0"
+
+ # We need the Cargo.lock file to construct our ref at track time
+ BST_REQUIRES_PREVIOUS_SOURCES_TRACK = True
+
+ ########################################################
+ # Plugin/Source API method implementations #
+ ########################################################
+ def configure(self, node):
+
+ # The url before any aliasing
+ #
+ self.url = node.get_str("url", "https://static.crates.io/crates")
+ # XXX: should we use get_sequence here?
+ self.ref = node.get_sequence("ref", None)
+ if self.ref is not None:
+ self.ref = self.ref.strip_node_info()
+ self.cargo_lock = node.get_str("cargo-lock", "Cargo.lock")
+ self.vendor_dir = node.get_str("vendor-dir", "crates")
+
+ node.validate_keys(Source.COMMON_CONFIG_KEYS + ["url", "ref", "cargo-lock", "vendor-dir"])
+
+ self.crates = self._parse_crates(self.ref)
+
+ def preflight(self):
+ return
+
+ def get_unique_key(self):
+ return [self.url, self.cargo_lock, self.vendor_dir, self.ref]
+
+ def is_resolved(self):
+ return (self.ref is not None) and all(crate.is_resolved() for crate in self.crates)
+
+ def is_cached(self):
+ return all(crate.is_cached() for crate in self.crates)
+
+ def load_ref(self, node):
+ # XXX: this should be get_sequence, and parse_crate should expect nodes
+ self.ref = node.get_sequence("ref", None)
+ self.crates = self._parse_crates(self.ref)
+
+ def get_ref(self):
+ return self.ref
+
+ def set_ref(self, ref, node):
+ node["ref"] = self.ref = ref
+ self.crates = self._parse_crates(self.ref)
+
+ def track(self, *, previous_sources_dir):
+ new_ref = []
+ lockfile = os.path.join(previous_sources_dir, self.cargo_lock)
+
+ try:
+ with open(lockfile, "r", encoding="utf-8") as f:
+ try:
+ lock = toml.load(f)
+ except toml.TomlDecodeError as e:
+ raise SourceError(
+ "Malformed Cargo.lock file at: {}".format(self.cargo_lock), detail="{}".format(e),
+ ) from e
+ except FileNotFoundError as e:
+ raise SourceError(
+ "Failed to find Cargo.lock file at: {}".format(self.cargo_lock),
+ detail="The cargo plugin expects to find a Cargo.lock file in\n"
+ + "the sources staged before it in the source list, but none was found.",
+ ) from e
+
+ # FIXME: Better validation would be good here, so we can raise more
+ # useful error messages in the case of a malformed Cargo.lock file.
+ #
+ for package in lock["package"]:
+ if "source" not in package:
+ continue
+ new_ref += [{"name": package["name"], "version": str(package["version"])}]
+
+ # Make sure the order we set it at track time is deterministic
+ new_ref = sorted(new_ref, key=lambda c: (c["name"], c["version"]))
+
+ # Download the crates and get their shas
+ for crate_obj in new_ref:
+ crate = Crate(self, crate_obj["name"], crate_obj["version"])
+
+ crate_url = crate._get_url()
+ with self.timed_activity("Downloading: {}".format(crate_url), silent_nested=True):
+ crate_obj["sha"] = crate._download(crate_url)
+
+ return new_ref
+
+ def stage(self, directory):
+
+ # Stage the crates into the vendor directory
+ vendor_dir = os.path.join(directory, self.vendor_dir)
+ for crate in self.crates:
+ crate.stage(vendor_dir)
+
+ # Stage our vendor config
+ vendor_config = _default_vendor_config_template.format(
+ vendorurl=self.translate_url(self.url), vendordir=self.vendor_dir
+ )
+ conf_dir = os.path.join(directory, ".cargo")
+ conf_file = os.path.join(conf_dir, "config")
+ os.makedirs(conf_dir, exist_ok=True)
+ with open(conf_file, "w", encoding="utf-8") as f:
+ f.write(vendor_config)
+
+ def get_source_fetchers(self):
+ return self.crates
+
+ ########################################################
+ # Private helpers #
+ ########################################################
+
+ # _parse_crates():
+ #
+ # Generates a list of crates based on the passed ref
+ #
+ # Args:
+ # (list|None) refs: The list of name/version dictionaries
+ #
+ # Returns:
+ # (list): A list of Crate objects
+ #
+ def _parse_crates(self, refs):
+
+ # Return an empty list for no ref
+ if refs is None:
+ return []
+
+ return [Crate(self, crate["name"], crate["version"], sha=crate.get("sha", None),) for crate in refs]
+
+
+def setup():
+ return CargoSource