You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@buildstream.apache.org by ro...@apache.org on 2020/12/29 13:36:01 UTC

[buildstream] 02/02: Make DownloadableFileSource clearly define public and private fields

This is an automated email from the ASF dual-hosted git repository.

root pushed a commit to branch jonathan/expose-downloadablefilesource
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 2c0d7a614de63eacda257fbfc8898bcdb2a79492
Author: Jonathan Maw <jo...@codethink.co.uk>
AuthorDate: Tue Aug 28 16:12:45 2018 +0100

    Make DownloadableFileSource clearly define public and private fields
    
    TarSource and RemoteSource now parse url from the config themselves,
    because they need it, but the url is private.
---
 buildstream/downloadablefilesource.py | 146 +++++++++++++++++++++-------------
 buildstream/plugins/sources/deb.py    |   2 +-
 buildstream/plugins/sources/remote.py |  10 ++-
 buildstream/plugins/sources/tar.py    |   8 +-
 buildstream/plugins/sources/zip.py    |   2 +-
 5 files changed, 107 insertions(+), 61 deletions(-)

diff --git a/buildstream/downloadablefilesource.py b/buildstream/downloadablefilesource.py
index 11981c4..593db89 100644
--- a/buildstream/downloadablefilesource.py
+++ b/buildstream/downloadablefilesource.py
@@ -2,6 +2,13 @@
 DownloadableFileSource - Abstract class for downloading files
 =============================================================
 A base abstract class for source implementations which download a file.
+
+Derived classes must write their own stage() implementation, using the
+public APIs exposed in this class.
+
+Derived classes must also chain up to the parent method in their get_unique_key()
+implementations.
+
 """
 
 import os
@@ -19,50 +26,54 @@ class DownloadableFileSource(Source):
 
     COMMON_CONFIG_KEYS = Source.COMMON_CONFIG_KEYS + ['url', 'ref', 'etag']
 
+    #####################################
+    # Implementations of abstract methods
+    #####################################
+
     def configure(self, node):
-        self.original_url = self.node_get_member(node, str, 'url')
-        self.ref = self.node_get_member(node, str, 'ref', None)
-        self.url = self.translate_url(self.original_url)
-        self._warn_deprecated_etag(node)
+        self.__original_url = self.node_get_member(node, str, 'url')
+        self.__ref = self.node_get_member(node, str, 'ref', None)
+        self.__url = self.translate_url(self.__original_url)
+        self.__warn_deprecated_etag(node)
 
     def preflight(self):
         return
 
     def get_unique_key(self):
-        return [self.original_url, self.ref]
+        return [self.__original_url, self.__ref]
 
     def get_consistency(self):
-        if self.ref is None:
+        if self.__ref is None:
             return Consistency.INCONSISTENT
 
-        if os.path.isfile(self._get_mirror_file()):
+        if os.path.isfile(self.get_mirror_file()):
             return Consistency.CACHED
 
         else:
             return Consistency.RESOLVED
 
     def load_ref(self, node):
-        self.ref = self.node_get_member(node, str, 'ref', None)
-        self._warn_deprecated_etag(node)
+        self.__ref = self.node_get_member(node, str, 'ref', None)
+        self.__warn_deprecated_etag(node)
 
     def get_ref(self):
-        return self.ref
+        return self.__ref
 
     def set_ref(self, ref, node):
-        node['ref'] = self.ref = ref
+        node['ref'] = self.__ref = ref
 
     def track(self):
         # there is no 'track' field in the source to determine what/whether
         # or not to update refs, because tracking a ref is always a conscious
         # decision by the user.
-        with self.timed_activity("Tracking {}".format(self.url),
+        with self.timed_activity("Tracking {}".format(self.__url),
                                  silent_nested=True):
-            new_ref = self._ensure_mirror()
+            new_ref = self.ensure_mirror()
 
-            if self.ref and self.ref != new_ref:
+            if self.__ref and self.__ref != new_ref:
                 detail = "When tracking, new ref differs from current ref:\n" \
-                    + "  Tracked URL: {}\n".format(self.url) \
-                    + "  Current ref: {}\n".format(self.ref) \
+                    + "  Tracked URL: {}\n".format(self.__url) \
+                    + "  Current ref: {}\n".format(self.__ref) \
                     + "  New ref: {}\n".format(new_ref)
                 self.warn("Potential man-in-the-middle attack!", detail=detail)
 
@@ -74,49 +85,40 @@ class DownloadableFileSource(Source):
         # file to be already cached because Source.fetch() will
         # not be called if the source is already Consistency.CACHED.
         #
-        if os.path.isfile(self._get_mirror_file()):
+        if os.path.isfile(self.get_mirror_file()):
             return  # pragma: nocover
 
         # Download the file, raise hell if the sha256sums don't match,
         # and mirror the file otherwise.
-        with self.timed_activity("Fetching {}".format(self.url), silent_nested=True):
-            sha256 = self._ensure_mirror()
-            if sha256 != self.ref:
+        with self.timed_activity("Fetching {}".format(self.__url), silent_nested=True):
+            sha256 = self.ensure_mirror()
+            if sha256 != self.__ref:
                 raise SourceError("File downloaded from {} has sha256sum '{}', not '{}'!"
-                                  .format(self.url, sha256, self.ref))
+                                  .format(self.__url, sha256, self.__ref))
+    ################
+    # Public methods
+    ################
 
-    def _warn_deprecated_etag(self, node):
-        etag = self.node_get_member(node, str, 'etag', None)
-        if etag:
-            provenance = self.node_provenance(node, member_name='etag')
-            self.warn('{} "etag" is deprecated and ignored.'.format(provenance))
+    def ensure_mirror(self):
+        """Downloads from the url and caches it according to its sha256sum
 
-    def _get_etag(self, ref):
-        etagfilename = os.path.join(self._get_mirror_dir(), '{}.etag'.format(ref))
-        if os.path.exists(etagfilename):
-            with open(etagfilename, 'r') as etagfile:
-                return etagfile.read()
-
-        return None
+        Returns:
+           (str): The sha256sum of the mirrored file
 
-    def _store_etag(self, ref, etag):
-        etagfilename = os.path.join(self._get_mirror_dir(), '{}.etag'.format(ref))
-        with utils.save_file_atomic(etagfilename) as etagfile:
-            etagfile.write(etag)
-
-    def _ensure_mirror(self):
-        # Downloads from the url and caches it according to its sha256sum.
+        Raises:
+           :class:`.SourceError`
+        """
         try:
             with self.tempdir() as td:
-                default_name = os.path.basename(self.url)
-                request = urllib.request.Request(self.url)
+                default_name = os.path.basename(self.__url)
+                request = urllib.request.Request(self.__url)
                 request.add_header('Accept', '*/*')
 
                 # We do not use etag in case what we have in cache is
                 # not matching ref in order to be able to recover from
                 # corrupted download.
-                if self.ref:
-                    etag = self._get_etag(self.ref)
+                if self.__ref:
+                    etag = self.__get_etag(self.__ref)
 
                     # Do not re-download the file if the ETag matches.
                     if etag and self.get_consistency() == Consistency.CACHED:
@@ -134,17 +136,17 @@ class DownloadableFileSource(Source):
                         shutil.copyfileobj(response, dest)
 
                 # Make sure url-specific mirror dir exists.
-                if not os.path.isdir(self._get_mirror_dir()):
-                    os.makedirs(self._get_mirror_dir())
+                if not os.path.isdir(self.__get_mirror_dir()):
+                    os.makedirs(self.__get_mirror_dir())
 
                 # Store by sha256sum
                 sha256 = utils.sha256sum(local_file)
                 # Even if the file already exists, move the new file over.
                 # In case the old file was corrupted somehow.
-                os.rename(local_file, self._get_mirror_file(sha256))
+                os.rename(local_file, self.get_mirror_file(sha=sha256))
 
                 if etag:
-                    self._store_etag(sha256, etag)
+                    self.__store_etag(sha256, etag)
                 return sha256
 
         except urllib.error.HTTPError as e:
@@ -152,19 +154,53 @@ class DownloadableFileSource(Source):
                 # 304 Not Modified.
                 # Because we use etag only for matching ref, currently specified ref is what
                 # we would have downloaded.
-                return self.ref
+                return self.__ref
             raise SourceError("{}: Error mirroring {}: {}"
-                              .format(self, self.url, e), temporary=True) from e
+                              .format(self, self.__url, e), temporary=True) from e
 
         except (urllib.error.URLError, urllib.error.ContentTooShortError, OSError, ValueError) as e:
             # Note that urllib.request.Request in the try block may throw a
             # ValueError for unknown url types, so we handle it here.
             raise SourceError("{}: Error mirroring {}: {}"
-                              .format(self, self.url, e), temporary=True) from e
+                              .format(self, self.__url, e), temporary=True) from e
+
+    def get_mirror_file(self, *, sha=None):
+        """Calculates the path to where this source stores the downloaded file
 
-    def _get_mirror_dir(self):
+        Users are expected to read the file this points to when staging their source.
+
+        Returns:
+           (str): A path to the file the source should be cached at
+        """
+        return os.path.join(self.__get_mirror_dir(), sha or self.__ref)
+
+    #######################
+    # Local Private methods
+    #######################
+
+    def __get_mirror_dir(self):
+        # Get the directory this source should store things in, for a given URL
         return os.path.join(self.get_mirror_directory(),
-                            utils.url_directory_name(self.original_url))
+                            utils.url_directory_name(self.__original_url))
+
+    def __warn_deprecated_etag(self, node):
+        # Warn the user if the 'etag' field is being used
+        etag = self.node_get_member(node, str, 'etag', None)
+        if etag:
+            provenance = self.node_provenance(node, member_name='etag')
+            self.warn('{} "etag" is deprecated and ignored.'.format(provenance))
 
-    def _get_mirror_file(self, sha=None):
-        return os.path.join(self._get_mirror_dir(), sha or self.ref)
+    def __get_etag(self, ref):
+        # Retrieve the etag's data from disk
+        etagfilename = os.path.join(self.__get_mirror_dir(), '{}.etag'.format(ref))
+        if os.path.exists(etagfilename):
+            with open(etagfilename, 'r') as etagfile:
+                return etagfile.read()
+
+        return None
+
+    def __store_etag(self, ref, etag):
+        # Write the etag's data to disk
+        etagfilename = os.path.join(self.__get_mirror_dir(), '{}.etag'.format(ref))
+        with utils.save_file_atomic(etagfilename) as etagfile:
+            etagfile.write(etag)
diff --git a/buildstream/plugins/sources/deb.py b/buildstream/plugins/sources/deb.py
index 1b7dafd..b62a976 100644
--- a/buildstream/plugins/sources/deb.py
+++ b/buildstream/plugins/sources/deb.py
@@ -68,7 +68,7 @@ class DebSource(TarSource):
 
     @contextmanager
     def _get_tar(self):
-        with open(self._get_mirror_file(), 'rb') as deb_file:
+        with open(self.get_mirror_file(), 'rb') as deb_file:
             arpy_archive = arpy.Archive(fileobj=deb_file)
             arpy_archive.read_all_headers()
             data_tar_arpy = [v for k, v in arpy_archive.archived_files.items() if b"data.tar" in k][0]
diff --git a/buildstream/plugins/sources/remote.py b/buildstream/plugins/sources/remote.py
index c296d31..fbfff90 100644
--- a/buildstream/plugins/sources/remote.py
+++ b/buildstream/plugins/sources/remote.py
@@ -61,7 +61,13 @@ class RemoteSource(DownloadableFileSource):
     def configure(self, node):
         super().configure(node)
 
-        self.filename = self.node_get_member(node, str, 'filename', os.path.basename(self.url))
+        self.filename = self.node_get_member(node, str, 'filename', None)
+        if not self.filename:
+            # url in DownloadableFileSource is private, so we read it again
+            original_url = self.node_get_member(node, str, 'url')
+            url = self.translate_url(original_url)
+            self.filename = os.path.basename(url)
+
         self.executable = self.node_get_member(node, bool, 'executable', False)
 
         if os.sep in self.filename:
@@ -78,7 +84,7 @@ class RemoteSource(DownloadableFileSource):
         dest = os.path.join(directory, self.filename)
         with self.timed_activity("Staging remote file to {}".format(dest)):
 
-            utils.safe_copy(self._get_mirror_file(), dest)
+            utils.safe_copy(self.get_mirror_file(), dest)
 
             # To prevent user's umask introducing variability here, explicitly set
             # file modes.
diff --git a/buildstream/plugins/sources/tar.py b/buildstream/plugins/sources/tar.py
index 75219dc..667b283 100644
--- a/buildstream/plugins/sources/tar.py
+++ b/buildstream/plugins/sources/tar.py
@@ -71,6 +71,10 @@ class TarSource(DownloadableFileSource):
     def configure(self, node):
         super().configure(node)
 
+        # url in DownloadableFileSource is private, so we read it again
+        original_url = self.node_get_member(node, str, 'url')
+        self.url = self.translate_url(original_url)
+
         self.base_dir = self.node_get_member(node, str, 'base-dir', '*') or None
 
         self.node_validate(node, DownloadableFileSource.COMMON_CONFIG_KEYS + ['base-dir'])
@@ -87,7 +91,7 @@ class TarSource(DownloadableFileSource):
     def _run_lzip(self):
         assert self.host_lzip
         with TemporaryFile() as lzip_stdout:
-            with open(self._get_mirror_file(), 'r') as lzip_file:
+            with open(self.get_mirror_file(), 'r') as lzip_file:
                 self.call([self.host_lzip, '-d'],
                           stdin=lzip_file,
                           stdout=lzip_stdout)
@@ -102,7 +106,7 @@ class TarSource(DownloadableFileSource):
                 with tarfile.open(fileobj=lzip_dec, mode='r:') as tar:
                     yield tar
         else:
-            with tarfile.open(self._get_mirror_file()) as tar:
+            with tarfile.open(self.get_mirror_file()) as tar:
                 yield tar
 
     def stage(self, directory):
diff --git a/buildstream/plugins/sources/zip.py b/buildstream/plugins/sources/zip.py
index dafd6f5..5773e0d 100644
--- a/buildstream/plugins/sources/zip.py
+++ b/buildstream/plugins/sources/zip.py
@@ -83,7 +83,7 @@ class ZipSource(DownloadableFileSource):
         noexec_rights = exec_rights & ~(stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
 
         try:
-            with zipfile.ZipFile(self._get_mirror_file()) as archive:
+            with zipfile.ZipFile(self.get_mirror_file()) as archive:
                 base_dir = None
                 if self.base_dir:
                     base_dir = self._find_base_dir(archive, self.base_dir)