You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@buildstream.apache.org by ro...@apache.org on 2020/12/29 13:35:59 UTC

[buildstream] branch jonathan/expose-downloadablefilesource created (now 2c0d7a6)

This is an automated email from the ASF dual-hosted git repository.

root pushed a change to branch jonathan/expose-downloadablefilesource
in repository https://gitbox.apache.org/repos/asf/buildstream.git.


      at 2c0d7a6  Make DownloadableFileSource clearly define public and private fields

This branch includes the following new commits:

     new 7f8d0b9  Make DownloadableFileSource publically accessible
     new 2c0d7a6  Make DownloadableFileSource clearly define public and private fields

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[buildstream] 01/02: Make DownloadableFileSource publically accessible

Posted by ro...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

root pushed a commit to branch jonathan/expose-downloadablefilesource
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 7f8d0b9cac97b830b718b00ed3db9096c06d525c
Author: Jonathan Maw <jo...@codethink.co.uk>
AuthorDate: Tue Aug 28 14:12:37 2018 +0100

    Make DownloadableFileSource publically accessible
---
 buildstream/__init__.py                                             | 1 +
 .../_downloadablefilesource.py => downloadablefilesource.py}        | 6 +++++-
 buildstream/plugins/sources/remote.py                               | 3 +--
 buildstream/plugins/sources/tar.py                                  | 3 +--
 buildstream/plugins/sources/zip.py                                  | 3 +--
 5 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/buildstream/__init__.py b/buildstream/__init__.py
index af2122e..5a6612e 100644
--- a/buildstream/__init__.py
+++ b/buildstream/__init__.py
@@ -34,3 +34,4 @@ if "_BST_COMPLETION" not in os.environ:
     from .element import Element, ElementError
     from .buildelement import BuildElement
     from .scriptelement import ScriptElement
+    from .downloadablefilesource import DownloadableFileSource
diff --git a/buildstream/plugins/sources/_downloadablefilesource.py b/buildstream/downloadablefilesource.py
similarity index 96%
rename from buildstream/plugins/sources/_downloadablefilesource.py
rename to buildstream/downloadablefilesource.py
index 7d1fc07..11981c4 100644
--- a/buildstream/plugins/sources/_downloadablefilesource.py
+++ b/buildstream/downloadablefilesource.py
@@ -1,4 +1,8 @@
-"""A base abstract class for source implementations which download a file"""
+"""
+DownloadableFileSource - Abstract class for downloading files
+=============================================================
+A base abstract class for source implementations which download a file.
+"""
 
 import os
 import urllib.request
diff --git a/buildstream/plugins/sources/remote.py b/buildstream/plugins/sources/remote.py
index a6b02fd..c296d31 100644
--- a/buildstream/plugins/sources/remote.py
+++ b/buildstream/plugins/sources/remote.py
@@ -52,8 +52,7 @@ details on common configuration options for sources.
    The ``remote`` plugin is available since :ref:`format version 10 <project_format_version>`
 """
 import os
-from buildstream import SourceError, utils
-from ._downloadablefilesource import DownloadableFileSource
+from buildstream import SourceError, utils, DownloadableFileSource
 
 
 class RemoteSource(DownloadableFileSource):
diff --git a/buildstream/plugins/sources/tar.py b/buildstream/plugins/sources/tar.py
index 195c059..75219dc 100644
--- a/buildstream/plugins/sources/tar.py
+++ b/buildstream/plugins/sources/tar.py
@@ -62,8 +62,7 @@ from tempfile import TemporaryFile
 
 from buildstream import SourceError
 from buildstream import utils
-
-from ._downloadablefilesource import DownloadableFileSource
+from buildstream import DownloadableFileSource
 
 
 class TarSource(DownloadableFileSource):
diff --git a/buildstream/plugins/sources/zip.py b/buildstream/plugins/sources/zip.py
index f5fac3a..dafd6f5 100644
--- a/buildstream/plugins/sources/zip.py
+++ b/buildstream/plugins/sources/zip.py
@@ -62,8 +62,7 @@ import stat
 
 from buildstream import SourceError
 from buildstream import utils
-
-from ._downloadablefilesource import DownloadableFileSource
+from buildstream import DownloadableFileSource
 
 
 class ZipSource(DownloadableFileSource):


[buildstream] 02/02: Make DownloadableFileSource clearly define public and private fields

Posted by ro...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

root pushed a commit to branch jonathan/expose-downloadablefilesource
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit 2c0d7a614de63eacda257fbfc8898bcdb2a79492
Author: Jonathan Maw <jo...@codethink.co.uk>
AuthorDate: Tue Aug 28 16:12:45 2018 +0100

    Make DownloadableFileSource clearly define public and private fields
    
    TarSource and RemoteSource now parse url from the config themselves,
    because they need it, but the url is private.
---
 buildstream/downloadablefilesource.py | 146 +++++++++++++++++++++-------------
 buildstream/plugins/sources/deb.py    |   2 +-
 buildstream/plugins/sources/remote.py |  10 ++-
 buildstream/plugins/sources/tar.py    |   8 +-
 buildstream/plugins/sources/zip.py    |   2 +-
 5 files changed, 107 insertions(+), 61 deletions(-)

diff --git a/buildstream/downloadablefilesource.py b/buildstream/downloadablefilesource.py
index 11981c4..593db89 100644
--- a/buildstream/downloadablefilesource.py
+++ b/buildstream/downloadablefilesource.py
@@ -2,6 +2,13 @@
 DownloadableFileSource - Abstract class for downloading files
 =============================================================
 A base abstract class for source implementations which download a file.
+
+Derived classes must write their own stage() implementation, using the
+public APIs exposed in this class.
+
+Derived classes must also chain up to the parent method in their get_unique_key()
+implementations.
+
 """
 
 import os
@@ -19,50 +26,54 @@ class DownloadableFileSource(Source):
 
     COMMON_CONFIG_KEYS = Source.COMMON_CONFIG_KEYS + ['url', 'ref', 'etag']
 
+    #####################################
+    # Implementations of abstract methods
+    #####################################
+
     def configure(self, node):
-        self.original_url = self.node_get_member(node, str, 'url')
-        self.ref = self.node_get_member(node, str, 'ref', None)
-        self.url = self.translate_url(self.original_url)
-        self._warn_deprecated_etag(node)
+        self.__original_url = self.node_get_member(node, str, 'url')
+        self.__ref = self.node_get_member(node, str, 'ref', None)
+        self.__url = self.translate_url(self.__original_url)
+        self.__warn_deprecated_etag(node)
 
     def preflight(self):
         return
 
     def get_unique_key(self):
-        return [self.original_url, self.ref]
+        return [self.__original_url, self.__ref]
 
     def get_consistency(self):
-        if self.ref is None:
+        if self.__ref is None:
             return Consistency.INCONSISTENT
 
-        if os.path.isfile(self._get_mirror_file()):
+        if os.path.isfile(self.get_mirror_file()):
             return Consistency.CACHED
 
         else:
             return Consistency.RESOLVED
 
     def load_ref(self, node):
-        self.ref = self.node_get_member(node, str, 'ref', None)
-        self._warn_deprecated_etag(node)
+        self.__ref = self.node_get_member(node, str, 'ref', None)
+        self.__warn_deprecated_etag(node)
 
     def get_ref(self):
-        return self.ref
+        return self.__ref
 
     def set_ref(self, ref, node):
-        node['ref'] = self.ref = ref
+        node['ref'] = self.__ref = ref
 
     def track(self):
         # there is no 'track' field in the source to determine what/whether
         # or not to update refs, because tracking a ref is always a conscious
         # decision by the user.
-        with self.timed_activity("Tracking {}".format(self.url),
+        with self.timed_activity("Tracking {}".format(self.__url),
                                  silent_nested=True):
-            new_ref = self._ensure_mirror()
+            new_ref = self.ensure_mirror()
 
-            if self.ref and self.ref != new_ref:
+            if self.__ref and self.__ref != new_ref:
                 detail = "When tracking, new ref differs from current ref:\n" \
-                    + "  Tracked URL: {}\n".format(self.url) \
-                    + "  Current ref: {}\n".format(self.ref) \
+                    + "  Tracked URL: {}\n".format(self.__url) \
+                    + "  Current ref: {}\n".format(self.__ref) \
                     + "  New ref: {}\n".format(new_ref)
                 self.warn("Potential man-in-the-middle attack!", detail=detail)
 
@@ -74,49 +85,40 @@ class DownloadableFileSource(Source):
         # file to be already cached because Source.fetch() will
         # not be called if the source is already Consistency.CACHED.
         #
-        if os.path.isfile(self._get_mirror_file()):
+        if os.path.isfile(self.get_mirror_file()):
             return  # pragma: nocover
 
         # Download the file, raise hell if the sha256sums don't match,
         # and mirror the file otherwise.
-        with self.timed_activity("Fetching {}".format(self.url), silent_nested=True):
-            sha256 = self._ensure_mirror()
-            if sha256 != self.ref:
+        with self.timed_activity("Fetching {}".format(self.__url), silent_nested=True):
+            sha256 = self.ensure_mirror()
+            if sha256 != self.__ref:
                 raise SourceError("File downloaded from {} has sha256sum '{}', not '{}'!"
-                                  .format(self.url, sha256, self.ref))
+                                  .format(self.__url, sha256, self.__ref))
+    ################
+    # Public methods
+    ################
 
-    def _warn_deprecated_etag(self, node):
-        etag = self.node_get_member(node, str, 'etag', None)
-        if etag:
-            provenance = self.node_provenance(node, member_name='etag')
-            self.warn('{} "etag" is deprecated and ignored.'.format(provenance))
+    def ensure_mirror(self):
+        """Downloads from the url and caches it according to its sha256sum
 
-    def _get_etag(self, ref):
-        etagfilename = os.path.join(self._get_mirror_dir(), '{}.etag'.format(ref))
-        if os.path.exists(etagfilename):
-            with open(etagfilename, 'r') as etagfile:
-                return etagfile.read()
-
-        return None
+        Returns:
+           (str): The sha256sum of the mirrored file
 
-    def _store_etag(self, ref, etag):
-        etagfilename = os.path.join(self._get_mirror_dir(), '{}.etag'.format(ref))
-        with utils.save_file_atomic(etagfilename) as etagfile:
-            etagfile.write(etag)
-
-    def _ensure_mirror(self):
-        # Downloads from the url and caches it according to its sha256sum.
+        Raises:
+           :class:`.SourceError`
+        """
         try:
             with self.tempdir() as td:
-                default_name = os.path.basename(self.url)
-                request = urllib.request.Request(self.url)
+                default_name = os.path.basename(self.__url)
+                request = urllib.request.Request(self.__url)
                 request.add_header('Accept', '*/*')
 
                 # We do not use etag in case what we have in cache is
                 # not matching ref in order to be able to recover from
                 # corrupted download.
-                if self.ref:
-                    etag = self._get_etag(self.ref)
+                if self.__ref:
+                    etag = self.__get_etag(self.__ref)
 
                     # Do not re-download the file if the ETag matches.
                     if etag and self.get_consistency() == Consistency.CACHED:
@@ -134,17 +136,17 @@ class DownloadableFileSource(Source):
                         shutil.copyfileobj(response, dest)
 
                 # Make sure url-specific mirror dir exists.
-                if not os.path.isdir(self._get_mirror_dir()):
-                    os.makedirs(self._get_mirror_dir())
+                if not os.path.isdir(self.__get_mirror_dir()):
+                    os.makedirs(self.__get_mirror_dir())
 
                 # Store by sha256sum
                 sha256 = utils.sha256sum(local_file)
                 # Even if the file already exists, move the new file over.
                 # In case the old file was corrupted somehow.
-                os.rename(local_file, self._get_mirror_file(sha256))
+                os.rename(local_file, self.get_mirror_file(sha=sha256))
 
                 if etag:
-                    self._store_etag(sha256, etag)
+                    self.__store_etag(sha256, etag)
                 return sha256
 
         except urllib.error.HTTPError as e:
@@ -152,19 +154,53 @@ class DownloadableFileSource(Source):
                 # 304 Not Modified.
                 # Because we use etag only for matching ref, currently specified ref is what
                 # we would have downloaded.
-                return self.ref
+                return self.__ref
             raise SourceError("{}: Error mirroring {}: {}"
-                              .format(self, self.url, e), temporary=True) from e
+                              .format(self, self.__url, e), temporary=True) from e
 
         except (urllib.error.URLError, urllib.error.ContentTooShortError, OSError, ValueError) as e:
             # Note that urllib.request.Request in the try block may throw a
             # ValueError for unknown url types, so we handle it here.
             raise SourceError("{}: Error mirroring {}: {}"
-                              .format(self, self.url, e), temporary=True) from e
+                              .format(self, self.__url, e), temporary=True) from e
+
+    def get_mirror_file(self, *, sha=None):
+        """Calculates the path to where this source stores the downloaded file
 
-    def _get_mirror_dir(self):
+        Users are expected to read the file this points to when staging their source.
+
+        Returns:
+           (str): A path to the file the source should be cached at
+        """
+        return os.path.join(self.__get_mirror_dir(), sha or self.__ref)
+
+    #######################
+    # Local Private methods
+    #######################
+
+    def __get_mirror_dir(self):
+        # Get the directory this source should store things in, for a given URL
         return os.path.join(self.get_mirror_directory(),
-                            utils.url_directory_name(self.original_url))
+                            utils.url_directory_name(self.__original_url))
+
+    def __warn_deprecated_etag(self, node):
+        # Warn the user if the 'etag' field is being used
+        etag = self.node_get_member(node, str, 'etag', None)
+        if etag:
+            provenance = self.node_provenance(node, member_name='etag')
+            self.warn('{} "etag" is deprecated and ignored.'.format(provenance))
 
-    def _get_mirror_file(self, sha=None):
-        return os.path.join(self._get_mirror_dir(), sha or self.ref)
+    def __get_etag(self, ref):
+        # Retrieve the etag's data from disk
+        etagfilename = os.path.join(self.__get_mirror_dir(), '{}.etag'.format(ref))
+        if os.path.exists(etagfilename):
+            with open(etagfilename, 'r') as etagfile:
+                return etagfile.read()
+
+        return None
+
+    def __store_etag(self, ref, etag):
+        # Write the etag's data to disk
+        etagfilename = os.path.join(self.__get_mirror_dir(), '{}.etag'.format(ref))
+        with utils.save_file_atomic(etagfilename) as etagfile:
+            etagfile.write(etag)
diff --git a/buildstream/plugins/sources/deb.py b/buildstream/plugins/sources/deb.py
index 1b7dafd..b62a976 100644
--- a/buildstream/plugins/sources/deb.py
+++ b/buildstream/plugins/sources/deb.py
@@ -68,7 +68,7 @@ class DebSource(TarSource):
 
     @contextmanager
     def _get_tar(self):
-        with open(self._get_mirror_file(), 'rb') as deb_file:
+        with open(self.get_mirror_file(), 'rb') as deb_file:
             arpy_archive = arpy.Archive(fileobj=deb_file)
             arpy_archive.read_all_headers()
             data_tar_arpy = [v for k, v in arpy_archive.archived_files.items() if b"data.tar" in k][0]
diff --git a/buildstream/plugins/sources/remote.py b/buildstream/plugins/sources/remote.py
index c296d31..fbfff90 100644
--- a/buildstream/plugins/sources/remote.py
+++ b/buildstream/plugins/sources/remote.py
@@ -61,7 +61,13 @@ class RemoteSource(DownloadableFileSource):
     def configure(self, node):
         super().configure(node)
 
-        self.filename = self.node_get_member(node, str, 'filename', os.path.basename(self.url))
+        self.filename = self.node_get_member(node, str, 'filename', None)
+        if not self.filename:
+            # url in DownloadableFileSource is private, so we read it again
+            original_url = self.node_get_member(node, str, 'url')
+            url = self.translate_url(original_url)
+            self.filename = os.path.basename(url)
+
         self.executable = self.node_get_member(node, bool, 'executable', False)
 
         if os.sep in self.filename:
@@ -78,7 +84,7 @@ class RemoteSource(DownloadableFileSource):
         dest = os.path.join(directory, self.filename)
         with self.timed_activity("Staging remote file to {}".format(dest)):
 
-            utils.safe_copy(self._get_mirror_file(), dest)
+            utils.safe_copy(self.get_mirror_file(), dest)
 
             # To prevent user's umask introducing variability here, explicitly set
             # file modes.
diff --git a/buildstream/plugins/sources/tar.py b/buildstream/plugins/sources/tar.py
index 75219dc..667b283 100644
--- a/buildstream/plugins/sources/tar.py
+++ b/buildstream/plugins/sources/tar.py
@@ -71,6 +71,10 @@ class TarSource(DownloadableFileSource):
     def configure(self, node):
         super().configure(node)
 
+        # url in DownloadableFileSource is private, so we read it again
+        original_url = self.node_get_member(node, str, 'url')
+        self.url = self.translate_url(original_url)
+
         self.base_dir = self.node_get_member(node, str, 'base-dir', '*') or None
 
         self.node_validate(node, DownloadableFileSource.COMMON_CONFIG_KEYS + ['base-dir'])
@@ -87,7 +91,7 @@ class TarSource(DownloadableFileSource):
     def _run_lzip(self):
         assert self.host_lzip
         with TemporaryFile() as lzip_stdout:
-            with open(self._get_mirror_file(), 'r') as lzip_file:
+            with open(self.get_mirror_file(), 'r') as lzip_file:
                 self.call([self.host_lzip, '-d'],
                           stdin=lzip_file,
                           stdout=lzip_stdout)
@@ -102,7 +106,7 @@ class TarSource(DownloadableFileSource):
                 with tarfile.open(fileobj=lzip_dec, mode='r:') as tar:
                     yield tar
         else:
-            with tarfile.open(self._get_mirror_file()) as tar:
+            with tarfile.open(self.get_mirror_file()) as tar:
                 yield tar
 
     def stage(self, directory):
diff --git a/buildstream/plugins/sources/zip.py b/buildstream/plugins/sources/zip.py
index dafd6f5..5773e0d 100644
--- a/buildstream/plugins/sources/zip.py
+++ b/buildstream/plugins/sources/zip.py
@@ -83,7 +83,7 @@ class ZipSource(DownloadableFileSource):
         noexec_rights = exec_rights & ~(stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
 
         try:
-            with zipfile.ZipFile(self._get_mirror_file()) as archive:
+            with zipfile.ZipFile(self.get_mirror_file()) as archive:
                 base_dir = None
                 if self.base_dir:
                     base_dir = self._find_base_dir(archive, self.base_dir)