You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@buildstream.apache.org by tv...@apache.org on 2021/02/04 07:37:36 UTC

[buildstream] 01/01: WIP: Make uploading of build trees configurable

This is an automated email from the ASF dual-hosted git repository.

tvb pushed a commit to branch tpollard/566
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit a739e4ac58ea0af334117f362f605666d593745a
Author: Tom Pollard <to...@codethink.co.uk>
AuthorDate: Wed Dec 5 17:45:38 2018 +0000

    WIP: Make uploading of build trees configurable
---
 buildstream/_artifactcache.py         |  89 +++++++++++++++---
 buildstream/_cas/cascache.py          |  93 ++++++++++++-------
 buildstream/_cas/casremote.py         |  18 +++-
 buildstream/element.py                |  30 +++++--
 buildstream/plugintestutils/runcli.py |  20 ++++-
 doc/source/using_config.rst           |  17 ++++
 tests/artifactcache/config.py         |  25 ++++++
 tests/integration/pushbuildtrees.py   | 165 ++++++++++++++++++++++++++++++++++
 8 files changed, 403 insertions(+), 54 deletions(-)

diff --git a/buildstream/_artifactcache.py b/buildstream/_artifactcache.py
index 5404dc1..ee2cf53 100644
--- a/buildstream/_artifactcache.py
+++ b/buildstream/_artifactcache.py
@@ -107,6 +107,7 @@ class ArtifactCache():
 
         self._has_fetch_remotes = False
         self._has_push_remotes = False
+        self._has_partial_push_remotes = False
 
         os.makedirs(self.extractdir, exist_ok=True)
 
@@ -488,6 +489,9 @@ class ArtifactCache():
                 self._has_fetch_remotes = True
                 if remote_spec.push:
                     self._has_push_remotes = True
+                    # Partial push requires generic push option to also be set
+                    if remote_spec.partial_push:
+                        self._has_partial_push_remotes = True
 
                 remotes[remote_spec.url] = CASRemote(remote_spec)
 
@@ -685,6 +689,32 @@ class ArtifactCache():
             remotes_for_project = self._remotes[element._get_project()]
             return any(remote.spec.push for remote in remotes_for_project)
 
+    # has_partial_push_remotes():
+    #
+    # Check whether any remote repositories are available for pushing
+    # non-complete artifacts. This option requires the generic push value
+    # to also be set.
+    #
+    # Args:
+    #     element (Element): The Element to check
+    #
+    # Returns:
+    #   (bool): True if any remote repository is configured for optional
+    #            partial pushes, False otherwise
+    #
+    def has_partial_push_remotes(self, *, element=None):
+        # If there's no partial push remotes available, we can't partial push at all
+        if not self._has_partial_push_remotes:
+            return False
+        elif element is None:
+            # At least one remote is set to allow partial pushes
+            return True
+        else:
+            # Check whether the specified element's project has push remotes configured
+            # to not accept partial artifact pushes
+            remotes_for_project = self._remotes[element._get_project()]
+            return any(remote.spec.partial_push for remote in remotes_for_project)
+
     # push():
     #
     # Push committed artifact to remote repository.
@@ -692,6 +722,8 @@ class ArtifactCache():
     # Args:
     #     element (Element): The Element whose artifact is to be pushed
     #     keys (list): The cache keys to use
+    #     partial(bool): If the artifact is cached in a partial state
+    #     subdir(string): Optional subdir to not push
     #
     # Returns:
     #   (bool): True if any remote was updated, False if no pushes were required
@@ -699,12 +731,25 @@ class ArtifactCache():
     # Raises:
     #   (ArtifactError): if there was an error
     #
-    def push(self, element, keys):
+    def push(self, element, keys, partial=False, subdir=None):
         refs = [self.get_artifact_fullname(element, key) for key in list(keys)]
 
         project = element._get_project()
 
-        push_remotes = [r for r in self._remotes[project] if r.spec.push]
+        push_remotes = []
+        partial_remotes = []
+
+        # Create list of remotes to push to, given current element and partial push config
+        if not partial:
+            push_remotes = [r for r in self._remotes[project] if (r.spec.push and not r.spec.partial_push)]
+
+        if self._has_partial_push_remotes:
+            # Create a specific list of the remotes expecting the artifact to be push in a partial
+            # state. This list needs to be pushed in a partial state, without the optional subdir if
+            # exists locally. No need to attempt pushing a partial artifact to a remote that is queued to
+            # to also recieve a full artifact
+            partial_remotes = [r for r in self._remotes[project] if (r.spec.partial_push and r.spec.push) and
+                               r not in push_remotes]
 
         pushed = False
 
@@ -713,7 +758,9 @@ class ArtifactCache():
             display_key = element._get_brief_display_key()
             element.status("Pushing artifact {} -> {}".format(display_key, remote.spec.url))
 
-            if self.cas.push(refs, remote):
+            # Passing the optional subdir allows for remote artifacts that are cached in a 'partial'
+            # state to be completed
+            if self.cas.push(refs, remote, subdir=subdir):
                 element.info("Pushed artifact {} -> {}".format(display_key, remote.spec.url))
                 pushed = True
             else:
@@ -721,6 +768,19 @@ class ArtifactCache():
                     remote.spec.url, element._get_brief_display_key()
                 ))
 
+        for remote in partial_remotes:
+            remote.init()
+            display_key = element._get_brief_display_key()
+            element.status("Pushing partial artifact {} -> {}".format(display_key, remote.spec.url))
+
+            if self.cas.push(refs, remote, excluded_subdirs=subdir):
+                element.info("Pushed partial artifact {} -> {}".format(display_key, remote.spec.url))
+                pushed = True
+            else:
+                element.info("Remote ({}) already has {} partial cached".format(
+                    remote.spec.url, element._get_brief_display_key()
+                ))
+
         return pushed
 
     # pull():
@@ -748,14 +808,23 @@ class ArtifactCache():
                 element.status("Pulling artifact {} <- {}".format(display_key, remote.spec.url))
 
                 if self.cas.pull(ref, remote, progress=progress, subdir=subdir, excluded_subdirs=excluded_subdirs):
-                    element.info("Pulled artifact {} <- {}".format(display_key, remote.spec.url))
                     if subdir:
-                        # Attempt to extract subdir into artifact extract dir if it already exists
-                        # without containing the subdir. If the respective artifact extract dir does not
-                        # exist a complete extraction will complete.
-                        self.extract(element, key, subdir)
-                    # no need to pull from additional remotes
-                    return True
+                        if not self.contains_subdir_artifact(element, key, subdir):
+                            # The pull was expecting the specific subdir to be present in the remote, attempt
+                            # to find it in other available remotes
+                            element.info("Pulled partial artifact {} <- {}. Attempting to retrieve {} from remotes"
+                                         .format(display_key, remote.spec.url, subdir))
+                        else:
+                            element.info("Pulled artifact {} <- {}".format(display_key, remote.spec.url))
+                            # Attempt to extract subdir into artifact extract dir if it already exists
+                            # without containing the subdir. If the respective artifact extract dir does not
+                            # exist a complete extraction will complete.
+                            self.extract(element, key, subdir)
+                            # no need to pull from additional remotes
+                            return True
+                    else:
+                        element.info("Pulled artifact {} <- {}".format(display_key, remote.spec.url))
+                        return True
                 else:
                     element.info("Remote ({}) does not have {} cached".format(
                         remote.spec.url, element._get_brief_display_key()
diff --git a/buildstream/_cas/cascache.py b/buildstream/_cas/cascache.py
index 9d7a121..e284d83 100644
--- a/buildstream/_cas/cascache.py
+++ b/buildstream/_cas/cascache.py
@@ -200,34 +200,47 @@ class CASCache():
     #   (bool): True if pull was successful, False if ref was not available
     #
     def pull(self, ref, remote, *, progress=None, subdir=None, excluded_subdirs=None):
-        try:
-            remote.init()
 
-            request = buildstream_pb2.GetReferenceRequest(instance_name=remote.spec.instance_name)
-            request.key = ref
-            response = remote.ref_storage.GetReference(request)
+        tree_found = False
 
-            tree = remote_execution_pb2.Digest()
-            tree.hash = response.digest.hash
-            tree.size_bytes = response.digest.size_bytes
+        while True:
+            try:
+                if not tree_found:
+                    remote.init()
 
-            # Check if the element artifact is present, if so just fetch the subdir.
-            if subdir and os.path.exists(self.objpath(tree)):
-                self._fetch_subdir(remote, tree, subdir)
-            else:
-                # Fetch artifact, excluded_subdirs determined in pullqueue
-                self._fetch_directory(remote, tree, excluded_subdirs=excluded_subdirs)
+                    request = buildstream_pb2.GetReferenceRequest(instance_name=remote.spec.instance_name)
+                    request.key = ref
+                    response = remote.ref_storage.GetReference(request)
 
-            self.set_ref(ref, tree)
+                    tree = remote_execution_pb2.Digest()
+                    tree.hash = response.digest.hash
+                    tree.size_bytes = response.digest.size_bytes
 
-            return True
-        except grpc.RpcError as e:
-            if e.code() != grpc.StatusCode.NOT_FOUND:
-                raise CASCacheError("Failed to pull ref {}: {}".format(ref, e)) from e
-            else:
-                return False
-        except BlobNotFound as e:
-            return False
+                # Check if the element artifact is present, if so just fetch the subdir.
+                if subdir and os.path.exists(self.objpath(tree)):
+                    self._fetch_subdir(remote, tree, subdir)
+                else:
+                    # Fetch artifact, excluded_subdirs determined in pullqueue
+                    self._fetch_directory(remote, tree, excluded_subdirs=excluded_subdirs)
+
+                self.set_ref(ref, tree)
+
+                return True
+            except grpc.RpcError as e:
+                if e.code() != grpc.StatusCode.NOT_FOUND:
+                    raise CASCacheError("Failed to pull ref {}: {}".format(ref, e)) from e
+                else:
+                    return False
+            except BlobNotFound as e:
+                if not excluded_subdirs and subdir:
+                    # The remote has the top level digest but could not complete a full pull,
+                    # attempt partial without the need to initialise and check for the artifact
+                    # digest. This default behaviour of dropping back to partial pulls could
+                    # be made a configurable warning given at artfictcache level.
+                    tree_found = True
+                    excluded_subdirs, subdir = subdir, excluded_subdirs
+                else:
+                    return False
 
     # pull_tree():
     #
@@ -272,6 +285,8 @@ class CASCache():
     # Args:
     #     refs (list): The refs to push
     #     remote (CASRemote): The remote to push to
+    #     subdir (string): Optional specific subdir to include in the push
+    #     excluded_subdirs (list): The optional list of subdirs to not push
     #
     # Returns:
     #   (bool): True if any remote was updated, False if no pushes were required
@@ -279,7 +294,7 @@ class CASCache():
     # Raises:
     #   (CASCacheError): if there was an error
     #
-    def push(self, refs, remote):
+    def push(self, refs, remote, *, subdir=None, excluded_subdirs=None):
         skipped_remote = True
         try:
             for ref in refs:
@@ -293,15 +308,18 @@ class CASCache():
                     response = remote.ref_storage.GetReference(request)
 
                     if response.digest.hash == tree.hash and response.digest.size_bytes == tree.size_bytes:
-                        # ref is already on the server with the same tree
-                        continue
+                        # ref is already on the server with the same tree, however it might be partially cached.
+                        # If artifact is not set to be pushed partially attempt to 'complete' the remote artifact if
+                        # needed, else continue.
+                        if excluded_subdirs or remote.verify_digest_on_remote(self._get_subdir(tree, subdir)):
+                            continue
 
                 except grpc.RpcError as e:
                     if e.code() != grpc.StatusCode.NOT_FOUND:
                         # Intentionally re-raise RpcError for outer except block.
                         raise
 
-                self._send_directory(remote, tree)
+                self._send_directory(remote, tree, excluded_dir=excluded_subdirs)
 
                 request = buildstream_pb2.UpdateReferenceRequest(instance_name=remote.spec.instance_name)
                 request.keys.append(ref)
@@ -784,10 +802,17 @@ class CASCache():
                 a += 1
                 b += 1
 
-    def _reachable_refs_dir(self, reachable, tree, update_mtime=False):
+    def _reachable_refs_dir(self, reachable, tree, update_mtime=False, subdir=False):
         if tree.hash in reachable:
             return
 
+        # If looping through subdir digests, skip processing if
+        # ref path does not exist, allowing for partial objects
+        if subdir and not os.path.exists(self.objpath(tree)):
+            return
+
+        # Raises FileNotFound exception is path does not exist,
+        # which should only be thrown on the top level digest
         if update_mtime:
             os.utime(self.objpath(tree))
 
@@ -804,9 +829,9 @@ class CASCache():
             reachable.add(filenode.digest.hash)
 
         for dirnode in directory.directories:
-            self._reachable_refs_dir(reachable, dirnode.digest, update_mtime=update_mtime)
+            self._reachable_refs_dir(reachable, dirnode.digest, update_mtime=update_mtime, subdir=True)
 
-    def _required_blobs(self, directory_digest):
+    def _required_blobs(self, directory_digest, excluded_dir=None):
         # parse directory, and recursively add blobs
         d = remote_execution_pb2.Digest()
         d.hash = directory_digest.hash
@@ -825,7 +850,8 @@ class CASCache():
             yield d
 
         for dirnode in directory.directories:
-            yield from self._required_blobs(dirnode.digest)
+            if dirnode.name != excluded_dir:
+                yield from self._required_blobs(dirnode.digest)
 
     # _ensure_blob():
     #
@@ -930,6 +956,7 @@ class CASCache():
             objpath = self._ensure_blob(remote, dir_digest)
 
             directory = remote_execution_pb2.Directory()
+
             with open(objpath, 'rb') as f:
                 directory.ParseFromString(f.read())
 
@@ -972,8 +999,8 @@ class CASCache():
 
         return dirdigest
 
-    def _send_directory(self, remote, digest, u_uid=uuid.uuid4()):
-        required_blobs = self._required_blobs(digest)
+    def _send_directory(self, remote, digest, u_uid=uuid.uuid4(), excluded_dir=None):
+        required_blobs = self._required_blobs(digest, excluded_dir=excluded_dir)
 
         missing_blobs = dict()
         # Limit size of FindMissingBlobs request
diff --git a/buildstream/_cas/casremote.py b/buildstream/_cas/casremote.py
index 56ba4c5..a953165 100644
--- a/buildstream/_cas/casremote.py
+++ b/buildstream/_cas/casremote.py
@@ -23,7 +23,8 @@ from .. import utils
 _MAX_PAYLOAD_BYTES = 1024 * 1024
 
 
-class CASRemoteSpec(namedtuple('CASRemoteSpec', 'url push server_cert client_key client_cert instance_name')):
+class CASRemoteSpec(namedtuple('CASRemoteSpec',
+                               'url push partial_push server_cert client_key client_cert instance_name')):
 
     # _new_from_config_node
     #
@@ -31,9 +32,18 @@ class CASRemoteSpec(namedtuple('CASRemoteSpec', 'url push server_cert client_key
     #
     @staticmethod
     def _new_from_config_node(spec_node, basedir=None):
-        _yaml.node_validate(spec_node, ['url', 'push', 'server-cert', 'client-key', 'client-cert', 'instance_name'])
+        _yaml.node_validate(spec_node, ['url', 'push', 'allow-partial-push', 'server-cert', 'client-key',
+                                        'client-cert', 'instance_name'])
         url = _yaml.node_get(spec_node, str, 'url')
         push = _yaml.node_get(spec_node, bool, 'push', default_value=False)
+        partial_push = _yaml.node_get(spec_node, bool, 'allow-partial-push', default_value=False)
+
+        # partial_push depends on push, raise error if not configured correctly
+        if partial_push and not push:
+            provenance = _yaml.node_get_provenance(spec_node, 'allow-partial-push')
+            raise LoadError(LoadErrorReason.INVALID_DATA,
+                            "{}: allow-partial-push also requires push to be set".format(provenance))
+
         if not url:
             provenance = _yaml.node_get_provenance(spec_node, 'url')
             raise LoadError(LoadErrorReason.INVALID_DATA,
@@ -63,10 +73,10 @@ class CASRemoteSpec(namedtuple('CASRemoteSpec', 'url push server_cert client_key
             raise LoadError(LoadErrorReason.INVALID_DATA,
                             "{}: 'client-cert' was specified without 'client-key'".format(provenance))
 
-        return CASRemoteSpec(url, push, server_cert, client_key, client_cert, instance_name)
+        return CASRemoteSpec(url, push, partial_push, server_cert, client_key, client_cert, instance_name)
 
 
-CASRemoteSpec.__new__.__defaults__ = (None, None, None, None)
+CASRemoteSpec.__new__.__defaults__ = (False, None, None, None, None)
 
 
 class BlobNotFound(CASRemoteError):
diff --git a/buildstream/element.py b/buildstream/element.py
index a243826..705c19e 100644
--- a/buildstream/element.py
+++ b/buildstream/element.py
@@ -1797,13 +1797,19 @@ class Element(Plugin):
     #   (bool): True if this element does not need a push job to be created
     #
     def _skip_push(self):
+
         if not self.__artifacts.has_push_remotes(element=self):
             # No push remotes for this element's project
             return True
 
         # Do not push elements that aren't cached, or that are cached with a dangling buildtree
-        # artifact unless element type is expected to have an an empty buildtree directory
-        if not self._cached_buildtree():
+        # artifact unless element type is expected to have an an empty buildtree directory. Check
+        # that this default behaviour is not overriden via a remote configured to allow pushing
+        # artifacts without their corresponding buildtree.
+        if not self._cached():
+            return True
+
+        if not self._cached_buildtree() and not self.__artifacts.has_partial_push_remotes(element=self):
             return True
 
         # Do not push tainted artifact
@@ -1814,11 +1820,14 @@ class Element(Plugin):
 
     # _push():
     #
-    # Push locally cached artifact to remote artifact repository.
+    # Push locally cached artifact to remote artifact repository. An attempt
+    # will be made to push partial artifacts if given current config dictates.
+    # If a remote set for 'full' artifact pushes is found to be cached partially
+    # in the remote, an attempt will be made to 'complete' it.
     #
     # Returns:
     #   (bool): True if the remote was updated, False if it already existed
-    #           and no updated was required
+    #           and no update was required
     #
     def _push(self):
         self.__assert_cached()
@@ -1827,8 +1836,17 @@ class Element(Plugin):
             self.warn("Not pushing tainted artifact.")
             return False
 
-        # Push all keys used for local commit
-        pushed = self.__artifacts.push(self, self.__get_cache_keys_for_commit())
+        # Push all keys used for local commit, this could be full or partial,
+        # given previous _skip_push() logic. If buildtree isn't cached, then
+        # set partial push
+
+        partial = False
+        subdir = 'buildtree'
+        if not self._cached_buildtree():
+            partial = True
+
+        pushed = self.__artifacts.push(self, self.__get_cache_keys_for_commit(), partial=partial, subdir=subdir)
+
         if not pushed:
             return False
 
diff --git a/buildstream/plugintestutils/runcli.py b/buildstream/plugintestutils/runcli.py
index fb7c23c..bd2f1e4 100644
--- a/buildstream/plugintestutils/runcli.py
+++ b/buildstream/plugintestutils/runcli.py
@@ -238,6 +238,13 @@ class Result():
 
         return list(pushed)
 
+    def get_partial_pushed_elements(self):
+        pushed = re.findall(r'\[\s*push:(\S+)\s*\]\s*INFO\s*Pushed partial artifact', self.stderr)
+        if pushed is None:
+            return []
+
+        return list(pushed)
+
     def get_pulled_elements(self):
         pulled = re.findall(r'\[\s*pull:(\S+)\s*\]\s*INFO\s*Pulled artifact', self.stderr)
         if pulled is None:
@@ -245,6 +252,13 @@ class Result():
 
         return list(pulled)
 
+    def get_partial_pulled_elements(self):
+        pulled = re.findall(r'\[\s*pull:(\S+)\s*\]\s*INFO\s*Pulled partial artifact', self.stderr)
+        if pulled is None:
+            return []
+
+        return list(pulled)
+
 
 class Cli():
 
@@ -265,11 +279,15 @@ class Cli():
     #
     # Args:
     #    config (dict): The user configuration to use
+    #    reset (bool): Optional reset of stored config
     #
-    def configure(self, config):
+    def configure(self, config, reset=False):
         if self.config is None:
             self.config = {}
 
+        if reset:
+            self.config.clear()
+
         for key, val in config.items():
             self.config[key] = val
 
diff --git a/doc/source/using_config.rst b/doc/source/using_config.rst
index 71ca64a..e696a9d 100644
--- a/doc/source/using_config.rst
+++ b/doc/source/using_config.rst
@@ -59,6 +59,15 @@ configuration:
      # Add another cache to pull from
      - url: https://anothercache.com/artifacts:8080
        server-cert: another_server.crt
+     # Add a cache to push/pull to/from, specifying
+       that you wish to push artifacts in a 'partial'
+       state (this being without the respective buildtree).
+       Note that allow-partial-push requires push to also
+       be set.
+     - url: https://anothercache.com/artifacts:11003
+       push: true
+       allow-partial-push: true
+
 
 .. note::
 
@@ -86,6 +95,14 @@ configuration:
          # Add another cache to pull from
          - url: https://ourprojectcache.com/artifacts:8080
            server-cert: project_server.crt
+         # Add a cache to push/pull to/from, specifying
+           that you wish to push artifacts in a 'partial'
+           state (this being without the respective buildtree).
+           Note that allow-partial-push requires push to also
+           be set.
+         - url: https://anothercache.com/artifacts:11003
+           push: true
+           allow-partial-push: true
 
 
 .. note::
diff --git a/tests/artifactcache/config.py b/tests/artifactcache/config.py
index fda3097..d89e7f9 100644
--- a/tests/artifactcache/config.py
+++ b/tests/artifactcache/config.py
@@ -139,3 +139,28 @@ def test_missing_certs(cli, datafiles, config_key, config_value):
     # This does not happen for a simple `bst show`.
     result = cli.run(project=project, args=['artifact', 'pull', 'element.bst'])
     result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.INVALID_DATA)
+
+
+# Assert that if allow-partial-push is specified as true without push also being
+# set likewise, we get a comprehensive LoadError instead of an unhandled exception.
+@pytest.mark.datafiles(DATA_DIR)
+def test_partial_push_error(cli, datafiles):
+    project = os.path.join(datafiles.dirname, datafiles.basename, 'project', 'elements')
+
+    project_conf = {
+        'name': 'test',
+
+        'artifacts': {
+            'url': 'https://cache.example.com:12345',
+            'allow-partial-push': 'True'
+        }
+    }
+    project_conf_file = os.path.join(project, 'project.conf')
+    _yaml.dump(project_conf, project_conf_file)
+
+    # Use `pull` here to ensure we try to initialize the remotes, triggering the error
+    #
+    # This does not happen for a simple `bst show`.
+    result = cli.run(project=project, args=['artifact', 'pull', 'target.bst'])
+    result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.INVALID_DATA)
+    assert "allow-partial-push also requires push to be set" in result.stderr
diff --git a/tests/integration/pushbuildtrees.py b/tests/integration/pushbuildtrees.py
new file mode 100644
index 0000000..194d206
--- /dev/null
+++ b/tests/integration/pushbuildtrees.py
@@ -0,0 +1,165 @@
+import os
+import shutil
+import pytest
+import subprocess
+
+from buildstream import _yaml
+from tests.testutils import create_artifact_share
+from tests.testutils.site import HAVE_SANDBOX
+from buildstream.plugintestutils import cli, cli_integration as cli2
+from buildstream.plugintestutils.integration import assert_contains
+from buildstream._exceptions import ErrorDomain, LoadErrorReason
+
+
+DATA_DIR = os.path.join(
+    os.path.dirname(os.path.realpath(__file__)),
+    "project"
+)
+
+
+# Remove artifact cache & set cli2.config value of pull-buildtrees
+# to false, which is the default user context. The cache has to be
+# cleared as just forcefully removing the refpath leaves dangling objects.
+def default_state(cli2, tmpdir, share):
+    shutil.rmtree(os.path.join(str(tmpdir), 'artifacts'))
+    cli2.configure({
+        'artifacts': {'url': share.repo, 'push': False},
+        'artifactdir': os.path.join(str(tmpdir), 'artifacts'),
+        'cache': {'pull-buildtrees': False},
+    })
+
+
+# Tests to capture the integration of the optionl push of buildtrees.
+# The behaviour should encompass pushing artifacts that are already cached
+# without a buildtree as well as artifacts that are cached with their buildtree.
+# This option is handled via 'allow-partial-push' on a per artifact remote config
+# node basis. Multiple remote config nodes can point to the same url and as such can
+# have different 'allow-partial-push' options, tests need to cover this using project
+# confs.
+@pytest.mark.integration
+@pytest.mark.datafiles(DATA_DIR)
+@pytest.mark.skipif(not HAVE_SANDBOX, reason='Only available with a functioning sandbox')
+def test_pushbuildtrees(cli2, tmpdir, datafiles, integration_cache):
+    project = os.path.join(datafiles.dirname, datafiles.basename)
+    element_name = 'autotools/amhello.bst'
+
+    # Create artifact shares for pull & push testing
+    with create_artifact_share(os.path.join(str(tmpdir), 'share1')) as share1,\
+        create_artifact_share(os.path.join(str(tmpdir), 'share2')) as share2,\
+        create_artifact_share(os.path.join(str(tmpdir), 'share3')) as share3,\
+        create_artifact_share(os.path.join(str(tmpdir), 'share4')) as share4:
+
+        cli2.configure({
+            'artifacts': {'url': share1.repo, 'push': True},
+            'artifactdir': os.path.join(str(tmpdir), 'artifacts')
+        })
+
+        cli2.configure({'artifacts': [{'url': share1.repo, 'push': True},
+                                     {'url': share2.repo, 'push': True, 'allow-partial-push': True}]})
+
+        # Build autotools element, checked pushed, delete local.
+        # As share 2 has push & allow-partial-push set a true, it
+        # should have pushed the artifacts, without the cached buildtrees,
+        # to it.
+        result = cli2.run(project=project, args=['build', element_name])
+        assert result.exit_code == 0
+        assert cli2.get_element_state(project, element_name) == 'cached'
+        elementdigest = share1.has_artifact('test', element_name, cli2.get_element_key(project, element_name))
+        buildtreedir = os.path.join(str(tmpdir), 'artifacts', 'extract', 'test', 'autotools-amhello',
+                                    elementdigest.hash, 'buildtree')
+        assert os.path.isdir(buildtreedir)
+        assert element_name in result.get_partial_pushed_elements()
+        assert element_name in result.get_pushed_elements()
+        assert share1.has_artifact('test', element_name, cli2.get_element_key(project, element_name))
+        assert share2.has_artifact('test', element_name, cli2.get_element_key(project, element_name))
+        default_state(cli2, tmpdir, share1)
+
+        # Check that after explictly pulling an artifact without it's buildtree,
+        # we can push it to another remote that is configured to accept the partial
+        # artifact
+        result = cli2.run(project=project, args=['artifact', 'pull', element_name])
+        assert element_name in result.get_pulled_elements()
+        cli2.configure({'artifacts': {'url': share3.repo, 'push': True, 'allow-partial-push': True}})
+        assert cli2.get_element_state(project, element_name) == 'cached'
+        assert not os.path.isdir(buildtreedir)
+        result = cli2.run(project=project, args=['artifact', 'push', element_name])
+        assert result.exit_code == 0
+        assert element_name in result.get_partial_pushed_elements()
+        assert element_name not in result.get_pushed_elements()
+        assert share3.has_artifact('test', element_name, cli2.get_element_key(project, element_name))
+        default_state(cli2, tmpdir, share3)
+
+        # Delete the local cache and pull the partial artifact from share 3,
+        # this should not include the buildtree when extracted locally, even when
+        # pull-buildtrees is given as a cli2 parameter as no available remotes will
+        # contain the buildtree
+        assert not os.path.isdir(buildtreedir)
+        assert cli2.get_element_state(project, element_name) != 'cached'
+        result = cli2.run(project=project, args=['--pull-buildtrees', 'artifact', 'pull', element_name])
+        assert element_name in result.get_partial_pulled_elements()
+        assert not os.path.isdir(buildtreedir)
+        default_state(cli2, tmpdir, share3)
+
+        # Delete the local cache and attempt to pull a 'full' artifact, including its
+        # buildtree. As with before share3 being the first listed remote will not have
+        # the buildtree available and should spawn a partial pull. Having share1 as the
+        # second available remote should allow the buildtree to be pulled thus 'completing'
+        # the artifact
+        cli2.configure({'artifacts': [{'url': share3.repo, 'push': True, 'allow-partial-push': True},
+                                     {'url': share1.repo, 'push': True}]})
+        assert cli2.get_element_state(project, element_name) != 'cached'
+        result = cli2.run(project=project, args=['--pull-buildtrees', 'artifact', 'pull', element_name])
+        assert element_name in result.get_partial_pulled_elements()
+        assert element_name in result.get_pulled_elements()
+        assert "Attempting to retrieve buildtree from remotes" in result.stderr
+        assert os.path.isdir(buildtreedir)
+        assert cli2.get_element_state(project, element_name) == 'cached'
+
+        # Test that we are able to 'complete' an artifact on a server which is cached partially,
+        # but has now been configured for full artifact pushing. This should require only pushing
+        # the missing blobs, which should be those of just the buildtree. In this case changing
+        # share3 to full pushes should exercise this
+        cli2.configure({'artifacts': {'url': share3.repo, 'push': True}})
+        result = cli2.run(project=project, args=['artifact', 'push', element_name])
+        assert element_name in result.get_pushed_elements()
+
+        # Ensure that the same remote url can be defined multiple times with differing push
+        # config. Buildstream supports the same remote having different configurations which
+        # partial pushing could be different for elements defined at a top level project.conf to
+        # those from a junctioned project. Assert that elements are pushed to the same remote in
+        # a state defined via their respective project.confs
+        default_state(cli2, tmpdir, share1)
+        cli2.configure({'artifactdir': os.path.join(str(tmpdir), 'artifacts')}, reset=True)
+        junction = os.path.join(project, 'elements', 'junction')
+        os.mkdir(junction)
+        shutil.copy2(os.path.join(project, 'elements', element_name), junction)
+
+        junction_conf = {}
+        project_conf = {}
+        junction_conf['name'] = 'amhello'
+        junction_conf['artifacts'] = {'url': share4.repo, 'push': True, 'allow-partial-push': True}
+        _yaml.dump(junction_conf, os.path.join(junction, 'project.conf'))
+        project_conf['artifacts'] = {'url': share4.repo, 'push': True}
+
+        # Read project.conf, the junction project.conf and buildstream.conf
+        # before running bst
+        with open(os.path.join(project, 'project.conf'), 'r') as f:
+            print(f.read())
+        with open(os.path.join(junction, 'project.conf'), 'r') as f:
+            print(f.read())
+        with open(os.path.join(project, 'cache', 'buildstream.conf'), 'r') as f:
+            print(f.read())
+
+        result = cli2.run(project=project, args=['build', 'junction/amhello.bst'], project_config=project_conf)
+
+        # Read project.conf, the junction project.conf and buildstream.conf
+        # after running bst
+        with open(os.path.join(project, 'project.conf'), 'r') as f:
+            print(f.read())
+        with open(os.path.join(junction, 'project.conf'), 'r') as f:
+            print(f.read())
+        with open(os.path.join(project, 'cache', 'buildstream.conf'), 'r') as f:
+            print(f.read())
+
+        assert 'junction/amhello.bst' in result.get_partial_pushed_elements()
+        assert 'base/base-alpine.bst' in result.get_pushed_elements()