You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@buildstream.apache.org by no...@apache.org on 2020/12/29 12:46:11 UTC

[buildstream] branch tpollard/566 created (now a739e4a)

This is an automated email from the ASF dual-hosted git repository.

not-in-ldap pushed a change to branch tpollard/566
in repository https://gitbox.apache.org/repos/asf/buildstream.git.


      at a739e4a  WIP: Make uploading of build trees configurable

This branch includes the following new commits:

     new a739e4a  WIP: Make uploading of build trees configurable

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[buildstream] 01/01: WIP: Make uploading of build trees configurable

Posted by no...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

not-in-ldap pushed a commit to branch tpollard/566
in repository https://gitbox.apache.org/repos/asf/buildstream.git

commit a739e4ac58ea0af334117f362f605666d593745a
Author: Tom Pollard <to...@codethink.co.uk>
AuthorDate: Wed Dec 5 17:45:38 2018 +0000

    WIP: Make uploading of build trees configurable
---
 buildstream/_artifactcache.py         |  89 +++++++++++++++---
 buildstream/_cas/cascache.py          |  93 ++++++++++++-------
 buildstream/_cas/casremote.py         |  18 +++-
 buildstream/element.py                |  30 +++++--
 buildstream/plugintestutils/runcli.py |  20 ++++-
 doc/source/using_config.rst           |  17 ++++
 tests/artifactcache/config.py         |  25 ++++++
 tests/integration/pushbuildtrees.py   | 165 ++++++++++++++++++++++++++++++++++
 8 files changed, 403 insertions(+), 54 deletions(-)

diff --git a/buildstream/_artifactcache.py b/buildstream/_artifactcache.py
index 5404dc1..ee2cf53 100644
--- a/buildstream/_artifactcache.py
+++ b/buildstream/_artifactcache.py
@@ -107,6 +107,7 @@ class ArtifactCache():
 
         self._has_fetch_remotes = False
         self._has_push_remotes = False
+        self._has_partial_push_remotes = False
 
         os.makedirs(self.extractdir, exist_ok=True)
 
@@ -488,6 +489,9 @@ class ArtifactCache():
                 self._has_fetch_remotes = True
                 if remote_spec.push:
                     self._has_push_remotes = True
+                    # Partial push requires generic push option to also be set
+                    if remote_spec.partial_push:
+                        self._has_partial_push_remotes = True
 
                 remotes[remote_spec.url] = CASRemote(remote_spec)
 
@@ -685,6 +689,32 @@ class ArtifactCache():
             remotes_for_project = self._remotes[element._get_project()]
             return any(remote.spec.push for remote in remotes_for_project)
 
+    # has_partial_push_remotes():
+    #
+    # Check whether any remote repositories are available for pushing
+    # non-complete artifacts. This option requires the generic push value
+    # to also be set.
+    #
+    # Args:
+    #     element (Element): The Element to check
+    #
+    # Returns:
+    #   (bool): True if any remote repository is configured for optional
+    #            partial pushes, False otherwise
+    #
+    def has_partial_push_remotes(self, *, element=None):
+        # If there's no partial push remotes available, we can't partial push at all
+        if not self._has_partial_push_remotes:
+            return False
+        elif element is None:
+            # At least one remote is set to allow partial pushes
+            return True
+        else:
+            # Check whether the specified element's project has push remotes configured
+            # to not accept partial artifact pushes
+            remotes_for_project = self._remotes[element._get_project()]
+            return any(remote.spec.partial_push for remote in remotes_for_project)
+
     # push():
     #
     # Push committed artifact to remote repository.
@@ -692,6 +722,8 @@ class ArtifactCache():
     # Args:
     #     element (Element): The Element whose artifact is to be pushed
     #     keys (list): The cache keys to use
+    #     partial(bool): If the artifact is cached in a partial state
+    #     subdir(string): Optional subdir to not push
     #
     # Returns:
     #   (bool): True if any remote was updated, False if no pushes were required
@@ -699,12 +731,25 @@ class ArtifactCache():
     # Raises:
     #   (ArtifactError): if there was an error
     #
-    def push(self, element, keys):
+    def push(self, element, keys, partial=False, subdir=None):
         refs = [self.get_artifact_fullname(element, key) for key in list(keys)]
 
         project = element._get_project()
 
-        push_remotes = [r for r in self._remotes[project] if r.spec.push]
+        push_remotes = []
+        partial_remotes = []
+
+        # Create list of remotes to push to, given current element and partial push config
+        if not partial:
+            push_remotes = [r for r in self._remotes[project] if (r.spec.push and not r.spec.partial_push)]
+
+        if self._has_partial_push_remotes:
+            # Create a specific list of the remotes expecting the artifact to be push in a partial
+            # state. This list needs to be pushed in a partial state, without the optional subdir if
+            # exists locally. No need to attempt pushing a partial artifact to a remote that is queued to
+            # to also recieve a full artifact
+            partial_remotes = [r for r in self._remotes[project] if (r.spec.partial_push and r.spec.push) and
+                               r not in push_remotes]
 
         pushed = False
 
@@ -713,7 +758,9 @@ class ArtifactCache():
             display_key = element._get_brief_display_key()
             element.status("Pushing artifact {} -> {}".format(display_key, remote.spec.url))
 
-            if self.cas.push(refs, remote):
+            # Passing the optional subdir allows for remote artifacts that are cached in a 'partial'
+            # state to be completed
+            if self.cas.push(refs, remote, subdir=subdir):
                 element.info("Pushed artifact {} -> {}".format(display_key, remote.spec.url))
                 pushed = True
             else:
@@ -721,6 +768,19 @@ class ArtifactCache():
                     remote.spec.url, element._get_brief_display_key()
                 ))
 
+        for remote in partial_remotes:
+            remote.init()
+            display_key = element._get_brief_display_key()
+            element.status("Pushing partial artifact {} -> {}".format(display_key, remote.spec.url))
+
+            if self.cas.push(refs, remote, excluded_subdirs=subdir):
+                element.info("Pushed partial artifact {} -> {}".format(display_key, remote.spec.url))
+                pushed = True
+            else:
+                element.info("Remote ({}) already has {} partial cached".format(
+                    remote.spec.url, element._get_brief_display_key()
+                ))
+
         return pushed
 
     # pull():
@@ -748,14 +808,23 @@ class ArtifactCache():
                 element.status("Pulling artifact {} <- {}".format(display_key, remote.spec.url))
 
                 if self.cas.pull(ref, remote, progress=progress, subdir=subdir, excluded_subdirs=excluded_subdirs):
-                    element.info("Pulled artifact {} <- {}".format(display_key, remote.spec.url))
                     if subdir:
-                        # Attempt to extract subdir into artifact extract dir if it already exists
-                        # without containing the subdir. If the respective artifact extract dir does not
-                        # exist a complete extraction will complete.
-                        self.extract(element, key, subdir)
-                    # no need to pull from additional remotes
-                    return True
+                        if not self.contains_subdir_artifact(element, key, subdir):
+                            # The pull was expecting the specific subdir to be present in the remote, attempt
+                            # to find it in other available remotes
+                            element.info("Pulled partial artifact {} <- {}. Attempting to retrieve {} from remotes"
+                                         .format(display_key, remote.spec.url, subdir))
+                        else:
+                            element.info("Pulled artifact {} <- {}".format(display_key, remote.spec.url))
+                            # Attempt to extract subdir into artifact extract dir if it already exists
+                            # without containing the subdir. If the respective artifact extract dir does not
+                            # exist a complete extraction will complete.
+                            self.extract(element, key, subdir)
+                            # no need to pull from additional remotes
+                            return True
+                    else:
+                        element.info("Pulled artifact {} <- {}".format(display_key, remote.spec.url))
+                        return True
                 else:
                     element.info("Remote ({}) does not have {} cached".format(
                         remote.spec.url, element._get_brief_display_key()
diff --git a/buildstream/_cas/cascache.py b/buildstream/_cas/cascache.py
index 9d7a121..e284d83 100644
--- a/buildstream/_cas/cascache.py
+++ b/buildstream/_cas/cascache.py
@@ -200,34 +200,47 @@ class CASCache():
     #   (bool): True if pull was successful, False if ref was not available
     #
     def pull(self, ref, remote, *, progress=None, subdir=None, excluded_subdirs=None):
-        try:
-            remote.init()
 
-            request = buildstream_pb2.GetReferenceRequest(instance_name=remote.spec.instance_name)
-            request.key = ref
-            response = remote.ref_storage.GetReference(request)
+        tree_found = False
 
-            tree = remote_execution_pb2.Digest()
-            tree.hash = response.digest.hash
-            tree.size_bytes = response.digest.size_bytes
+        while True:
+            try:
+                if not tree_found:
+                    remote.init()
 
-            # Check if the element artifact is present, if so just fetch the subdir.
-            if subdir and os.path.exists(self.objpath(tree)):
-                self._fetch_subdir(remote, tree, subdir)
-            else:
-                # Fetch artifact, excluded_subdirs determined in pullqueue
-                self._fetch_directory(remote, tree, excluded_subdirs=excluded_subdirs)
+                    request = buildstream_pb2.GetReferenceRequest(instance_name=remote.spec.instance_name)
+                    request.key = ref
+                    response = remote.ref_storage.GetReference(request)
 
-            self.set_ref(ref, tree)
+                    tree = remote_execution_pb2.Digest()
+                    tree.hash = response.digest.hash
+                    tree.size_bytes = response.digest.size_bytes
 
-            return True
-        except grpc.RpcError as e:
-            if e.code() != grpc.StatusCode.NOT_FOUND:
-                raise CASCacheError("Failed to pull ref {}: {}".format(ref, e)) from e
-            else:
-                return False
-        except BlobNotFound as e:
-            return False
+                # Check if the element artifact is present, if so just fetch the subdir.
+                if subdir and os.path.exists(self.objpath(tree)):
+                    self._fetch_subdir(remote, tree, subdir)
+                else:
+                    # Fetch artifact, excluded_subdirs determined in pullqueue
+                    self._fetch_directory(remote, tree, excluded_subdirs=excluded_subdirs)
+
+                self.set_ref(ref, tree)
+
+                return True
+            except grpc.RpcError as e:
+                if e.code() != grpc.StatusCode.NOT_FOUND:
+                    raise CASCacheError("Failed to pull ref {}: {}".format(ref, e)) from e
+                else:
+                    return False
+            except BlobNotFound as e:
+                if not excluded_subdirs and subdir:
+                    # The remote has the top level digest but could not complete a full pull,
+                    # attempt partial without the need to initialise and check for the artifact
+                    # digest. This default behaviour of dropping back to partial pulls could
+                    # be made a configurable warning given at artfictcache level.
+                    tree_found = True
+                    excluded_subdirs, subdir = subdir, excluded_subdirs
+                else:
+                    return False
 
     # pull_tree():
     #
@@ -272,6 +285,8 @@ class CASCache():
     # Args:
     #     refs (list): The refs to push
     #     remote (CASRemote): The remote to push to
+    #     subdir (string): Optional specific subdir to include in the push
+    #     excluded_subdirs (list): The optional list of subdirs to not push
     #
     # Returns:
     #   (bool): True if any remote was updated, False if no pushes were required
@@ -279,7 +294,7 @@ class CASCache():
     # Raises:
     #   (CASCacheError): if there was an error
     #
-    def push(self, refs, remote):
+    def push(self, refs, remote, *, subdir=None, excluded_subdirs=None):
         skipped_remote = True
         try:
             for ref in refs:
@@ -293,15 +308,18 @@ class CASCache():
                     response = remote.ref_storage.GetReference(request)
 
                     if response.digest.hash == tree.hash and response.digest.size_bytes == tree.size_bytes:
-                        # ref is already on the server with the same tree
-                        continue
+                        # ref is already on the server with the same tree, however it might be partially cached.
+                        # If artifact is not set to be pushed partially attempt to 'complete' the remote artifact if
+                        # needed, else continue.
+                        if excluded_subdirs or remote.verify_digest_on_remote(self._get_subdir(tree, subdir)):
+                            continue
 
                 except grpc.RpcError as e:
                     if e.code() != grpc.StatusCode.NOT_FOUND:
                         # Intentionally re-raise RpcError for outer except block.
                         raise
 
-                self._send_directory(remote, tree)
+                self._send_directory(remote, tree, excluded_dir=excluded_subdirs)
 
                 request = buildstream_pb2.UpdateReferenceRequest(instance_name=remote.spec.instance_name)
                 request.keys.append(ref)
@@ -784,10 +802,17 @@ class CASCache():
                 a += 1
                 b += 1
 
-    def _reachable_refs_dir(self, reachable, tree, update_mtime=False):
+    def _reachable_refs_dir(self, reachable, tree, update_mtime=False, subdir=False):
         if tree.hash in reachable:
             return
 
+        # If looping through subdir digests, skip processing if
+        # ref path does not exist, allowing for partial objects
+        if subdir and not os.path.exists(self.objpath(tree)):
+            return
+
+        # Raises FileNotFound exception is path does not exist,
+        # which should only be thrown on the top level digest
         if update_mtime:
             os.utime(self.objpath(tree))
 
@@ -804,9 +829,9 @@ class CASCache():
             reachable.add(filenode.digest.hash)
 
         for dirnode in directory.directories:
-            self._reachable_refs_dir(reachable, dirnode.digest, update_mtime=update_mtime)
+            self._reachable_refs_dir(reachable, dirnode.digest, update_mtime=update_mtime, subdir=True)
 
-    def _required_blobs(self, directory_digest):
+    def _required_blobs(self, directory_digest, excluded_dir=None):
         # parse directory, and recursively add blobs
         d = remote_execution_pb2.Digest()
         d.hash = directory_digest.hash
@@ -825,7 +850,8 @@ class CASCache():
             yield d
 
         for dirnode in directory.directories:
-            yield from self._required_blobs(dirnode.digest)
+            if dirnode.name != excluded_dir:
+                yield from self._required_blobs(dirnode.digest)
 
     # _ensure_blob():
     #
@@ -930,6 +956,7 @@ class CASCache():
             objpath = self._ensure_blob(remote, dir_digest)
 
             directory = remote_execution_pb2.Directory()
+
             with open(objpath, 'rb') as f:
                 directory.ParseFromString(f.read())
 
@@ -972,8 +999,8 @@ class CASCache():
 
         return dirdigest
 
-    def _send_directory(self, remote, digest, u_uid=uuid.uuid4()):
-        required_blobs = self._required_blobs(digest)
+    def _send_directory(self, remote, digest, u_uid=uuid.uuid4(), excluded_dir=None):
+        required_blobs = self._required_blobs(digest, excluded_dir=excluded_dir)
 
         missing_blobs = dict()
         # Limit size of FindMissingBlobs request
diff --git a/buildstream/_cas/casremote.py b/buildstream/_cas/casremote.py
index 56ba4c5..a953165 100644
--- a/buildstream/_cas/casremote.py
+++ b/buildstream/_cas/casremote.py
@@ -23,7 +23,8 @@ from .. import utils
 _MAX_PAYLOAD_BYTES = 1024 * 1024
 
 
-class CASRemoteSpec(namedtuple('CASRemoteSpec', 'url push server_cert client_key client_cert instance_name')):
+class CASRemoteSpec(namedtuple('CASRemoteSpec',
+                               'url push partial_push server_cert client_key client_cert instance_name')):
 
     # _new_from_config_node
     #
@@ -31,9 +32,18 @@ class CASRemoteSpec(namedtuple('CASRemoteSpec', 'url push server_cert client_key
     #
     @staticmethod
     def _new_from_config_node(spec_node, basedir=None):
-        _yaml.node_validate(spec_node, ['url', 'push', 'server-cert', 'client-key', 'client-cert', 'instance_name'])
+        _yaml.node_validate(spec_node, ['url', 'push', 'allow-partial-push', 'server-cert', 'client-key',
+                                        'client-cert', 'instance_name'])
         url = _yaml.node_get(spec_node, str, 'url')
         push = _yaml.node_get(spec_node, bool, 'push', default_value=False)
+        partial_push = _yaml.node_get(spec_node, bool, 'allow-partial-push', default_value=False)
+
+        # partial_push depends on push, raise error if not configured correctly
+        if partial_push and not push:
+            provenance = _yaml.node_get_provenance(spec_node, 'allow-partial-push')
+            raise LoadError(LoadErrorReason.INVALID_DATA,
+                            "{}: allow-partial-push also requires push to be set".format(provenance))
+
         if not url:
             provenance = _yaml.node_get_provenance(spec_node, 'url')
             raise LoadError(LoadErrorReason.INVALID_DATA,
@@ -63,10 +73,10 @@ class CASRemoteSpec(namedtuple('CASRemoteSpec', 'url push server_cert client_key
             raise LoadError(LoadErrorReason.INVALID_DATA,
                             "{}: 'client-cert' was specified without 'client-key'".format(provenance))
 
-        return CASRemoteSpec(url, push, server_cert, client_key, client_cert, instance_name)
+        return CASRemoteSpec(url, push, partial_push, server_cert, client_key, client_cert, instance_name)
 
 
-CASRemoteSpec.__new__.__defaults__ = (None, None, None, None)
+CASRemoteSpec.__new__.__defaults__ = (False, None, None, None, None)
 
 
 class BlobNotFound(CASRemoteError):
diff --git a/buildstream/element.py b/buildstream/element.py
index a243826..705c19e 100644
--- a/buildstream/element.py
+++ b/buildstream/element.py
@@ -1797,13 +1797,19 @@ class Element(Plugin):
     #   (bool): True if this element does not need a push job to be created
     #
     def _skip_push(self):
+
         if not self.__artifacts.has_push_remotes(element=self):
             # No push remotes for this element's project
             return True
 
         # Do not push elements that aren't cached, or that are cached with a dangling buildtree
-        # artifact unless element type is expected to have an an empty buildtree directory
-        if not self._cached_buildtree():
+        # artifact unless element type is expected to have an an empty buildtree directory. Check
+        # that this default behaviour is not overriden via a remote configured to allow pushing
+        # artifacts without their corresponding buildtree.
+        if not self._cached():
+            return True
+
+        if not self._cached_buildtree() and not self.__artifacts.has_partial_push_remotes(element=self):
             return True
 
         # Do not push tainted artifact
@@ -1814,11 +1820,14 @@ class Element(Plugin):
 
     # _push():
     #
-    # Push locally cached artifact to remote artifact repository.
+    # Push locally cached artifact to remote artifact repository. An attempt
+    # will be made to push partial artifacts if given current config dictates.
+    # If a remote set for 'full' artifact pushes is found to be cached partially
+    # in the remote, an attempt will be made to 'complete' it.
     #
     # Returns:
     #   (bool): True if the remote was updated, False if it already existed
-    #           and no updated was required
+    #           and no update was required
     #
     def _push(self):
         self.__assert_cached()
@@ -1827,8 +1836,17 @@ class Element(Plugin):
             self.warn("Not pushing tainted artifact.")
             return False
 
-        # Push all keys used for local commit
-        pushed = self.__artifacts.push(self, self.__get_cache_keys_for_commit())
+        # Push all keys used for local commit, this could be full or partial,
+        # given previous _skip_push() logic. If buildtree isn't cached, then
+        # set partial push
+
+        partial = False
+        subdir = 'buildtree'
+        if not self._cached_buildtree():
+            partial = True
+
+        pushed = self.__artifacts.push(self, self.__get_cache_keys_for_commit(), partial=partial, subdir=subdir)
+
         if not pushed:
             return False
 
diff --git a/buildstream/plugintestutils/runcli.py b/buildstream/plugintestutils/runcli.py
index fb7c23c..bd2f1e4 100644
--- a/buildstream/plugintestutils/runcli.py
+++ b/buildstream/plugintestutils/runcli.py
@@ -238,6 +238,13 @@ class Result():
 
         return list(pushed)
 
+    def get_partial_pushed_elements(self):
+        pushed = re.findall(r'\[\s*push:(\S+)\s*\]\s*INFO\s*Pushed partial artifact', self.stderr)
+        if pushed is None:
+            return []
+
+        return list(pushed)
+
     def get_pulled_elements(self):
         pulled = re.findall(r'\[\s*pull:(\S+)\s*\]\s*INFO\s*Pulled artifact', self.stderr)
         if pulled is None:
@@ -245,6 +252,13 @@ class Result():
 
         return list(pulled)
 
+    def get_partial_pulled_elements(self):
+        pulled = re.findall(r'\[\s*pull:(\S+)\s*\]\s*INFO\s*Pulled partial artifact', self.stderr)
+        if pulled is None:
+            return []
+
+        return list(pulled)
+
 
 class Cli():
 
@@ -265,11 +279,15 @@ class Cli():
     #
     # Args:
     #    config (dict): The user configuration to use
+    #    reset (bool): Optional reset of stored config
     #
-    def configure(self, config):
+    def configure(self, config, reset=False):
         if self.config is None:
             self.config = {}
 
+        if reset:
+            self.config.clear()
+
         for key, val in config.items():
             self.config[key] = val
 
diff --git a/doc/source/using_config.rst b/doc/source/using_config.rst
index 71ca64a..e696a9d 100644
--- a/doc/source/using_config.rst
+++ b/doc/source/using_config.rst
@@ -59,6 +59,15 @@ configuration:
      # Add another cache to pull from
      - url: https://anothercache.com/artifacts:8080
        server-cert: another_server.crt
+     # Add a cache to push/pull to/from, specifying
+       that you wish to push artifacts in a 'partial'
+       state (this being without the respective buildtree).
+       Note that allow-partial-push requires push to also
+       be set.
+     - url: https://anothercache.com/artifacts:11003
+       push: true
+       allow-partial-push: true
+
 
 .. note::
 
@@ -86,6 +95,14 @@ configuration:
          # Add another cache to pull from
          - url: https://ourprojectcache.com/artifacts:8080
            server-cert: project_server.crt
+         # Add a cache to push/pull to/from, specifying
+           that you wish to push artifacts in a 'partial'
+           state (this being without the respective buildtree).
+           Note that allow-partial-push requires push to also
+           be set.
+         - url: https://anothercache.com/artifacts:11003
+           push: true
+           allow-partial-push: true
 
 
 .. note::
diff --git a/tests/artifactcache/config.py b/tests/artifactcache/config.py
index fda3097..d89e7f9 100644
--- a/tests/artifactcache/config.py
+++ b/tests/artifactcache/config.py
@@ -139,3 +139,28 @@ def test_missing_certs(cli, datafiles, config_key, config_value):
     # This does not happen for a simple `bst show`.
     result = cli.run(project=project, args=['artifact', 'pull', 'element.bst'])
     result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.INVALID_DATA)
+
+
+# Assert that if allow-partial-push is specified as true without push also being
+# set likewise, we get a comprehensive LoadError instead of an unhandled exception.
+@pytest.mark.datafiles(DATA_DIR)
+def test_partial_push_error(cli, datafiles):
+    project = os.path.join(datafiles.dirname, datafiles.basename, 'project', 'elements')
+
+    project_conf = {
+        'name': 'test',
+
+        'artifacts': {
+            'url': 'https://cache.example.com:12345',
+            'allow-partial-push': 'True'
+        }
+    }
+    project_conf_file = os.path.join(project, 'project.conf')
+    _yaml.dump(project_conf, project_conf_file)
+
+    # Use `pull` here to ensure we try to initialize the remotes, triggering the error
+    #
+    # This does not happen for a simple `bst show`.
+    result = cli.run(project=project, args=['artifact', 'pull', 'target.bst'])
+    result.assert_main_error(ErrorDomain.LOAD, LoadErrorReason.INVALID_DATA)
+    assert "allow-partial-push also requires push to be set" in result.stderr
diff --git a/tests/integration/pushbuildtrees.py b/tests/integration/pushbuildtrees.py
new file mode 100644
index 0000000..194d206
--- /dev/null
+++ b/tests/integration/pushbuildtrees.py
@@ -0,0 +1,165 @@
+import os
+import shutil
+import pytest
+import subprocess
+
+from buildstream import _yaml
+from tests.testutils import create_artifact_share
+from tests.testutils.site import HAVE_SANDBOX
+from buildstream.plugintestutils import cli, cli_integration as cli2
+from buildstream.plugintestutils.integration import assert_contains
+from buildstream._exceptions import ErrorDomain, LoadErrorReason
+
+
+DATA_DIR = os.path.join(
+    os.path.dirname(os.path.realpath(__file__)),
+    "project"
+)
+
+
+# Remove artifact cache & set cli2.config value of pull-buildtrees
+# to false, which is the default user context. The cache has to be
+# cleared as just forcefully removing the refpath leaves dangling objects.
+def default_state(cli2, tmpdir, share):
+    shutil.rmtree(os.path.join(str(tmpdir), 'artifacts'))
+    cli2.configure({
+        'artifacts': {'url': share.repo, 'push': False},
+        'artifactdir': os.path.join(str(tmpdir), 'artifacts'),
+        'cache': {'pull-buildtrees': False},
+    })
+
+
+# Tests to capture the integration of the optionl push of buildtrees.
+# The behaviour should encompass pushing artifacts that are already cached
+# without a buildtree as well as artifacts that are cached with their buildtree.
+# This option is handled via 'allow-partial-push' on a per artifact remote config
+# node basis. Multiple remote config nodes can point to the same url and as such can
+# have different 'allow-partial-push' options, tests need to cover this using project
+# confs.
+@pytest.mark.integration
+@pytest.mark.datafiles(DATA_DIR)
+@pytest.mark.skipif(not HAVE_SANDBOX, reason='Only available with a functioning sandbox')
+def test_pushbuildtrees(cli2, tmpdir, datafiles, integration_cache):
+    project = os.path.join(datafiles.dirname, datafiles.basename)
+    element_name = 'autotools/amhello.bst'
+
+    # Create artifact shares for pull & push testing
+    with create_artifact_share(os.path.join(str(tmpdir), 'share1')) as share1,\
+        create_artifact_share(os.path.join(str(tmpdir), 'share2')) as share2,\
+        create_artifact_share(os.path.join(str(tmpdir), 'share3')) as share3,\
+        create_artifact_share(os.path.join(str(tmpdir), 'share4')) as share4:
+
+        cli2.configure({
+            'artifacts': {'url': share1.repo, 'push': True},
+            'artifactdir': os.path.join(str(tmpdir), 'artifacts')
+        })
+
+        cli2.configure({'artifacts': [{'url': share1.repo, 'push': True},
+                                     {'url': share2.repo, 'push': True, 'allow-partial-push': True}]})
+
+        # Build autotools element, checked pushed, delete local.
+        # As share 2 has push & allow-partial-push set a true, it
+        # should have pushed the artifacts, without the cached buildtrees,
+        # to it.
+        result = cli2.run(project=project, args=['build', element_name])
+        assert result.exit_code == 0
+        assert cli2.get_element_state(project, element_name) == 'cached'
+        elementdigest = share1.has_artifact('test', element_name, cli2.get_element_key(project, element_name))
+        buildtreedir = os.path.join(str(tmpdir), 'artifacts', 'extract', 'test', 'autotools-amhello',
+                                    elementdigest.hash, 'buildtree')
+        assert os.path.isdir(buildtreedir)
+        assert element_name in result.get_partial_pushed_elements()
+        assert element_name in result.get_pushed_elements()
+        assert share1.has_artifact('test', element_name, cli2.get_element_key(project, element_name))
+        assert share2.has_artifact('test', element_name, cli2.get_element_key(project, element_name))
+        default_state(cli2, tmpdir, share1)
+
+        # Check that after explictly pulling an artifact without it's buildtree,
+        # we can push it to another remote that is configured to accept the partial
+        # artifact
+        result = cli2.run(project=project, args=['artifact', 'pull', element_name])
+        assert element_name in result.get_pulled_elements()
+        cli2.configure({'artifacts': {'url': share3.repo, 'push': True, 'allow-partial-push': True}})
+        assert cli2.get_element_state(project, element_name) == 'cached'
+        assert not os.path.isdir(buildtreedir)
+        result = cli2.run(project=project, args=['artifact', 'push', element_name])
+        assert result.exit_code == 0
+        assert element_name in result.get_partial_pushed_elements()
+        assert element_name not in result.get_pushed_elements()
+        assert share3.has_artifact('test', element_name, cli2.get_element_key(project, element_name))
+        default_state(cli2, tmpdir, share3)
+
+        # Delete the local cache and pull the partial artifact from share 3,
+        # this should not include the buildtree when extracted locally, even when
+        # pull-buildtrees is given as a cli2 parameter as no available remotes will
+        # contain the buildtree
+        assert not os.path.isdir(buildtreedir)
+        assert cli2.get_element_state(project, element_name) != 'cached'
+        result = cli2.run(project=project, args=['--pull-buildtrees', 'artifact', 'pull', element_name])
+        assert element_name in result.get_partial_pulled_elements()
+        assert not os.path.isdir(buildtreedir)
+        default_state(cli2, tmpdir, share3)
+
+        # Delete the local cache and attempt to pull a 'full' artifact, including its
+        # buildtree. As with before share3 being the first listed remote will not have
+        # the buildtree available and should spawn a partial pull. Having share1 as the
+        # second available remote should allow the buildtree to be pulled thus 'completing'
+        # the artifact
+        cli2.configure({'artifacts': [{'url': share3.repo, 'push': True, 'allow-partial-push': True},
+                                     {'url': share1.repo, 'push': True}]})
+        assert cli2.get_element_state(project, element_name) != 'cached'
+        result = cli2.run(project=project, args=['--pull-buildtrees', 'artifact', 'pull', element_name])
+        assert element_name in result.get_partial_pulled_elements()
+        assert element_name in result.get_pulled_elements()
+        assert "Attempting to retrieve buildtree from remotes" in result.stderr
+        assert os.path.isdir(buildtreedir)
+        assert cli2.get_element_state(project, element_name) == 'cached'
+
+        # Test that we are able to 'complete' an artifact on a server which is cached partially,
+        # but has now been configured for full artifact pushing. This should require only pushing
+        # the missing blobs, which should be those of just the buildtree. In this case changing
+        # share3 to full pushes should exercise this
+        cli2.configure({'artifacts': {'url': share3.repo, 'push': True}})
+        result = cli2.run(project=project, args=['artifact', 'push', element_name])
+        assert element_name in result.get_pushed_elements()
+
+        # Ensure that the same remote url can be defined multiple times with differing push
+        # config. Buildstream supports the same remote having different configurations which
+        # partial pushing could be different for elements defined at a top level project.conf to
+        # those from a junctioned project. Assert that elements are pushed to the same remote in
+        # a state defined via their respective project.confs
+        default_state(cli2, tmpdir, share1)
+        cli2.configure({'artifactdir': os.path.join(str(tmpdir), 'artifacts')}, reset=True)
+        junction = os.path.join(project, 'elements', 'junction')
+        os.mkdir(junction)
+        shutil.copy2(os.path.join(project, 'elements', element_name), junction)
+
+        junction_conf = {}
+        project_conf = {}
+        junction_conf['name'] = 'amhello'
+        junction_conf['artifacts'] = {'url': share4.repo, 'push': True, 'allow-partial-push': True}
+        _yaml.dump(junction_conf, os.path.join(junction, 'project.conf'))
+        project_conf['artifacts'] = {'url': share4.repo, 'push': True}
+
+        # Read project.conf, the junction project.conf and buildstream.conf
+        # before running bst
+        with open(os.path.join(project, 'project.conf'), 'r') as f:
+            print(f.read())
+        with open(os.path.join(junction, 'project.conf'), 'r') as f:
+            print(f.read())
+        with open(os.path.join(project, 'cache', 'buildstream.conf'), 'r') as f:
+            print(f.read())
+
+        result = cli2.run(project=project, args=['build', 'junction/amhello.bst'], project_config=project_conf)
+
+        # Read project.conf, the junction project.conf and buildstream.conf
+        # after running bst
+        with open(os.path.join(project, 'project.conf'), 'r') as f:
+            print(f.read())
+        with open(os.path.join(junction, 'project.conf'), 'r') as f:
+            print(f.read())
+        with open(os.path.join(project, 'cache', 'buildstream.conf'), 'r') as f:
+            print(f.read())
+
+        assert 'junction/amhello.bst' in result.get_partial_pushed_elements()
+        assert 'base/base-alpine.bst' in result.get_pushed_elements()