You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@brooklyn.apache.org by he...@apache.org on 2022/08/11 23:54:59 UTC

[brooklyn-server] branch master updated (1b84156556 -> 562fc4e1d2)

This is an automated email from the ASF dual-hosted git repository.

heneveld pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/brooklyn-server.git


    from 1b84156556 Merge branch 'suppress-nested-sensitive-fields'
     new a0de263d40 fix use of concurrent task list in GC
     new 4a374e9a00 add aliases for more common types
     new d29fcb5f8a expose cleaning up a namespace on containers
     new d9a0f2eea7 clarify destruction api, and pass errors to caller
     new bb1e5ac48a add created and destroying lifecycle hooks
     new 1eba5df57a introduce more forceful destroy method
     new 0bcb387de0 stronger destroy semantics used in most places, esp in tests
     new 613bd023bd faster strategies for determining container readiness and completion
     new 562fc4e1d2 allow batch config read to take a few millis, but not block

The 9 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../brooklyn/camp/brooklyn/ConfigYamlTest.java     |   2 +-
 .../brooklyn/DynamicMultiGroupYamlRebindTest.java  |   4 +-
 .../camp/brooklyn/WindowsYamlLiveTest.java         |   2 +-
 .../catalog/CatalogMakeOsgiBundleTest.java         |   2 +-
 .../brooklyn/core/entity/AbstractEntity.java       |  29 +-
 .../org/apache/brooklyn/core/entity/Entities.java  |  51 +-
 .../mgmt/internal/BrooklynGarbageCollector.java    |   2 +-
 .../core/mgmt/internal/BrooklynShutdownHooks.java  |   4 +-
 .../mgmt/internal/EntityManagementSupport.java     |   9 +-
 core/src/main/resources/catalog.bom                |  11 +
 .../brooklyn/core/effector/EffectorTaskTest.java   |   2 +-
 .../mgmt/internal/EntityExecutionManagerTest.java  |   4 +-
 .../BrooklynMementoPersisterTestFixture.java       |   4 +-
 .../launcher/blueprints/AbstractBlueprintTest.java |   2 +-
 .../autoscaling/AutoScalerPolicyPoolSizeTest.java  |   2 +-
 .../org/apache/brooklyn/rest/api/EntityApi.java    |   2 +-
 .../rest/resources/EntityConfigResource.java       |   5 +-
 .../rest/util/BrooklynRestResourceUtils.java       |   2 +-
 .../brooklyn/rest/resources/ActivityRestTest.java  |   2 +-
 .../rest/resources/EffectorResourceTest.java       |   2 +-
 .../brooklyn/tasks/kubectl/ContainerCommons.java   |   6 +-
 .../tasks/kubectl/ContainerTaskFactory.java        | 531 ++++++++++++++-------
 .../machine/pool/AbstractServerPoolTest.java       |   2 +-
 ...wareProcessStopsDuringStartJcloudsLiveTest.java |   6 +-
 ...nillaWindowsProcessWinrmExitStatusLiveTest.java |   2 +-
 .../VanillaWindowsProcessWinrmStreamsLiveTest.java |   4 +-
 .../qa/performance/ScalabilityPerformanceTest.java |   2 +-
 .../brooklyn/tasks/kubectl/ContainerTaskTest.java  |   9 +-
 28 files changed, 496 insertions(+), 209 deletions(-)


[brooklyn-server] 09/09: allow batch config read to take a few millis, but not block

Posted by he...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

heneveld pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/brooklyn-server.git

commit 562fc4e1d29021fcd044fb6e561f46cab96fd9f1
Author: Alex Heneveld <al...@cloudsoft.io>
AuthorDate: Fri Aug 12 00:51:43 2022 +0100

    allow batch config read to take a few millis, but not block
---
 .../org/apache/brooklyn/rest/resources/EntityConfigResource.java     | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/rest/rest-resources/src/main/java/org/apache/brooklyn/rest/resources/EntityConfigResource.java b/rest/rest-resources/src/main/java/org/apache/brooklyn/rest/resources/EntityConfigResource.java
index 445dda3e3c..fe46aa8a6e 100644
--- a/rest/rest-resources/src/main/java/org/apache/brooklyn/rest/resources/EntityConfigResource.java
+++ b/rest/rest-resources/src/main/java/org/apache/brooklyn/rest/resources/EntityConfigResource.java
@@ -140,7 +140,10 @@ public class EntityConfigResource extends AbstractBrooklynRestResource implement
                                     .skipResolution(skipResolution)
                                     .suppressIfSecret(key.getName(), suppressSecrets)
                                     .raw(raw)
-                                    .context(entity).timeout(Duration.ZERO).renderAs(key)
+                                    .context(entity)
+                                    .immediately(true)              // 2022-08 added immediately instead of timeout zero because the latter caused some oddities
+                                    .timeout(Duration.millis(500))  // shouldn't take this long, but if it does, under load, just bail
+                                    .renderAs(key)
                                     .resolve());
 
             }


[brooklyn-server] 03/09: expose cleaning up a namespace on containers

Posted by he...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

heneveld pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/brooklyn-server.git

commit d29fcb5f8aa263f9b0d4b0b48e9c5b0fc4dfae9a
Author: Alex Heneveld <al...@cloudsoft.io>
AuthorDate: Thu Aug 11 14:32:13 2022 +0100

    expose cleaning up a namespace on containers
---
 .../tasks/kubectl/ContainerTaskFactory.java        | 93 +++++++++++++---------
 1 file changed, 57 insertions(+), 36 deletions(-)

diff --git a/software/base/src/main/java/org/apache/brooklyn/tasks/kubectl/ContainerTaskFactory.java b/software/base/src/main/java/org/apache/brooklyn/tasks/kubectl/ContainerTaskFactory.java
index bab7b06fcb..a78c1cb6de 100644
--- a/software/base/src/main/java/org/apache/brooklyn/tasks/kubectl/ContainerTaskFactory.java
+++ b/software/base/src/main/java/org/apache/brooklyn/tasks/kubectl/ContainerTaskFactory.java
@@ -18,7 +18,6 @@
  */
 package org.apache.brooklyn.tasks.kubectl;
 
-import com.google.common.base.Stopwatch;
 import com.google.common.collect.Lists;
 import org.apache.brooklyn.api.entity.Entity;
 import org.apache.brooklyn.api.mgmt.Task;
@@ -59,7 +58,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicReference;
 import java.util.function.Function;
 import java.util.stream.Collectors;
 
@@ -98,41 +96,15 @@ public class ContainerTaskFactory<T extends ContainerTaskFactory<T,RET>,RET> imp
                         argumentsCfg = MutableList.of(argumentsCfgO.stream().map(x -> ""+x).collect(Collectors.joining("\n")));
                     }
 
-                    String containerImage = EntityInitializers.resolve(config, CONTAINER_IMAGE);
                     PullPolicy containerImagePullPolicy = EntityInitializers.resolve(config, CONTAINER_IMAGE_PULL_POLICY);
-                    Boolean devMode = EntityInitializers.resolve(config, KEEP_CONTAINER_FOR_DEBUGGING);
 
                     String workingDir = EntityInitializers.resolve(config, WORKING_DIR);
                     Set<Map<String,String>> volumeMounts = (Set<Map<String,String>>) EntityInitializers.resolve(config, VOLUME_MOUNTS);
                     Set<Map<String, Object>> volumes = (Set<Map<String, Object>>) EntityInitializers.resolve(config, VOLUMES);
 
-                    if(Strings.isBlank(containerImage)) {
-                        throw new IllegalStateException("You must specify containerImage when using " + this.getClass().getSimpleName());
-                    }
-
+                    final String kubeJobName = initNamespaceAndGetNewJobName();
+                    String containerImage = EntityInitializers.resolve(config, CONTAINER_IMAGE);
                     Entity entity = BrooklynTaskTags.getContextEntity(Tasks.current());
-                    if (entity == null) {
-                        throw new IllegalStateException("Task must run in context of entity to background jobs");
-                    }
-
-                    final String cleanImageName = containerImage.contains(":") ? containerImage.substring(0, containerImage.indexOf(":")) : containerImage;
-
-                    StringShortener ss = new StringShortener().separator("-");
-                    if (Strings.isNonBlank(this.jobIdentifier)) {
-                        ss.append("job", this.jobIdentifier).canTruncate("job", 20);
-                    } else {
-                        ss.append("brooklyn", "brooklyn").canTruncate("brooklyn", 2);
-                        ss.append("appId", entity.getApplicationId()).canTruncate("appId", 4);
-                        ss.append("entityId", entity.getId()).canTruncate("entityId", 4);
-                        ss.append("image", cleanImageName).canTruncate("image", 10);
-                    }
-                    ss.append("uid", Strings.makeRandomId(9)+Identifiers.makeRandomPassword(1, Identifiers.LOWER_CASE_ALPHA));
-                    final String kubeJobName = ss.getStringOfMaxLength(50)
-                            .replaceAll("[^A-Za-z0-9-]+", "-") // remove all symbols
-                            .toLowerCase();
-                    if (namespace==null) {
-                        namespace = kubeJobName;
-                    }
 
                     LOG.debug("Submitting container job in namespace "+namespace+", name "+kubeJobName);
 
@@ -360,13 +332,9 @@ public class ContainerTaskFactory<T extends ContainerTaskFactory<T,RET>,RET> imp
                             return returnConversion==null ? (RET) result : returnConversion.apply(result);
 
                         } finally {
-                            // clean up - delete namespace
-                            if (!devMode && deleteNamespaceHere) {
-                                LOG.debug("Deleting namespace "+namespace);
-                                // do this not as a subtask so we can run even if the main queue fails
-                                Entities.submit(entity, newSimpleTaskFactory(String.format(NAMESPACE_DELETE_CMD, namespace)).summary("Tear down containers").newTask()).block();
+                            if (deleteNamespaceHere) {
+                                doDeleteNamespace();
                             }
-                            System.runFinalization();
                         }
                     } catch (Exception e) {
                         throw Exceptions.propagate(e);
@@ -378,6 +346,59 @@ public class ContainerTaskFactory<T extends ContainerTaskFactory<T,RET>,RET> imp
         return taskBuilder.build();
     }
 
+    private String initNamespaceAndGetNewJobName() {
+        Entity entity = BrooklynTaskTags.getContextEntity(Tasks.current());
+        if (entity == null) {
+            throw new IllegalStateException("Task must run in context of entity to background jobs");
+        }
+
+        String containerImage = EntityInitializers.resolve(config, CONTAINER_IMAGE);
+        if(Strings.isBlank(containerImage)) {
+            throw new IllegalStateException("You must specify containerImage when using " + this.getClass().getSimpleName());
+        }
+
+        final String cleanImageName = containerImage.contains(":") ? containerImage.substring(0, containerImage.indexOf(":")) : containerImage;
+
+        StringShortener ss = new StringShortener().separator("-");
+        if (Strings.isNonBlank(this.jobIdentifier)) {
+            ss.append("job", this.jobIdentifier).canTruncate("job", 20);
+        } else {
+            ss.append("brooklyn", "brooklyn").canTruncate("brooklyn", 2);
+            ss.append("appId", entity.getApplicationId()).canTruncate("appId", 4);
+            ss.append("entityId", entity.getId()).canTruncate("entityId", 4);
+            ss.append("image", cleanImageName).canTruncate("image", 10);
+        }
+        ss.append("uid", Strings.makeRandomId(9)+Identifiers.makeRandomPassword(1, Identifiers.LOWER_CASE_ALPHA));
+        final String kubeJobName = ss.getStringOfMaxLength(50)
+                .replaceAll("[^A-Za-z0-9-]+", "-") // remove all symbols
+                .toLowerCase();
+        if (namespace==null) {
+            namespace = kubeJobName;
+        }
+        return kubeJobName;
+    }
+
+    public String getNamespace() {
+        return namespace;
+    }
+
+    public boolean doDeleteNamespace() {
+        if (namespace==null) return false;
+        Entity entity = BrooklynTaskTags.getContextEntity(Tasks.current());
+        if (entity==null) return false;
+        // clean up - delete namespace
+        Boolean devMode = EntityInitializers.resolve(config, KEEP_CONTAINER_FOR_DEBUGGING);
+        if (Boolean.TRUE.equals(devMode)) {
+            return false;
+        }
+
+        LOG.debug("Deleting namespace " + namespace);
+        // do this not as a subtask so we can run even if the main queue fails
+        Entities.submit(entity, newSimpleTaskFactory(String.format(NAMESPACE_DELETE_CMD, namespace)).summary("Tear down containers").newTask()).block();
+        System.runFinalization();
+        return true;
+    }
+
     public T summary(String summary) { this.summary = summary; return self(); }
     public T timeout(Duration timeout) { config.put(TIMEOUT, timeout); return self(); }
     public T command(List<String> commands) { config.put(COMMAND, commands); return self(); }


[brooklyn-server] 01/09: fix use of concurrent task list in GC

Posted by he...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

heneveld pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/brooklyn-server.git

commit a0de263d408ae1bfd7ea2c96560d13c096e2e43a
Author: Alex Heneveld <al...@cloudsoft.io>
AuthorDate: Thu Aug 11 14:31:30 2022 +0100

    fix use of concurrent task list in GC
---
 .../apache/brooklyn/core/mgmt/internal/BrooklynGarbageCollector.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/java/org/apache/brooklyn/core/mgmt/internal/BrooklynGarbageCollector.java b/core/src/main/java/org/apache/brooklyn/core/mgmt/internal/BrooklynGarbageCollector.java
index a64fda5ddf..f66823b174 100644
--- a/core/src/main/java/org/apache/brooklyn/core/mgmt/internal/BrooklynGarbageCollector.java
+++ b/core/src/main/java/org/apache/brooklyn/core/mgmt/internal/BrooklynGarbageCollector.java
@@ -503,7 +503,7 @@ public class BrooklynGarbageCollector {
         if (!brooklynProperties.getConfig(CHECK_SUBTASK_SUBMITTERS))
             return 0;
         
-        Collection<Task<?>> allTasks = executionManager.allTasksLive();
+        Collection<Task<?>> allTasks = executionManager.getAllTasks();
         Collection<Task<?>> tasksToDelete = MutableList.of();
         try {
             for (Task<?> task: allTasks) {


[brooklyn-server] 05/09: add created and destroying lifecycle hooks

Posted by he...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

heneveld pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/brooklyn-server.git

commit bb1e5ac48a6ede94021ed13e8b54a9ed9c8394c8
Author: Alex Heneveld <al...@cloudsoft.io>
AuthorDate: Thu Aug 11 14:33:33 2022 +0100

    add created and destroying lifecycle hooks
---
 .../brooklyn/core/entity/AbstractEntity.java       | 29 +++++++++++++++++++++-
 .../mgmt/internal/EntityManagementSupport.java     |  9 ++++++-
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/core/src/main/java/org/apache/brooklyn/core/entity/AbstractEntity.java b/core/src/main/java/org/apache/brooklyn/core/entity/AbstractEntity.java
index c047ee6ecb..30a96f1ec2 100644
--- a/core/src/main/java/org/apache/brooklyn/core/entity/AbstractEntity.java
+++ b/core/src/main/java/org/apache/brooklyn/core/entity/AbstractEntity.java
@@ -1767,7 +1767,19 @@ public abstract class AbstractEntity extends AbstractBrooklynObject implements E
             if (displayNameAutoGenerated) displayName.set(getAutogeneratedDefaultDisplayName());
         }
     }
-    
+
+    /**
+     * Invoked by {@link EntityManagementSupport} when this entity is first created and almost fully managed,
+     * visible to other entities through the management context, for the very first time.
+     * It will be followed by a call to {@link #onManagementStarted()},
+     * but will not be called on subsequent rebindings.
+     *
+     * This is called by {@link org.apache.brooklyn.core.mgmt.internal.LocalEntityManager#createEntity(Map, Class)}
+     * just after its call to {@link org.apache.brooklyn.core.objs.proxy.InternalEntityFactory#createEntity(EntitySpec, String)}
+     * and immediately before the call to {@link #onManagementStarted()}.
+     */
+    public void onManagementCreated() {}
+
     /**
      * Invoked by {@link EntityManagementSupport} when this entity is fully managed and visible to other entities
      * through the management context.
@@ -1790,6 +1802,21 @@ public abstract class AbstractEntity extends AbstractBrooklynObject implements E
     @Deprecated
     public void onManagementNoLongerMaster() {}
 
+    /**
+     * Invoked by {@link EntityManagementSupport} when this entity is being unmanaged.
+     * This may be because this entity is being removed, or because management is stopping.
+     * If this entity is being removed, there will be a subsequent call to {@link #onManagementDestroying()}
+     */
+    public void onManagementStopping() {
+    }
+
+    /**
+     * Invoked by {@link EntityManagementSupport} when this entity is being permanently removed from management.
+     * This call is synchronous and still has access to submit and block on tasks, so can be used for cleanup.
+     */
+    public void onManagementDestroying() {
+    }
+
     /**
      * Invoked by {@link EntityManagementSupport} when this entity is fully unmanaged.
      * <p>
diff --git a/core/src/main/java/org/apache/brooklyn/core/mgmt/internal/EntityManagementSupport.java b/core/src/main/java/org/apache/brooklyn/core/mgmt/internal/EntityManagementSupport.java
index 4ffa12699a..7e71a9cc57 100644
--- a/core/src/main/java/org/apache/brooklyn/core/mgmt/internal/EntityManagementSupport.java
+++ b/core/src/main/java/org/apache/brooklyn/core/mgmt/internal/EntityManagementSupport.java
@@ -273,6 +273,7 @@ public class EntityManagementSupport {
             
             if (!isReadOnly()) {
                 entity.onManagementBecomingMaster();
+                if (info.getMode().isCreating()) entity.onManagementCreated();
                 entity.onManagementStarted();
             }
             
@@ -340,8 +341,14 @@ public class EntityManagementSupport {
                 nonDeploymentManagementContext.setMode(NonDeploymentManagementContextMode.MANAGEMENT_STOPPING);
             }
         }
-        
+
+        if (!wasDryRun && !isReadOnly()) {
+            entity.onManagementStopping();
+            if (info.getMode().isDestroying()) entity.onManagementDestroying();
+        }
+
         if (wasDryRun || (!isReadOnly() && info.getMode().isDestroying())) {
+
             // ensure adjuncts get a destroy callback
             // note they don't get any alert if the entity is being locally unmanaged to run somewhere else.
             // framework should introduce a call for that ideally, but in interim if needed they


[brooklyn-server] 04/09: clarify destruction api, and pass errors to caller

Posted by he...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

heneveld pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/brooklyn-server.git

commit d9a0f2eea7f5f852391ed76750bb11509f55083e
Author: Alex Heneveld <al...@cloudsoft.io>
AuthorDate: Thu Aug 11 14:33:01 2022 +0100

    clarify destruction api, and pass errors to caller
---
 rest/rest-api/src/main/java/org/apache/brooklyn/rest/api/EntityApi.java | 2 +-
 .../java/org/apache/brooklyn/rest/util/BrooklynRestResourceUtils.java   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/rest/rest-api/src/main/java/org/apache/brooklyn/rest/api/EntityApi.java b/rest/rest-api/src/main/java/org/apache/brooklyn/rest/api/EntityApi.java
index 7a69c9432b..12d2cb236a 100644
--- a/rest/rest-api/src/main/java/org/apache/brooklyn/rest/api/EntityApi.java
+++ b/rest/rest-api/src/main/java/org/apache/brooklyn/rest/api/EntityApi.java
@@ -325,7 +325,7 @@ public interface EntityApi {
     public Response expunge(
             @ApiParam(value = "Application ID or name", required = true) @PathParam("application") final String applicationId, 
             @ApiParam(value = "Entity ID or name", required = true) @PathParam("entity") final String entityId, 
-            @ApiParam(value = "Whether to gracefully release all resources", required = true) @QueryParam("release") final boolean release);
+            @ApiParam(value = "Whether to gracefully release all resources (failing and keeping if unsuccessful)", required = true) @QueryParam("release") final boolean release);
 
     @GET
     @Path("/{entity}/descendants")
diff --git a/rest/rest-resources/src/main/java/org/apache/brooklyn/rest/util/BrooklynRestResourceUtils.java b/rest/rest-resources/src/main/java/org/apache/brooklyn/rest/util/BrooklynRestResourceUtils.java
index d382205b0e..1ecf2d5bc0 100644
--- a/rest/rest-resources/src/main/java/org/apache/brooklyn/rest/util/BrooklynRestResourceUtils.java
+++ b/rest/rest-resources/src/main/java/org/apache/brooklyn/rest/util/BrooklynRestResourceUtils.java
@@ -481,7 +481,7 @@ public class BrooklynRestResourceUtils {
                         @Override
                         public void run() {
                             if (release)
-                                Entities.destroyCatching(entity);
+                                Entities.destroy(entity);
                             else
                                 mgmt.getEntityManager().unmanage(entity);
                         }


[brooklyn-server] 02/09: add aliases for more common types

Posted by he...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

heneveld pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/brooklyn-server.git

commit 4a374e9a00d9351299836044fc5b99381e46dbc8
Author: Alex Heneveld <al...@cloudsoft.io>
AuthorDate: Thu Aug 11 14:31:57 2022 +0100

    add aliases for more common types
---
 core/src/main/resources/catalog.bom | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/core/src/main/resources/catalog.bom b/core/src/main/resources/catalog.bom
index fcdccdf9a4..a1cb2d74b8 100644
--- a/core/src/main/resources/catalog.bom
+++ b/core/src/main/resources/catalog.bom
@@ -207,3 +207,14 @@ brooklyn.catalog:
       itemType: bean
       item:
         type: org.apache.brooklyn.core.effector.http.HttpCommandEffector
+
+    - id: org.apache.brooklyn.core.sensor.ssh.SshCommandSensor
+      format: java-type-name
+      itemType: bean
+      item:
+        type: org.apache.brooklyn.core.sensor.ssh.SshCommandSensor
+    - id: org.apache.brooklyn.core.effector.ssh.SshCommandEffector
+      format: java-type-name
+      itemType: bean
+      item:
+        type: org.apache.brooklyn.core.effector.ssh.SshCommandEffector


[brooklyn-server] 07/09: stronger destroy semantics used in most places, esp in tests

Posted by he...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

heneveld pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/brooklyn-server.git

commit 0bcb387de010a29a4b5d3aaf76caef79c5febeb8
Author: Alex Heneveld <al...@cloudsoft.io>
AuthorDate: Thu Aug 11 14:42:58 2022 +0100

    stronger destroy semantics used in most places, esp in tests
---
 .../java/org/apache/brooklyn/camp/brooklyn/ConfigYamlTest.java     | 2 +-
 .../brooklyn/camp/brooklyn/DynamicMultiGroupYamlRebindTest.java    | 4 ++--
 .../org/apache/brooklyn/camp/brooklyn/WindowsYamlLiveTest.java     | 2 +-
 .../brooklyn/camp/brooklyn/catalog/CatalogMakeOsgiBundleTest.java  | 2 +-
 core/src/main/java/org/apache/brooklyn/core/entity/Entities.java   | 7 +++++--
 .../apache/brooklyn/core/mgmt/internal/BrooklynShutdownHooks.java  | 4 ++--
 .../java/org/apache/brooklyn/core/effector/EffectorTaskTest.java   | 2 +-
 .../brooklyn/core/mgmt/internal/EntityExecutionManagerTest.java    | 4 ++--
 .../core/mgmt/persist/BrooklynMementoPersisterTestFixture.java     | 4 ++--
 .../apache/brooklyn/launcher/blueprints/AbstractBlueprintTest.java | 2 +-
 .../brooklyn/policy/autoscaling/AutoScalerPolicyPoolSizeTest.java  | 2 +-
 .../org/apache/brooklyn/rest/util/BrooklynRestResourceUtils.java   | 2 +-
 .../java/org/apache/brooklyn/rest/resources/ActivityRestTest.java  | 2 +-
 .../org/apache/brooklyn/rest/resources/EffectorResourceTest.java   | 2 +-
 .../brooklyn/entity/machine/pool/AbstractServerPoolTest.java       | 2 +-
 .../base/SoftwareProcessStopsDuringStartJcloudsLiveTest.java       | 6 +++---
 .../base/VanillaWindowsProcessWinrmExitStatusLiveTest.java         | 2 +-
 .../software/base/VanillaWindowsProcessWinrmStreamsLiveTest.java   | 4 ++--
 .../base/test/qa/performance/ScalabilityPerformanceTest.java       | 2 +-
 19 files changed, 30 insertions(+), 27 deletions(-)

diff --git a/camp/camp-brooklyn/src/test/java/org/apache/brooklyn/camp/brooklyn/ConfigYamlTest.java b/camp/camp-brooklyn/src/test/java/org/apache/brooklyn/camp/brooklyn/ConfigYamlTest.java
index 9ede4a14fa..fcbccfdacd 100644
--- a/camp/camp-brooklyn/src/test/java/org/apache/brooklyn/camp/brooklyn/ConfigYamlTest.java
+++ b/camp/camp-brooklyn/src/test/java/org/apache/brooklyn/camp/brooklyn/ConfigYamlTest.java
@@ -137,7 +137,7 @@ public class ConfigYamlTest extends AbstractYamlTest {
                     // error, loop wasn't interrupted or detected
                     LOG.warn("Timeout elapsed, destroying items; usage: "+
                             ((LocalManagementContext)mgmt()).getGarbageCollector().getUsageString());
-                    Entities.destroy(app);
+                    Entities.destroy(app, true);
                 } catch (RuntimeInterruptedException e) {
                     // expected on normal execution; clear the interrupted flag to prevent ugly further warnings being logged
                     Thread.interrupted();
diff --git a/camp/camp-brooklyn/src/test/java/org/apache/brooklyn/camp/brooklyn/DynamicMultiGroupYamlRebindTest.java b/camp/camp-brooklyn/src/test/java/org/apache/brooklyn/camp/brooklyn/DynamicMultiGroupYamlRebindTest.java
index 2e0026921d..35c7762aa7 100644
--- a/camp/camp-brooklyn/src/test/java/org/apache/brooklyn/camp/brooklyn/DynamicMultiGroupYamlRebindTest.java
+++ b/camp/camp-brooklyn/src/test/java/org/apache/brooklyn/camp/brooklyn/DynamicMultiGroupYamlRebindTest.java
@@ -56,7 +56,7 @@ public class DynamicMultiGroupYamlRebindTest extends AbstractYamlRebindTest {
          Enricher enricher1 = app.enrichers().iterator().next();
 
          // Destroy application before first rebind.
-         Entities.destroy(app);
+         Entities.destroy(app, true);
 
          // check that a subsequent change doesn't cause it to re-create
          mgmt().getRebindManager().getChangeListener().onChanged(enricher1);
@@ -100,7 +100,7 @@ public class DynamicMultiGroupYamlRebindTest extends AbstractYamlRebindTest {
       Assert.assertEquals(state.getEntities().size(), 10);
 
       // Destroy application after first rebind.
-      Entities.destroy(appRebind);
+      Entities.destroy(appRebind, true);
 
       // Rebind, expect no apps.
       rebind(RebindOptions.create().terminateOrigManagementContext(true));
diff --git a/camp/camp-brooklyn/src/test/java/org/apache/brooklyn/camp/brooklyn/WindowsYamlLiveTest.java b/camp/camp-brooklyn/src/test/java/org/apache/brooklyn/camp/brooklyn/WindowsYamlLiveTest.java
index a84822a046..79943652ba 100644
--- a/camp/camp-brooklyn/src/test/java/org/apache/brooklyn/camp/brooklyn/WindowsYamlLiveTest.java
+++ b/camp/camp-brooklyn/src/test/java/org/apache/brooklyn/camp/brooklyn/WindowsYamlLiveTest.java
@@ -134,7 +134,7 @@ public class WindowsYamlLiveTest extends AbstractWindowsYamlTest {
     @Override
     public void tearDown() {
         try {
-            if (app != null) Entities.destroy(app);
+            if (app != null) Entities.destroy(app, true);
         } catch (Throwable t) {
             log.error("Caught exception in tearDown method", t);
         } finally {
diff --git a/camp/camp-brooklyn/src/test/java/org/apache/brooklyn/camp/brooklyn/catalog/CatalogMakeOsgiBundleTest.java b/camp/camp-brooklyn/src/test/java/org/apache/brooklyn/camp/brooklyn/catalog/CatalogMakeOsgiBundleTest.java
index f2bd70944c..122c4a088d 100644
--- a/camp/camp-brooklyn/src/test/java/org/apache/brooklyn/camp/brooklyn/catalog/CatalogMakeOsgiBundleTest.java
+++ b/camp/camp-brooklyn/src/test/java/org/apache/brooklyn/camp/brooklyn/catalog/CatalogMakeOsgiBundleTest.java
@@ -82,7 +82,7 @@ public class CatalogMakeOsgiBundleTest extends AbstractYamlTest {
     @AfterMethod(alwaysRun = true)
     public void cleanUpButKeepMgmt() throws Exception {
         for (Application app: MutableList.copyOf(mgmt().getApplications())) {
-            Entities.destroy(app);
+            Entities.destroy(app, true);
         }
         for (Bundle b: bundlesToRemove) {
             ((ManagementContextInternal)mgmt()).getOsgiManager().get().uninstallUploadedBundle(
diff --git a/core/src/main/java/org/apache/brooklyn/core/entity/Entities.java b/core/src/main/java/org/apache/brooklyn/core/entity/Entities.java
index a0a36ef8af..5ace40d008 100644
--- a/core/src/main/java/org/apache/brooklyn/core/entity/Entities.java
+++ b/core/src/main/java/org/apache/brooklyn/core/entity/Entities.java
@@ -721,8 +721,11 @@ public class Entities {
 
     /** Same as {@link #destroy(Entity)} but catching all errors. */
     public static void destroyCatching(Entity entity) {
+        destroyCatching(entity, false);
+    }
+    public static void destroyCatching(Entity entity, boolean unmanageOnErrors) {
         try {
-            destroy(entity);
+            destroy(entity, unmanageOnErrors);
         } catch (Exception e) {
             log.warn("ERROR destroying "+entity+" (ignoring): "+e, e);
             Exceptions.propagateIfFatal(e);
@@ -777,7 +780,7 @@ public class Entities {
                     public void run() {
                         log.debug("destroying app "+app+" (managed? "+isManaged(app)+"; mgmt is "+mgmt+")");
                         try {
-                            destroy(app);
+                            destroy(app, true);
                             log.debug("destroyed app "+app+"; mgmt now "+mgmt);
                         } catch (Exception e) {
                             log.warn("problems destroying app "+app+" (mgmt now "+mgmt+", will rethrow at least one exception): "+e);
diff --git a/core/src/main/java/org/apache/brooklyn/core/mgmt/internal/BrooklynShutdownHooks.java b/core/src/main/java/org/apache/brooklyn/core/mgmt/internal/BrooklynShutdownHooks.java
index 2e4e0c63e7..eeb3c67821 100644
--- a/core/src/main/java/org/apache/brooklyn/core/mgmt/internal/BrooklynShutdownHooks.java
+++ b/core/src/main/java/org/apache/brooklyn/core/mgmt/internal/BrooklynShutdownHooks.java
@@ -78,7 +78,7 @@ public class BrooklynShutdownHooks {
                 semaphore.release();
                 try {
                     log.warn("Call to invokeStopOnShutdown for "+entity+" while system already shutting down; invoking stop now and throwing exception");
-                    Entities.destroy(entity);
+                    Entities.destroy(entity, false);
                     throw new IllegalStateException("Call to invokeStopOnShutdown for "+entity+" while system already shutting down");
                 } catch (Exception e) {
                     throw new IllegalStateException("Call to invokeStopOnShutdown for "+entity+" while system already shutting down, had error: "+e, e);
@@ -229,7 +229,7 @@ public class BrooklynShutdownHooks {
             final Entity entity = entityToStop;
             if (!Entities.isManaged(entity)) continue;
             Task<Object> t = Tasks.builder().dynamic(false).displayName("destroying "+entity).body(new Runnable() {
-                @Override public void run() { Entities.destroy(entity); }
+                @Override public void run() { Entities.destroy(entity, false); }
             }).build();
             stops.add( ((EntityInternal)entity).getExecutionContext().submit(t) );
         }
diff --git a/core/src/test/java/org/apache/brooklyn/core/effector/EffectorTaskTest.java b/core/src/test/java/org/apache/brooklyn/core/effector/EffectorTaskTest.java
index faa1fad67a..db83f0d6e6 100644
--- a/core/src/test/java/org/apache/brooklyn/core/effector/EffectorTaskTest.java
+++ b/core/src/test/java/org/apache/brooklyn/core/effector/EffectorTaskTest.java
@@ -435,7 +435,7 @@ public class EffectorTaskTest extends BrooklynAppUnitTestSupport {
 
                         // Execution completed in the child's ExecutionContext, but still queued as a secondary.
                         // Destroy the child entity so that no subsequent tasks can be executed in its context.
-                        Entities.destroy(child);
+                        Entities.destroy(child, true);
                     } finally {
                         // Let STALL complete
                         synchronized(lock) {
diff --git a/core/src/test/java/org/apache/brooklyn/core/mgmt/internal/EntityExecutionManagerTest.java b/core/src/test/java/org/apache/brooklyn/core/mgmt/internal/EntityExecutionManagerTest.java
index 2fe243272b..5465e49bf3 100644
--- a/core/src/test/java/org/apache/brooklyn/core/mgmt/internal/EntityExecutionManagerTest.java
+++ b/core/src/test/java/org/apache/brooklyn/core/mgmt/internal/EntityExecutionManagerTest.java
@@ -364,7 +364,7 @@ public class EntityExecutionManagerTest extends BrooklynAppUnitTestSupport {
         Set<Object> tags = app.getManagementContext().getExecutionManager().getTaskTags();
         assertTrue(tags.contains(BrooklynTaskTags.tagForContextEntity(e)), "tags="+tags);
         
-        Entities.destroy(e);
+        Entities.destroy(e, true);
         forceGc();
 
         Asserts.succeedsEventually(() -> {
@@ -392,7 +392,7 @@ public class EntityExecutionManagerTest extends BrooklynAppUnitTestSupport {
         TestEntity entity = app.createAndManageChild(EntitySpec.create(TestEntity.class));
         entity.sensors().set(TestEntity.NAME, "bob");
         entity.invoke(TestEntity.MY_EFFECTOR, ImmutableMap.<String,Object>of()).get();
-        Entities.destroy(entity);
+        Entities.destroy(entity, true);
         Time.sleep(Duration.ONE_SECOND);
         forceGc();
         Collection<Task<?>> t2 = em.getAllTasks();
diff --git a/core/src/test/java/org/apache/brooklyn/core/mgmt/persist/BrooklynMementoPersisterTestFixture.java b/core/src/test/java/org/apache/brooklyn/core/mgmt/persist/BrooklynMementoPersisterTestFixture.java
index 6f63bab005..51f00a1232 100644
--- a/core/src/test/java/org/apache/brooklyn/core/mgmt/persist/BrooklynMementoPersisterTestFixture.java
+++ b/core/src/test/java/org/apache/brooklyn/core/mgmt/persist/BrooklynMementoPersisterTestFixture.java
@@ -133,7 +133,7 @@ public abstract class BrooklynMementoPersisterTestFixture {
 
     @Test
     public void testDeleteAndLoadMemento() throws Exception {
-        Entities.destroy(entity);
+        Entities.destroy(entity, true);
 
         BrooklynMemento reloadedMemento = loadMemento();
         
@@ -142,7 +142,7 @@ public abstract class BrooklynMementoPersisterTestFixture {
         assertEquals(Iterables.getOnlyElement(reloadedMemento.getLocationIds()), location.getId());
         
         // Destroying the app should also unmanage its owned location, and adjuncts
-        Entities.destroy(app);
+        Entities.destroy(app, true);
         reloadedMemento = loadMemento();
         
         assertFalse(Iterables.contains(reloadedMemento.getEntityIds(), entity.getId()));
diff --git a/launcher/src/test/java/org/apache/brooklyn/launcher/blueprints/AbstractBlueprintTest.java b/launcher/src/test/java/org/apache/brooklyn/launcher/blueprints/AbstractBlueprintTest.java
index 96aef9b37e..fda84338cd 100644
--- a/launcher/src/test/java/org/apache/brooklyn/launcher/blueprints/AbstractBlueprintTest.java
+++ b/launcher/src/test/java/org/apache/brooklyn/launcher/blueprints/AbstractBlueprintTest.java
@@ -149,7 +149,7 @@ public abstract class AbstractBlueprintTest {
                 for (Application app: mgmt.getApplications()) {
                     LOG.debug("destroying app "+app+" (managed? "+Entities.isManaged(app)+"; mgmt is "+mgmt+")");
                     try {
-                        Entities.destroy(app);
+                        Entities.destroy(app, true);
                         LOG.debug("destroyed app "+app+"; mgmt now "+mgmt);
                     } catch (Exception e) {
                         LOG.error("problems destroying app "+app, e);
diff --git a/policy/src/test/java/org/apache/brooklyn/policy/autoscaling/AutoScalerPolicyPoolSizeTest.java b/policy/src/test/java/org/apache/brooklyn/policy/autoscaling/AutoScalerPolicyPoolSizeTest.java
index 16325d2244..222877eed6 100644
--- a/policy/src/test/java/org/apache/brooklyn/policy/autoscaling/AutoScalerPolicyPoolSizeTest.java
+++ b/policy/src/test/java/org/apache/brooklyn/policy/autoscaling/AutoScalerPolicyPoolSizeTest.java
@@ -93,7 +93,7 @@ public class AutoScalerPolicyPoolSizeTest extends BrooklynAppUnitTestSupport {
         EntityAsserts.assertAttributeEqualsEventually(cluster, TestCluster.GROUP_SIZE, CLUSTER_INIITIAL_SIZE);
         // Simulate user expunging the entities manually
         for (int i = 0; i < CLUSTER_MAX_SIZE - CLUSTER_MIN_SIZE; i++) {
-            Entities.destroyCatching(cluster.getMembers().iterator().next());
+            Entities.destroyCatching(cluster.getMembers().iterator().next(), true);
         }
         EntityAsserts.assertAttributeEqualsEventually(cluster, TestSizeRecordingCluster.SIZE_HISTORY_RECORD_COUNT, 2);
         Assert.assertEquals((int)cluster.getSizeHistory().get(0), CLUSTER_INIITIAL_SIZE);
diff --git a/rest/rest-resources/src/main/java/org/apache/brooklyn/rest/util/BrooklynRestResourceUtils.java b/rest/rest-resources/src/main/java/org/apache/brooklyn/rest/util/BrooklynRestResourceUtils.java
index 1ecf2d5bc0..2e30ee2791 100644
--- a/rest/rest-resources/src/main/java/org/apache/brooklyn/rest/util/BrooklynRestResourceUtils.java
+++ b/rest/rest-resources/src/main/java/org/apache/brooklyn/rest/util/BrooklynRestResourceUtils.java
@@ -481,7 +481,7 @@ public class BrooklynRestResourceUtils {
                         @Override
                         public void run() {
                             if (release)
-                                Entities.destroy(entity);
+                                Entities.destroy(entity, false);
                             else
                                 mgmt.getEntityManager().unmanage(entity);
                         }
diff --git a/rest/rest-resources/src/test/java/org/apache/brooklyn/rest/resources/ActivityRestTest.java b/rest/rest-resources/src/test/java/org/apache/brooklyn/rest/resources/ActivityRestTest.java
index 6ab67a114f..d14e326067 100644
--- a/rest/rest-resources/src/test/java/org/apache/brooklyn/rest/resources/ActivityRestTest.java
+++ b/rest/rest-resources/src/test/java/org/apache/brooklyn/rest/resources/ActivityRestTest.java
@@ -103,7 +103,7 @@ Task[eatand]@J90TKfIX: Waiting on Task[eat-sleep-rave-repeat]@QPa5o4kF
     @SuppressWarnings("deprecation")
     protected void initEntity(int seed) {
         if (entity != null && Entities.isManaged(entity)) {
-            Entities.destroy(entity.getApplication());
+            Entities.destroy(entity.getApplication(), true);
         }
         
         CreationResult<BasicApplication, Void> app = EntityManagementUtils.createStarting(getManagementContext(),
diff --git a/rest/rest-resources/src/test/java/org/apache/brooklyn/rest/resources/EffectorResourceTest.java b/rest/rest-resources/src/test/java/org/apache/brooklyn/rest/resources/EffectorResourceTest.java
index 957f631865..a8d3cdf680 100644
--- a/rest/rest-resources/src/test/java/org/apache/brooklyn/rest/resources/EffectorResourceTest.java
+++ b/rest/rest-resources/src/test/java/org/apache/brooklyn/rest/resources/EffectorResourceTest.java
@@ -63,7 +63,7 @@ public class EffectorResourceTest extends BrooklynRestResourceTest {
     @Override
     public void destroyMethod() throws Exception {
         try {
-            if (app != null) Entities.destroy(app);
+            if (app != null) Entities.destroy(app, true);
         } finally {
             super.destroyMethod();
         }
diff --git a/software/base/src/test/java/org/apache/brooklyn/entity/machine/pool/AbstractServerPoolTest.java b/software/base/src/test/java/org/apache/brooklyn/entity/machine/pool/AbstractServerPoolTest.java
index 0da5a2e325..70fa0c5e0c 100644
--- a/software/base/src/test/java/org/apache/brooklyn/entity/machine/pool/AbstractServerPoolTest.java
+++ b/software/base/src/test/java/org/apache/brooklyn/entity/machine/pool/AbstractServerPoolTest.java
@@ -76,7 +76,7 @@ public abstract class AbstractServerPoolTest {
     public void tearDown() throws Exception {
         // Kills the apps before terminating the pool
         for (TestApplication app : createdApps) {
-            Entities.destroy(app);
+            Entities.destroy(app, true);
         }
         if (mgmt != null) {
             Entities.destroyAll(mgmt);
diff --git a/software/base/src/test/java/org/apache/brooklyn/entity/software/base/SoftwareProcessStopsDuringStartJcloudsLiveTest.java b/software/base/src/test/java/org/apache/brooklyn/entity/software/base/SoftwareProcessStopsDuringStartJcloudsLiveTest.java
index 7839de26bd..36a3e5fef3 100644
--- a/software/base/src/test/java/org/apache/brooklyn/entity/software/base/SoftwareProcessStopsDuringStartJcloudsLiveTest.java
+++ b/software/base/src/test/java/org/apache/brooklyn/entity/software/base/SoftwareProcessStopsDuringStartJcloudsLiveTest.java
@@ -148,7 +148,7 @@ public class SoftwareProcessStopsDuringStartJcloudsLiveTest extends BrooklynAppL
         executeInLimitedTime(new Callable<Void>() {
             @Override
             public Void call() {
-                Entities.destroy(app);
+                Entities.destroy(app, true);
                 return null;
             }
         }, Asserts.DEFAULT_LONG_TIMEOUT.toMilliseconds(), TimeUnit.MILLISECONDS);
@@ -169,7 +169,7 @@ public class SoftwareProcessStopsDuringStartJcloudsLiveTest extends BrooklynAppL
      * </ul>
      */
     @Test(groups = {"Live"})
-    public void testJclousMachineIsExpungedWhenStoppedDuringStart() throws Exception {
+    public void testJcloudsMachineIsExpungedWhenStoppedDuringStart() throws Exception {
         Map<String,?> allFlags = ImmutableMap.<String,Object>builder()
                 .put("tags", ImmutableList.of(getClass().getName()))
                 .put(JcloudsLocation.IMAGE_ID.getName(), IMAGE_ID)
@@ -193,7 +193,7 @@ public class SoftwareProcessStopsDuringStartJcloudsLiveTest extends BrooklynAppL
         EntityAsserts.assertAttributeEqualsEventually(entity, AttributesInternal.INTERNAL_PROVISIONING_TASK_STATE, AttributesInternal.ProvisioningTaskState.RUNNING);
 
         Stopwatch stopwatch = Stopwatch.createStarted();
-        Entities.destroyCatching(app);
+        Entities.destroyCatching(app, true);
         LOG.info("Time for expunging: {}", Duration.of(stopwatch));
 
         NodeMetadata nodeMetadata = Iterables.getFirst(((AWSEC2ComputeService) jcloudsLocation.getComputeService()).listNodesDetailsMatching(new Predicate<ComputeMetadata>() {
diff --git a/software/base/src/test/java/org/apache/brooklyn/entity/software/base/VanillaWindowsProcessWinrmExitStatusLiveTest.java b/software/base/src/test/java/org/apache/brooklyn/entity/software/base/VanillaWindowsProcessWinrmExitStatusLiveTest.java
index b41044935f..fa38212736 100644
--- a/software/base/src/test/java/org/apache/brooklyn/entity/software/base/VanillaWindowsProcessWinrmExitStatusLiveTest.java
+++ b/software/base/src/test/java/org/apache/brooklyn/entity/software/base/VanillaWindowsProcessWinrmExitStatusLiveTest.java
@@ -89,7 +89,7 @@ public class VanillaWindowsProcessWinrmExitStatusLiveTest {
     public void tearDown() throws Exception {
         try {
             try {
-                if (app != null) Entities.destroy(app);
+                if (app != null) Entities.destroy(app, true);
             } catch (Throwable t) {
                 LOG.error("Caught exception in tearDown method", t);
             }
diff --git a/software/base/src/test/java/org/apache/brooklyn/entity/software/base/VanillaWindowsProcessWinrmStreamsLiveTest.java b/software/base/src/test/java/org/apache/brooklyn/entity/software/base/VanillaWindowsProcessWinrmStreamsLiveTest.java
index ec435b31a6..e1c6f9f9e9 100644
--- a/software/base/src/test/java/org/apache/brooklyn/entity/software/base/VanillaWindowsProcessWinrmStreamsLiveTest.java
+++ b/software/base/src/test/java/org/apache/brooklyn/entity/software/base/VanillaWindowsProcessWinrmStreamsLiveTest.java
@@ -49,7 +49,7 @@ public class VanillaWindowsProcessWinrmStreamsLiveTest extends AbstractSoftwareP
     @BeforeClass(alwaysRun = true)
     public void setUpClass() throws Exception {
         super.setUp();
-        if (app != null) Entities.destroy(app);
+        if (app != null) Entities.destroy(app, true);
         
         location = WindowsTestFixture.setUpWindowsLocation(mgmt);
         machine = location.obtain(ImmutableMap.of());
@@ -76,7 +76,7 @@ public class VanillaWindowsProcessWinrmStreamsLiveTest extends AbstractSoftwareP
     @Override
     public void tearDown() throws Exception {
         try {
-            if (app != null) Entities.destroy(app);
+            if (app != null) Entities.destroy(app, true);
         } catch (Throwable t) {
             LOG.error("Caught exception in tearDown method", t);
         } finally {
diff --git a/software/base/src/test/java/org/apache/brooklyn/entity/software/base/test/qa/performance/ScalabilityPerformanceTest.java b/software/base/src/test/java/org/apache/brooklyn/entity/software/base/test/qa/performance/ScalabilityPerformanceTest.java
index a3b064e7b7..3b6e0033b4 100644
--- a/software/base/src/test/java/org/apache/brooklyn/entity/software/base/test/qa/performance/ScalabilityPerformanceTest.java
+++ b/software/base/src/test/java/org/apache/brooklyn/entity/software/base/test/qa/performance/ScalabilityPerformanceTest.java
@@ -237,7 +237,7 @@ public class ScalabilityPerformanceTest extends AbstractPerformanceTest {
                         ManagementContext mgmt = app.getManagementContext();
                         LOG.debug("destroying app "+app+" (managed? "+Entities.isManaged(app)+"; mgmt is "+mgmt+")");
                         try {
-                            Entities.destroy(app);
+                            Entities.destroy(app, true);
                             LOG.debug("destroyed app "+app+"; mgmt now "+mgmt);
                         } catch (Exception e) {
                             LOG.warn("problems destroying app "+app+" (mgmt now "+mgmt+", will rethrow at least one exception): "+e);


[brooklyn-server] 06/09: introduce more forceful destroy method

Posted by he...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

heneveld pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/brooklyn-server.git

commit 1eba5df57a8b40fbb428bb96aa19984c89c7135e
Author: Alex Heneveld <al...@cloudsoft.io>
AuthorDate: Thu Aug 11 14:33:52 2022 +0100

    introduce more forceful destroy method
---
 .../org/apache/brooklyn/core/entity/Entities.java  | 44 ++++++++++++++++++----
 1 file changed, 37 insertions(+), 7 deletions(-)

diff --git a/core/src/main/java/org/apache/brooklyn/core/entity/Entities.java b/core/src/main/java/org/apache/brooklyn/core/entity/Entities.java
index 7550881b9d..a0a36ef8af 100644
--- a/core/src/main/java/org/apache/brooklyn/core/entity/Entities.java
+++ b/core/src/main/java/org/apache/brooklyn/core/entity/Entities.java
@@ -72,6 +72,7 @@ import org.apache.brooklyn.core.location.Locations;
 import org.apache.brooklyn.core.mgmt.BrooklynTaskTags;
 import org.apache.brooklyn.core.objs.proxy.EntityProxyImpl;
 import org.apache.brooklyn.core.sensor.DependentConfiguration;
+import org.apache.brooklyn.util.collections.MutableList;
 import org.apache.brooklyn.util.collections.MutableMap;
 import org.apache.brooklyn.util.collections.MutableSet;
 import org.apache.brooklyn.util.core.ResourceUtils;
@@ -660,27 +661,56 @@ public class Entities {
                 MutableMap.of("locations", locations)).getUnchecked();
     }
 
+    public static void destroy(Entity e) {
+        destroy(e, false);
+    }
     /**
      * Attempts to stop, destroy, and unmanage the given entity.
      * <p>
      * Actual actions performed will depend on the entity type and its current state.
      */
-    public static void destroy(Entity e) {
+    public static void destroy(Entity e, boolean unmanageOnErrors) {
         if (isManaged(e)) {
             if (isReadOnly(e)) {
                 unmanage(e);
                 log.debug("destroyed and unmanaged read-only copy of "+e);
             } else {
-                if (e instanceof Startable) Entities.invokeEffector(e, e, Startable.STOP).getUnchecked();
+                List<Exception> errors = MutableList.of();
+
+                try {
+                    if (e instanceof Startable) Entities.invokeEffector(e, e, Startable.STOP).getUnchecked();
+                } catch (Exception error) {
+                    Exceptions.propagateIfFatal(error);
+                    if (!unmanageOnErrors) Exceptions.propagate(error);
+                    errors.add(error);
+                }
                 
                 // if destroying gracefully we might also want to do this (currently gets done by GC after unmanage,
                 // which is good enough for leaks, but not sure if that's ideal for subscriptions etc)
 //                ((LocalEntityManager)e.getApplication().getManagementContext().getEntityManager()).stopTasks(e, null);
-                
-                if (e instanceof EntityInternal) ((EntityInternal)e).destroy();
-                
-                unmanage(e);
-                
+
+                try {
+                    if (e instanceof EntityInternal) ((EntityInternal) e).destroy();
+                } catch (Exception error) {
+                    Exceptions.propagateIfFatal(error);
+                    if (!unmanageOnErrors) Exceptions.propagate(error);
+                    errors.add(error);
+                }
+
+                try {
+                    unmanage(e);
+                } catch (Exception error) {
+                    Exceptions.propagateIfFatal(error);
+                    if (!unmanageOnErrors) Exceptions.propagate(error);
+                    errors.add(error);
+                }
+
+                if (!errors.isEmpty()) {
+                    log.warn("destroyed and unmanaged "+e+", with errors; mgmt now "+
+                            (e.getApplicationId()==null ? "(no app)" : e.getApplication().getManagementContext())+" - managed? "+isManaged(e)+"; errors: "+errors);
+                    throw Exceptions.propagate("Errors destroying "+e, errors);
+                }
+
                 log.debug("destroyed and unmanaged "+e+"; mgmt now "+
                     (e.getApplicationId()==null ? "(no app)" : e.getApplication().getManagementContext())+" - managed? "+isManaged(e));
             }


[brooklyn-server] 08/09: faster strategies for determining container readiness and completion

Posted by he...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

heneveld pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/brooklyn-server.git

commit 613bd023bd65d5e74fc0fc33ea82b8e3263ae5f3
Author: Alex Heneveld <al...@cloudsoft.io>
AuthorDate: Thu Aug 11 22:23:00 2022 +0100

    faster strategies for determining container readiness and completion
    
    and destroy namespace can be done asynchronously
    
    because jobs/pods update only several seconds after the container is finished (using docker desktop)
---
 .../brooklyn/tasks/kubectl/ContainerCommons.java   |   6 +-
 .../tasks/kubectl/ContainerTaskFactory.java        | 456 ++++++++++++++-------
 .../brooklyn/tasks/kubectl/ContainerTaskTest.java  |   9 +-
 3 files changed, 328 insertions(+), 143 deletions(-)

diff --git a/software/base/src/main/java/org/apache/brooklyn/tasks/kubectl/ContainerCommons.java b/software/base/src/main/java/org/apache/brooklyn/tasks/kubectl/ContainerCommons.java
index d426724e38..130c25b963 100644
--- a/software/base/src/main/java/org/apache/brooklyn/tasks/kubectl/ContainerCommons.java
+++ b/software/base/src/main/java/org/apache/brooklyn/tasks/kubectl/ContainerCommons.java
@@ -56,10 +56,12 @@ public interface ContainerCommons {
     String NAMESPACE_CREATE_CMD = "kubectl create namespace %s";
     String NAMESPACE_SET_CMD = "kubectl config set-context --current --namespace=%s";
     String JOBS_CREATE_CMD = "kubectl apply -f %s --namespace=%s";
-    String JOBS_FEED_CMD = "kubectl wait --timeout=%ds --for=condition=complete job/%s --namespace=%s";
-    String JOBS_FEED_FAILED_CMD = "kubectl wait --timeout=%ds --for=condition=failed job/%s --namespace=%s";
+    String JOBS_WAIT_COMPLETE_CMD = "kubectl wait --timeout=%ds --for=condition=complete job/%s --namespace=%s";
+    String JOBS_WAIT_FAILED_CMD = "kubectl wait --timeout=%ds --for=condition=failed job/%s --namespace=%s";
     String JOBS_LOGS_CMD = "kubectl logs jobs/%s --namespace=%s";
+    String JOBS_DELETE_CMD = "kubectl delete job %s --namespace=%s";
     String PODS_CMD_PREFIX = "kubectl get pods --namespace=%s --selector=job-name=%s ";
+    String PODS_STATUS_STATE_CMD = PODS_CMD_PREFIX + "-ojsonpath='{.items[0].status.containerStatuses[0].state}'";
     String PODS_STATUS_PHASE_CMD = PODS_CMD_PREFIX + "-ojsonpath='{.items[0].status.phase}'";
     String PODS_NAME_CMD = PODS_CMD_PREFIX + "-ojsonpath='{.items[0].metadata.name}'";
     String PODS_EXIT_CODE_CMD = PODS_CMD_PREFIX + "-ojsonpath='{.items[0].status.containerStatuses[0].state.terminated.exitCode}'";
diff --git a/software/base/src/main/java/org/apache/brooklyn/tasks/kubectl/ContainerTaskFactory.java b/software/base/src/main/java/org/apache/brooklyn/tasks/kubectl/ContainerTaskFactory.java
index a78c1cb6de..f1b8caf255 100644
--- a/software/base/src/main/java/org/apache/brooklyn/tasks/kubectl/ContainerTaskFactory.java
+++ b/software/base/src/main/java/org/apache/brooklyn/tasks/kubectl/ContainerTaskFactory.java
@@ -19,6 +19,7 @@
 package org.apache.brooklyn.tasks.kubectl;
 
 import com.google.common.collect.Lists;
+import com.google.gson.Gson;
 import org.apache.brooklyn.api.entity.Entity;
 import org.apache.brooklyn.api.mgmt.Task;
 import org.apache.brooklyn.core.entity.BrooklynConfigKeys;
@@ -52,12 +53,14 @@ import org.slf4j.LoggerFactory;
 
 import java.io.ByteArrayOutputStream;
 import java.io.File;
+import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.Consumer;
 import java.util.function.Function;
 import java.util.stream.Collectors;
 
@@ -71,6 +74,7 @@ public class ContainerTaskFactory<T extends ContainerTaskFactory<T,RET>,RET> imp
     protected String jobIdentifier = "";
     protected final ConfigBag config = ConfigBag.newInstance();
     private String namespace;
+    private boolean namespaceRandom = false;
     private Boolean createNamespace;
     private Boolean deleteNamespace;
     Function<ContainerTaskResult,RET> returnConversion;
@@ -179,146 +183,28 @@ public class ContainerTaskFactory<T extends ContainerTaskFactory<T,RET>,RET> imp
                             final CountdownTimer timer = CountdownTimer.newInstanceStarted(timeout);
 
                             // wait for it to be running (or failed / succeeded) -
-                            PodPhases podPhase = DynamicTasks.queue(Tasks.<PodPhases>builder().dynamic(true).displayName("Wait for container to be running (or fail)").body(() -> {
-                                String phase = null;
-                                long first = System.currentTimeMillis();
-                                long last = first;
-                                long backoffMillis = 10;
-                                while (timer.isNotExpired()) {
-                                    phase = DynamicTasks.queue(newSimpleTaskFactory(String.format(PODS_STATUS_PHASE_CMD, namespace, kubeJobName)).summary("Get pod phase").allowingNonZeroExitCode().newTask()).get().trim();
-                                    if (PodPhases.Running.name().equalsIgnoreCase(phase)) return PodPhases.Running;
-                                    if (PodPhases.Failed.name().equalsIgnoreCase(phase)) return PodPhases.Failed;
-                                    if (PodPhases.Succeeded.name().equalsIgnoreCase(phase)) return PodPhases.Succeeded;
-
-                                    if (Strings.isNonBlank(phase) && Strings.isBlank(result.kubePodName)) {
-                                        result.kubePodName = DynamicTasks.queue(newSimpleTaskFactory(String.format(PODS_NAME_CMD, namespace, kubeJobName)).summary("Get pod name").allowingNonZeroExitCode().newTask()).get().trim();
-                                    }
-                                    if (PodPhases.Pending.name().equals(phase) && Strings.isNonBlank(result.kubePodName)) {
-                                        // if pending, look for errors
-                                        String failedEvents = DynamicTasks.queue(newSimpleTaskFactory(String.format(SCOPED_EVENTS_FAILED_JSON_CMD, namespace, result.kubePodName)).summary("Get pod failed events").allowingNonZeroExitCode().newTask()).get().trim();
-                                        if (!"[]".equals(failedEvents)) {
-                                            String events = DynamicTasks.queue(newSimpleTaskFactory(String.format(SCOPED_EVENTS_CMD, namespace, result.kubePodName)).summary("Get pod events").allowingNonZeroExitCode().newTask()).get().trim();
-                                            throw new IllegalStateException("Job pod failed: "+failedEvents+"\n"+events);
-                                        }
-                                    }
-
-                                    if (System.currentTimeMillis() - last > 10*1000) {
-                                        last = System.currentTimeMillis();
-                                        // every 10s log info
-                                        LOG.info("Container taking long time to start ("+Duration.millis(last-first)+"): "+namespace+" "+kubeJobName+" "+result.kubePodName+" / phase '"+phase+"'");
-                                        if (Strings.isNonBlank(result.kubePodName)) {
-                                            String events = DynamicTasks.queue(newSimpleTaskFactory(String.format(SCOPED_EVENTS_CMD, namespace, result.kubePodName)).summary("Get pod events").allowingNonZeroExitCode().newTask()).get().trim();
-                                            LOG.info("Pod events: \n"+events);
-                                        } else {
-                                            String events = DynamicTasks.queue(newSimpleTaskFactory(String.format(SCOPED_EVENTS_CMD, namespace, kubeJobName)).summary("Get job events").allowingNonZeroExitCode().newTask()).get().trim();
-                                            LOG.info("Job events: \n"+events);
-                                        }
-                                    }
-                                    long backoffMillis2 = backoffMillis;
-                                    Tasks.withBlockingDetails("waiting "+backoffMillis2+"ms for pod to be available (current status '" + phase + "')", () -> {
-                                        Time.sleep(backoffMillis2);
-                                        return null;
-                                    });
-                                    if (backoffMillis<80) backoffMillis*=2;
-                                }
-                                throw new IllegalStateException("Timeout waiting for pod to be available; current status is '" + phase + "'");
-                            }).build()).getUnchecked();
+                            PodPhases phaseOnceActive = waitForContainerAvailable(kubeJobName, result, timer);
+//                            waitForContainerPodContainerState(kubeJobName, result, timer);
 
                             // notify once pod is available
-                            synchronized (result) {
-                                result.notifyAll();
-                            }
-
-                            // use `wait --for` api, but in a 5s loop in case there are other issues
-                            boolean succeeded = podPhase == PodPhases.Succeeded ? true : podPhase == PodPhases.Failed ? false : DynamicTasks.queue(Tasks.<Boolean>builder().dynamic(true).displayName("Wait for success or failure").body(() -> {
-                                while (true) {
-                                    LOG.debug("Container job submitted, now waiting on success or failure");
-
-                                    long secondsLeft = Math.min(Math.max(1, timer.getDurationRemaining().toSeconds()), 5);
-                                    final AtomicInteger finishCount = new AtomicInteger(0);
-
-                                    ProcessTaskWrapper<String> waitForSuccess = Entities.submit(entity, newSimpleTaskFactory(String.format(JOBS_FEED_CMD, secondsLeft, kubeJobName, namespace))
-                                            .summary("Wait for success ('complete')").allowingNonZeroExitCode().newTask());
-                                    Entities.submit(entity, Tasks.create("Wait for success then notify", () -> {
-                                        try {
-                                            if (waitForSuccess.get().contains("condition met"))
-                                                LOG.debug("Container job " + namespace + " detected as completed (succeeded) in kubernetes");
-                                        } finally {
-                                            synchronized (finishCount) {
-                                                finishCount.incrementAndGet();
-                                                finishCount.notifyAll();
-                                            }
-                                        }
-                                    }));
-
-                                    ProcessTaskWrapper<String> waitForFailed = Entities.submit(entity, newSimpleTaskFactory(String.format(JOBS_FEED_FAILED_CMD, secondsLeft, kubeJobName, namespace))
-                                            .summary("Wait for failed").allowingNonZeroExitCode().newTask());
-                                    Entities.submit(entity, Tasks.create("Wait for failed then notify", () -> {
-                                        try {
-                                            if (waitForFailed.get().contains("condition met"))
-                                                LOG.debug("Container job " + namespace + " detected as failed in kubernetes (may be valid non-zero exit)");
-                                        } finally {
-                                            synchronized (finishCount) {
-                                                finishCount.incrementAndGet();
-                                                finishCount.notifyAll();
-                                            }
-                                        }
-                                    }));
-
-                                    while (finishCount.get() == 0) {
-                                        LOG.debug("Container job " + namespace + " waiting on complete or failed");
-                                        try {
-                                            synchronized (finishCount) {
-                                                finishCount.wait(Duration.TEN_SECONDS.toMilliseconds());
-                                            }
-                                        } catch (InterruptedException e) {
-                                            throw Exceptions.propagate(e);
-                                        }
-                                    }
-
-                                    if (waitForSuccess.isDone() && waitForSuccess.getExitCode() == 0) return true;
-                                    if (waitForFailed.isDone() && waitForFailed.getExitCode() == 0) return false;
-                                    LOG.debug("Container job " + namespace + " not yet complete, will retry");
+                            synchronized (result) { result.notifyAll(); }
 
-                                    // other one-off checks for job error, we could do here
-                                    // e.g. if image can't be pulled, for instance
+                            boolean succeeded = PodPhases.Succeeded == phaseOnceActive ||
+                                    (PodPhases.Failed != phaseOnceActive &&
+                                            //use `wait --for` api, but in a 5s loop in case there are other issues
+//                                            waitForContainerCompletedUsingK8sWaitFor(stdout, kubeJobName, entity, timer)
+                                            waitForContainerCompletedUsingPodState(stdout, kubeJobName, entity, timer)
+                                    );
 
-                                    // finally get the partial log for reporting
-                                    ProcessTaskWrapper<String> outputSoFarCmd = DynamicTasks.queue(newSimpleTaskFactory(String.format(JOBS_LOGS_CMD, kubeJobName, namespace)).summary("Retrieve output so far").allowingNonZeroExitCode().newTask());
-                                    BrooklynTaskTags.setTransient(outputSoFarCmd.asTask());
-                                    outputSoFarCmd.block();
-                                    if (outputSoFarCmd.getExitCode()!=0) {
-                                        throw new IllegalStateException("Error detected with container job while reading logs (exit code "+outputSoFarCmd.getExitCode()+"): "+outputSoFarCmd.getStdout() + " / "+outputSoFarCmd.getStderr());
-                                    }
-                                    String outputSoFar = outputSoFarCmd.get();
-                                    int bytesAlreadyRead = stdout.size();
-                                    if (bytesAlreadyRead <= outputSoFar.length()) {
-                                        String newOutput = outputSoFar.substring(stdout.size());
-                                        LOG.debug("Container job " + namespace + " output: " + newOutput);
-                                        stdout.write(newOutput.getBytes(StandardCharsets.UTF_8));
-                                    } else {
-                                        // not sure why this happens, but it does sometimes; for now just reset
-                                        LOG.debug("Container job " + namespace + " output reset, length "+outputSoFar.length()+" less than "+bytesAlreadyRead+"; ignoring new output:\n" + outputSoFar +"\n"+new String(stdout.toByteArray()));
-                                        stdout.reset();
-                                        stdout.write(outputSoFar.getBytes(StandardCharsets.UTF_8));
-                                    }
-
-                                    if (timer.isExpired())
-                                        throw new IllegalStateException("Timeout waiting for success or failure");
-
-                                    // probably timed out or job not yet available; short wait then retry
-                                    Time.sleep(Duration.millis(50));
-                                }
-
-                            }).build()).getUnchecked();
-                            LOG.debug("Container job "+namespace+" completed, success "+succeeded);
+                            LOG.debug("Container job "+kubeJobName+" completed, success "+succeeded);
 
                             ProcessTaskWrapper<String> retrieveOutput = DynamicTasks.queue(newSimpleTaskFactory(String.format(JOBS_LOGS_CMD, kubeJobName, namespace)).summary("Retrieve output").newTask());
                             ProcessTaskWrapper<String> retrieveExitCode = DynamicTasks.queue(newSimpleTaskFactory(String.format(PODS_EXIT_CODE_CMD, namespace, kubeJobName)).summary("Retrieve exit code").newTask());
 
                             DynamicTasks.waitForLast();
                             result.mainStdout = retrieveOutput.get();
-                            stdout.write(result.mainStdout.substring(stdout.size()).getBytes(StandardCharsets.UTF_8));
+
+                            updateStdoutWithNewData(stdout, result.mainStdout);
 
                             String exitCodeS = retrieveExitCode.getStdout();
                             if (Strings.isNonBlank(exitCodeS)) result.mainExitCode = Integer.parseInt(exitCodeS.trim());
@@ -333,8 +219,17 @@ public class ContainerTaskFactory<T extends ContainerTaskFactory<T,RET>,RET> imp
 
                         } finally {
                             if (deleteNamespaceHere) {
-                                doDeleteNamespace();
+                                doDeleteNamespace(!namespaceRandom, true);  // if a one-off job, namespace has random id in it so can safely be deleted in background (no one else risks reusing it)
+                            } else {
+                                Boolean devMode = EntityInitializers.resolve(config, KEEP_CONTAINER_FOR_DEBUGGING);
+                                if (!Boolean.TRUE.equals(devMode)) {
+                                    Entities.submit(entity, newDeleteJobTask(kubeJobName)
+                                                    // namespace might have been deleted in parallel so okay if we don't delete the job
+                                                    .allowingNonZeroExitCode()
+                                                    .newTask()).get();
+                                }
                             }
+                            DynamicTasks.waitForLast();
                         }
                     } catch (Exception e) {
                         throw Exceptions.propagate(e);
@@ -346,6 +241,277 @@ public class ContainerTaskFactory<T extends ContainerTaskFactory<T,RET>,RET> imp
         return taskBuilder.build();
     }
 
+    private Boolean waitForContainerCompletedUsingK8sWaitFor(ByteArrayOutputStream stdout, String kubeJobName, Entity entity, CountdownTimer timer) {
+        return DynamicTasks.queue(Tasks.<Boolean>builder().dynamic(true).displayName("Wait for success or failure").body(() -> {
+            while (true) {
+                LOG.debug("Container job " + kubeJobName + " submitted, now waiting on success or failure");
+
+                long secondsLeft = Math.min(Math.max(1, timer.getDurationRemaining().toSeconds()), 5);
+                Boolean x = checkForContainerCompletedUsingK8sWaitFor(kubeJobName, entity, secondsLeft);
+
+                if (x != null) return x;
+                LOG.debug("Container job " + namespace + " not yet complete, will retry");
+
+                // other one-off checks for job error, we could do here
+                // e.g. if image can't be pulled, for instance
+
+                refreshStdout(stdout, kubeJobName, timer);
+
+                // probably timed out or job not yet available; short wait then retry
+                Time.sleep(Duration.millis(50));
+            }
+
+        }).build()).getUnchecked();
+    }
+
+    private Boolean waitForContainerCompletedUsingPodState(ByteArrayOutputStream stdout, String kubeJobName, Entity entity, CountdownTimer timer) {
+        return DynamicTasks.queue(Tasks.<Boolean>builder().dynamic(true).displayName("Wait for success or failure").body(() -> {
+            long retryDelay = 10;
+            while (true) {
+                LOG.debug("Container job " + kubeJobName + " submitted, now waiting on success or failure");
+
+                PodPhases phase = checkPodPhase(kubeJobName);
+                if (phase.equals(PodPhases.Succeeded)) return true;
+                if (phase.equals(PodPhases.Failed)) return false;
+
+                LOG.debug("Container job " + namespace + " not yet complete, will sleep then retry");
+
+                // other one-off checks for job error, we could do here
+                // e.g. if image can't be pulled, for instance
+
+                refreshStdout(stdout, kubeJobName, timer);
+
+                // probably timed out or job not yet available; short wait then retry
+                Time.sleep(Duration.millis(retryDelay));
+                retryDelay *= 1.5;
+                if (retryDelay > 250) {
+                    // max out at 500ms
+                    retryDelay = 500;
+                }
+            }
+
+        }).build()).getUnchecked();
+    }
+
+    private void refreshStdout(ByteArrayOutputStream stdout, String kubeJobName, CountdownTimer timer) throws IOException {
+        // finally get the partial log for reporting
+        ProcessTaskWrapper<String> outputSoFarCmd = DynamicTasks.queue(newSimpleTaskFactory(String.format(JOBS_LOGS_CMD, kubeJobName, namespace)).summary("Retrieve output so far").allowingNonZeroExitCode().newTask());
+        BrooklynTaskTags.setTransient(outputSoFarCmd.asTask());
+        outputSoFarCmd.block();
+        if (outputSoFarCmd.getExitCode() != 0) {
+            throw new IllegalStateException("Error detected with container job while reading logs (exit code " + outputSoFarCmd.getExitCode() + "): " + outputSoFarCmd.getStdout() + " / " + outputSoFarCmd.getStderr());
+        }
+        updateStdoutWithNewData(stdout, outputSoFarCmd.get());
+
+        if (timer.isExpired())
+            throw new IllegalStateException("Timeout waiting for success or failure");
+    }
+
+    private void updateStdoutWithNewData(ByteArrayOutputStream receiverStream, String outputFound) throws IOException {
+        int bytesAlreadyRead = receiverStream.size();
+        if (bytesAlreadyRead <= outputFound.length()) {
+            String newOutput = outputFound.substring(receiverStream.size());
+            LOG.debug("Container job " + namespace + " output: " + newOutput);
+            receiverStream.write(newOutput.getBytes(StandardCharsets.UTF_8));
+        } else {
+            // not sure why this happens, but it does sometimes; for now just reset
+            LOG.debug("Container job " + namespace + " output reset, length " + outputFound.length() + " less than " + bytesAlreadyRead + "; ignoring new output:\n" + outputFound + "\n" + new String(receiverStream.toByteArray()));
+            receiverStream.reset();
+            receiverStream.write(outputFound.getBytes(StandardCharsets.UTF_8));
+        }
+    }
+
+    private Boolean checkForContainerCompletedUsingK8sWaitFor(String kubeJobName, Entity entity, long timeoutSeconds) {
+        final AtomicInteger finishCount = new AtomicInteger(0);
+
+        ProcessTaskWrapper<String> waitForSuccess = Entities.submit(entity, newSimpleTaskFactory(String.format(JOBS_WAIT_COMPLETE_CMD, timeoutSeconds, kubeJobName, namespace))
+                .summary("Wait for success ('complete')").allowingNonZeroExitCode().newTask());
+        Entities.submit(entity, Tasks.create("Wait for success then notify", () -> {
+            try {
+                if (waitForSuccess.get().contains("condition met"))
+                    LOG.debug("Container job " + kubeJobName + " detected as completed (succeeded) in kubernetes");
+            } finally {
+                synchronized (finishCount) {
+                    finishCount.incrementAndGet();
+                    finishCount.notifyAll();
+                }
+            }
+        }));
+
+        ProcessTaskWrapper<String> waitForFailed = Entities.submit(entity, newSimpleTaskFactory(String.format(JOBS_WAIT_FAILED_CMD, timeoutSeconds, kubeJobName, namespace))
+                .summary("Wait for failed").allowingNonZeroExitCode().newTask());
+        Entities.submit(entity, Tasks.create("Wait for failed then notify", () -> {
+            try {
+                if (waitForFailed.get().contains("condition met"))
+                    LOG.debug("Container job " + kubeJobName + " detected as failed in kubernetes (may be valid non-zero exit)");
+            } finally {
+                synchronized (finishCount) {
+                    finishCount.incrementAndGet();
+                    finishCount.notifyAll();
+                }
+            }
+        }));
+
+        while (finishCount.get() == 0) {
+            LOG.debug("Container job " + kubeJobName + " waiting on complete or failed");
+            try {
+                synchronized (finishCount) {
+                    finishCount.wait(Duration.TEN_SECONDS.toMilliseconds());
+                }
+            } catch (InterruptedException e) {
+                throw Exceptions.propagate(e);
+            }
+        }
+
+        if (waitForSuccess.isDone() && waitForSuccess.getExitCode() == 0) return true;
+        if (waitForFailed.isDone() && waitForFailed.getExitCode() == 0) return false;
+        return null;
+    }
+
+    private Boolean checkForContainerCompletedUsingPodState(String kubeJobName, Entity entity, long timeoutSeconds) {
+        final AtomicInteger finishCount = new AtomicInteger(0);
+
+        ProcessTaskWrapper<String> waitForSuccess = Entities.submit(entity, newSimpleTaskFactory(String.format(JOBS_WAIT_COMPLETE_CMD, timeoutSeconds, kubeJobName, namespace))
+                .summary("Wait for success ('complete')").allowingNonZeroExitCode().newTask());
+        Entities.submit(entity, Tasks.create("Wait for success then notify", () -> {
+            try {
+                if (waitForSuccess.get().contains("condition met"))
+                    LOG.debug("Container job " + kubeJobName + " detected as completed (succeeded) in kubernetes");
+            } finally {
+                synchronized (finishCount) {
+                    finishCount.incrementAndGet();
+                    finishCount.notifyAll();
+                }
+            }
+        }));
+
+        ProcessTaskWrapper<String> waitForFailed = Entities.submit(entity, newSimpleTaskFactory(String.format(JOBS_WAIT_FAILED_CMD, timeoutSeconds, kubeJobName, namespace))
+                .summary("Wait for failed").allowingNonZeroExitCode().newTask());
+        Entities.submit(entity, Tasks.create("Wait for failed then notify", () -> {
+            try {
+                if (waitForFailed.get().contains("condition met"))
+                    LOG.debug("Container job " + kubeJobName + " detected as failed in kubernetes (may be valid non-zero exit)");
+            } finally {
+                synchronized (finishCount) {
+                    finishCount.incrementAndGet();
+                    finishCount.notifyAll();
+                }
+            }
+        }));
+
+        while (finishCount.get() == 0) {
+            LOG.debug("Container job " + kubeJobName + " waiting on complete or failed");
+            try {
+                synchronized (finishCount) {
+                    finishCount.wait(Duration.TEN_SECONDS.toMilliseconds());
+                }
+            } catch (InterruptedException e) {
+                throw Exceptions.propagate(e);
+            }
+        }
+
+        if (waitForSuccess.isDone() && waitForSuccess.getExitCode() == 0) return true;
+        if (waitForFailed.isDone() && waitForFailed.getExitCode() == 0) return false;
+        return null;
+    }
+
+    private PodPhases waitForContainerAvailable(String kubeJobName, ContainerTaskResult result, CountdownTimer timer) {
+        return DynamicTasks.queue(Tasks.<PodPhases>builder().dynamic(true).displayName("Wait for container to be running (or fail)").body(() -> {
+            long first = System.currentTimeMillis();
+            long last = first;
+            long backoffMillis = 10;
+            PodPhases phase = PodPhases.Unknown;
+            long startupReportDelay = 1000;  // report any start longer than 1s
+            while (timer.isNotExpired()) {
+                phase = checkPodPhase(kubeJobName);
+                if (phase == PodPhases.Failed || phase == PodPhases.Succeeded || phase == PodPhases.Running) {
+                    if (startupReportDelay>5000) LOG.info("Container detected in state "+phase+" after "+Duration.millis(System.currentTimeMillis()-first));
+                    else LOG.debug("Container detected in state "+phase+" after "+Duration.millis(System.currentTimeMillis()-first));
+                    return phase;
+                }
+
+                if (phase!=PodPhases.Unknown && Strings.isBlank(result.kubePodName)) {
+                    result.kubePodName = DynamicTasks.queue(newSimpleTaskFactory(String.format(PODS_NAME_CMD, namespace, kubeJobName)).summary("Get pod name").allowingNonZeroExitCode().newTask()).get().trim();
+                }
+                if (phase == PodPhases.Pending && Strings.isNonBlank(result.kubePodName)) {
+                    // if pending, need to look for errors
+                    String failedEvents = DynamicTasks.queue(newSimpleTaskFactory(String.format(SCOPED_EVENTS_FAILED_JSON_CMD, namespace, result.kubePodName)).summary("Check pod failed events").allowingNonZeroExitCode().newTask()).get().trim();
+                    if (!"[]".equals(failedEvents)) {
+                        String events = DynamicTasks.queue(newSimpleTaskFactory(String.format(SCOPED_EVENTS_CMD, namespace, result.kubePodName)).summary("Get pod events on failure").allowingNonZeroExitCode().newTask()).get().trim();
+                        throw new IllegalStateException("Job pod failed: "+failedEvents+"\n"+events);
+                    }
+                }
+
+                if (System.currentTimeMillis() - last > startupReportDelay) {
+                    last = System.currentTimeMillis();
+
+                    // log debug after 1s, then info after 5s, 20s, etc
+                    // seems bad that it often takes 1s+ just to start the container :/
+                    Consumer<String> log = startupReportDelay<3*1000 ? LOG::debug : LOG::info;
+
+                    log.accept("Container taking a while to start ("+Duration.millis(last-first)+"): "+namespace+" "+ kubeJobName +" "+ result.kubePodName+" / phase '"+phase+"'");
+                    String stateJsonS = DynamicTasks.queue(newSimpleTaskFactory(String.format(PODS_STATUS_STATE_CMD, namespace, kubeJobName)).summary("Get pod state").allowingNonZeroExitCode().newTask()).get().trim();
+                    if (Strings.isNonBlank(stateJsonS)) {
+                        log.accept("Pod state: "+stateJsonS);
+                    }
+                    if (Strings.isNonBlank(result.kubePodName)) {
+                        String events = DynamicTasks.queue(newSimpleTaskFactory(String.format(SCOPED_EVENTS_CMD, namespace, result.kubePodName)).summary("Get pod events").allowingNonZeroExitCode().newTask()).get().trim();
+                        log.accept("Pod events: \n"+events);
+                    } else {
+                        String events = DynamicTasks.queue(newSimpleTaskFactory(String.format(SCOPED_EVENTS_CMD, namespace, kubeJobName)).summary("Get job events").allowingNonZeroExitCode().newTask()).get().trim();
+                        log.accept("Job events: \n"+events);
+                    }
+
+                    // first 1s, then 5s, then every 20s
+                    startupReportDelay *= 5;
+                    if (startupReportDelay > 20*1000) startupReportDelay = 20*1000;
+                }
+                long backoffMillis2 = backoffMillis;
+                Tasks.withBlockingDetails("waiting "+backoffMillis2+"ms for pod to be available (current status '" + phase + "')", () -> {
+                    Time.sleep(backoffMillis2);
+                    return null;
+                });
+                if (backoffMillis<80) backoffMillis*=2;
+            }
+            throw new IllegalStateException("Timeout waiting for pod to be available; current status is '" + phase + "'");
+        }).build()).getUnchecked();
+    }
+
+    private PodPhases checkPodPhase(String kubeJobName) {
+        PodPhases succeeded = getPodPhaseFromContainerState(kubeJobName);
+        if (succeeded != null) return succeeded;
+
+        // this is the more official way, fall back to it if above is not recognised (eg waiting)
+        String phase = DynamicTasks.queue(newSimpleTaskFactory(String.format(PODS_STATUS_PHASE_CMD, namespace, kubeJobName)).summary("Get pod phase").allowingNonZeroExitCode().newTask()).get().trim();
+        for (PodPhases candidate: PodPhases.values()) {
+            if (candidate.name().equalsIgnoreCase(phase)) return candidate;
+        }
+        return PodPhases.Unknown;
+    }
+
+    private PodPhases getPodPhaseFromContainerState(String kubeJobName) {
+        // pod container state is populated much sooner than the pod status and job fields and wait, so prefer it
+        String stateJsonS = DynamicTasks.queue(newSimpleTaskFactory(String.format(PODS_STATUS_STATE_CMD, namespace, kubeJobName)).summary("Get pod state").allowingNonZeroExitCode().newTask()).get().trim();
+        if (Strings.isNonBlank(stateJsonS)) {
+            Object stateO = new Gson().fromJson(stateJsonS, Object.class);
+            if (stateO instanceof Map) {
+                if (!((Map<?, ?>) stateO).keySet().isEmpty()) {
+                    Object stateK = (((Map<?, ?>) stateO).keySet().iterator().next());
+                    if (stateK instanceof String) {
+                        String stateS = (String) stateK;
+                        if ("terminated".equalsIgnoreCase(stateS)) return PodPhases.Succeeded;
+                        if ("running".equalsIgnoreCase(stateS)) return PodPhases.Running;
+                    }
+                }
+            }
+        }
+        return null;
+    }
+
+    public ProcessTaskFactory<String> newDeleteJobTask(String kubeJobName) {
+        return newSimpleTaskFactory(String.format(JOBS_DELETE_CMD, kubeJobName, namespace)).summary("Delete job");
+    }
+
     private String initNamespaceAndGetNewJobName() {
         Entity entity = BrooklynTaskTags.getContextEntity(Tasks.current());
         if (entity == null) {
@@ -374,6 +540,7 @@ public class ContainerTaskFactory<T extends ContainerTaskFactory<T,RET>,RET> imp
                 .toLowerCase();
         if (namespace==null) {
             namespace = kubeJobName;
+            namespaceRandom = true;
         }
         return kubeJobName;
     }
@@ -382,21 +549,28 @@ public class ContainerTaskFactory<T extends ContainerTaskFactory<T,RET>,RET> imp
         return namespace;
     }
 
-    public boolean doDeleteNamespace() {
-        if (namespace==null) return false;
+    public ProcessTaskWrapper<String> doDeleteNamespace(boolean wait, boolean requireSuccess) {
+        if (namespace==null) return null;
         Entity entity = BrooklynTaskTags.getContextEntity(Tasks.current());
-        if (entity==null) return false;
+        if (entity==null) return null;
         // clean up - delete namespace
         Boolean devMode = EntityInitializers.resolve(config, KEEP_CONTAINER_FOR_DEBUGGING);
         if (Boolean.TRUE.equals(devMode)) {
-            return false;
+            return null;
         }
 
-        LOG.debug("Deleting namespace " + namespace);
+        LOG.info("Deleting namespace " + namespace);
         // do this not as a subtask so we can run even if the main queue fails
-        Entities.submit(entity, newSimpleTaskFactory(String.format(NAMESPACE_DELETE_CMD, namespace)).summary("Tear down containers").newTask()).block();
-        System.runFinalization();
-        return true;
+        ProcessTaskFactory<String> tf = newSimpleTaskFactory(String.format(NAMESPACE_DELETE_CMD, namespace)).summary("Tear down containers").allowingNonZeroExitCode();
+        if (!requireSuccess) tf = tf.allowingNonZeroExitCode();
+        else tf = tf.requiringExitCodeZero();
+        ProcessTaskWrapper<String> task = Entities.submit(entity, tf.newTask());
+        if (wait) {
+            task.get();
+            LOG.info("Deleted namespace " + namespace);
+            System.runFinalization();
+        }
+        return task;
     }
 
     public T summary(String summary) { this.summary = summary; return self(); }
@@ -458,7 +632,9 @@ public class ContainerTaskFactory<T extends ContainerTaskFactory<T,RET>,RET> imp
         return self();
     }
 
-    public T deleteNamespace(Boolean delete) { this.deleteNamespace = delete; return self(); }
+    public T setDeleteNamespaceAfter(Boolean delete) { this.deleteNamespace = delete; return self(); }
+    @Deprecated /** @deprecated since 1.1 when introduced */
+    public T deleteNamespace(Boolean delete) { return setDeleteNamespaceAfter(delete); }
 
     /** visible in the container environment */
     public T jobIdentifier(String jobIdentifier) {
diff --git a/software/base/src/test/java/org/apache/brooklyn/tasks/kubectl/ContainerTaskTest.java b/software/base/src/test/java/org/apache/brooklyn/tasks/kubectl/ContainerTaskTest.java
index a688ae21b3..0b85dcfb1e 100644
--- a/software/base/src/test/java/org/apache/brooklyn/tasks/kubectl/ContainerTaskTest.java
+++ b/software/base/src/test/java/org/apache/brooklyn/tasks/kubectl/ContainerTaskTest.java
@@ -30,6 +30,8 @@ import org.apache.brooklyn.test.Asserts;
 import org.apache.brooklyn.util.core.task.DynamicTasks;
 import org.apache.brooklyn.util.text.Identifiers;
 import org.apache.brooklyn.util.time.Duration;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import org.testng.annotations.Test;
 
 import java.util.HashMap;
@@ -43,10 +45,14 @@ import static org.testng.AssertJUnit.assertTrue;
 @Test(groups = {"Live"})
 public class ContainerTaskTest extends BrooklynAppUnitTestSupport {
 
+    private static final Logger LOG = LoggerFactory.getLogger(ContainerTaskTest.class);
+
     @Test
     public void testSuccessfulContainerTask() {
+        LOG.info("Starting container test");
         TestEntity entity = app.createAndManageChild(EntitySpec.create(TestEntity.class));
 
+        LOG.info("Starting dedicated container run");
         Task<ContainerTaskResult> containerTask =  ContainerTaskFactory.newInstance()
                 .summary("Running container task")
                 .jobIdentifier("test-container-task")
@@ -58,6 +64,7 @@ public class ContainerTaskTest extends BrooklynAppUnitTestSupport {
 
         DynamicTasks.queueIfPossible(containerTask).orSubmitAsync(entity);
         ContainerTaskResult result = containerTask.getUnchecked(Duration.ONE_MINUTE);
+        LOG.info("Result: "+result + " / "+result.getMainStdout().trim());
         Asserts.assertEquals(result.getMainStdout().trim(), "hello test");
     }
 
@@ -220,7 +227,7 @@ public class ContainerTaskTest extends BrooklynAppUnitTestSupport {
             Asserts.assertEquals(result.getMainStdout().trim(), "hello " + uid);
 
         } finally {
-            DynamicTasks.queueIfPossible( baseFactory.summary("cleaning up").deleteNamespace(true).bashScriptCommands("rm hello-"+uid+".sh") ).orSubmitAsync(entity);
+            DynamicTasks.queueIfPossible( baseFactory.summary("cleaning up").setDeleteNamespaceAfter(true).bashScriptCommands("rm hello-"+uid+".sh") ).orSubmitAsync(entity);
         }
     }