You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@brooklyn.apache.org by al...@apache.org on 2016/01/15 15:57:35 UTC

[2/5] incubator-brooklyn git commit: BROOKLYN-212: AutoScaling doesn’t retry if InsufficientCapacity

BROOKLYN-212: AutoScaling doesn’t retry if InsufficientCapacity

- Adds Resizable.InsufficientCapacityException, thrown by
  Resizable.resize() if could not grow at all.
- DynamicCluster catches NoMachinesAvailableException, and rethrows
  as InsufficientCapacityException in resize().
- AutoScalerPolicy catches InsufficientCapacityException, and sets
  insufficientCapacityHighWaterMark to record the max size it can get
  to. Does not try again to resize above that, unless the highWaterMark
  is explicitly cleared by reconfiguring that config value.
- Tests:
  - Changes TestCluster to include history of sizes and desiredSizes
  - Changes TestCluster, so can throw InsufficientCapacityException
    when gets to a particular size.
  - Test for DynamicCluster throwing InsufficientCapacityException
  - Test for AutoScalerPolicyMetricTest, to not resize above the failure
    level again.
  - Test for AutoScalerPolicyNoMoreMachinesTest, for when BYON location
    has run out of machines in a DynamicCluster.


Project: http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/commit/a91889d4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/tree/a91889d4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/diff/a91889d4

Branch: refs/heads/master
Commit: a91889d467848ceefa88855fd615afe9bca94931
Parents: dd3b8e8
Author: Aled Sage <al...@gmail.com>
Authored: Fri Jan 8 21:33:25 2016 +0000
Committer: Aled Sage <al...@gmail.com>
Committed: Thu Jan 14 10:45:13 2016 +0000

----------------------------------------------------------------------
 .../brooklyn/core/entity/trait/Resizable.java   |  16 ++
 .../entity/group/DynamicClusterImpl.java        |  29 ++-
 .../core/entity/trait/FailingEntity.java        |   2 +-
 .../core/entity/trait/FailingEntityImpl.java    |   7 +-
 .../brooklyn/core/test/entity/TestCluster.java  |  10 +
 .../core/test/entity/TestClusterImpl.java       |  34 ++++
 .../entity/group/DynamicClusterTest.java        |  58 +++++-
 .../policy/autoscaling/AutoScalerPolicy.java    |  42 ++++-
 .../autoscaling/AutoScalerPolicyMetricTest.java |  50 ++++-
 .../AutoScalerPolicyNoMoreMachinesTest.java     | 187 +++++++++++++++++++
 10 files changed, 421 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/a91889d4/brooklyn-server/core/src/main/java/org/apache/brooklyn/core/entity/trait/Resizable.java
----------------------------------------------------------------------
diff --git a/brooklyn-server/core/src/main/java/org/apache/brooklyn/core/entity/trait/Resizable.java b/brooklyn-server/core/src/main/java/org/apache/brooklyn/core/entity/trait/Resizable.java
index 68c9398..1fca9f3 100644
--- a/brooklyn-server/core/src/main/java/org/apache/brooklyn/core/entity/trait/Resizable.java
+++ b/brooklyn-server/core/src/main/java/org/apache/brooklyn/core/entity/trait/Resizable.java
@@ -31,6 +31,20 @@ import org.apache.brooklyn.core.effector.MethodEffector;
  */
 public interface Resizable {
 
+    /**
+     * Indicates that resizing up (at all) is not possible, because there is insufficient capacity.
+     */
+    public static class InsufficientCapacityException extends RuntimeException {
+        private static final long serialVersionUID = 953230498564942446L;
+        
+        public InsufficientCapacityException(String msg) {
+            super(msg);
+        }
+        public InsufficientCapacityException(String msg, Throwable cause) {
+            super(msg, cause);
+        }
+    }
+    
     MethodEffector<Integer> RESIZE = new MethodEffector<Integer>(Resizable.class, "resize");
 
     /**
@@ -38,6 +52,8 @@ public interface Resizable {
      *
      * @param desiredSize the new size of the entity group.
      * @return the new size of the group.
+     * 
+     * @throws InsufficientCapacityException If the request was to grow, but there is no capacity to grow at all
      */
     @Effector(description="Changes the size of the entity (e.g. the number of nodes in a cluster)")
     Integer resize(@EffectorParam(name="desiredSize", description="The new size of the cluster") Integer desiredSize);

http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/a91889d4/brooklyn-server/core/src/main/java/org/apache/brooklyn/entity/group/DynamicClusterImpl.java
----------------------------------------------------------------------
diff --git a/brooklyn-server/core/src/main/java/org/apache/brooklyn/entity/group/DynamicClusterImpl.java b/brooklyn-server/core/src/main/java/org/apache/brooklyn/entity/group/DynamicClusterImpl.java
index b8e5c63..bfe0b90 100644
--- a/brooklyn-server/core/src/main/java/org/apache/brooklyn/entity/group/DynamicClusterImpl.java
+++ b/brooklyn-server/core/src/main/java/org/apache/brooklyn/entity/group/DynamicClusterImpl.java
@@ -38,6 +38,7 @@ import org.apache.brooklyn.api.entity.EntitySpec;
 import org.apache.brooklyn.api.entity.Group;
 import org.apache.brooklyn.api.location.Location;
 import org.apache.brooklyn.api.location.MachineProvisioningLocation;
+import org.apache.brooklyn.api.location.NoMachinesAvailableException;
 import org.apache.brooklyn.api.mgmt.Task;
 import org.apache.brooklyn.api.policy.Policy;
 import org.apache.brooklyn.api.sensor.AttributeSensor;
@@ -50,6 +51,7 @@ import org.apache.brooklyn.core.entity.factory.EntityFactoryForLocation;
 import org.apache.brooklyn.core.entity.lifecycle.Lifecycle;
 import org.apache.brooklyn.core.entity.lifecycle.ServiceStateLogic;
 import org.apache.brooklyn.core.entity.lifecycle.ServiceStateLogic.ServiceProblemsLogic;
+import org.apache.brooklyn.core.entity.trait.Resizable;
 import org.apache.brooklyn.core.entity.trait.Startable;
 import org.apache.brooklyn.core.entity.trait.StartableMethods;
 import org.apache.brooklyn.core.location.Locations;
@@ -518,7 +520,20 @@ public class DynamicClusterImpl extends AbstractGroupImpl implements DynamicClus
             } else {
                 if (LOG.isDebugEnabled()) LOG.debug("Resize no-op {} from {} to {}", new Object[] {this, originalSize, desiredSize});
             }
-            resizeByDelta(delta);
+            // If we managed to grow at all, then expect no exception.
+            // Otherwise, if failed because NoMachinesAvailable, then propagate as InsufficientCapacityException.
+            // This tells things like the AutoScalerPolicy to not keep retrying.
+            try {
+                resizeByDelta(delta);
+            } catch (Exception e) {
+                Exceptions.propagateIfFatal(e);
+                NoMachinesAvailableException nmae = Exceptions.getFirstThrowableOfType(e, NoMachinesAvailableException.class);
+                if (nmae != null) {
+                    throw new Resizable.InsufficientCapacityException("Failed to resize", e);
+                } else {
+                    throw Exceptions.propagate(e);
+                }
+            }
         }
         return getCurrentSize();
     }
@@ -669,7 +684,7 @@ public class DynamicClusterImpl extends AbstractGroupImpl implements DynamicClus
         }
     }
 
-    /** <strong>Note</strong> for sub-clases; this method can be called while synchronized on {@link #mutex}. */
+    /** <strong>Note</strong> for sub-classes; this method can be called while synchronized on {@link #mutex}. */
     protected Collection<Entity> grow(int delta) {
         Preconditions.checkArgument(delta > 0, "Must call grow with positive delta.");
 
@@ -697,7 +712,15 @@ public class DynamicClusterImpl extends AbstractGroupImpl implements DynamicClus
         }
 
         // create and start the entities
-        return addInEachLocation(chosenLocations, ImmutableMap.of()).getWithError();
+        ReferenceWithError<Collection<Entity>> result = addInEachLocation(chosenLocations, ImmutableMap.of());
+        
+        // If any entities were created, return them (even if we didn't manage to create them all).
+        // Otherwise, propagate any error that happened.
+        if (result.get().size() > 0) {
+            return result.get();
+        } else {
+            return result.getWithError();
+        }
     }
 
     /** <strong>Note</strong> for sub-clases; this method can be called while synchronized on {@link #mutex}. */

http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/a91889d4/brooklyn-server/core/src/test/java/org/apache/brooklyn/core/entity/trait/FailingEntity.java
----------------------------------------------------------------------
diff --git a/brooklyn-server/core/src/test/java/org/apache/brooklyn/core/entity/trait/FailingEntity.java b/brooklyn-server/core/src/test/java/org/apache/brooklyn/core/entity/trait/FailingEntity.java
index 532eb4d..7845326 100644
--- a/brooklyn-server/core/src/test/java/org/apache/brooklyn/core/entity/trait/FailingEntity.java
+++ b/brooklyn-server/core/src/test/java/org/apache/brooklyn/core/entity/trait/FailingEntity.java
@@ -60,7 +60,7 @@ public interface FailingEntity extends TestEntity {
     ConfigKey<Predicate<? super FailingEntity>> FAIL_ON_RESTART_CONDITION = (ConfigKey) ConfigKeys.newConfigKey(Predicate.class, "failOnRestartCondition", "Whether to throw exception on call to restart", null);
     
     @SetFromFlag("exceptionClazz")
-    ConfigKey<Class<? extends RuntimeException>> EXCEPTION_CLAZZ = (ConfigKey) ConfigKeys.newConfigKey(Class.class, "exceptionClazz", "Type of exception to throw", IllegalStateException.class);
+    ConfigKey<Class<? extends Exception>> EXCEPTION_CLAZZ = (ConfigKey) ConfigKeys.newConfigKey(Class.class, "exceptionClazz", "Type of exception to throw", IllegalStateException.class);
     
     @SetFromFlag("execOnFailure")
     ConfigKey<Function<? super FailingEntity,?>> EXEC_ON_FAILURE = (ConfigKey) ConfigKeys.newConfigKey(Function.class, "execOnFailure", "Callback to execute before throwing an exception, on any failure", Functions.identity());

http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/a91889d4/brooklyn-server/core/src/test/java/org/apache/brooklyn/core/entity/trait/FailingEntityImpl.java
----------------------------------------------------------------------
diff --git a/brooklyn-server/core/src/test/java/org/apache/brooklyn/core/entity/trait/FailingEntityImpl.java b/brooklyn-server/core/src/test/java/org/apache/brooklyn/core/entity/trait/FailingEntityImpl.java
index a675c78..423bdd3 100644
--- a/brooklyn-server/core/src/test/java/org/apache/brooklyn/core/entity/trait/FailingEntityImpl.java
+++ b/brooklyn-server/core/src/test/java/org/apache/brooklyn/core/entity/trait/FailingEntityImpl.java
@@ -79,7 +79,12 @@ public class FailingEntityImpl extends TestEntityImpl implements FailingEntity {
     
     private RuntimeException newException(String msg) {
         try {
-            return getConfig(EXCEPTION_CLAZZ).getConstructor(String.class).newInstance("Simulating entity stop failure for test");
+            Exception result = getConfig(EXCEPTION_CLAZZ).getConstructor(String.class).newInstance("Simulating entity stop failure for test");
+            if (!(result instanceof RuntimeException)) {
+                return new RuntimeException("wrapping", result);
+            } else {
+                return (RuntimeException)result;
+            }
         } catch (Exception e) {
             throw Exceptions.propagate(e);
         }

http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/a91889d4/brooklyn-server/core/src/test/java/org/apache/brooklyn/core/test/entity/TestCluster.java
----------------------------------------------------------------------
diff --git a/brooklyn-server/core/src/test/java/org/apache/brooklyn/core/test/entity/TestCluster.java b/brooklyn-server/core/src/test/java/org/apache/brooklyn/core/test/entity/TestCluster.java
index 3ff61f0..9593203 100644
--- a/brooklyn-server/core/src/test/java/org/apache/brooklyn/core/test/entity/TestCluster.java
+++ b/brooklyn-server/core/src/test/java/org/apache/brooklyn/core/test/entity/TestCluster.java
@@ -18,8 +18,12 @@
  */
 package org.apache.brooklyn.core.test.entity;
 
+import java.util.List;
+
 import org.apache.brooklyn.api.entity.EntityLocal;
 import org.apache.brooklyn.api.entity.ImplementedBy;
+import org.apache.brooklyn.config.ConfigKey;
+import org.apache.brooklyn.core.config.ConfigKeys;
 import org.apache.brooklyn.entity.group.DynamicCluster;
 
 /**
@@ -27,4 +31,10 @@ import org.apache.brooklyn.entity.group.DynamicCluster;
 */
 @ImplementedBy(TestClusterImpl.class)
 public interface TestCluster extends DynamicCluster, EntityLocal {
+    
+    ConfigKey<Integer> MAX_SIZE = ConfigKeys.newIntegerConfigKey("testCluster.maxSize", "Size after which it will throw InsufficientCapacityException", Integer.MAX_VALUE);
+    
+    List<Integer> getSizeHistory();
+    
+    List<Integer> getDesiredSizeHistory();
 }

http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/a91889d4/brooklyn-server/core/src/test/java/org/apache/brooklyn/core/test/entity/TestClusterImpl.java
----------------------------------------------------------------------
diff --git a/brooklyn-server/core/src/test/java/org/apache/brooklyn/core/test/entity/TestClusterImpl.java b/brooklyn-server/core/src/test/java/org/apache/brooklyn/core/test/entity/TestClusterImpl.java
index bf8d0cb..d318e5e 100644
--- a/brooklyn-server/core/src/test/java/org/apache/brooklyn/core/test/entity/TestClusterImpl.java
+++ b/brooklyn-server/core/src/test/java/org/apache/brooklyn/core/test/entity/TestClusterImpl.java
@@ -18,22 +18,32 @@
  */
 package org.apache.brooklyn.core.test.entity;
 
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
 import org.apache.brooklyn.core.entity.trait.Startable;
 import org.apache.brooklyn.entity.group.DynamicClusterImpl;
 import org.apache.brooklyn.util.collections.QuorumCheck.QuorumChecks;
 
+import com.google.common.collect.ImmutableList;
+
 /**
 * Mock cluster entity for testing.
 */
 public class TestClusterImpl extends DynamicClusterImpl implements TestCluster {
     private volatile int size;
 
+    private final List<Integer> desiredSizeHistory = Collections.synchronizedList(new ArrayList<Integer>());
+    private final List<Integer> sizeHistory = Collections.synchronizedList(new ArrayList<Integer>());
+    
     public TestClusterImpl() {
     }
     
     @Override
     public void init() {
         super.init();
+        sizeHistory.add(size);
         size = getConfig(INITIAL_SIZE);
         sensors().set(Startable.SERVICE_UP, true);
     }
@@ -48,11 +58,35 @@ public class TestClusterImpl extends DynamicClusterImpl implements TestCluster {
     
     @Override
     public Integer resize(Integer desiredSize) {
+        desiredSizeHistory.add(desiredSize);
+        
+        if (desiredSize > size) {
+            if (size < getConfig(MAX_SIZE)) {
+                desiredSize = Math.min(desiredSize, getConfig(MAX_SIZE));
+            } else {
+                throw new InsufficientCapacityException("Simulating insufficient capacity (desiredSize="+desiredSize+"; maxSize="+getConfig(MAX_SIZE)+"; currentSize="+size+")");
+            }
+        }
+        this.sizeHistory.add(desiredSize);
         this.size = desiredSize;
         return size;
     }
     
     @Override
+    public List<Integer> getSizeHistory() {
+        synchronized (sizeHistory) {
+            return ImmutableList.copyOf(sizeHistory);
+        }
+    }
+    
+    @Override
+    public List<Integer> getDesiredSizeHistory() {
+        synchronized (desiredSizeHistory) {
+            return ImmutableList.copyOf(desiredSizeHistory);
+        }
+    }
+    
+    @Override
     public void stop() {
         size = 0;
         super.stop();

http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/a91889d4/brooklyn-server/core/src/test/java/org/apache/brooklyn/entity/group/DynamicClusterTest.java
----------------------------------------------------------------------
diff --git a/brooklyn-server/core/src/test/java/org/apache/brooklyn/entity/group/DynamicClusterTest.java b/brooklyn-server/core/src/test/java/org/apache/brooklyn/entity/group/DynamicClusterTest.java
index dd37e58..33194f0 100644
--- a/brooklyn-server/core/src/test/java/org/apache/brooklyn/entity/group/DynamicClusterTest.java
+++ b/brooklyn-server/core/src/test/java/org/apache/brooklyn/entity/group/DynamicClusterTest.java
@@ -44,6 +44,7 @@ import java.util.concurrent.atomic.AtomicReference;
 import org.apache.brooklyn.api.entity.Entity;
 import org.apache.brooklyn.api.entity.EntitySpec;
 import org.apache.brooklyn.api.location.Location;
+import org.apache.brooklyn.api.location.NoMachinesAvailableException;
 import org.apache.brooklyn.api.mgmt.Task;
 import org.apache.brooklyn.api.sensor.SensorEvent;
 import org.apache.brooklyn.core.entity.Attributes;
@@ -54,6 +55,7 @@ import org.apache.brooklyn.core.entity.lifecycle.Lifecycle;
 import org.apache.brooklyn.core.entity.lifecycle.ServiceStateLogic;
 import org.apache.brooklyn.core.entity.trait.Changeable;
 import org.apache.brooklyn.core.entity.trait.FailingEntity;
+import org.apache.brooklyn.core.entity.trait.Resizable;
 import org.apache.brooklyn.core.location.SimulatedLocation;
 import org.apache.brooklyn.core.mgmt.BrooklynTaskTags;
 import org.apache.brooklyn.core.test.BrooklynAppUnitTestSupport;
@@ -72,13 +74,13 @@ import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
 
 import com.google.common.base.Function;
+import com.google.common.base.Predicate;
 import com.google.common.base.Predicates;
 import com.google.common.base.Throwables;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
 import com.google.common.util.concurrent.Atomics;
 
 
@@ -202,6 +204,53 @@ public class DynamicClusterTest extends BrooklynAppUnitTestSupport {
         assertEquals(entity.getApplication(), app);
     }
 
+    @Test
+    public void testResizeWhereChildThrowsNoMachineAvailableExceptionIsPropagatedAsInsufficientCapacityException() throws Exception {
+        final DynamicCluster cluster = app.createAndManageChild(EntitySpec.create(DynamicCluster.class)
+            .configure(DynamicCluster.MEMBER_SPEC, EntitySpec.create(FailingEntity.class)
+                    .configure(FailingEntity.FAIL_ON_START, true)
+                    .configure(FailingEntity.EXCEPTION_CLAZZ, NoMachinesAvailableException.class))
+            .configure(DynamicCluster.INITIAL_SIZE, 0));
+        cluster.start(ImmutableList.of(loc));
+        
+        try {
+            cluster.resize(1);
+            Asserts.shouldHaveFailedPreviously();
+        } catch (Exception e) {
+            Asserts.expectedFailureOfType(e, Resizable.InsufficientCapacityException.class);
+        }
+    }
+
+    @Test
+    public void testResizeWhereSubsetOfChildrenThrowsNoMachineAvailableExceptionReturnsNormally() throws Exception {
+        final DynamicCluster cluster = app.createAndManageChild(EntitySpec.create(DynamicCluster.class)
+            .configure(DynamicCluster.MEMBER_SPEC, EntitySpec.create(FailingEntity.class)
+                    .configure(FailingEntity.FAIL_ON_START_CONDITION, new Predicate<FailingEntity>() {
+                        final AtomicInteger counter = new AtomicInteger();
+                        @Override public boolean apply(FailingEntity input) {
+                            // Only second and subsequent entities fail
+                            int index = counter.getAndIncrement();
+                            return (index >= 1);
+                        }})
+                    .configure(FailingEntity.EXCEPTION_CLAZZ, NoMachinesAvailableException.class))
+            .configure(DynamicCluster.INITIAL_SIZE, 0));
+        cluster.start(ImmutableList.of(loc));
+
+        // Managed to partially resize, so should not fail entirely.
+        // Instead just say how big we managed to get.
+        Integer newSize = cluster.resize(2);
+        assertEquals(newSize, (Integer)1);
+        assertEquals(cluster.getCurrentSize(), (Integer)1);
+
+        // This attempt will fail, because all new children will fail
+        try {
+            cluster.resize(2);
+            Asserts.shouldHaveFailedPreviously();
+        } catch (Exception e) {
+            Asserts.expectedFailureOfType(e, Resizable.InsufficientCapacityException.class);
+        }
+    }
+
     /** This can be sensitive to order, e.g. if TestEntity set expected RUNNING before setting SERVICE_UP, 
      * there would be a point when TestEntity is ON_FIRE.
      * <p>
@@ -249,6 +298,7 @@ public class DynamicClusterTest extends BrooklynAppUnitTestSupport {
         assertEquals(Iterables.size(Entities.descendants(cluster, TestEntity.class)), 0);
     }
 
+    
     @Test
     public void currentSizePropertyReflectsActualClusterSize() throws Exception {
         DynamicCluster cluster = app.createAndManageChild(EntitySpec.create(DynamicCluster.class)
@@ -421,7 +471,7 @@ public class DynamicClusterTest extends BrooklynAppUnitTestSupport {
                     }}));
 
         cluster.start(ImmutableList.of(loc));
-        resizeExpectingError(cluster, 3);
+        cluster.resize(3);
         assertEquals(cluster.getCurrentSize(), (Integer)2);
         assertEquals(cluster.getMembers().size(), 2);
         for (Entity member : cluster.getMembers()) {
@@ -533,7 +583,7 @@ public class DynamicClusterTest extends BrooklynAppUnitTestSupport {
                     }}));
 
         cluster.start(ImmutableList.of(loc));
-        resizeExpectingError(cluster, 3);
+        cluster.resize(3);
         assertEquals(cluster.getCurrentSize(), (Integer)2);
         assertEquals(cluster.getMembers().size(), 2);
         assertEquals(Iterables.size(Iterables.filter(cluster.getChildren(), Predicates.instanceOf(FailingEntity.class))), 3);
@@ -570,7 +620,7 @@ public class DynamicClusterTest extends BrooklynAppUnitTestSupport {
         assertEquals(cluster.getChildren().size(), 0, "children="+cluster.getChildren());
         
         // Failed node will not be a member or child
-        resizeExpectingError(cluster, 3);
+        cluster.resize(3);
         assertEquals(cluster.getCurrentSize(), (Integer)2);
         assertEquals(cluster.getMembers().size(), 2);
         assertEquals(cluster.getChildren().size(), 2, "children="+cluster.getChildren());

http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/a91889d4/brooklyn-server/policy/src/main/java/org/apache/brooklyn/policy/autoscaling/AutoScalerPolicy.java
----------------------------------------------------------------------
diff --git a/brooklyn-server/policy/src/main/java/org/apache/brooklyn/policy/autoscaling/AutoScalerPolicy.java b/brooklyn-server/policy/src/main/java/org/apache/brooklyn/policy/autoscaling/AutoScalerPolicy.java
index d315260..a1406ca 100644
--- a/brooklyn-server/policy/src/main/java/org/apache/brooklyn/policy/autoscaling/AutoScalerPolicy.java
+++ b/brooklyn-server/policy/src/main/java/org/apache/brooklyn/policy/autoscaling/AutoScalerPolicy.java
@@ -290,6 +290,7 @@ public class AutoScalerPolicy extends AbstractPolicy {
             .defaultValue(1)
             .reconfigurable(true)
             .build();
+    
     @SetFromFlag("resizeUpIterationMax")
     public static final ConfigKey<Integer> RESIZE_UP_ITERATION_MAX = BasicConfigKey.builder(Integer.class)
             .name("autoscaler.resizeUpIterationMax")
@@ -297,6 +298,7 @@ public class AutoScalerPolicy extends AbstractPolicy {
             .description("Maximum change to the size on a single iteration when scaling up")
             .reconfigurable(true)
             .build();
+    
     @SetFromFlag("resizeDownIterationIncrement")
     public static final ConfigKey<Integer> RESIZE_DOWN_ITERATION_INCREMENT = BasicConfigKey.builder(Integer.class)
             .name("autoscaler.resizeDownIterationIncrement")
@@ -304,6 +306,7 @@ public class AutoScalerPolicy extends AbstractPolicy {
             .defaultValue(1)
             .reconfigurable(true)
             .build();
+    
     @SetFromFlag("resizeDownIterationMax")
     public static final ConfigKey<Integer> RESIZE_DOWN_ITERATION_MAX = BasicConfigKey.builder(Integer.class)
             .name("autoscaler.resizeDownIterationMax")
@@ -346,6 +349,12 @@ public class AutoScalerPolicy extends AbstractPolicy {
             .reconfigurable(true)
             .build();
 
+    public static final ConfigKey<Integer> INSUFFICIENT_CAPACITY_HIGH_WATER_MARK = BasicConfigKey.builder(Integer.class)
+            .name("autoscaler.insufficientCapacityHighWaterMark")
+            .defaultValue(null)
+            .reconfigurable(true)
+            .build();
+
     @SetFromFlag("resizeOperator")
     public static final ConfigKey<ResizeOperator> RESIZE_OPERATOR = BasicConfigKey.builder(ResizeOperator.class)
             .name("autoscaler.resizeOperator")
@@ -564,6 +573,10 @@ public class AutoScalerPolicy extends AbstractPolicy {
         return getConfig(MAX_POOL_SIZE);
     }
     
+    private Integer getInsufficientCapacityHighWaterMark() {
+        return getConfig(INSUFFICIENT_CAPACITY_HIGH_WATER_MARK);
+    }
+    
     private ResizeOperator getResizeOperator() {
         return getConfig(RESIZE_OPERATOR);
     }
@@ -620,6 +633,14 @@ public class AutoScalerPolicy extends AbstractPolicy {
                 throw new IllegalArgumentException("Min pool size "+val+" must not be greater than max pool size "+getConfig(MAX_POOL_SIZE));
             }
             onPoolSizeLimitsChanged(getConfig(MIN_POOL_SIZE), newMax);
+        } else if (key.equals(INSUFFICIENT_CAPACITY_HIGH_WATER_MARK)) {
+            Integer newVal = (Integer) val;
+            Integer oldVal = config().get(INSUFFICIENT_CAPACITY_HIGH_WATER_MARK);
+            if (oldVal != null && (newVal == null || newVal > oldVal)) {
+                LOG.info("{} resetting {} to {}, which will enable resizing above previous level of {}",
+                        new Object[] {AutoScalerPolicy.this, INSUFFICIENT_CAPACITY_HIGH_WATER_MARK.getName(), newVal, oldVal});
+                // TODO see above about changing metricLowerBound; not triggering resize now
+            }
         } else {
             throw new UnsupportedOperationException("reconfiguring "+key+" unsupported for "+this);
         }
@@ -849,8 +870,12 @@ public class AutoScalerPolicy extends AbstractPolicy {
     }
 
     private int applyMinMaxConstraints(int desiredSize) {
-        desiredSize = Math.max(getMinPoolSize(), desiredSize);
-        desiredSize = Math.min(getMaxPoolSize(), desiredSize);
+        int minSize = getMinPoolSize();
+        int maxSize = getMaxPoolSize();
+        Integer insufficientCapacityHighWaterMark = getInsufficientCapacityHighWaterMark();
+        desiredSize = Math.max(minSize, desiredSize);
+        desiredSize = Math.min(maxSize, desiredSize);
+        if (insufficientCapacityHighWaterMark != null) desiredSize = Math.min(insufficientCapacityHighWaterMark, desiredSize);
         return desiredSize;
     }
 
@@ -989,7 +1014,7 @@ public class AutoScalerPolicy extends AbstractPolicy {
     }
 
     private void resizeNow() {
-        long currentPoolSize = getCurrentSizeOperator().apply(poolEntity);
+        final long currentPoolSize = getCurrentSizeOperator().apply(poolEntity);
         CalculatedDesiredPoolSize calculatedDesiredPoolSize = calculateDesiredPoolSize(currentPoolSize);
         final long desiredPoolSize = calculatedDesiredPoolSize.size;
         boolean stable = calculatedDesiredPoolSize.stable;
@@ -1018,7 +1043,16 @@ public class AutoScalerPolicy extends AbstractPolicy {
                 @Override
                 public Void call() throws Exception {
                     // TODO Should we use int throughout, rather than casting here?
-                    getResizeOperator().resize(poolEntity, (int) desiredPoolSize);
+                    try {
+                        getResizeOperator().resize(poolEntity, (int) desiredPoolSize);
+                    } catch (Resizable.InsufficientCapacityException e) {
+                        // cannot resize beyond this; set the high-water mark
+                        int insufficientCapacityHighWaterMark = (currentPoolSize > Integer.MAX_VALUE ? Integer.MAX_VALUE : (int)currentPoolSize);
+                        LOG.warn("{} failed to resize {} due to insufficient capacity; setting high-water mark to {}, "
+                                + "and will not attempt to resize above that level again", 
+                                new Object[] {AutoScalerPolicy.this, poolEntity, insufficientCapacityHighWaterMark});
+                        config().set(INSUFFICIENT_CAPACITY_HIGH_WATER_MARK, insufficientCapacityHighWaterMark);
+                    }
                     return null;
                 }
             }).build())

http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/a91889d4/brooklyn-server/policy/src/test/java/org/apache/brooklyn/policy/autoscaling/AutoScalerPolicyMetricTest.java
----------------------------------------------------------------------
diff --git a/brooklyn-server/policy/src/test/java/org/apache/brooklyn/policy/autoscaling/AutoScalerPolicyMetricTest.java b/brooklyn-server/policy/src/test/java/org/apache/brooklyn/policy/autoscaling/AutoScalerPolicyMetricTest.java
index e04f714..9582523 100644
--- a/brooklyn-server/policy/src/test/java/org/apache/brooklyn/policy/autoscaling/AutoScalerPolicyMetricTest.java
+++ b/brooklyn-server/policy/src/test/java/org/apache/brooklyn/policy/autoscaling/AutoScalerPolicyMetricTest.java
@@ -40,13 +40,14 @@ import org.testng.annotations.AfterMethod;
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
 
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
 
 public class AutoScalerPolicyMetricTest {
     
     private static long TIMEOUT_MS = 10000;
-    private static long SHORT_WAIT_MS = 50;
+    private static long SHORT_WAIT_MS = 250;
     
     private static final AttributeSensor<Integer> MY_ATTRIBUTE = Sensors.newIntegerSensor("autoscaler.test.intAttrib");
     TestApplication app;
@@ -270,4 +271,51 @@ public class AutoScalerPolicyMetricTest {
         entityWithMetric.sensors().set(TestEntity.SEQUENCE, 101);
         Asserts.succeedsEventually(ImmutableMap.of("timeout", TIMEOUT_MS), currentSizeAsserter(tc, 2));
     }
+    
+    @Test(groups="Integration")
+    public void testOnFailedGrowWillSetHighwaterMarkAndNotResizeAboveThatAgain() {
+        tc = app.createAndManageChild(EntitySpec.create(TestCluster.class)
+                .configure("initialSize", 1)
+                .configure(TestCluster.MAX_SIZE, 2));
+
+        tc.resize(1);
+        
+        tc.policies().add(AutoScalerPolicy.builder()
+                .metric(MY_ATTRIBUTE)
+                .metricLowerBound(50)
+                .metricUpperBound(100)
+                .buildSpec());
+        
+        // workload 200 so requires doubling size to 2 to handle: (200*1)/100 = 2
+        tc.sensors().set(MY_ATTRIBUTE, 200);
+        Asserts.succeedsEventually(ImmutableMap.of("timeout", TIMEOUT_MS), currentSizeAsserter(tc, 2));
+        assertEquals(tc.getDesiredSizeHistory(), ImmutableList.of(1, 2), "desired="+tc.getDesiredSizeHistory());
+        assertEquals(tc.getSizeHistory(), ImmutableList.of(0, 1, 2), "sizes="+tc.getSizeHistory());
+        tc.sensors().set(MY_ATTRIBUTE, 100);
+        
+        // workload 110, requires 1 more entity: (2*110)/100 = 2.1
+        // But max size is 2, so resize will fail.
+        tc.sensors().set(MY_ATTRIBUTE, 110);
+        Asserts.succeedsContinually(ImmutableMap.of("timeout", SHORT_WAIT_MS), currentSizeAsserter(tc, 2));
+        assertEquals(tc.getDesiredSizeHistory(), ImmutableList.of(1, 2, 3), "desired="+tc.getDesiredSizeHistory()); // TODO succeeds eventually?
+        assertEquals(tc.getSizeHistory(), ImmutableList.of(0, 1, 2), "sizes="+tc.getSizeHistory());
+        
+        // another attempt to resize will not cause it to ask
+        tc.sensors().set(MY_ATTRIBUTE, 110);
+        Asserts.succeedsContinually(ImmutableMap.of("timeout", SHORT_WAIT_MS), currentSizeAsserter(tc, 2));
+        assertEquals(tc.getDesiredSizeHistory(), ImmutableList.of(1, 2, 3), "desired="+tc.getDesiredSizeHistory());
+        assertEquals(tc.getSizeHistory(), ImmutableList.of(0, 1, 2), "sizes="+tc.getSizeHistory());
+        
+        // but if we shrink down again, then we'll still be able to go up to the previous level.
+        // We'll only try to go as high as the previous high-water mark though.
+        tc.sensors().set(MY_ATTRIBUTE, 1);
+        Asserts.succeedsEventually(ImmutableMap.of("timeout", SHORT_WAIT_MS), currentSizeAsserter(tc, 1));
+        assertEquals(tc.getDesiredSizeHistory(), ImmutableList.of(1, 2, 3, 1), "desired="+tc.getDesiredSizeHistory());
+        assertEquals(tc.getSizeHistory(), ImmutableList.of(0, 1, 2, 1), "sizes="+tc.getSizeHistory());
+        
+        tc.sensors().set(MY_ATTRIBUTE, 10000);
+        Asserts.succeedsEventually(ImmutableMap.of("timeout", SHORT_WAIT_MS), currentSizeAsserter(tc, 2));
+        assertEquals(tc.getDesiredSizeHistory(), ImmutableList.of(1, 2, 3, 1, 2), "desired="+tc.getDesiredSizeHistory());
+        assertEquals(tc.getSizeHistory(), ImmutableList.of(0, 1, 2, 1, 2), "sizes="+tc.getSizeHistory());
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/a91889d4/brooklyn-server/software/base/src/test/java/org/apache/brooklyn/entity/software/base/test/autoscaling/AutoScalerPolicyNoMoreMachinesTest.java
----------------------------------------------------------------------
diff --git a/brooklyn-server/software/base/src/test/java/org/apache/brooklyn/entity/software/base/test/autoscaling/AutoScalerPolicyNoMoreMachinesTest.java b/brooklyn-server/software/base/src/test/java/org/apache/brooklyn/entity/software/base/test/autoscaling/AutoScalerPolicyNoMoreMachinesTest.java
new file mode 100644
index 0000000..2d36cd3
--- /dev/null
+++ b/brooklyn-server/software/base/src/test/java/org/apache/brooklyn/entity/software/base/test/autoscaling/AutoScalerPolicyNoMoreMachinesTest.java
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.brooklyn.entity.software.base.test.autoscaling;
+
+import static org.testng.Assert.assertEquals;
+
+import java.util.Map;
+
+import org.apache.brooklyn.api.entity.EntitySpec;
+import org.apache.brooklyn.api.location.Location;
+import org.apache.brooklyn.api.policy.PolicySpec;
+import org.apache.brooklyn.api.sensor.AttributeSensor;
+import org.apache.brooklyn.core.entity.BrooklynConfigKeys;
+import org.apache.brooklyn.core.entity.trait.Resizable;
+import org.apache.brooklyn.core.sensor.Sensors;
+import org.apache.brooklyn.core.test.BrooklynAppUnitTestSupport;
+import org.apache.brooklyn.core.test.entity.TestCluster;
+import org.apache.brooklyn.entity.group.DynamicCluster;
+import org.apache.brooklyn.entity.software.base.EmptySoftwareProcess;
+import org.apache.brooklyn.policy.autoscaling.AutoScalerPolicy;
+import org.apache.brooklyn.test.Asserts;
+import org.apache.brooklyn.util.time.Duration;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import com.google.common.base.Predicates;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+
+public class AutoScalerPolicyNoMoreMachinesTest extends BrooklynAppUnitTestSupport {
+
+    @SuppressWarnings("unused")
+    private static final Logger log = LoggerFactory.getLogger(AutoScalerPolicyNoMoreMachinesTest.class);
+    
+    private static long SHORT_WAIT_MS = 250;
+    
+    DynamicCluster cluster;
+    Location loc;
+    AutoScalerPolicy policy;
+    
+    @BeforeMethod(alwaysRun=true)
+    public void setUp() throws Exception {
+        super.setUp();
+        cluster = app.createAndManageChild(EntitySpec.create(DynamicCluster.class)
+                .configure(TestCluster.INITIAL_SIZE, 0)
+                .configure(DynamicCluster.MEMBER_SPEC, EntitySpec.create(EmptySoftwareProcess.class)
+                        .configure(BrooklynConfigKeys.SKIP_ON_BOX_BASE_DIR_RESOLUTION, true)));
+        loc = mgmt.getLocationRegistry().resolve("byon(hosts='1.1.1.1,1.1.1.2')");
+        app.start(ImmutableList.of(loc));
+    }
+
+    @Test
+    public void testResizeDirectly() throws Exception {
+        assertSize(0);
+
+        cluster.resize(2);
+        assertSize(2);
+        
+        // Won't get a location to successfully resize (byon location only has 2 machines); 
+        // so still left with 2 members + 1 in quarantine
+        try {
+            cluster.resize(3);
+            Asserts.shouldHaveFailedPreviously();
+        } catch (Exception e) {
+            Asserts.expectedFailureOfType(e, Resizable.InsufficientCapacityException.class);
+        }
+        assertSize(2, 1);
+
+        // Resize down; still have 1 in quarantine
+        cluster.resize(1);
+        assertSize(1, 1);
+        
+        // Resize back up to 2 should be allowed
+        cluster.resize(2);
+        assertSize(2, 1);
+    }
+    
+    @Test
+    public void testPoolHotSensorResizingBeyondMaxMachines() throws Exception {
+        cluster.resize(1);
+        policy = cluster.policies().add(PolicySpec.create(AutoScalerPolicy.class)
+                .configure(AutoScalerPolicy.MIN_PERIOD_BETWEEN_EXECS, Duration.millis(10)));
+
+        // Single node trying to handle a load of 21; too high, so will add one more node
+        cluster.sensors().emit(AutoScalerPolicy.DEFAULT_POOL_HOT_SENSOR, message(21L, 10L, 20L));
+        assertSizeEventually(2);
+
+        // Two nodes handing an aggregated load of 41; too high for 2 nodes so tries to scale to 3.
+        // But byon location only has 2 nodes so will fail.
+        cluster.sensors().emit(AutoScalerPolicy.DEFAULT_POOL_HOT_SENSOR, message(21L, 10L, 20L));
+        assertSizeEventually(2, 1);
+        
+        // Should not repeatedly retry
+        assertSizeContinually(2, 1);
+        
+        // If there is another indication of too much load, should not retry yet again.
+        cluster.sensors().emit(AutoScalerPolicy.DEFAULT_POOL_HOT_SENSOR, message(42L, 10L, 20L));
+        assertSizeContinually(2, 1);
+    }
+    
+    @Test
+    public void testMetricResizingBeyondMaxMachines() throws Exception {
+        AttributeSensor<Integer> metric = Sensors.newIntegerSensor("test.aggregatedLoad");
+        
+        cluster.resize(1);
+        policy = cluster.policies().add(PolicySpec.create(AutoScalerPolicy.class)
+                .configure(AutoScalerPolicy.METRIC, metric)
+                .configure(AutoScalerPolicy.METRIC_LOWER_BOUND, 10)
+                .configure(AutoScalerPolicy.METRIC_UPPER_BOUND, 20)
+                .configure(AutoScalerPolicy.MIN_PERIOD_BETWEEN_EXECS, Duration.millis(10)));
+
+        // Single node trying to handle a load of 21; too high, so will add one more node.
+        // That takes the load back to within acceptable limits
+        cluster.sensors().set(metric, 21);
+        assertSizeEventually(2);
+        cluster.sensors().set(metric, 19);
+
+        // With two nodes, load is now too high, so will try (and fail) to add one more node.
+        // Trigger another attempt to resize.
+        cluster.sensors().set(metric, 22);
+        assertSizeEventually(2, 1);
+        assertSizeContinually(2, 1);
+
+        // Metric is re-published; should not keep retrying
+        cluster.sensors().set(metric, 21);
+        assertSizeContinually(2, 1);
+    }
+
+    protected Map<String, Object> message(double currentWorkrate, double lowThreshold, double highThreshold) {
+        return message(cluster.getCurrentSize(), currentWorkrate, lowThreshold, highThreshold);
+    }
+    
+    protected Map<String, Object> message(int currentSize, double currentWorkrate, double lowThreshold, double highThreshold) {
+        return ImmutableMap.<String,Object>of(
+            AutoScalerPolicy.POOL_CURRENT_SIZE_KEY, currentSize,
+            AutoScalerPolicy.POOL_CURRENT_WORKRATE_KEY, currentWorkrate,
+            AutoScalerPolicy.POOL_LOW_THRESHOLD_KEY, lowThreshold,
+            AutoScalerPolicy.POOL_HIGH_THRESHOLD_KEY, highThreshold);
+    }
+    
+    protected void assertSize(Integer targetSize) {
+        assertSize(targetSize, 0);
+    }
+
+    protected void assertSize(int targetSize, int quarantineSize) {
+        assertEquals(cluster.getCurrentSize(), (Integer) targetSize, "cluster.currentSize");
+        assertEquals(cluster.getMembers().size(), targetSize, "cluster.members.size");
+        assertEquals(cluster.sensors().get(DynamicCluster.QUARANTINE_GROUP).getMembers().size(), quarantineSize, "cluster.quarantine.size");
+        assertEquals(mgmt.getEntityManager().findEntities(Predicates.instanceOf(EmptySoftwareProcess.class)).size(), targetSize + quarantineSize, "instanceCount(EmptySoftwareProcess)");
+    }
+    
+    protected void assertSizeEventually(int targetSize) {
+        assertSizeEventually(targetSize, 0);
+    }
+    
+    protected void assertSizeEventually(final int targetSize, final int quarantineSize) {
+        Asserts.succeedsEventually(new Runnable() {
+            public void run() {
+                assertSize(targetSize, quarantineSize);
+            }});
+    }
+    
+    protected void assertSizeContinually(final int targetSize, final int quarantineSize) {
+        Asserts.succeedsContinually(ImmutableMap.of("timeout", SHORT_WAIT_MS), new Runnable() {
+            public void run() {
+                assertSize(targetSize, quarantineSize);
+            }});
+    }
+}