You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@brooklyn.apache.org by he...@apache.org on 2014/11/15 01:05:16 UTC

[07/21] incubator-brooklyn git commit: Add a HOT_BACKUP mode where nodes are read-only copies but NOT willing to be master.

Add a HOT_BACKUP mode where nodes are read-only copies but NOT willing to be master.


Project: http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/commit/9dd1a957
Tree: http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/tree/9dd1a957
Diff: http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/diff/9dd1a957

Branch: refs/heads/master
Commit: 9dd1a95724a473b59a777fb6d627b6dde2fa0547
Parents: f39c6a3
Author: Alex Heneveld <al...@cloudsoftcorp.com>
Authored: Thu Nov 13 02:05:16 2014 +0000
Committer: Alex Heneveld <al...@cloudsoftcorp.com>
Committed: Thu Nov 13 23:38:53 2014 +0000

----------------------------------------------------------------------
 .../brooklyn/entity/rebind/RebindManager.java   |   6 +-
 .../management/ha/HighAvailabilityManager.java  |   2 +-
 .../management/ha/HighAvailabilityMode.java     |   6 +
 .../management/ha/ManagementNodeState.java      |  25 +++-
 .../entity/rebind/RebindManagerImpl.java        |  22 +--
 .../ha/HighAvailabilityManagerImpl.java         | 140 +++++++++++--------
 .../ha/ManagementPlaneSyncRecordDeltaImpl.java  |   1 -
 ...mentPlaneSyncRecordPersisterToMultiFile.java |   4 -
 .../NonDeploymentManagementContext.java         |   2 +-
 .../entity/rebind/RebindManagerSorterTest.java  |   2 +-
 usage/cli/src/main/java/brooklyn/cli/Main.java  |  18 ++-
 .../brooklyn/launcher/BrooklynLauncher.java     |  20 +--
 12 files changed, 154 insertions(+), 94 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/9dd1a957/api/src/main/java/brooklyn/entity/rebind/RebindManager.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/brooklyn/entity/rebind/RebindManager.java b/api/src/main/java/brooklyn/entity/rebind/RebindManager.java
index e5001a6..fcd71bc 100644
--- a/api/src/main/java/brooklyn/entity/rebind/RebindManager.java
+++ b/api/src/main/java/brooklyn/entity/rebind/RebindManager.java
@@ -69,7 +69,7 @@ public interface RebindManager {
     /** Causes this management context to rebind, loading data from the given backing store.
      * use wisely, as this can cause local entities to be completely lost, or will throw in many other situations.
      * in general it may be invoked for a new node becoming {@link ManagementNodeState#MASTER} 
-     * or periodically for a node in {@link ManagementNodeState#HOT_STANDBY}. */
+     * or periodically for a node in {@link ManagementNodeState#HOT_STANDBY} or {@link ManagementNodeState#HOT_BACKUP}. */
     @Beta
     public List<Application> rebind(ClassLoader classLoader, RebindExceptionHandler exceptionHandler, ManagementNodeState mode);
 
@@ -92,13 +92,13 @@ public interface RebindManager {
      * Perform an initial load of state read-only and starts a background process 
      * reading (mirroring) state periodically.
      */
-    public void startReadOnly();
+    public void startReadOnly(ManagementNodeState mode);
     /** Stops the background reading (mirroring) of state. 
      * Interrupts any current activity and waits for it to cease. */
     public void stopReadOnly();
     
     /** Starts the appropriate background processes, {@link #startPersistence()} if {@link ManagementNodeState#MASTER},
-     * {@link #startReadOnly()} if {@link ManagementNodeState#HOT_STANDBY} */
+     * {@link #startReadOnly()} if {@link ManagementNodeState#HOT_STANDBY} or {@link ManagementNodeState#HOT_BACKUP} */
     public void start();
     /** Stops the appropriate background processes, {@link #stopPersistence()} or {@link #stopReadOnly()},
      * waiting for activity there to cease (interrupting in the case of {@link #stopReadOnly()}). */

http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/9dd1a957/api/src/main/java/brooklyn/management/ha/HighAvailabilityManager.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/brooklyn/management/ha/HighAvailabilityManager.java b/api/src/main/java/brooklyn/management/ha/HighAvailabilityManager.java
index c3671d7..de37123 100644
--- a/api/src/main/java/brooklyn/management/ha/HighAvailabilityManager.java
+++ b/api/src/main/java/brooklyn/management/ha/HighAvailabilityManager.java
@@ -72,7 +72,7 @@ public interface HighAvailabilityManager {
      * <p>
      * When this method returns, the status of this node will be set,
      * either {@link ManagementNodeState#MASTER} if appropriate 
-     * or {@link ManagementNodeState#STANDBY} / {@link ManagementNodeState#HOT_STANDBY}.
+     * or {@link ManagementNodeState#STANDBY} / {@link ManagementNodeState#HOT_STANDBY} / {@link ManagementNodeState#HOT_BACKUP}.
      *
      * @param startMode mode to start with
      * @throws IllegalStateException if current state of the management-plane doesn't match that desired by {@code startMode} 

http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/9dd1a957/api/src/main/java/brooklyn/management/ha/HighAvailabilityMode.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/brooklyn/management/ha/HighAvailabilityMode.java b/api/src/main/java/brooklyn/management/ha/HighAvailabilityMode.java
index 97fb5bc..2e593b1 100644
--- a/api/src/main/java/brooklyn/management/ha/HighAvailabilityMode.java
+++ b/api/src/main/java/brooklyn/management/ha/HighAvailabilityMode.java
@@ -52,6 +52,12 @@ public enum HighAvailabilityMode {
     HOT_STANDBY,
     
     /**
+     * Means node must be hot backup; do not attempt to become master (but it <i>can</i> start without a master).
+     * See {@link ManagementNodeState#HOT_BACKUP}. 
+     */
+    HOT_BACKUP,
+    
+    /**
      * Means node must be master; if there is already a master then fail fast on startup.
      * See {@link ManagementNodeState#MASTER}.
      */

http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/9dd1a957/api/src/main/java/brooklyn/management/ha/ManagementNodeState.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/brooklyn/management/ha/ManagementNodeState.java b/api/src/main/java/brooklyn/management/ha/ManagementNodeState.java
index c8a90f1..4587b57 100644
--- a/api/src/main/java/brooklyn/management/ha/ManagementNodeState.java
+++ b/api/src/main/java/brooklyn/management/ha/ManagementNodeState.java
@@ -18,6 +18,8 @@
  */
 package brooklyn.management.ha;
 
+import brooklyn.util.guava.Maybe;
+
 public enum ManagementNodeState {
     /** @deprecated since 0.7.0 synonym for maintenance (plus, it should have been US English!) */
     UNINITIALISED,
@@ -27,8 +29,10 @@ public enum ManagementNodeState {
     /** Node is in "lukewarm standby" mode, where it is available to be promoted to master,
      * but does not have entities loaded and will require some effort to be promoted */
     STANDBY,
-    /** Node is acting as read-only proxy */
+    /** Node is acting as read-only proxy available to be promoted to master on existing master failure */
     HOT_STANDBY,
+    /** Node is acting as a read-only proxy but not making itself available for promotion to master */
+    HOT_BACKUP,
     /** Node is running as primary/master, able to manage entities and create new ones */
     // the semantics are intended to support multi-master here; we could have multiple master nodes,
     // but we need to look up who is master for any given entity
@@ -38,4 +42,23 @@ public enum ManagementNodeState {
     FAILED,
     /** Node has gone away; maintenance not possible */
     TERMINATED;
+
+    /** Converts a {@link HighAvailabilityMode} to a {@link ManagementNodeState}, if possible */
+    public static Maybe<ManagementNodeState> of(HighAvailabilityMode startMode) {
+        switch (startMode) {
+        case AUTO:
+        case DISABLED:
+            return Maybe.absent("Requested "+HighAvailabilityMode.class+" mode "+startMode+" cannot be converted to "+ManagementNodeState.class);
+        case HOT_BACKUP:
+            return Maybe.of(HOT_BACKUP);
+        case HOT_STANDBY:
+            return Maybe.of(HOT_STANDBY);
+        case MASTER:
+            return Maybe.of(MASTER);
+        case STANDBY:
+            return Maybe.of(STANDBY);
+        }
+        // above should be exhaustive
+        return Maybe.absent("Requested "+HighAvailabilityMode.class+" mode "+startMode+" was not expected");
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/9dd1a957/core/src/main/java/brooklyn/entity/rebind/RebindManagerImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/brooklyn/entity/rebind/RebindManagerImpl.java b/core/src/main/java/brooklyn/entity/rebind/RebindManagerImpl.java
index ac9f726..4252a25 100644
--- a/core/src/main/java/brooklyn/entity/rebind/RebindManagerImpl.java
+++ b/core/src/main/java/brooklyn/entity/rebind/RebindManagerImpl.java
@@ -298,7 +298,11 @@ public class RebindManagerImpl implements RebindManager {
     
     @SuppressWarnings("unchecked")
     @Override
-    public void startReadOnly() {
+    public void startReadOnly(final ManagementNodeState mode) {
+        if (mode!=ManagementNodeState.HOT_STANDBY && mode!=ManagementNodeState.HOT_BACKUP) {
+            throw new IllegalStateException("Read-only rebind thread only permitted for hot proxy modes; not "+mode);
+        }
+        
         if (persistenceRunning) {
             throw new IllegalStateException("Cannot start read-only when already running with persistence");
         }
@@ -315,7 +319,7 @@ public class RebindManagerImpl implements RebindManager {
         readOnlyRebindCount = 0;
 
         try {
-            rebind(null, null, ManagementNodeState.HOT_STANDBY);
+            rebind(null, null, mode);
         } catch (Exception e) {
             Exceptions.propagate(e);
         }
@@ -325,7 +329,7 @@ public class RebindManagerImpl implements RebindManager {
                 return Tasks.<Void>builder().dynamic(false).name("rebind (periodic run").body(new Callable<Void>() {
                     public Void call() {
                         try {
-                            rebind(null, null, ManagementNodeState.HOT_STANDBY);
+                            rebind(null, null, mode);
                             readOnlyRebindCount++;
                             return null;
                         } catch (RuntimeInterruptedException e) {
@@ -376,8 +380,8 @@ public class RebindManagerImpl implements RebindManager {
     @Override
     public void start() {
         ManagementNodeState target = getRebindMode();
-        if (target==ManagementNodeState.HOT_STANDBY) {
-            startReadOnly();
+        if (target==ManagementNodeState.HOT_STANDBY || target==ManagementNodeState.HOT_BACKUP) {
+            startReadOnly(target);
         } else if (target==ManagementNodeState.MASTER) {
             startPersistence();
         } else {
@@ -466,8 +470,8 @@ public class RebindManagerImpl implements RebindManager {
                 .build();
         final ManagementNodeState mode = modeO!=null ? modeO : getRebindMode();
         
-        if (mode!=ManagementNodeState.HOT_STANDBY && mode!=ManagementNodeState.MASTER)
-            throw new IllegalStateException("Must be either master or read only to rebind (mode "+mode+")");
+        if (mode!=ManagementNodeState.MASTER && mode!=ManagementNodeState.HOT_STANDBY && mode!=ManagementNodeState.HOT_BACKUP)
+            throw new IllegalStateException("Must be either master or hot standby/backup to rebind (mode "+mode+")");
 
         ExecutionContext ec = BasicExecutionContext.getCurrentExecutionContext();
         if (ec == null) {
@@ -533,7 +537,7 @@ public class RebindManagerImpl implements RebindManager {
             
             exceptionHandler.onStart(rebindContext);
             
-            if (mode==ManagementNodeState.HOT_STANDBY) {
+            if (mode==ManagementNodeState.HOT_STANDBY || mode==ManagementNodeState.HOT_BACKUP) {
                 rebindContext.setAllReadOnly();
             } else {
                 Preconditions.checkState(mode==ManagementNodeState.MASTER, "Must be either master or read only to rebind (mode "+mode+")");
@@ -590,7 +594,7 @@ public class RebindManagerImpl implements RebindManager {
             BrooklynMementoManifest mementoManifest = persistenceStoreAccess.loadMementoManifest(mementoRawData, exceptionHandler);
 
             boolean isEmpty = mementoManifest.isEmpty();
-            if (mode!=ManagementNodeState.HOT_STANDBY) {
+            if (mode!=ManagementNodeState.HOT_STANDBY && mode!=ManagementNodeState.HOT_BACKUP) {
                 if (!isEmpty) { 
                     LOG.info("Rebinding from "+getPersister().getBackingStoreDescription()+"...");
                 } else {

http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/9dd1a957/core/src/main/java/brooklyn/management/ha/HighAvailabilityManagerImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/brooklyn/management/ha/HighAvailabilityManagerImpl.java b/core/src/main/java/brooklyn/management/ha/HighAvailabilityManagerImpl.java
index 329578e..62b06c0 100644
--- a/core/src/main/java/brooklyn/management/ha/HighAvailabilityManagerImpl.java
+++ b/core/src/main/java/brooklyn/management/ha/HighAvailabilityManagerImpl.java
@@ -98,20 +98,8 @@ import com.google.common.collect.Iterables;
 @Beta
 public class HighAvailabilityManagerImpl implements HighAvailabilityManager {
 
-    // TODO Improve mechanism for injecting configuration options (such as heartbeat and timeouts).
-    // For example, read from brooklyn.properties?
-    // But see BrooklynLauncher.haHeartbeatPeriod and .haHeartbeatTimeout, which are always injected.
-    // So perhaps best to read brooklyn.properties there? Would be nice to avoid the cast to 
-    // HighAvailabilityManagerImpl though.
-    
-    // TODO There is a race if you start multiple nodes simultaneously.
-    // They may not have seen each other's heartbeats yet, so will all claim mastery!
-    // But this should be resolved shortly afterwards.
-
-    // TODO Should we pass in a classloader on construction, so it can be passed to {@link RebindManager#rebind(ClassLoader)}
-
     public final ConfigKey<Duration> POLL_PERIOD = ConfigKeys.newConfigKey(Duration.class, "brooklyn.ha.pollPeriod",
-        "How often nodes should poll to detect whether master is healthy", Duration.seconds(5));
+        "How often nodes should poll to detect whether master is healthy", Duration.seconds(1));
     public final ConfigKey<Duration> HEARTBEAT_TIMEOUT = ConfigKeys.newConfigKey(Duration.class, "brooklyn.ha.heartbeatTimeout",
         "Maximum allowable time for detection of a peer's heartbeat; if no sign of master after this time, "
         + "another node may promote itself", Duration.THIRTY_SECONDS);
@@ -286,8 +274,10 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager {
             case DISABLED:
                 // no action needed, will do anything necessary below
                 break;
-            case HOT_STANDBY: demoteToStandby(true); break;
-            case STANDBY: demoteToStandby(false); break;
+            case HOT_STANDBY: 
+            case HOT_BACKUP: 
+            case STANDBY: 
+                demoteTo(ManagementNodeState.of(startMode).get()); break;
             default:
                 throw new IllegalStateException("Unexpected high availability mode "+startMode+" requested for "+this);
             }
@@ -298,8 +288,14 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager {
         case AUTO:
             // don't care; let's start and see if we promote ourselves
             publishAndCheck(true);
-            if (nodeState == ManagementNodeState.STANDBY || nodeState == ManagementNodeState.HOT_STANDBY) {
-                ManagementPlaneSyncRecord newState = loadManagementPlaneSyncRecord(true);;
+            switch (nodeState) {
+            case HOT_BACKUP:
+                if (!nodeStateTransitionComplete) throw new IllegalStateException("Cannot switch to AUTO when in the middle of a transition to "+nodeState);
+                // else change us to hot standby and continue to below
+                nodeState = ManagementNodeState.HOT_STANDBY;
+            case HOT_STANDBY:
+            case STANDBY:
+                ManagementPlaneSyncRecord newState = loadManagementPlaneSyncRecord(true);
                 String masterNodeId = newState.getMasterNodeId();
                 ManagementNodeSyncRecord masterNodeDetails = newState.getManagementNodes().get(masterNodeId);
                 LOG.info("Management node "+ownNodeId+" running as HA " + nodeState + " autodetected, " +
@@ -307,9 +303,11 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager {
                         + (existingMaster==null ? "(new) " : "")
                         + "is "+masterNodeId +
                         (masterNodeDetails==null || masterNodeDetails.getUri()==null ? " (no url)" : " at "+masterNodeDetails.getUri())));
-            } else if (nodeState == ManagementNodeState.MASTER) {
+                break;
+            case MASTER:
                 LOG.info("Management node "+ownNodeId+" running as HA MASTER autodetected");
-            } else {
+                break;
+            default:
                 throw new IllegalStateException("Management node "+ownNodeId+" set to HA AUTO, encountered unexpected mode "+nodeState);
             }
             break;
@@ -332,7 +330,8 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager {
                 LOG.error("Management node "+ownNodeId+" detected no master when "+startMode+" requested and existing master required; failing.");
                 throw new IllegalStateException("No existing master; cannot start as "+startMode);
             }
-            
+            // continue to below (above lines skipped for hot backup)
+        case HOT_BACKUP:
             String message = "Management node "+ownNodeId+" running as HA "+getNodeState()+" (";
             if (getNodeState().toString().equals(startMode.toString()))
                 message += "explicitly requested";
@@ -346,7 +345,8 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager {
             } else {
                 ManagementPlaneSyncRecord newState = loadManagementPlaneSyncRecord(true);
                 if (Strings.isBlank(newState.getMasterNodeId())) {
-                    message += "); no master currently (subsequent election may repair)";
+                    message += "); no master currently"; 
+                    if (startMode != HighAvailabilityMode.HOT_BACKUP) message += " (subsequent election may repair)";
                 } else {
                     message += "); master "+newState.getMasterNodeId();
                 }
@@ -356,7 +356,7 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager {
         case DISABLED:
             // safe just to run even if we weren't master
             LOG.info("Management node "+ownNodeId+" HA DISABLED (was "+nodeState+")");
-            demoteToFailed();
+            demoteTo(ManagementNodeState.FAILED);
             if (pollingTask!=null) pollingTask.cancel(true);
             break;
         default:
@@ -370,25 +370,28 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager {
                 startMode = HighAvailabilityMode.STANDBY;
             }
         }
-        if (nodeState==ManagementNodeState.STANDBY && startMode==HighAvailabilityMode.HOT_STANDBY) {
-            // if it should be hot standby, then we need to promote
+        if ((nodeState==ManagementNodeState.STANDBY && startMode==HighAvailabilityMode.HOT_STANDBY) || 
+                (startMode==HighAvailabilityMode.HOT_BACKUP)) {
             nodeStateTransitionComplete = false;
-            // inform the world that we are transitioning (not eligible for promotion while going in to hot standby)
-            publishHealth();
+            if (startMode==HighAvailabilityMode.HOT_STANDBY) {
+                // if it should be hot standby, then we need to promote
+                // inform the world that we are transitioning (not eligible for promotion while going in to hot standby)
+                publishHealth();
+            }
             try {
-                attemptHotStandby();
+                attemptHotProxy(ManagementNodeState.of(startMode).get());
                 nodeStateTransitionComplete = true;
                 publishHealth();
                 
-                if (getNodeState()==ManagementNodeState.HOT_STANDBY) {
-                    LOG.info("Management node "+ownNodeId+" now running as HA "+ManagementNodeState.HOT_STANDBY+"; "
+                if (getNodeState()==ManagementNodeState.HOT_STANDBY || getNodeState()==ManagementNodeState.HOT_BACKUP) {
+                    LOG.info("Management node "+ownNodeId+" now running as HA "+getNodeState()+"; "
                         + managementContext.getApplications().size()+" application"+Strings.s(managementContext.getApplications().size())+" loaded");
                 } else {
-                    LOG.warn("Management node "+ownNodeId+" unable to promote to "+ManagementNodeState.HOT_STANDBY+" (currently "+getNodeState()+"); "
+                    LOG.warn("Management node "+ownNodeId+" unable to promote to "+startMode+" (currently "+getNodeState()+"); "
                         + "(see log for further details)");
                 }
             } catch (Exception e) {
-                LOG.warn("Management node "+ownNodeId+" unable to promote to "+ManagementNodeState.HOT_STANDBY+" (currently "+getNodeState()+"); rethrowing: "+Exceptions.collapseText(e));
+                LOG.warn("Management node "+ownNodeId+" unable to promote to "+startMode+" (currently "+getNodeState()+"); rethrowing: "+Exceptions.collapseText(e));
                 throw Exceptions.propagate(e);
             }
         } else {
@@ -594,9 +597,8 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager {
     protected void checkMaster(boolean initializing) {
         ManagementPlaneSyncRecord memento = loadManagementPlaneSyncRecord(false);
         
-        if (getNodeState() == ManagementNodeState.FAILED) {
-            // if we have failed then no point in checking who is master
-            // (if somehow this node is subsequently clearFailure() then it will resume)
+        if (getNodeState()==ManagementNodeState.FAILED || getNodeState()==ManagementNodeState.HOT_BACKUP) {
+            // if failed or hot backup then we can't promote ourselves, so no point in checking who is master
             return;
         }
         
@@ -642,7 +644,9 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager {
         
         if (demotingSelfInFavourOfOtherMaster) {
             LOG.debug("Master-change for this node only, demoting "+ownNodeRecord.toVerboseString()+" in favour of official master "+newMasterNodeRecord.toVerboseString());
-            demoteToStandby(BrooklynFeatureEnablement.isEnabled(BrooklynFeatureEnablement.FEATURE_DEFAULT_STANDBY_IS_HOT_PROPERTY));
+            demoteTo(
+                BrooklynFeatureEnablement.isEnabled(BrooklynFeatureEnablement.FEATURE_DEFAULT_STANDBY_IS_HOT_PROPERTY) ?
+                    ManagementNodeState.HOT_STANDBY : ManagementNodeState.STANDBY);
             return;
         } else {
             LOG.debug("Detected master heartbeat timeout. Initiating a new master election. Master was " + currMasterNodeRecord);
@@ -703,11 +707,11 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager {
                 LOG.warn("Problem in promption-listener (continuing)", e);
             }
         }
-        boolean wasHotStandby = nodeState==ManagementNodeState.HOT_STANDBY;
+        boolean wasHot = (nodeState==ManagementNodeState.HOT_STANDBY || nodeState==ManagementNodeState.HOT_BACKUP);
         nodeState = ManagementNodeState.MASTER;
         publishPromotionToMaster();
         try {
-            if (wasHotStandby) {
+            if (wasHot) {
                 // could just promote the standby items; but for now we stop the old read-only and re-load them, to make sure nothing has been missed
                 // TODO ideally there'd be an incremental rebind as well as an incremental persist
                 managementContext.getRebindManager().stopReadOnly();
@@ -716,7 +720,7 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager {
             managementContext.getRebindManager().rebind(managementContext.getCatalog().getRootClassLoader(), null, nodeState);
         } catch (Exception e) {
             LOG.error("Management node enountered problem during rebind when promoting self to master; demoting to FAILED and rethrowing: "+e);
-            demoteToFailed();
+            demoteTo(ManagementNodeState.FAILED);
             throw Exceptions.propagate(e);
         }
         managementContext.getRebindManager().start();
@@ -728,19 +732,17 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager {
         }
     }
 
+    /** @deprecated since 0.7.0, use {@link #demoteTo(ManagementNodeState)} */ @Deprecated
     protected void demoteToFailed() {
-        // TODO merge this method with the one below
-        boolean wasMaster = nodeState == ManagementNodeState.MASTER;
-        if (wasMaster) backupOnDemotionIfNeeded();
-        ManagementTransitionMode mode = (wasMaster ? ManagementTransitionMode.REBINDING_NO_LONGER_PRIMARY : ManagementTransitionMode.REBINDING_DESTROYED);
-        nodeState = ManagementNodeState.FAILED;
-        onDemotionStopItems(mode);
-        nodeStateTransitionComplete = true;
-        publishDemotion(wasMaster);
+        demoteTo(ManagementNodeState.FAILED);
     }
-    
+    /** @deprecated since 0.7.0, use {@link #demoteTo(ManagementNodeState)} */ @Deprecated
     protected void demoteToStandby(boolean hot) {
-        if (!running) {
+        demoteTo(hot ? ManagementNodeState.HOT_STANDBY : ManagementNodeState.STANDBY);
+    }
+    
+    protected void demoteTo(ManagementNodeState toState) {
+        if (toState!=ManagementNodeState.FAILED && !running) {
             LOG.warn("Ignoring demote-from-master request, as HighAvailabilityManager is no longer running");
             return;
         }
@@ -749,14 +751,24 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager {
         ManagementTransitionMode mode = (wasMaster ? ManagementTransitionMode.REBINDING_NO_LONGER_PRIMARY : ManagementTransitionMode.REBINDING_DESTROYED);
 
         nodeStateTransitionComplete = false;
-        nodeState = ManagementNodeState.STANDBY;
+        
+        switch (toState) {
+        case FAILED: 
+        case HOT_BACKUP:
+        case STANDBY:
+            nodeState = toState; break;
+        case HOT_STANDBY:
+            nodeState = ManagementNodeState.STANDBY; break;
+        default:
+            throw new IllegalStateException("Illegal target state: "+toState);
+        }
         onDemotionStopItems(mode);
         nodeStateTransitionComplete = true;
         publishDemotion(wasMaster);
         
-        if (hot) {
+        if (toState==ManagementNodeState.HOT_BACKUP || toState==ManagementNodeState.HOT_STANDBY) {
             nodeStateTransitionComplete = false;
-            attemptHotStandby();
+            attemptHotProxy(toState);
             nodeStateTransitionComplete = true;
             publishHealth();
         }
@@ -795,19 +807,29 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager {
         ((BasicBrooklynCatalog)managementContext.getCatalog()).reset(CatalogDto.newEmptyInstance("<reset-by-ha-status-change>"));
     }
     
-    /** starts hot standby, in foreground; the caller is responsible for publishing health afterwards.
-     * @return whether hot standby was possible (if not, errors should be stored elsewhere) */
+    /** @deprecated since 0.7.0, use {@link #attemptHotProxy(ManagementNodeState)} */ @Deprecated
     protected boolean attemptHotStandby() {
+        return attemptHotProxy(ManagementNodeState.HOT_STANDBY);
+    }
+    
+    /** Starts hot standby or hot backup, in foreground
+     * <p>
+     * In the case of the former, the caller is responsible for publishing health afterwards,
+     * but if it fails, this method will {@link #demoteTo(ManagementNodeState)} {@link ManagementNodeState#FAILED}.
+     * <p>
+     * @return whether the requested {@link ManagementNodeState} was possible;
+     * (if not, errors should be stored elsewhere) */
+    protected boolean attemptHotProxy(ManagementNodeState toState) {
         try {
-            Preconditions.checkState(nodeStateTransitionComplete==false, "Must be in transitioning state to go into hot standby");
-            nodeState = ManagementNodeState.HOT_STANDBY;
-            managementContext.getRebindManager().startReadOnly();
+            Preconditions.checkState(nodeStateTransitionComplete==false, "Must be in transitioning state to go into "+toState);
+            nodeState = toState;
+            managementContext.getRebindManager().startReadOnly(toState);
             
             return true;
         } catch (Exception e) {
             Exceptions.propagateIfFatal(e);
-            LOG.warn("Unable to promote "+ownNodeId+" to hot standby, switching to FAILED: "+e, e);
-            demoteToFailed();
+            LOG.warn("Unable to promote "+ownNodeId+" to "+toState+", switching to FAILED: "+e, e);
+            demoteTo(ManagementNodeState.FAILED);
             return false;
         }
     }
@@ -909,7 +931,7 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager {
         @Override
         public ManagementNodeSyncRecord apply(@Nullable ManagementNodeSyncRecord input) {
             if (input == null) return null;
-            if (!(input.getStatus() == ManagementNodeState.STANDBY || input.getStatus() == ManagementNodeState.HOT_STANDBY || input.getStatus() == ManagementNodeState.MASTER)) return input;
+            if (!(input.getStatus() == ManagementNodeState.STANDBY || input.getStatus() == ManagementNodeState.HOT_STANDBY || input.getStatus() == ManagementNodeState.MASTER || input.getStatus() == ManagementNodeState.HOT_BACKUP)) return input;
             if (isHeartbeatOk(input, referenceNode)) return input;
             return BasicManagementNodeSyncRecord.builder()
                     .from(input)

http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/9dd1a957/core/src/main/java/brooklyn/management/ha/ManagementPlaneSyncRecordDeltaImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/brooklyn/management/ha/ManagementPlaneSyncRecordDeltaImpl.java b/core/src/main/java/brooklyn/management/ha/ManagementPlaneSyncRecordDeltaImpl.java
index 6b026bc..5e07b19 100644
--- a/core/src/main/java/brooklyn/management/ha/ManagementPlaneSyncRecordDeltaImpl.java
+++ b/core/src/main/java/brooklyn/management/ha/ManagementPlaneSyncRecordDeltaImpl.java
@@ -25,7 +25,6 @@ import java.util.Collection;
 
 import brooklyn.management.ha.ManagementPlaneSyncRecordPersister.Delta;
 
-import com.google.api.client.repackaged.com.google.common.base.Objects;
 import com.google.common.annotations.Beta;
 import com.google.common.collect.Sets;
 

http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/9dd1a957/core/src/main/java/brooklyn/management/ha/ManagementPlaneSyncRecordPersisterToMultiFile.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/brooklyn/management/ha/ManagementPlaneSyncRecordPersisterToMultiFile.java b/core/src/main/java/brooklyn/management/ha/ManagementPlaneSyncRecordPersisterToMultiFile.java
index d3fa34a..fcade26 100644
--- a/core/src/main/java/brooklyn/management/ha/ManagementPlaneSyncRecordPersisterToMultiFile.java
+++ b/core/src/main/java/brooklyn/management/ha/ManagementPlaneSyncRecordPersisterToMultiFile.java
@@ -274,10 +274,6 @@ public class ManagementPlaneSyncRecordPersisterToMultiFile implements Management
         return new File(dir, "master");
     }
 
-    private File getFileForPlaneId() {
-        return new File(dir, "plane.id");
-    }
-
     private File getFileForChangeLog() {
         return new File(dir, "change.log");
     }

http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/9dd1a957/core/src/main/java/brooklyn/management/internal/NonDeploymentManagementContext.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/brooklyn/management/internal/NonDeploymentManagementContext.java b/core/src/main/java/brooklyn/management/internal/NonDeploymentManagementContext.java
index 3d0e4d5..2771475 100644
--- a/core/src/main/java/brooklyn/management/internal/NonDeploymentManagementContext.java
+++ b/core/src/main/java/brooklyn/management/internal/NonDeploymentManagementContext.java
@@ -491,7 +491,7 @@ public class NonDeploymentManagementContext implements ManagementContextInternal
         }
 
         @Override
-        public void startReadOnly() {
+        public void startReadOnly(ManagementNodeState state) {
             throw new IllegalStateException("Non-deployment context "+NonDeploymentManagementContext.this+" is not valid for this operation.");
         }
         

http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/9dd1a957/core/src/test/java/brooklyn/entity/rebind/RebindManagerSorterTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/brooklyn/entity/rebind/RebindManagerSorterTest.java b/core/src/test/java/brooklyn/entity/rebind/RebindManagerSorterTest.java
index 991a000..f986c7e 100644
--- a/core/src/test/java/brooklyn/entity/rebind/RebindManagerSorterTest.java
+++ b/core/src/test/java/brooklyn/entity/rebind/RebindManagerSorterTest.java
@@ -142,7 +142,7 @@ public class RebindManagerSorterTest {
     private Map<String, EntityMemento> toMementos(Iterable<? extends Entity> entities) {
         Map<String, EntityMemento> result = Maps.newLinkedHashMap();
         for (Entity entity : entities) {
-            result.put(entity.getId(), MementosGenerators.newEntityMemento(entity));
+            result.put(entity.getId(), MementosGenerators.newEntityMemento(Entities.deproxy(entity)));
         }
         return result;
     }

http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/9dd1a957/usage/cli/src/main/java/brooklyn/cli/Main.java
----------------------------------------------------------------------
diff --git a/usage/cli/src/main/java/brooklyn/cli/Main.java b/usage/cli/src/main/java/brooklyn/cli/Main.java
index b1b933c..3eb7fc0 100644
--- a/usage/cli/src/main/java/brooklyn/cli/Main.java
+++ b/usage/cli/src/main/java/brooklyn/cli/Main.java
@@ -313,17 +313,22 @@ public class Main extends AbstractMain {
         protected final static String HA_OPTION_MASTER = "master";
         protected final static String HA_OPTION_STANDBY = "standby";
         protected final static String HA_OPTION_HOT_STANDBY = "hot_standby";
-        static { Enums.checkAllEnumeratedIgnoreCase(HighAvailabilityMode.class, HA_OPTION_AUTO, HA_OPTION_DISABLED, HA_OPTION_MASTER, HA_OPTION_STANDBY, HA_OPTION_HOT_STANDBY); }
+        protected final static String HA_OPTION_HOT_BACKUP = "hot_backup";
+        static { Enums.checkAllEnumeratedIgnoreCase(HighAvailabilityMode.class, HA_OPTION_AUTO, HA_OPTION_DISABLED, HA_OPTION_MASTER, HA_OPTION_STANDBY, HA_OPTION_HOT_STANDBY, HA_OPTION_HOT_BACKUP); }
         
-        @Option(name = { HA_OPTION }, allowedValues = { HA_OPTION_DISABLED, HA_OPTION_AUTO, HA_OPTION_MASTER, HA_OPTION_STANDBY, HA_OPTION_HOT_STANDBY },
+        @Option(name = { HA_OPTION }, allowedValues = { HA_OPTION_DISABLED, HA_OPTION_AUTO, HA_OPTION_MASTER, HA_OPTION_STANDBY, HA_OPTION_HOT_STANDBY, HA_OPTION_HOT_BACKUP },
                 title = "high availability mode",
                 description =
                         "The high availability mode. Possible values are: \n"+
                         "disabled: management node works in isolation - will not cooperate with any other standby/master nodes in management plane; \n"+
                         "auto: will look for other management nodes, and will allocate itself as standby or master based on other nodes' states; \n"+
                         "master: will startup as master - if there is already a master then fails immediately; \n"+
-                        "standby: will start up as lukewarm standby - if there is not already a master then fails immediately; \n"+
-                        "hot_standby: will start up as hot standby - if there is not already a master then fails immediately")
+                        "standby: will start up as lukewarm standby with no state - if there is not already a master then fails immediately, "
+                        + "and if there is a master which subsequently fails, this node can promote itself; \n"+
+                        "hot_standby: will start up as hot standby in read-only mode - if there is not already a master then fails immediately, "
+                        + "and if there is a master which subseuqently fails, this node can promote itself; \n"+
+                        "hot_backup: will start up as hot backup in read-only mode - no master is required, and this node will not become a master"
+                        )
         public String highAvailability = HA_OPTION_AUTO;
 
         @VisibleForTesting
@@ -472,7 +477,10 @@ public class Main extends AbstractMain {
                     if (highAvailabilityMode.get() == HighAvailabilityMode.AUTO)
                         return HighAvailabilityMode.DISABLED;
                     throw new FatalConfigurationRuntimeException("Cannot specify highAvailability when persistence is disabled");
-                } else if (persistMode == PersistMode.CLEAN && (highAvailabilityMode.get() == HighAvailabilityMode.STANDBY || highAvailabilityMode.get() == HighAvailabilityMode.HOT_STANDBY)) {
+                } else if (persistMode == PersistMode.CLEAN && 
+                        (highAvailabilityMode.get() == HighAvailabilityMode.STANDBY 
+                        || highAvailabilityMode.get() == HighAvailabilityMode.HOT_STANDBY
+                        || highAvailabilityMode.get() == HighAvailabilityMode.HOT_BACKUP)) {
                     throw new FatalConfigurationRuntimeException("Cannot specify highAvailability "+highAvailabilityMode.get()+" when persistence is CLEAN");
                 }
             }

http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/9dd1a957/usage/launcher/src/main/java/brooklyn/launcher/BrooklynLauncher.java
----------------------------------------------------------------------
diff --git a/usage/launcher/src/main/java/brooklyn/launcher/BrooklynLauncher.java b/usage/launcher/src/main/java/brooklyn/launcher/BrooklynLauncher.java
index 00162a8..976db4c 100644
--- a/usage/launcher/src/main/java/brooklyn/launcher/BrooklynLauncher.java
+++ b/usage/launcher/src/main/java/brooklyn/launcher/BrooklynLauncher.java
@@ -160,8 +160,9 @@ public class BrooklynLauncher {
     private String persistenceDir;
     private String persistenceLocation;
     private Duration persistPeriod = Duration.ONE_SECOND;
-    private Duration haHeartbeatTimeout = Duration.THIRTY_SECONDS;
-    private Duration haHeartbeatPeriod = Duration.ONE_SECOND;
+    // these default values come from config in HighAvailablilityManagerImpl
+    private Duration haHeartbeatTimeoutOverride = null;
+    private Duration haHeartbeatPeriodOverride = null;
     
     private volatile BrooklynWebServer webServer;
     private CampPlatform campPlatform;
@@ -433,7 +434,7 @@ public class BrooklynLauncher {
     }
 
     public BrooklynLauncher haHeartbeatTimeout(Duration val) {
-        this.haHeartbeatTimeout = val;
+        this.haHeartbeatTimeoutOverride = val;
         return this;
     }
 
@@ -446,7 +447,7 @@ public class BrooklynLauncher {
      * Controls both the frequency of heartbeats, and the frequency of checking the health of other nodes.
      */
     public BrooklynLauncher haHeartbeatPeriod(Duration val) {
-        this.haHeartbeatPeriod = val;
+        this.haHeartbeatPeriodOverride = val;
         return this;
     }
 
@@ -770,8 +771,8 @@ public class BrooklynLauncher {
             ManagementPlaneSyncRecordPersister persister =
                 new ManagementPlaneSyncRecordPersisterToObjectStore(managementContext,
                     objectStore, managementContext.getCatalog().getRootClassLoader());
-            ((HighAvailabilityManagerImpl)haManager).setHeartbeatTimeout(haHeartbeatTimeout);
-            ((HighAvailabilityManagerImpl)haManager).setPollPeriod(haHeartbeatPeriod);
+            ((HighAvailabilityManagerImpl)haManager).setHeartbeatTimeout(haHeartbeatTimeoutOverride);
+            ((HighAvailabilityManagerImpl)haManager).setPollPeriod(haHeartbeatPeriodOverride);
             haManager.setPersister(persister);
         }
     }
@@ -790,19 +791,20 @@ public class BrooklynLauncher {
             // Let the HA manager decide when objectstore.prepare and rebindmgr.rebind need to be called 
             // (based on whether other nodes in plane are already running).
             
-            HighAvailabilityMode startMode;
+            HighAvailabilityMode startMode=null;
             switch (highAvailabilityMode) {
                 case AUTO:
                 case MASTER:
                 case STANDBY:
                 case HOT_STANDBY:
+                case HOT_BACKUP:
                     startMode = highAvailabilityMode;
                     break;
                 case DISABLED:
                     throw new IllegalStateException("Unexpected code-branch for high availability mode "+highAvailabilityMode);
-                default:       
-                    throw new IllegalStateException("Unexpected high availability mode "+highAvailabilityMode);
             }
+            if (startMode==null)
+                throw new IllegalStateException("Unexpected high availability mode "+highAvailabilityMode);
             
             LOG.debug("Management node (with HA) starting");
             HighAvailabilityManager haManager = managementContext.getHighAvailabilityManager();