You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sling.apache.org by st...@apache.org on 2015/09/22 14:35:24 UTC

svn commit: r1704604 [1/2] - in /sling/trunk/bundles/extensions/discovery/impl/src: main/java/org/apache/sling/discovery/impl/ main/java/org/apache/sling/discovery/impl/cluster/ main/java/org/apache/sling/discovery/impl/cluster/voting/ main/java/org/ap...

Author: stefanegli
Date: Tue Sep 22 12:35:19 2015
New Revision: 1704604

URL: http://svn.apache.org/viewvc?rev=1704604&view=rev
Log:
SLING-5030 : better handling of pseudo-network-partitioning : replace isolated mode with (larger) TOPOLOGY_CHANGING phase - PLUS SLING-4959 : remove config option delayInitEventUntilVoted (as that is not considered correct anymore)

Added:
    sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/common/heartbeat/
    sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/common/heartbeat/HeartbeatTest.java   (with props)
Modified:
    sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/Config.java
    sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/DiscoveryServiceImpl.java
    sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/TopologyWebConsolePlugin.java
    sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/ClusterViewService.java
    sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/ClusterViewServiceImpl.java
    sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingHandler.java
    sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingHelper.java
    sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingView.java
    sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/common/ViewHelper.java
    sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/common/heartbeat/HeartbeatHandler.java
    sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/topology/connector/TopologyConnectorClient.java
    sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/topology/connector/TopologyConnectorServlet.java
    sling/trunk/bundles/extensions/discovery/impl/src/main/resources/OSGI-INF/metatype/metatype.properties
    sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/ClusterLoadTest.java
    sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/ClusterTest.java
    sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/SingleInstanceTest.java
    sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/TopologyEventTest.java
    sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/setup/Instance.java
    sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/topology/LargeTopologyWithHubTest.java
    sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/topology/TopologyTest.java
    sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/topology/TopologyTestHelper.java

Modified: sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/Config.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/Config.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/Config.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/Config.java Tue Sep 22 12:35:19 2015
@@ -116,15 +116,6 @@ public class Config {
     private String discoveryResourcePath = DEFAULT_DISCOVERY_RESOURCE_PATH;
 
     /**
-     * If set to true the TOPOLOGY_INIT event will be sent only once the cluster view was established.
-     * This can mean there is a delay until the voting in the cluster was finished.
-     * But the advantage of delaying the INIT event is to make sure no two instances see themselves
-     * as leader at startup. (see SLING-3750).
-     */
-    @Property(boolValue=true)
-    private static final String DELAY_INIT_EVENT_UNTIL_VOTED = "delayInitEventUntilVoted";
-
-    /**
      * If set to true, local-loops of topology connectors are automatically stopped when detected so.
      */
     @Property(boolValue=false)
@@ -183,9 +174,6 @@ public class Config {
     /** True when auto-stop of a local-loop is enabled. Default is false. **/
     private boolean autoStopLocalLoopEnabled;
     
-    /** True to make sure the INIT delay is only sent once there is (the first) established view in the cluster **/
-    private boolean delayInitEventUntilVoted = true; /* default: true */
-    
     /**
      * True when the hmac is enabled and signing is disabled.
      */
@@ -317,7 +305,6 @@ public class Config {
         logger.debug("configure: invertRepositoryDescriptor='{}'",
                 this.invertRepositoryDescriptor);
 
-        delayInitEventUntilVoted = PropertiesUtil.toBoolean(properties.get(DELAY_INIT_EVENT_UNTIL_VOTED), true);
         autoStopLocalLoopEnabled = PropertiesUtil.toBoolean(properties.get(AUTO_STOP_LOCAL_LOOP_ENABLED), false);
         gzipConnectorRequestsEnabled = PropertiesUtil.toBoolean(properties.get(GZIP_CONNECTOR_REQUESTS_ENABLED), false);
         
@@ -494,14 +481,6 @@ public class Config {
     }
     
     /**
-     * @return true to make sure the INIT event is only sent to topology listeners once
-     * there is (eg the first) an established cluster view
-     */
-    public boolean isDelayInitEventUntilVoted() {
-        return delayInitEventUntilVoted;
-    }
-    
-    /**
      * @return true if the auto-stopping of local-loop topology connectors is enabled.
      */
     public boolean isAutoStopLocalLoopEnabled() {

Modified: sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/DiscoveryServiceImpl.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/DiscoveryServiceImpl.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/DiscoveryServiceImpl.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/DiscoveryServiceImpl.java Tue Sep 22 12:35:19 2015
@@ -33,6 +33,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Set;
+import java.util.UUID;
 
 import org.apache.felix.scr.annotations.Activate;
 import org.apache.felix.scr.annotations.Component;
@@ -57,8 +58,10 @@ import org.apache.sling.discovery.Topolo
 import org.apache.sling.discovery.TopologyEventListener;
 import org.apache.sling.discovery.TopologyView;
 import org.apache.sling.discovery.impl.cluster.ClusterViewService;
+import org.apache.sling.discovery.impl.cluster.UndefinedClusterViewException;
+import org.apache.sling.discovery.impl.common.DefaultClusterViewImpl;
+import org.apache.sling.discovery.impl.common.DefaultInstanceDescriptionImpl;
 import org.apache.sling.discovery.impl.common.heartbeat.HeartbeatHandler;
-import org.apache.sling.discovery.impl.common.resource.IsolatedInstanceDescription;
 import org.apache.sling.discovery.impl.common.resource.ResourceHelper;
 import org.apache.sling.discovery.impl.topology.TopologyViewImpl;
 import org.apache.sling.discovery.impl.topology.announcement.AnnouncementRegistry;
@@ -230,6 +233,9 @@ public class DiscoveryServiceImpl implem
     @Reference(cardinality = ReferenceCardinality.OPTIONAL_MULTIPLE, policy = ReferencePolicy.DYNAMIC, referenceInterface = TopologyEventListener.class)
     private TopologyEventListener[] eventListeners = new TopologyEventListener[0];
 
+    /** SLING-5030 : this map contains the event last sent to each listener to prevent duplicate CHANGING events when scheduler is broken**/
+    private Map<TopologyEventListener,TopologyEvent.Type> lastEventMap = new HashMap<TopologyEventListener, TopologyEvent.Type>();
+    
     /**
      * All property providers.
      */
@@ -281,6 +287,9 @@ public class DiscoveryServiceImpl implem
      * synchronized(lock) (which would be deadlock-prone). (introduced with SLING-4638).
      **/
     private volatile boolean delayedEventPending = false;
+    
+    /** used to continue functioning when scheduler is broken **/
+    private volatile boolean delayedEventPendingFailed = false;
 
     private ServiceRegistration mbeanRegistration;
 
@@ -308,6 +317,14 @@ public class DiscoveryServiceImpl implem
             logger.warn("registerMBean: Unable to register DiscoveryServiceImpl MBean", t);
         }
     }
+    
+    private void setOldView(TopologyViewImpl view) {
+        if (view==null) {
+            throw new IllegalArgumentException("view must not be null");
+        }
+        oldView = view;
+    }
+    
     /**
      * Activate this service
      */
@@ -324,15 +341,32 @@ public class DiscoveryServiceImpl implem
 
         slingId = settingsService.getSlingId();
 
-        oldView = (TopologyViewImpl) getTopology();
+        final String isolatedClusterId = UUID.randomUUID().toString();
+        {
+            // create a pre-voting/isolated topologyView which would be used
+            // until the first voting has finished.
+            // this way for the single-instance case the clusterId can
+            // remain the same between a getTopology() that is invoked before
+            // the first TOPOLOGY_INIT and afterwards
+            DefaultClusterViewImpl isolatedCluster = new DefaultClusterViewImpl(isolatedClusterId);
+            Map<String, String> emptyProperties = new HashMap<String, String>();
+            DefaultInstanceDescriptionImpl isolatedInstance = 
+                    new DefaultInstanceDescriptionImpl(isolatedCluster, true, true, slingId, emptyProperties);
+            Collection<InstanceDescription> col = new ArrayList<InstanceDescription>();
+            col.add(isolatedInstance);
+            final TopologyViewImpl topology = new TopologyViewImpl();
+            topology.addInstances(col);
+            topology.markOld();
+            setOldView(topology);
+        }
+        setOldView((TopologyViewImpl) getTopology());
         oldView.markOld();
 
         // make sure the first heartbeat is issued as soon as possible - which
         // is right after this service starts. since the two (discoveryservice
         // and heartbeatHandler need to know each other, the discoveryservice
         // is passed on to the heartbeatHandler in this initialize call).
-        heartbeatHandler.initialize(this,
-                clusterViewService.getIsolatedClusterViewId());
+        heartbeatHandler.initialize(this, isolatedClusterId);
 
         final TopologyEventListener[] registeredServices;
         synchronized (lock) {
@@ -350,16 +384,11 @@ public class DiscoveryServiceImpl implem
             doUpdateProperties();
 
             TopologyViewImpl newView = (TopologyViewImpl) getTopology();
-            final boolean isIsolatedView = isIsolated(newView);
-            if (config.isDelayInitEventUntilVoted() && isIsolatedView) {
+            if (!newView.isCurrent()) {
                 // SLING-3750: just issue a log.info about the delaying
                 logger.info("activate: this instance is in isolated mode and must yet finish voting before it can send out TOPOLOGY_INIT.");
                 initEventDelayed = true;
             } else {
-                if (isIsolatedView) {
-                    // SLING-3750: issue a log.info about not-delaying even though isolated
-                    logger.info("activate: this instance is in isolated mode and likely should delay TOPOLOGY_INIT - but corresponding config ('delayInitEventUntilVoted') is disabled.");
-                }
                 final TopologyEvent event = new TopologyEvent(Type.TOPOLOGY_INIT, null,
                         newView);
                 for (final TopologyEventListener da : registeredServices) {
@@ -367,7 +396,7 @@ public class DiscoveryServiceImpl implem
                 }
             }
             activated = true;
-            oldView = newView;
+            setOldView(newView);
         }
 
         URL[] topologyConnectorURLs = config.getTopologyConnectorURLs();
@@ -390,14 +419,6 @@ public class DiscoveryServiceImpl implem
         logger.debug("DiscoveryServiceImpl activated.");
     }
 
-    private boolean isIsolated(TopologyViewImpl view) {
-        final InstanceDescription localInstance = view.getLocalInstance();
-        // 'instanceof' is not so nice here - but anything else requires 
-        // excessive changing (introducing new classes/interfaces)
-        // which is an overkill in and of itself.. thus: 'instanceof'
-        return localInstance instanceof IsolatedInstanceDescription;
-    }
-
     private void enqueueAsyncTopologyEvent(final TopologyEventListener da, final TopologyEvent event) {
         if (logger.isDebugEnabled()) {
             logger.debug("enqueueAsyncTopologyEvent: sending topologyEvent {}, to {}", event, da);
@@ -408,7 +429,13 @@ public class DiscoveryServiceImpl implem
             logger.warn("enqueueAsyncTopologyEvent: asyncEventSender is null, cannot send event ({}, {})!", da, event);
             return;
         }
+        if (lastEventMap.get(da)==event.getType() && event.getType()==Type.TOPOLOGY_CHANGING) {
+            // don't sent TOPOLOGY_CHANGING twice
+            logger.debug("enqueueAsyncTopologyEvent: listener already got TOPOLOGY_CHANGING: {}", da);
+            return;
+        }
         asyncEventSender.enqueue(da, event);
+        lastEventMap.put(da, event.getType());
         if (logger.isDebugEnabled()) {
             logger.debug("enqueueAsyncTopologyEvent: sending topologyEvent {}, to {}", event, da);
         }
@@ -651,7 +678,18 @@ public class DiscoveryServiceImpl implem
         // create a new topology view
         final TopologyViewImpl topology = new TopologyViewImpl();
 
-        final ClusterView localClusterView = clusterViewService.getClusterView();
+        ClusterView localClusterView = null;
+        try {
+            localClusterView = clusterViewService.getClusterView();
+        } catch (UndefinedClusterViewException e) {
+            // SLING-5030 : when we're cut off from the local cluster we also
+            // treat it as being cut off from the entire topology, ie we don't
+            // update the announcements but just return
+            // the previous oldView marked as !current
+            logger.info("getTopology: undefined cluster view: "+e.getClass().getSimpleName()+": "+e);
+            oldView.markOld();
+            return oldView;
+        }
 
         final List<InstanceDescription> localInstances = localClusterView.getInstances();
         topology.addInstances(localInstances);
@@ -660,11 +698,6 @@ public class DiscoveryServiceImpl implem
                 .listInstances(localClusterView);
         topology.addInstances(attachedInstances);
 
-        // SLING-4638: set 'current' correctly
-        if (isIsolated(topology) || delayedEventPending) {
-            topology.markOld();
-        }
-
         return topology;
     }
 
@@ -695,112 +728,151 @@ public class DiscoveryServiceImpl implem
             logger.debug("handlePotentialTopologyChange: ignoring early change before activate finished.");
             return;
         }
-        if (delayedEventPending) {
+        if (delayedEventPending && !delayedEventPendingFailed) {
             logger.debug("handlePotentialTopologyChange: ignoring potential change since a delayed event is pending.");
             return;
         }
-        if (oldView == null) {
-            throw new IllegalStateException("oldView must not be null");
-        }
         TopologyViewImpl newView = (TopologyViewImpl) getTopology();
-        TopologyViewImpl oldView = this.oldView;
-        
         if (initEventDelayed) {
-            if (isIsolated(newView)) {
+            // this means activate could not yet send a TOPOLOGY_INIT event
+            // (which can happen frequently) - so we have to do this now
+            // that we potentially have a valid view
+            if (!newView.isCurrent()) {
                 // we cannot proceed until we're out of the isolated mode..
                 // SLING-4535 : while this has warning character, it happens very frequently,
                 //              eg also when binding a PropertyProvider (so normal processing)
                 //              hence lowering to info for now
                 logger.info("handlePotentialTopologyChange: still in isolated mode - cannot send TOPOLOGY_INIT yet.");
-                return;
+            } else {
+                logger.info("handlePotentialTopologyChange: new view is no longer isolated sending delayed TOPOLOGY_INIT now.");
+                // SLING-4638: OK: newView is current==true as we're just coming out of initEventDelayed first time.
+                enqueueForAll(Type.TOPOLOGY_INIT, null, newView);
+                initEventDelayed = false;
             }
-            logger.info("handlePotentialTopologyChange: new view is no longer isolated sending delayed TOPOLOGY_INIT now.");
-            final TopologyEvent initEvent = new TopologyEvent(Type.TOPOLOGY_INIT, null,
-                    newView); // SLING-4638: OK: newView is current==true as we're just coming out of initEventDelayed first time.
-            for (final TopologyEventListener da : eventListeners) {
-                enqueueAsyncTopologyEvent(da, initEvent);
-            }
-            // now after having sent INIT events, we need to set oldView to what we've
-            // just sent out - which is newView. This makes sure that we don't send
-            // out any CHANGING/CHANGED event afterwards based on an 'isolated-oldView'
-            // (which would be wrong). Hence:
-            this.oldView = newView;
-            oldView = newView;
-
-            initEventDelayed = false;
-        }
-
-        Type difference = newView.compareTopology(oldView);
-        if (difference == null) {
-            // then dont send any event then
-            logger.debug("handlePotentialTopologyChange: identical views. not informing listeners");
             return;
+        }
+
+        TopologyViewImpl oldView = this.oldView;
+        Type difference;
+        if (!newView.isCurrent()) {
+            difference = Type.TOPOLOGY_CHANGING;
         } else {
-            if (logger.isDebugEnabled()) {
-                logger.debug("handlePotentialTopologyChange: difference: {}, oldView={}, newView={}",
-                        new Object[] {difference, oldView, newView});
+            difference = newView.compareTopology(oldView);
+        }
+        if (difference == null) { // indicating: equals
+            if (delayedEventPendingFailed) {
+                // when the delayed event handling for some very odd reason could
+                // not re-spawn itself (via runAfter) - in that case we now
+                // have listeners in CHANGING state .. which we should wake up
+                enqueueForAll(Type.TOPOLOGY_CHANGED, oldView, newView);
+                delayedEventPendingFailed = false;
+                delayedEventPending = false;
+            } else {
+                // then dont send any event then
+                logger.debug("handlePotentialTopologyChange: identical views. not informing listeners");
             }
+            return;
+        } else if (difference == Type.PROPERTIES_CHANGED) {
+            enqueueForAll(Type.PROPERTIES_CHANGED, oldView, newView);
+            return;
         }
+        delayedEventPendingFailed = false;
+        delayedEventPending = false;
 
+        // else: TOPOLOGY_CHANGING or CHANGED
+        if (logger.isDebugEnabled()) {
+            logger.debug("handlePotentialTopologyChange: difference: {}, oldView={}, newView={}",
+                    new Object[] {difference, oldView, newView});
+        }
+
+    	// send a TOPOLOGY_CHANGING first
+        logger.info("handlePotentialTopologyChange: sending "+Type.TOPOLOGY_CHANGING+
+                " to all listeners (that have not gotten one yet) (oldView={}).", oldView);
         oldView.markOld();
-        if (difference!=Type.TOPOLOGY_CHANGED) {
-            for (final TopologyEventListener da : eventListeners) {
-                enqueueAsyncTopologyEvent(da, new TopologyEvent(difference, oldView,
-                        newView));
-            }
-        } else { // TOPOLOGY_CHANGED
-
-        	// send a TOPOLOGY_CHANGING first
-            for (final TopologyEventListener da : eventListeners) {
-                enqueueAsyncTopologyEvent(da, new TopologyEvent(Type.TOPOLOGY_CHANGING, oldView,
-                        null));
-            }
-
-        	if (config.getMinEventDelay()>0) {
-                // then delay the sending of the next event
-                logger.debug("handlePotentialTopologyChange: delaying event sending to avoid event flooding");
-
-                if (runAfter(config.getMinEventDelay() /*seconds*/ , new Runnable() {
-
-                    public void run() {
-                        synchronized(lock) {
-                        	delayedEventPending = false;
-                        	logger.debug("handlePotentialTopologyChange: sending delayed event now");
-                        	if (!activated) {
-                        		logger.debug("handlePotentialTopologyChange: no longer activated. not sending delayed event");
-                        		return;
-                        	}
-                            final TopologyViewImpl newView = (TopologyViewImpl) getTopology();
-                            // irrespective of the difference, send the latest topology
-                            // via a topology_changed event (since we already sent a changing)
-                            for (final TopologyEventListener da : eventListeners) {
-                                enqueueAsyncTopologyEvent(da, new TopologyEvent(Type.TOPOLOGY_CHANGED,
-                                        DiscoveryServiceImpl.this.oldView, newView));
-                            }
-                            DiscoveryServiceImpl.this.oldView = newView;
-                        }
-                        if (heartbeatHandler!=null) {
-                            // trigger a heartbeat 'now' to pass it on to the topology asap
-                            heartbeatHandler.triggerHeartbeat();
+        for (final TopologyEventListener da : eventListeners) {
+            enqueueAsyncTopologyEvent(da, new TopologyEvent(Type.TOPOLOGY_CHANGING, oldView,
+                    null));
+        }
+
+        int minEventDelay = config.getMinEventDelay();
+        if ((!newView.isCurrent()) && minEventDelay<=0) {
+            // if newView is isolated
+            // then we should not send a TOPOLOGY_CHANGED yet - but instead
+            // wait until the view gets resolved. that is achieved by
+            // going into event-delaying and retrying that way.
+            // and if minEventDelay is not configured, then auto-switch
+            // to a 1sec such minEventDelay:
+            minEventDelay=1;
+        }
+        
+        if (minEventDelay<=0) {
+            // otherwise, send the TOPOLOGY_CHANGED now
+            enqueueForAll(Type.TOPOLOGY_CHANGED, oldView, newView);
+            return;
+        }
+
+        // then delay the sending of the next event
+        logger.debug("handlePotentialTopologyChange: delaying event sending to avoid event flooding");
+
+        if (runAfter(minEventDelay /*seconds*/ , new Runnable() {
+
+            public void run() {
+                logger.debug("handlePotentialTopologyChange: acquiring synchronized(lock)...");
+                synchronized(lock) {
+                	logger.debug("handlePotentialTopologyChange: sending delayed event now");
+                	if (!activated) {
+                	    delayedEventPending = false;
+                		logger.debug("handlePotentialTopologyChange: no longer activated. not sending delayed event");
+                		return;
+                	}
+                    final TopologyViewImpl newView = (TopologyViewImpl) getTopology();
+                    // irrespective of the difference, send the latest topology
+                    // via a topology_changed event (since we already sent a changing)
+                    if (!newView.isCurrent()) {
+                        // if the newView is isolated at this stage we have sent
+                        // TOPOLOGY_CHANGING to the listeners, and they are now waiting
+                        // for TOPOLOGY_CHANGED. But we can't send them that yet..
+                        // we must do a loop via the minEventDelay mechanism and log 
+                        // accordingly
+                        if (runAfter(1/*sec*/, this)) {
+                            logger.warn("handlePotentialTopologyChange: local instance is isolated from topology. Waiting for rejoining...");
+                            return;
                         }
+                        // otherwise we have to fall back to still sending a TOPOLOGY_CHANGED
+                        // but that's unexpected! (back to delayedEventPending=false..)
+                        delayedEventPendingFailed = true;
+                        logger.warn("handlePotentialTopologyChange: local instance is isolated from topology but failed to trigger delay-job");
+                        return;
                     }
-                })) {
-                	delayedEventPending = true;
-                    logger.debug("handlePotentialTopologyChange: delaying of event triggered.");
-                    return;
-                } else {
-                	logger.debug("handlePotentialTopologyChange: delaying did not work for some reason.");
-                }
-        	}
 
-        	// otherwise, send the TOPOLOGY_CHANGED now
-            for (final TopologyEventListener da : eventListeners) {
-                enqueueAsyncTopologyEvent(da, new TopologyEvent(Type.TOPOLOGY_CHANGED, oldView,
-                        newView));
+                    enqueueForAll(Type.TOPOLOGY_CHANGED, DiscoveryServiceImpl.this.oldView, newView);
+                    delayedEventPending = false;
+                }
             }
+        })) {
+        	delayedEventPending = true;
+            logger.debug("handlePotentialTopologyChange: delayed event triggering.");
+            return;
+        } else {
+        	logger.debug("handlePotentialTopologyChange: delaying event triggering did not work for some reason. "
+        	        + "Will be retriggered lazily via later heartbeat.");
+        	delayedEventPending = true;
+        	delayedEventPendingFailed = true;
+        	return;
+        }
+    }
+    
+    private void enqueueForAll(Type eventType, TopologyViewImpl oldView, TopologyViewImpl newView) {
+        if (oldView!=null) {
+            oldView.markOld();
+        }
+        logger.info("enqueueForAll: sending "+eventType+" to all listeners (oldView={}, newView={}).", oldView, newView);
+        for (final TopologyEventListener da : eventListeners) {
+            enqueueAsyncTopologyEvent(da, new TopologyEvent(eventType, oldView, newView));
+        }
+        if (eventType!=Type.TOPOLOGY_CHANGING) {
+            setOldView(newView);
         }
-
-        this.oldView = newView;
         if (heartbeatHandler!=null) {
             // trigger a heartbeat 'now' to pass it on to the topology asap
             heartbeatHandler.triggerHeartbeat();

Modified: sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/TopologyWebConsolePlugin.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/TopologyWebConsolePlugin.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/TopologyWebConsolePlugin.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/TopologyWebConsolePlugin.java Tue Sep 22 12:35:19 2015
@@ -55,6 +55,7 @@ import org.apache.sling.discovery.Topolo
 import org.apache.sling.discovery.TopologyEventListener;
 import org.apache.sling.discovery.TopologyView;
 import org.apache.sling.discovery.impl.cluster.ClusterViewService;
+import org.apache.sling.discovery.impl.cluster.UndefinedClusterViewException;
 import org.apache.sling.discovery.impl.topology.announcement.Announcement;
 import org.apache.sling.discovery.impl.topology.announcement.AnnouncementRegistry;
 import org.apache.sling.discovery.impl.topology.announcement.CachedAnnouncement;
@@ -239,16 +240,20 @@ public class TopologyWebConsolePlugin ex
         Set<ClusterView> clusters = topology.getClusterViews();
         ClusterView myCluster = topology.getLocalInstance().getClusterView();
         boolean odd = true;
-        renderCluster(pw, myCluster, myCluster, odd);
-
-        for (Iterator<ClusterView> it = clusters.iterator(); it.hasNext();) {
-            ClusterView clusterView = it.next();
-            if (clusterView.equals(myCluster)) {
-                // skip - I already rendered that
-                continue;
+        try{
+            renderCluster(pw, myCluster, myCluster, odd);
+    
+            for (Iterator<ClusterView> it = clusters.iterator(); it.hasNext();) {
+                ClusterView clusterView = it.next();
+                if (clusterView.equals(myCluster)) {
+                    // skip - I already rendered that
+                    continue;
+                }
+                odd = !odd;
+                renderCluster(pw, clusterView, myCluster, odd);
             }
-            odd = !odd;
-            renderCluster(pw, clusterView, myCluster, odd);
+        } catch(UndefinedClusterViewException e) {
+            pw.println("<tr><td>No ClusterView available at the moment, either isolated or not yet voted atm! ("+e+")</td></tr>");
         }
 
         pw.println("</tbody>");
@@ -283,8 +288,10 @@ public class TopologyWebConsolePlugin ex
 
     /**
      * Render a particular cluster (into table rows)
+     * @throws UndefinedClusterViewException 
      */
-    private void renderCluster(final PrintWriter pw, final ClusterView renderCluster, final ClusterView localCluster, final boolean odd) {
+    private void renderCluster(final PrintWriter pw, final ClusterView renderCluster, final ClusterView localCluster, final boolean odd) 
+            throws UndefinedClusterViewException {
         final Collection<Announcement> announcements = announcementRegistry.listAnnouncementsInSameCluster(localCluster);
 
         for (Iterator<InstanceDescription> it = renderCluster.getInstances()
@@ -755,15 +762,19 @@ public class TopologyWebConsolePlugin ex
 
         final Set<ClusterView> clusters = topology.getClusterViews();
         final ClusterView myCluster = topology.getLocalInstance().getClusterView();
-        printCluster(pw, myCluster, myCluster);
-
-        for (Iterator<ClusterView> it = clusters.iterator(); it.hasNext();) {
-            ClusterView clusterView = it.next();
-            if (clusterView.equals(myCluster)) {
-                // skip - I already rendered that
-                continue;
+        try{
+            printCluster(pw, myCluster, myCluster);
+    
+            for (Iterator<ClusterView> it = clusters.iterator(); it.hasNext();) {
+                ClusterView clusterView = it.next();
+                if (clusterView.equals(myCluster)) {
+                    // skip - I already rendered that
+                    continue;
+                }
+                printCluster(pw, clusterView, myCluster);
             }
-            printCluster(pw, clusterView, myCluster);
+        } catch (UndefinedClusterViewException e) {
+            pw.println("No ClusterView available at the moment, either isolated or not yet voted atm! ("+e+")");
         }
 
         pw.println();
@@ -876,8 +887,10 @@ public class TopologyWebConsolePlugin ex
 
     /**
      * Render a particular cluster
+     * @throws UndefinedClusterViewException 
      */
-    private void printCluster(final PrintWriter pw, final ClusterView renderCluster, final ClusterView localCluster) {
+    private void printCluster(final PrintWriter pw, final ClusterView renderCluster, final ClusterView localCluster)
+            throws UndefinedClusterViewException {
         final Collection<Announcement> announcements = announcementRegistry.listAnnouncementsInSameCluster(localCluster);
 
         for(final InstanceDescription instanceDescription : renderCluster.getInstances() ) {

Modified: sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/ClusterViewService.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/ClusterViewService.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/ClusterViewService.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/ClusterViewService.java Tue Sep 22 12:35:19 2015
@@ -32,19 +32,18 @@ public interface ClusterViewService {
     /** the sling id of the local instance **/
     String getSlingId();
 
-    /** the current cluster view **/
-    ClusterView getClusterView();
-
     /**
-     * the view id of the cluster view when isolated - ie before any view is
-     * established
-     **/
-    String getIsolatedClusterViewId();
+     * the current cluster view
+     * @return the current cluster view - never returns null
+     * @throws UndefinedClusterViewException
+     */
+    ClusterView getClusterView() throws UndefinedClusterViewException;
 
     /** checks whether the cluster view contains a particular sling id **/
-    boolean contains(String slingId);
+    boolean contains(String slingId) throws UndefinedClusterViewException;
 
     /** checks whether the cluster contains any of the provided instances **/
-    boolean containsAny(Collection<InstanceDescription> listInstances);
+    boolean containsAny(Collection<InstanceDescription> listInstances) 
+            throws UndefinedClusterViewException;
 
 }

Modified: sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/ClusterViewServiceImpl.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/ClusterViewServiceImpl.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/ClusterViewServiceImpl.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/ClusterViewServiceImpl.java Tue Sep 22 12:35:19 2015
@@ -21,13 +21,11 @@ package org.apache.sling.discovery.impl.
 import java.util.Collection;
 import java.util.Iterator;
 import java.util.List;
-import java.util.UUID;
 
 import org.apache.felix.scr.annotations.Component;
 import org.apache.felix.scr.annotations.Reference;
 import org.apache.felix.scr.annotations.Service;
 import org.apache.sling.api.resource.LoginException;
-import org.apache.sling.api.resource.Resource;
 import org.apache.sling.api.resource.ResourceResolver;
 import org.apache.sling.api.resource.ResourceResolverFactory;
 import org.apache.sling.discovery.ClusterView;
@@ -36,7 +34,6 @@ import org.apache.sling.discovery.impl.C
 import org.apache.sling.discovery.impl.common.View;
 import org.apache.sling.discovery.impl.common.ViewHelper;
 import org.apache.sling.discovery.impl.common.resource.EstablishedClusterView;
-import org.apache.sling.discovery.impl.common.resource.IsolatedInstanceDescription;
 import org.apache.sling.settings.SlingSettingsService;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -62,34 +59,6 @@ public class ClusterViewServiceImpl impl
     @Reference
     private Config config;
 
-    /** the cluster view id of the isolated cluster view */
-    private String isolatedClusterViewId = UUID.randomUUID().toString();
-
-    public String getIsolatedClusterViewId() {
-        return isolatedClusterViewId;
-    }
-
-    private ClusterView getIsolatedClusterView() {
-        ResourceResolver resourceResolver = null;
-        try {
-            resourceResolver = resourceResolverFactory
-                    .getAdministrativeResourceResolver(null);
-            Resource instanceResource = resourceResolver
-                    .getResource(config.getClusterInstancesPath() + "/"
-                            + getSlingId());
-            IsolatedInstanceDescription ownInstance = new IsolatedInstanceDescription(instanceResource,
-                    isolatedClusterViewId, getSlingId());
-            return ownInstance.getClusterView();
-        } catch (LoginException e) {
-            logger.error("Could not do a login: " + e, e);
-            throw new RuntimeException("Could not do a login", e);
-        } finally {
-            if (resourceResolver != null) {
-                resourceResolver.close();
-            }
-        }
-    }
-
     public String getSlingId() {
     	if (settingsService==null) {
     		return null;
@@ -97,9 +66,8 @@ public class ClusterViewServiceImpl impl
         return settingsService.getSlingId();
     }
 
-    public boolean contains(final String slingId) {
-        List<InstanceDescription> localInstances = getClusterView()
-                .getInstances();
+    public boolean contains(final String slingId) throws UndefinedClusterViewException {
+        List<InstanceDescription> localInstances = getClusterView().getInstances();
         for (Iterator<InstanceDescription> it = localInstances.iterator(); it
                 .hasNext();) {
             InstanceDescription aLocalInstance = it.next();
@@ -111,7 +79,8 @@ public class ClusterViewServiceImpl impl
         return false;
     }
 
-    public boolean containsAny(Collection<InstanceDescription> listInstances) {
+    public boolean containsAny(Collection<InstanceDescription> listInstances) 
+            throws UndefinedClusterViewException{
         for (Iterator<InstanceDescription> it = listInstances.iterator(); it
                 .hasNext();) {
             InstanceDescription instanceDescription = it.next();
@@ -122,10 +91,10 @@ public class ClusterViewServiceImpl impl
         return false;
     }
 
-    public ClusterView getClusterView() {
+    public ClusterView getClusterView() throws UndefinedClusterViewException {
     	if (resourceResolverFactory==null) {
     		logger.warn("getClusterView: no resourceResolverFactory set at the moment.");
-    		return null;
+    		throw new UndefinedClusterViewException("no resourceResolverFactory set");
     	}
         ResourceResolver resourceResolver = null;
         try {
@@ -135,7 +104,7 @@ public class ClusterViewServiceImpl impl
             View view = ViewHelper.getEstablishedView(resourceResolver, config);
             if (view == null) {
                 logger.debug("getClusterView: no view established at the moment. isolated mode");
-                return getIsolatedClusterView();
+                throw new UndefinedClusterViewException("no established view at the moment");
             }
 
             EstablishedClusterView clusterViewImpl = new EstablishedClusterView(
@@ -155,12 +124,12 @@ public class ClusterViewServiceImpl impl
                 logger.info("getClusterView: the existing established view does not incude the local instance ("+getSlingId()+") yet! Assuming isolated mode. "
                         + "If this occurs at runtime - other than at startup - it could cause a pseudo-network-partition, see SLING-3432. "
                         + "Consider increasing heartbeatTimeout then!");
-                return getIsolatedClusterView();
+                throw new UndefinedClusterViewException("established view does not include local instance - isolated");
             }
         } catch (LoginException e) {
             logger.error(
                     "handleEvent: could not log in administratively: " + e, e);
-            return null;
+            throw new UndefinedClusterViewException("could not log in administratively: "+e);
         } finally {
             if (resourceResolver != null) {
                 resourceResolver.close();

Modified: sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingHandler.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingHandler.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingHandler.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingHandler.java Tue Sep 22 12:35:19 2015
@@ -389,10 +389,8 @@ public class VotingHandler implements Ev
         // 3b: move the result under /established
         final String newEstablishedViewPath = establishedViewsResource.getPath()
                 + "/" + winningVoteResource.getName();
-    	if (logger.isDebugEnabled()) {
-	        logger.debug("promote: promote to new established node "
+        logger.info("promote: promote to new established node "
 	                + newEstablishedViewPath);
-    	}
         ResourceHelper.moveResource(winningVoteResource, newEstablishedViewPath);
 
         // step 4: delete all ongoing votings...

Modified: sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingHelper.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingHelper.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingHelper.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingHelper.java Tue Sep 22 12:35:19 2015
@@ -58,7 +58,9 @@ public class VotingHelper {
         final Resource ongoingVotingsResource = resourceResolver
                 .getResource(ongoingVotingsPath);
         if (ongoingVotingsResource == null) {
-            logger.info("listOpenNonWinningVotings: no ongoing votings parent resource found"); // TOOD - is this expected?
+            // it is legal that at this stage there is no ongoingvotings node yet 
+            // for example when there was never a voting yet
+            logger.debug("listOpenNonWinningVotings: no ongoing votings parent resource found");
             return new ArrayList<VotingView>();
         }
         final Iterable<Resource> children = ongoingVotingsResource.getChildren();
@@ -150,7 +152,9 @@ public class VotingHelper {
         Resource ongoingVotingsResource = resourceResolver
                 .getResource(ongoingVotingsPath);
         if (ongoingVotingsResource == null) {
-            logger.info("getWinningVoting: no ongoing votings parent resource found"); // TOOD - is this expected?
+            // it is legal that at this stage there is no ongoingvotings node yet 
+            // for example when there was never a voting yet
+            logger.debug("getWinningVoting: no ongoing votings parent resource found");
             return null;
         }
         Iterable<Resource> children = ongoingVotingsResource.getChildren();

Modified: sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingView.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingView.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingView.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingView.java Tue Sep 22 12:35:19 2015
@@ -115,6 +115,7 @@ public class VotingView extends View {
             resourceResolver.create(membersResource, memberId, properties);
         }
         resourceResolver.commit();
+        logger.info("newVoting: new voting started: newViewId="+newViewId+", resource="+votingResource+", #members: "+liveInstances.size()+", members: "+liveInstances);
         return new VotingView(votingResource);
     }
 

Modified: sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/common/ViewHelper.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/common/ViewHelper.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/common/ViewHelper.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/common/ViewHelper.java Tue Sep 22 12:35:19 2015
@@ -88,14 +88,17 @@ public class ViewHelper {
         final Resource establishedParent = resourceResolver
                 .getResource(config.getEstablishedViewPath());
         if (establishedParent == null) {
+            logger.debug("getEstablishedView: no established view found: {}", config.getEstablishedViewPath());
             return null;
         }
         final Iterable<Resource> children = establishedParent.getChildren();
         if (children == null) {
+            logger.debug("getEstablishedView: no children found of {}", establishedParent);
             return null;
         }
         final Iterator<Resource> it = children.iterator();
         if (!it.hasNext()) {
+            logger.debug("getEstablishedView: no it of children of {}", establishedParent);
             return null;
         }
         Resource establishedView = it.next();

Modified: sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/common/heartbeat/HeartbeatHandler.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/common/heartbeat/HeartbeatHandler.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/common/heartbeat/HeartbeatHandler.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/common/heartbeat/HeartbeatHandler.java Tue Sep 22 12:35:19 2015
@@ -209,14 +209,20 @@ public class HeartbeatHandler implements
      * a reference on us - but we cant have circular references in osgi).
      * <p>
      * The initialVotingId is used to avoid an unnecessary topologyChanged event
-     * when switching form isolated to established view but with only the local
-     * instance in the view.
+     * when starting up an instance in a 1-node cluster: the instance
+     * will wait until the first voting has been finished to send
+     * the TOPOLOGY_INIT event - BUT even before that the API method
+     * getTopology() is open - so if anyone asks for the topology
+     * BEFORE the first voting in a 1-node cluster is done, it gets
+     * a particular clusterId - that one we aim to reuse for the first
+     * voting.
      */
     public void initialize(final DiscoveryServiceImpl discoveryService,
             final String initialVotingId) {
         synchronized(lock) {
         	this.discoveryService = discoveryService;
         	this.nextVotingId = initialVotingId;
+        	logger.info("initialize: nextVotingId="+nextVotingId);
             issueHeartbeat();
         }
 
@@ -281,7 +287,7 @@ public class HeartbeatHandler implements
      * and then a remote heartbeat (to all the topology connectors
      * which announce this part of the topology to others)
      */
-    private void issueHeartbeat() {
+    void issueHeartbeat() {
         if (discoveryService == null) {
             logger.error("issueHeartbeat: discoveryService is null");
         } else {
@@ -462,7 +468,7 @@ public class HeartbeatHandler implements
     /** Check whether the established view matches the reality, ie matches the
      * heartbeats
      */
-    private void checkView() {
+    void checkView() {
         // check the remotes first
         if (announcementRegistry == null) {
             logger.error("announcementRegistry is null");

Modified: sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/topology/connector/TopologyConnectorClient.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/topology/connector/TopologyConnectorClient.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/topology/connector/TopologyConnectorClient.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/topology/connector/TopologyConnectorClient.java Tue Sep 22 12:35:19 2015
@@ -49,6 +49,7 @@ import org.apache.sling.discovery.Cluste
 import org.apache.sling.discovery.InstanceDescription;
 import org.apache.sling.discovery.impl.Config;
 import org.apache.sling.discovery.impl.cluster.ClusterViewService;
+import org.apache.sling.discovery.impl.cluster.UndefinedClusterViewException;
 import org.apache.sling.discovery.impl.topology.announcement.Announcement;
 import org.apache.sling.discovery.impl.topology.announcement.AnnouncementFilter;
 import org.apache.sling.discovery.impl.topology.announcement.AnnouncementRegistry;
@@ -181,8 +182,15 @@ public class TopologyConnectorClient imp
             Announcement topologyAnnouncement = new Announcement(
                     clusterViewService.getSlingId());
             topologyAnnouncement.setServerInfo(serverInfo);
-            final ClusterView clusterView = clusterViewService
-                    .getClusterView();
+            final ClusterView clusterView;
+            try {
+                clusterView = clusterViewService
+                        .getClusterView();
+            } catch (UndefinedClusterViewException e) {
+                // SLING-5030 : then we cannot ping
+                logger.warn("ping: no clusterView available at the moment, cannot ping others now: "+e);
+                return;
+            }
             topologyAnnouncement.setLocalCluster(clusterView);
             if (force) {
                 logger.debug("ping: sending a resetBackoff");
@@ -194,7 +202,7 @@ public class TopologyConnectorClient imp
                     // filter out announcements that are of old cluster instances
                     // which I dont really have in my cluster view at the moment
                     final Iterator<InstanceDescription> it = 
-                            clusterViewService.getClusterView().getInstances().iterator();
+                            clusterView.getInstances().iterator();
                     while(it.hasNext()) {
                         final InstanceDescription instance = it.next();
                         if (instance.getSlingId().equals(receivingSlingId)) {

Modified: sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/topology/connector/TopologyConnectorServlet.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/topology/connector/TopologyConnectorServlet.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/topology/connector/TopologyConnectorServlet.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/topology/connector/TopologyConnectorServlet.java Tue Sep 22 12:35:19 2015
@@ -42,6 +42,7 @@ import org.apache.sling.commons.json.JSO
 import org.apache.sling.discovery.ClusterView;
 import org.apache.sling.discovery.impl.Config;
 import org.apache.sling.discovery.impl.cluster.ClusterViewService;
+import org.apache.sling.discovery.impl.cluster.UndefinedClusterViewException;
 import org.apache.sling.discovery.impl.topology.announcement.Announcement;
 import org.apache.sling.discovery.impl.topology.announcement.AnnouncementFilter;
 import org.apache.sling.discovery.impl.topology.announcement.AnnouncementRegistry;
@@ -322,6 +323,9 @@ public class TopologyConnectorServlet ex
         } catch (JSONException e) {
             logger.error("doPost: Got a JSONException: " + e, e);
             response.sendError(500);
+        } catch (UndefinedClusterViewException e) {
+            logger.warn("doPost: no clusterView available at the moment - cannot handle connectors now: "+e);
+            response.sendError(503); // "please retry, but atm I can't help since I'm isolated"
         }
 
     }

Modified: sling/trunk/bundles/extensions/discovery/impl/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/main/resources/OSGI-INF/metatype/metatype.properties (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/main/resources/OSGI-INF/metatype/metatype.properties Tue Sep 22 12:35:19 2015
@@ -114,9 +114,3 @@ backoffStableFactor.description = When a
 backoffStandbyFactor.name = Backoff factor for standby connectors
 backoffStandbyFactor.description = When a topology connector is in standby mode (ie when it is redundant), the heartbeat \
  frequency is lowered, ie the heartbeatInterval for this connector is increased , at maximum by the backoffStandbyFactor
-
-delayInitEventUntilVoted.name = Delay first INIT event until voted
-delayInitEventUntilVoted.description = Delay sending the TOPOLOGY_INIT event until the instance has finished \
- an initial round of voting within the local cluster to make sure the view and leader are established. Avoids \
- duplicate leaders on startup.
-

Modified: sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/ClusterLoadTest.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/ClusterLoadTest.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/ClusterLoadTest.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/ClusterLoadTest.java Tue Sep 22 12:35:19 2015
@@ -1,6 +1,7 @@
 package org.apache.sling.discovery.impl.cluster;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.fail;
 
 import java.util.Iterator;
@@ -9,7 +10,6 @@ import java.util.List;
 import java.util.Random;
 
 import org.apache.sling.discovery.impl.common.resource.EstablishedInstanceDescription;
-import org.apache.sling.discovery.impl.common.resource.IsolatedInstanceDescription;
 import org.apache.sling.discovery.impl.setup.Instance;
 import org.apache.sling.discovery.impl.setup.WithholdingAppender;
 import org.apache.sling.testing.tools.retry.RetryLoop;
@@ -66,9 +66,15 @@ public class ClusterLoadTest {
     	Thread.sleep(2000);
     	// without any heartbeat action, the discovery service reports its local instance
     	// in so called 'isolated' mode - lets test for that
-        assertEquals(IsolatedInstanceDescription.class, firstInstance
-                .getClusterViewService().getClusterView().getInstances().get(0)
-                .getClass());
+//        assertEquals(IsolatedInstanceDescription.class, firstInstance
+//                .getClusterViewService().getClusterView().getInstances().get(0)
+//                .getClass());
+    	try{
+    	    firstInstance.getClusterViewService().getClusterView();
+    	    fail("should complain");
+    	} catch(UndefinedClusterViewException e) {
+    	    // SLING-5030:
+    	}
         firstInstance.startHeartbeats(1);
         Thread.sleep(4000);
         // after a heartbeat and letting it settle, the discovery service must have

Modified: sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/ClusterTest.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/ClusterTest.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/ClusterTest.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/ClusterTest.java Tue Sep 22 12:35:19 2015
@@ -169,11 +169,19 @@ public class ClusterTest {
         assertNotNull(instance1);
         assertNotNull(instance2);
 
-        // the two instances are still isolated - so in a cluster of size 1
-        assertEquals(1, instance1.getClusterViewService().getClusterView().getInstances().size());
-        assertEquals(1, instance2.getClusterViewService().getClusterView().getInstances().size());
-        assertTrue(instance1.getLocalInstanceDescription().isLeader());
-        assertTrue(instance2.getLocalInstanceDescription().isLeader());
+        // the two instances are still isolated - hence they throw an exception
+        try{
+            instance1.getClusterViewService().getClusterView();
+            fail("should complain");
+        } catch(UndefinedClusterViewException e) {
+            // ok
+        }
+        try{
+            instance2.getClusterViewService().getClusterView();
+            fail("should complain");
+        } catch(UndefinedClusterViewException e) {
+            // ok
+        }
 
         // let the sync/voting happen
         for(int m=0; m<4; m++) {
@@ -456,7 +464,11 @@ public class ClusterTest {
 
         // join the instances to form a cluster by sending out heartbeats
         runHeartbeatOnceWith(instance1, instance2, instance3, instance5);
+        Thread.sleep(500);
+        runHeartbeatOnceWith(instance1, instance2, instance3, instance5);
+        Thread.sleep(500);
         runHeartbeatOnceWith(instance1, instance2, instance3, instance5);
+        Thread.sleep(500);
 
         assertSameTopology(new SimpleClusterView(instance1, instance2));
         assertSameTopology(new SimpleClusterView(instance3));
@@ -541,7 +553,7 @@ public class ClusterTest {
         logger.info("testDuplicateInstance3726: end");
     }
 
-    private void assertSameTopology(SimpleClusterView... clusters) {
+    private void assertSameTopology(SimpleClusterView... clusters) throws UndefinedClusterViewException {
         if (clusters==null) {
             return;
         }
@@ -590,10 +602,16 @@ public class ClusterTest {
         
         // start instance4 in a separate cluster
         instance4 = Instance.newStandaloneInstance("/var/discovery/implremote4/", "remoteInstance4", false, Integer.MAX_VALUE /* no timeout */, 1);
-        assertNotSameClusterIds(instance2, instance4);
-        assertNotSameClusterIds(instance3, instance4);
+        try{
+            instance4.getClusterViewService().getClusterView();
+            fail("should complain");
+        } catch(UndefinedClusterViewException e) {
+            // ok
+        }
         
         // instead, now start a connector from instance3 to instance2
+        instance4.runHeartbeatOnce();
+        instance4.runHeartbeatOnce();
         pingConnector(instance3, instance4);
         
         // start instance 1
@@ -674,10 +692,23 @@ public class ClusterTest {
         // now launch the remote instance
         instance3 = Instance.newStandaloneInstance("/var/discovery/implremote/", "remoteInstance", false, Integer.MAX_VALUE /* no timeout */, 1);
         assertSameClusterIds(instance1, instance2);
-        assertNotSameClusterIds(instance1, instance3);
+        try{
+            instance3.getClusterViewService().getClusterView();
+            fail("should complain");
+        } catch(UndefinedClusterViewException ue) {
+            // ok
+        }
+        assertEquals(0, instance1.getAnnouncementRegistry().listLocalAnnouncements().size());
+        assertEquals(0, instance1.getAnnouncementRegistry().listLocalIncomingAnnouncements().size());
+        assertEquals(0, instance2.getAnnouncementRegistry().listLocalAnnouncements().size());
+        assertEquals(0, instance2.getAnnouncementRegistry().listLocalIncomingAnnouncements().size());
+        assertEquals(0, instance3.getAnnouncementRegistry().listLocalAnnouncements().size());
+        assertEquals(0, instance3.getAnnouncementRegistry().listLocalIncomingAnnouncements().size());
         
         // create a topology connector from instance3 to instance1
         // -> corresponds to starting to ping
+        instance3.runHeartbeatOnce();
+        instance3.runHeartbeatOnce();
         pingConnector(instance3, instance1);
         // make asserts on the topology
         instance1.dumpRepo();
@@ -708,7 +739,7 @@ public class ClusterTest {
 		return instance1SlingId;
 	}
     
-    private void assertNotSameClusterIds(Instance... instances) {
+    private void assertNotSameClusterIds(Instance... instances) throws UndefinedClusterViewException {
     	if (instances==null) {
     		fail("must not pass empty set of instances here");
     	}
@@ -730,7 +761,7 @@ public class ClusterTest {
         }
 	}
 
-	private void assertSameClusterIds(Instance... instances) {
+	private void assertSameClusterIds(Instance... instances) throws UndefinedClusterViewException {
     	if (instances==null) {
             // then there is nothing to compare
             return;
@@ -827,7 +858,7 @@ public class ClusterTest {
 		return true;
 	}
 
-	private boolean pingConnector(final Instance from, final Instance to) {
+	private boolean pingConnector(final Instance from, final Instance to) throws UndefinedClusterViewException {
 	    final Announcement fromAnnouncement = createFromAnnouncement(from);
 	    Announcement replyAnnouncement = null;
 	    try{
@@ -835,7 +866,10 @@ public class ClusterTest {
 	    } catch(AssertionError e) {
 	        logger.warn("pingConnector: ping failed, assertionError: "+e);
 	        return false;
-	    }
+	    } catch (UndefinedClusterViewException e) {
+            logger.warn("pingConnector: ping failed, currently the cluster view is undefined: "+e);
+            return false;
+        }
         registerReplyAnnouncement(from, replyAnnouncement);
         return true;
     }
@@ -860,7 +894,8 @@ public class ClusterTest {
 //        statusDetails = null;
 	}
 
-	private Announcement ping(Instance to, final Announcement incomingTopologyAnnouncement) {
+	private Announcement ping(Instance to, final Announcement incomingTopologyAnnouncement) 
+	        throws UndefinedClusterViewException {
 		final String slingId = to.slingId;
 		final ClusterViewService clusterViewService = to.getClusterViewService();
 		final AnnouncementRegistry announcementRegistry = to.getAnnouncementRegistry();
@@ -910,7 +945,7 @@ public class ClusterTest {
         }
 	}
 
-	private Announcement createFromAnnouncement(final Instance from) {
+	private Announcement createFromAnnouncement(final Instance from) throws UndefinedClusterViewException {
 		// TODO: refactor TopologyConnectorClient to avoid duplicating code from there (ping())
 		Announcement topologyAnnouncement = new Announcement(from.slingId);
         topologyAnnouncement.setServerInfo(from.slingId);
@@ -922,7 +957,7 @@ public class ClusterTest {
                 // filter out announcements that are of old cluster instances
                 // which I dont really have in my cluster view at the moment
                 final Iterator<InstanceDescription> it = 
-                        from.getClusterViewService().getClusterView().getInstances().iterator();
+                        clusterView.getInstances().iterator();
                 while(it.hasNext()) {
                     final InstanceDescription instance = it.next();
                     if (instance.getSlingId().equals(receivingSlingId)) {
@@ -954,14 +989,18 @@ public class ClusterTest {
         assertNotNull(instance1);
         assertNotNull(instance2);
 
-        String clusterId1 = instance1.getClusterViewService()
-                .getClusterView().getId();
-        String clusterId2 = instance2.getClusterViewService()
-                .getClusterView().getId();
-        // the cluster ids must differ
-        assertNotEquals(clusterId1, clusterId2);
-        assertEquals(1, instance1.getClusterViewService().getClusterView().getInstances().size());
-        assertEquals(1, instance2.getClusterViewService().getClusterView().getInstances().size());
+        try{
+            instance1.getClusterViewService().getClusterView();
+            fail("should complain");
+        } catch(UndefinedClusterViewException e) {
+            // ok
+        }
+        try{
+            instance2.getClusterViewService().getClusterView();
+            fail("should complain");
+        } catch(UndefinedClusterViewException e) {
+            // ok
+        }
 
         // let the sync/voting happen
         instance1.runHeartbeatOnce();
@@ -980,10 +1019,6 @@ public class ClusterTest {
         // both cluster ids must be the same
         assertEquals(newClusterId1, newClusterId1);
         
-        // either instance1 or instance2 must have kept the cluster id
-        if (!newClusterId1.equals(clusterId1)) {
-        	assertEquals(newClusterId2, clusterId2);
-        }
         instance1.dumpRepo();
         assertEquals(2, instance1.getClusterViewService().getClusterView().getInstances().size());
         assertEquals(2, instance2.getClusterViewService().getClusterView().getInstances().size());
@@ -1007,8 +1042,13 @@ public class ClusterTest {
         // the cluster should now have size 1
         assertEquals(1, instance1.getClusterViewService().getClusterView().getInstances().size());
         // the instance 2 should be in isolated mode as it is no longer in the established view
-        // hence also size 1
-        assertEquals(1, instance2.getClusterViewService().getClusterView().getInstances().size());
+        // hence null
+        try{
+            instance2.getClusterViewService().getClusterView();
+            fail("should complain");
+        } catch(UndefinedClusterViewException e) {
+            // ok
+        }
 
         // but the cluster id must have remained stable
         instance1.dumpRepo();
@@ -1037,15 +1077,24 @@ public class ClusterTest {
         assertEquals(instance3.getSlingId(), instance3.getClusterViewService()
                 .getSlingId());
 
-        int numC1 = instance1.getClusterViewService().getClusterView()
-                .getInstances().size();
-        assertEquals(1, numC1);
-        int numC2 = instance2.getClusterViewService().getClusterView()
-                .getInstances().size();
-        assertEquals(1, numC2);
-        int numC3 = instance3.getClusterViewService().getClusterView()
-                .getInstances().size();
-        assertEquals(1, numC3);
+        try{
+            instance1.getClusterViewService().getClusterView();
+            fail("should complain");
+        } catch(UndefinedClusterViewException e) {
+            // ok
+        }
+        try{
+            instance2.getClusterViewService().getClusterView();
+            fail("should complain");
+        } catch(UndefinedClusterViewException e) {
+            // ok
+        }
+        try{
+            instance3.getClusterViewService().getClusterView();
+            fail("should complain");
+        } catch(UndefinedClusterViewException e) {
+            // ok
+        }
 
         instance1.dumpRepo();
 
@@ -1096,12 +1145,18 @@ public class ClusterTest {
         assertEquals(instance2.getSlingId(), instance2.getClusterViewService()
                 .getSlingId());
 
-        int numC1 = instance1.getClusterViewService().getClusterView()
-                .getInstances().size();
-        assertEquals(1, numC1);
-        int numC2 = instance2.getClusterViewService().getClusterView()
-                .getInstances().size();
-        assertEquals(1, numC2);
+        try{
+            instance1.getClusterViewService().getClusterView();
+            fail("should complain");
+        } catch(UndefinedClusterViewException e) {
+            // ok
+        }
+        try{
+            instance2.getClusterViewService().getClusterView();
+            fail("should complain");
+        } catch(UndefinedClusterViewException e) {
+            // ok
+        }
 
         instance1.runHeartbeatOnce();
         instance2.runHeartbeatOnce();
@@ -1204,19 +1259,19 @@ public class ClusterTest {
         logger.info("testPropertyProviders: end");
     }
 
-    private void assertPropertyValues() {
+    private void assertPropertyValues() throws UndefinedClusterViewException {
         assertPropertyValues(instance1.getSlingId(), property1Name,
                 property1Value);
         assertPropertyValues(instance2.getSlingId(), property2Name,
                 property2Value);
     }
 
-    private void assertPropertyValues(String slingId, String name, String value) {
+    private void assertPropertyValues(String slingId, String name, String value) throws UndefinedClusterViewException {
         assertEquals(value, getInstance(instance1, slingId).getProperty(name));
         assertEquals(value, getInstance(instance2, slingId).getProperty(name));
     }
 
-    private InstanceDescription getInstance(Instance instance, String slingId) {
+    private InstanceDescription getInstance(Instance instance, String slingId) throws UndefinedClusterViewException {
         Iterator<InstanceDescription> it = instance.getClusterViewService()
                 .getClusterView().getInstances().iterator();
         while (it.hasNext()) {

Modified: sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/SingleInstanceTest.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/SingleInstanceTest.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/SingleInstanceTest.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/SingleInstanceTest.java Tue Sep 22 12:35:19 2015
@@ -23,6 +23,7 @@ import static org.junit.Assert.assertNot
 import static org.junit.Assert.assertNotSame;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import java.util.List;
 import java.util.Map;
@@ -36,7 +37,6 @@ import org.apache.sling.discovery.Topolo
 import org.apache.sling.discovery.TopologyEvent.Type;
 import org.apache.sling.discovery.impl.cluster.helpers.AssertingTopologyEventListener;
 import org.apache.sling.discovery.impl.common.resource.EstablishedInstanceDescription;
-import org.apache.sling.discovery.impl.common.resource.IsolatedInstanceDescription;
 import org.apache.sling.discovery.impl.setup.Instance;
 import org.apache.sling.discovery.impl.setup.PropertyProviderImpl;
 import org.junit.After;
@@ -63,7 +63,7 @@ public class SingleInstanceTest {
         logger.info("setup: creating new standalone instance");
         instance = Instance.newStandaloneInstance("/var/discovery/impl/", "standaloneInstance", true,
                 20, 999/*long enough heartbeat interval to prevent them to disturb the explicit heartbeats during the test*/, 
-                3, UUID.randomUUID().toString(), true);
+                3, UUID.randomUUID().toString());
         logger.info("setup: creating new standalone instance done.");
     }
 
@@ -80,12 +80,20 @@ public class SingleInstanceTest {
     }
 
     @Test
-    public void testGetters() {
+    public void testGetters() throws UndefinedClusterViewException {
         logger.info("testGetters: start");
         assertNotNull(instance);
         logger.info("sling id=" + instance.getSlingId());
-        assertNotNull(instance.getClusterViewService().getClusterView());
+        try{
+            instance.getClusterViewService().getClusterView();
+            fail("should complain"); // SLING-5030
+        } catch(UndefinedClusterViewException e) {
+            // ok
+        }
 
+        instance.runHeartbeatOnce();
+        
+        assertNotNull(instance.getClusterViewService().getClusterView());
         ClusterView cv = instance.getClusterViewService().getClusterView();
         logger.info("cluster view: id=" + cv.getId());
         assertNotNull(cv.getId());
@@ -121,6 +129,7 @@ public class SingleInstanceTest {
         pp.setProperty(propertyName, propertyValue);
         instance.bindPropertyProvider(pp, propertyName);
 
+        instance.runHeartbeatOnce();
         assertEquals(propertyValue,
                 instance.getClusterViewService().getClusterView()
                         .getInstances().get(0).getProperty(propertyName));
@@ -141,6 +150,10 @@ public class SingleInstanceTest {
     @Test
     public void testInvalidProperties() throws Throwable {
         logger.info("testInvalidProperties: start");
+        
+        instance.runHeartbeatOnce();
+        instance.runHeartbeatOnce();
+        
         final String propertyValue = UUID.randomUUID().toString();
         doTestProperty(UUID.randomUUID().toString(), propertyValue, propertyValue);
 
@@ -223,9 +236,13 @@ public class SingleInstanceTest {
     @Test
     public void testBootstrap() throws Throwable {
         logger.info("testBootstrap: start");
-        ClusterView initialClusterView = instance.getClusterViewService()
-                .getClusterView();
-        assertNotNull(initialClusterView);
+        try{
+            instance.getClusterViewService().getClusterView();
+            fail("should complain");
+        } catch(UndefinedClusterViewException e) {
+            // SLING-5030 : isolated mode is gone, replaced with exception
+            // ok
+        }
 
         // SLING-3750 : with delaying the init event, we now should NOT get any events
         // before we let the view establish (which happens via heartbeats below)
@@ -234,15 +251,13 @@ public class SingleInstanceTest {
         assertEquals(0, ada.getEvents().size());
         assertEquals(0, ada.getUnexpectedCount());
 
-        // hard assumption that the class we get is an
-        // IsolatedInstanceDescription
-        // this is because we dont have any established clusterview yet - hence
-        // still entirely isolated
-        assertEquals(IsolatedInstanceDescription.class, initialClusterView
-                .getInstances().get(0).getClass());
-        assertEquals(IsolatedInstanceDescription.class, instance
-                .getClusterViewService().getClusterView().getInstances().get(0)
-                .getClass());
+        try{
+            instance.getClusterViewService().getClusterView();
+            fail("should complain");
+        } catch(UndefinedClusterViewException e) {
+            // ok
+        }
+        
         ada.addExpected(Type.TOPOLOGY_INIT);
         instance.runHeartbeatOnce();
         Thread.sleep(1000);
@@ -253,14 +268,11 @@ public class SingleInstanceTest {
         assertEquals(1, ada.getEvents().size());
         TopologyEvent initEvent = ada.getEvents().remove(0);
         assertNotNull(initEvent);
-
-        assertEquals(initialClusterView.getId(), initEvent.getNewView()
-                .getClusterViews().iterator().next().getId());
-        assertEquals(initialClusterView.getInstances().get(0).getSlingId(),
-                initEvent.getNewView().getLocalInstance().getSlingId());
+        assertNotNull(initEvent.getNewView());
+        assertNotNull(initEvent.getNewView().getClusterViews());
 
         // after the view was established though, we expect it to be a normal
-        // ResourceInstanceDescription
+        // EstablishedInstanceDescription
         assertEquals(EstablishedInstanceDescription.class, instance
                 .getClusterViewService().getClusterView().getInstances().get(0)
                 .getClass());