You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sling.apache.org by st...@apache.org on 2015/09/22 14:35:24 UTC
svn commit: r1704604 [1/2] - in
/sling/trunk/bundles/extensions/discovery/impl/src:
main/java/org/apache/sling/discovery/impl/
main/java/org/apache/sling/discovery/impl/cluster/
main/java/org/apache/sling/discovery/impl/cluster/voting/ main/java/org/ap...
Author: stefanegli
Date: Tue Sep 22 12:35:19 2015
New Revision: 1704604
URL: http://svn.apache.org/viewvc?rev=1704604&view=rev
Log:
SLING-5030 : better handling of pseudo-network-partitioning : replace isolated mode with (larger) TOPOLOGY_CHANGING phase - PLUS SLING-4959 : remove config option delayInitEventUntilVoted (as that is not considered correct anymore)
Added:
sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/common/heartbeat/
sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/common/heartbeat/HeartbeatTest.java (with props)
Modified:
sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/Config.java
sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/DiscoveryServiceImpl.java
sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/TopologyWebConsolePlugin.java
sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/ClusterViewService.java
sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/ClusterViewServiceImpl.java
sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingHandler.java
sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingHelper.java
sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingView.java
sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/common/ViewHelper.java
sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/common/heartbeat/HeartbeatHandler.java
sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/topology/connector/TopologyConnectorClient.java
sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/topology/connector/TopologyConnectorServlet.java
sling/trunk/bundles/extensions/discovery/impl/src/main/resources/OSGI-INF/metatype/metatype.properties
sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/ClusterLoadTest.java
sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/ClusterTest.java
sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/SingleInstanceTest.java
sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/TopologyEventTest.java
sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/setup/Instance.java
sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/topology/LargeTopologyWithHubTest.java
sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/topology/TopologyTest.java
sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/topology/TopologyTestHelper.java
Modified: sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/Config.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/Config.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/Config.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/Config.java Tue Sep 22 12:35:19 2015
@@ -116,15 +116,6 @@ public class Config {
private String discoveryResourcePath = DEFAULT_DISCOVERY_RESOURCE_PATH;
/**
- * If set to true the TOPOLOGY_INIT event will be sent only once the cluster view was established.
- * This can mean there is a delay until the voting in the cluster was finished.
- * But the advantage of delaying the INIT event is to make sure no two instances see themselves
- * as leader at startup. (see SLING-3750).
- */
- @Property(boolValue=true)
- private static final String DELAY_INIT_EVENT_UNTIL_VOTED = "delayInitEventUntilVoted";
-
- /**
* If set to true, local-loops of topology connectors are automatically stopped when detected so.
*/
@Property(boolValue=false)
@@ -183,9 +174,6 @@ public class Config {
/** True when auto-stop of a local-loop is enabled. Default is false. **/
private boolean autoStopLocalLoopEnabled;
- /** True to make sure the INIT delay is only sent once there is (the first) established view in the cluster **/
- private boolean delayInitEventUntilVoted = true; /* default: true */
-
/**
* True when the hmac is enabled and signing is disabled.
*/
@@ -317,7 +305,6 @@ public class Config {
logger.debug("configure: invertRepositoryDescriptor='{}'",
this.invertRepositoryDescriptor);
- delayInitEventUntilVoted = PropertiesUtil.toBoolean(properties.get(DELAY_INIT_EVENT_UNTIL_VOTED), true);
autoStopLocalLoopEnabled = PropertiesUtil.toBoolean(properties.get(AUTO_STOP_LOCAL_LOOP_ENABLED), false);
gzipConnectorRequestsEnabled = PropertiesUtil.toBoolean(properties.get(GZIP_CONNECTOR_REQUESTS_ENABLED), false);
@@ -494,14 +481,6 @@ public class Config {
}
/**
- * @return true to make sure the INIT event is only sent to topology listeners once
- * there is (eg the first) an established cluster view
- */
- public boolean isDelayInitEventUntilVoted() {
- return delayInitEventUntilVoted;
- }
-
- /**
* @return true if the auto-stopping of local-loop topology connectors is enabled.
*/
public boolean isAutoStopLocalLoopEnabled() {
Modified: sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/DiscoveryServiceImpl.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/DiscoveryServiceImpl.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/DiscoveryServiceImpl.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/DiscoveryServiceImpl.java Tue Sep 22 12:35:19 2015
@@ -33,6 +33,7 @@ import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
+import java.util.UUID;
import org.apache.felix.scr.annotations.Activate;
import org.apache.felix.scr.annotations.Component;
@@ -57,8 +58,10 @@ import org.apache.sling.discovery.Topolo
import org.apache.sling.discovery.TopologyEventListener;
import org.apache.sling.discovery.TopologyView;
import org.apache.sling.discovery.impl.cluster.ClusterViewService;
+import org.apache.sling.discovery.impl.cluster.UndefinedClusterViewException;
+import org.apache.sling.discovery.impl.common.DefaultClusterViewImpl;
+import org.apache.sling.discovery.impl.common.DefaultInstanceDescriptionImpl;
import org.apache.sling.discovery.impl.common.heartbeat.HeartbeatHandler;
-import org.apache.sling.discovery.impl.common.resource.IsolatedInstanceDescription;
import org.apache.sling.discovery.impl.common.resource.ResourceHelper;
import org.apache.sling.discovery.impl.topology.TopologyViewImpl;
import org.apache.sling.discovery.impl.topology.announcement.AnnouncementRegistry;
@@ -230,6 +233,9 @@ public class DiscoveryServiceImpl implem
@Reference(cardinality = ReferenceCardinality.OPTIONAL_MULTIPLE, policy = ReferencePolicy.DYNAMIC, referenceInterface = TopologyEventListener.class)
private TopologyEventListener[] eventListeners = new TopologyEventListener[0];
+ /** SLING-5030 : this map contains the event last sent to each listener to prevent duplicate CHANGING events when scheduler is broken**/
+ private Map<TopologyEventListener,TopologyEvent.Type> lastEventMap = new HashMap<TopologyEventListener, TopologyEvent.Type>();
+
/**
* All property providers.
*/
@@ -281,6 +287,9 @@ public class DiscoveryServiceImpl implem
* synchronized(lock) (which would be deadlock-prone). (introduced with SLING-4638).
**/
private volatile boolean delayedEventPending = false;
+
+ /** used to continue functioning when scheduler is broken **/
+ private volatile boolean delayedEventPendingFailed = false;
private ServiceRegistration mbeanRegistration;
@@ -308,6 +317,14 @@ public class DiscoveryServiceImpl implem
logger.warn("registerMBean: Unable to register DiscoveryServiceImpl MBean", t);
}
}
+
+ private void setOldView(TopologyViewImpl view) {
+ if (view==null) {
+ throw new IllegalArgumentException("view must not be null");
+ }
+ oldView = view;
+ }
+
/**
* Activate this service
*/
@@ -324,15 +341,32 @@ public class DiscoveryServiceImpl implem
slingId = settingsService.getSlingId();
- oldView = (TopologyViewImpl) getTopology();
+ final String isolatedClusterId = UUID.randomUUID().toString();
+ {
+ // create a pre-voting/isolated topologyView which would be used
+ // until the first voting has finished.
+ // this way for the single-instance case the clusterId can
+ // remain the same between a getTopology() that is invoked before
+ // the first TOPOLOGY_INIT and afterwards
+ DefaultClusterViewImpl isolatedCluster = new DefaultClusterViewImpl(isolatedClusterId);
+ Map<String, String> emptyProperties = new HashMap<String, String>();
+ DefaultInstanceDescriptionImpl isolatedInstance =
+ new DefaultInstanceDescriptionImpl(isolatedCluster, true, true, slingId, emptyProperties);
+ Collection<InstanceDescription> col = new ArrayList<InstanceDescription>();
+ col.add(isolatedInstance);
+ final TopologyViewImpl topology = new TopologyViewImpl();
+ topology.addInstances(col);
+ topology.markOld();
+ setOldView(topology);
+ }
+ setOldView((TopologyViewImpl) getTopology());
oldView.markOld();
// make sure the first heartbeat is issued as soon as possible - which
// is right after this service starts. since the two (discoveryservice
// and heartbeatHandler need to know each other, the discoveryservice
// is passed on to the heartbeatHandler in this initialize call).
- heartbeatHandler.initialize(this,
- clusterViewService.getIsolatedClusterViewId());
+ heartbeatHandler.initialize(this, isolatedClusterId);
final TopologyEventListener[] registeredServices;
synchronized (lock) {
@@ -350,16 +384,11 @@ public class DiscoveryServiceImpl implem
doUpdateProperties();
TopologyViewImpl newView = (TopologyViewImpl) getTopology();
- final boolean isIsolatedView = isIsolated(newView);
- if (config.isDelayInitEventUntilVoted() && isIsolatedView) {
+ if (!newView.isCurrent()) {
// SLING-3750: just issue a log.info about the delaying
logger.info("activate: this instance is in isolated mode and must yet finish voting before it can send out TOPOLOGY_INIT.");
initEventDelayed = true;
} else {
- if (isIsolatedView) {
- // SLING-3750: issue a log.info about not-delaying even though isolated
- logger.info("activate: this instance is in isolated mode and likely should delay TOPOLOGY_INIT - but corresponding config ('delayInitEventUntilVoted') is disabled.");
- }
final TopologyEvent event = new TopologyEvent(Type.TOPOLOGY_INIT, null,
newView);
for (final TopologyEventListener da : registeredServices) {
@@ -367,7 +396,7 @@ public class DiscoveryServiceImpl implem
}
}
activated = true;
- oldView = newView;
+ setOldView(newView);
}
URL[] topologyConnectorURLs = config.getTopologyConnectorURLs();
@@ -390,14 +419,6 @@ public class DiscoveryServiceImpl implem
logger.debug("DiscoveryServiceImpl activated.");
}
- private boolean isIsolated(TopologyViewImpl view) {
- final InstanceDescription localInstance = view.getLocalInstance();
- // 'instanceof' is not so nice here - but anything else requires
- // excessive changing (introducing new classes/interfaces)
- // which is an overkill in and of itself.. thus: 'instanceof'
- return localInstance instanceof IsolatedInstanceDescription;
- }
-
private void enqueueAsyncTopologyEvent(final TopologyEventListener da, final TopologyEvent event) {
if (logger.isDebugEnabled()) {
logger.debug("enqueueAsyncTopologyEvent: sending topologyEvent {}, to {}", event, da);
@@ -408,7 +429,13 @@ public class DiscoveryServiceImpl implem
logger.warn("enqueueAsyncTopologyEvent: asyncEventSender is null, cannot send event ({}, {})!", da, event);
return;
}
+ if (lastEventMap.get(da)==event.getType() && event.getType()==Type.TOPOLOGY_CHANGING) {
+ // don't sent TOPOLOGY_CHANGING twice
+ logger.debug("enqueueAsyncTopologyEvent: listener already got TOPOLOGY_CHANGING: {}", da);
+ return;
+ }
asyncEventSender.enqueue(da, event);
+ lastEventMap.put(da, event.getType());
if (logger.isDebugEnabled()) {
logger.debug("enqueueAsyncTopologyEvent: sending topologyEvent {}, to {}", event, da);
}
@@ -651,7 +678,18 @@ public class DiscoveryServiceImpl implem
// create a new topology view
final TopologyViewImpl topology = new TopologyViewImpl();
- final ClusterView localClusterView = clusterViewService.getClusterView();
+ ClusterView localClusterView = null;
+ try {
+ localClusterView = clusterViewService.getClusterView();
+ } catch (UndefinedClusterViewException e) {
+ // SLING-5030 : when we're cut off from the local cluster we also
+ // treat it as being cut off from the entire topology, ie we don't
+ // update the announcements but just return
+ // the previous oldView marked as !current
+ logger.info("getTopology: undefined cluster view: "+e.getClass().getSimpleName()+": "+e);
+ oldView.markOld();
+ return oldView;
+ }
final List<InstanceDescription> localInstances = localClusterView.getInstances();
topology.addInstances(localInstances);
@@ -660,11 +698,6 @@ public class DiscoveryServiceImpl implem
.listInstances(localClusterView);
topology.addInstances(attachedInstances);
- // SLING-4638: set 'current' correctly
- if (isIsolated(topology) || delayedEventPending) {
- topology.markOld();
- }
-
return topology;
}
@@ -695,112 +728,151 @@ public class DiscoveryServiceImpl implem
logger.debug("handlePotentialTopologyChange: ignoring early change before activate finished.");
return;
}
- if (delayedEventPending) {
+ if (delayedEventPending && !delayedEventPendingFailed) {
logger.debug("handlePotentialTopologyChange: ignoring potential change since a delayed event is pending.");
return;
}
- if (oldView == null) {
- throw new IllegalStateException("oldView must not be null");
- }
TopologyViewImpl newView = (TopologyViewImpl) getTopology();
- TopologyViewImpl oldView = this.oldView;
-
if (initEventDelayed) {
- if (isIsolated(newView)) {
+ // this means activate could not yet send a TOPOLOGY_INIT event
+ // (which can happen frequently) - so we have to do this now
+ // that we potentially have a valid view
+ if (!newView.isCurrent()) {
// we cannot proceed until we're out of the isolated mode..
// SLING-4535 : while this has warning character, it happens very frequently,
// eg also when binding a PropertyProvider (so normal processing)
// hence lowering to info for now
logger.info("handlePotentialTopologyChange: still in isolated mode - cannot send TOPOLOGY_INIT yet.");
- return;
+ } else {
+ logger.info("handlePotentialTopologyChange: new view is no longer isolated sending delayed TOPOLOGY_INIT now.");
+ // SLING-4638: OK: newView is current==true as we're just coming out of initEventDelayed first time.
+ enqueueForAll(Type.TOPOLOGY_INIT, null, newView);
+ initEventDelayed = false;
}
- logger.info("handlePotentialTopologyChange: new view is no longer isolated sending delayed TOPOLOGY_INIT now.");
- final TopologyEvent initEvent = new TopologyEvent(Type.TOPOLOGY_INIT, null,
- newView); // SLING-4638: OK: newView is current==true as we're just coming out of initEventDelayed first time.
- for (final TopologyEventListener da : eventListeners) {
- enqueueAsyncTopologyEvent(da, initEvent);
- }
- // now after having sent INIT events, we need to set oldView to what we've
- // just sent out - which is newView. This makes sure that we don't send
- // out any CHANGING/CHANGED event afterwards based on an 'isolated-oldView'
- // (which would be wrong). Hence:
- this.oldView = newView;
- oldView = newView;
-
- initEventDelayed = false;
- }
-
- Type difference = newView.compareTopology(oldView);
- if (difference == null) {
- // then dont send any event then
- logger.debug("handlePotentialTopologyChange: identical views. not informing listeners");
return;
+ }
+
+ TopologyViewImpl oldView = this.oldView;
+ Type difference;
+ if (!newView.isCurrent()) {
+ difference = Type.TOPOLOGY_CHANGING;
} else {
- if (logger.isDebugEnabled()) {
- logger.debug("handlePotentialTopologyChange: difference: {}, oldView={}, newView={}",
- new Object[] {difference, oldView, newView});
+ difference = newView.compareTopology(oldView);
+ }
+ if (difference == null) { // indicating: equals
+ if (delayedEventPendingFailed) {
+ // when the delayed event handling for some very odd reason could
+ // not re-spawn itself (via runAfter) - in that case we now
+ // have listeners in CHANGING state .. which we should wake up
+ enqueueForAll(Type.TOPOLOGY_CHANGED, oldView, newView);
+ delayedEventPendingFailed = false;
+ delayedEventPending = false;
+ } else {
+ // then dont send any event then
+ logger.debug("handlePotentialTopologyChange: identical views. not informing listeners");
}
+ return;
+ } else if (difference == Type.PROPERTIES_CHANGED) {
+ enqueueForAll(Type.PROPERTIES_CHANGED, oldView, newView);
+ return;
}
+ delayedEventPendingFailed = false;
+ delayedEventPending = false;
+ // else: TOPOLOGY_CHANGING or CHANGED
+ if (logger.isDebugEnabled()) {
+ logger.debug("handlePotentialTopologyChange: difference: {}, oldView={}, newView={}",
+ new Object[] {difference, oldView, newView});
+ }
+
+ // send a TOPOLOGY_CHANGING first
+ logger.info("handlePotentialTopologyChange: sending "+Type.TOPOLOGY_CHANGING+
+ " to all listeners (that have not gotten one yet) (oldView={}).", oldView);
oldView.markOld();
- if (difference!=Type.TOPOLOGY_CHANGED) {
- for (final TopologyEventListener da : eventListeners) {
- enqueueAsyncTopologyEvent(da, new TopologyEvent(difference, oldView,
- newView));
- }
- } else { // TOPOLOGY_CHANGED
-
- // send a TOPOLOGY_CHANGING first
- for (final TopologyEventListener da : eventListeners) {
- enqueueAsyncTopologyEvent(da, new TopologyEvent(Type.TOPOLOGY_CHANGING, oldView,
- null));
- }
-
- if (config.getMinEventDelay()>0) {
- // then delay the sending of the next event
- logger.debug("handlePotentialTopologyChange: delaying event sending to avoid event flooding");
-
- if (runAfter(config.getMinEventDelay() /*seconds*/ , new Runnable() {
-
- public void run() {
- synchronized(lock) {
- delayedEventPending = false;
- logger.debug("handlePotentialTopologyChange: sending delayed event now");
- if (!activated) {
- logger.debug("handlePotentialTopologyChange: no longer activated. not sending delayed event");
- return;
- }
- final TopologyViewImpl newView = (TopologyViewImpl) getTopology();
- // irrespective of the difference, send the latest topology
- // via a topology_changed event (since we already sent a changing)
- for (final TopologyEventListener da : eventListeners) {
- enqueueAsyncTopologyEvent(da, new TopologyEvent(Type.TOPOLOGY_CHANGED,
- DiscoveryServiceImpl.this.oldView, newView));
- }
- DiscoveryServiceImpl.this.oldView = newView;
- }
- if (heartbeatHandler!=null) {
- // trigger a heartbeat 'now' to pass it on to the topology asap
- heartbeatHandler.triggerHeartbeat();
+ for (final TopologyEventListener da : eventListeners) {
+ enqueueAsyncTopologyEvent(da, new TopologyEvent(Type.TOPOLOGY_CHANGING, oldView,
+ null));
+ }
+
+ int minEventDelay = config.getMinEventDelay();
+ if ((!newView.isCurrent()) && minEventDelay<=0) {
+ // if newView is isolated
+ // then we should not send a TOPOLOGY_CHANGED yet - but instead
+ // wait until the view gets resolved. that is achieved by
+ // going into event-delaying and retrying that way.
+ // and if minEventDelay is not configured, then auto-switch
+ // to a 1sec such minEventDelay:
+ minEventDelay=1;
+ }
+
+ if (minEventDelay<=0) {
+ // otherwise, send the TOPOLOGY_CHANGED now
+ enqueueForAll(Type.TOPOLOGY_CHANGED, oldView, newView);
+ return;
+ }
+
+ // then delay the sending of the next event
+ logger.debug("handlePotentialTopologyChange: delaying event sending to avoid event flooding");
+
+ if (runAfter(minEventDelay /*seconds*/ , new Runnable() {
+
+ public void run() {
+ logger.debug("handlePotentialTopologyChange: acquiring synchronized(lock)...");
+ synchronized(lock) {
+ logger.debug("handlePotentialTopologyChange: sending delayed event now");
+ if (!activated) {
+ delayedEventPending = false;
+ logger.debug("handlePotentialTopologyChange: no longer activated. not sending delayed event");
+ return;
+ }
+ final TopologyViewImpl newView = (TopologyViewImpl) getTopology();
+ // irrespective of the difference, send the latest topology
+ // via a topology_changed event (since we already sent a changing)
+ if (!newView.isCurrent()) {
+ // if the newView is isolated at this stage we have sent
+ // TOPOLOGY_CHANGING to the listeners, and they are now waiting
+ // for TOPOLOGY_CHANGED. But we can't send them that yet..
+ // we must do a loop via the minEventDelay mechanism and log
+ // accordingly
+ if (runAfter(1/*sec*/, this)) {
+ logger.warn("handlePotentialTopologyChange: local instance is isolated from topology. Waiting for rejoining...");
+ return;
}
+ // otherwise we have to fall back to still sending a TOPOLOGY_CHANGED
+ // but that's unexpected! (back to delayedEventPending=false..)
+ delayedEventPendingFailed = true;
+ logger.warn("handlePotentialTopologyChange: local instance is isolated from topology but failed to trigger delay-job");
+ return;
}
- })) {
- delayedEventPending = true;
- logger.debug("handlePotentialTopologyChange: delaying of event triggered.");
- return;
- } else {
- logger.debug("handlePotentialTopologyChange: delaying did not work for some reason.");
- }
- }
- // otherwise, send the TOPOLOGY_CHANGED now
- for (final TopologyEventListener da : eventListeners) {
- enqueueAsyncTopologyEvent(da, new TopologyEvent(Type.TOPOLOGY_CHANGED, oldView,
- newView));
+ enqueueForAll(Type.TOPOLOGY_CHANGED, DiscoveryServiceImpl.this.oldView, newView);
+ delayedEventPending = false;
+ }
}
+ })) {
+ delayedEventPending = true;
+ logger.debug("handlePotentialTopologyChange: delayed event triggering.");
+ return;
+ } else {
+ logger.debug("handlePotentialTopologyChange: delaying event triggering did not work for some reason. "
+ + "Will be retriggered lazily via later heartbeat.");
+ delayedEventPending = true;
+ delayedEventPendingFailed = true;
+ return;
+ }
+ }
+
+ private void enqueueForAll(Type eventType, TopologyViewImpl oldView, TopologyViewImpl newView) {
+ if (oldView!=null) {
+ oldView.markOld();
+ }
+ logger.info("enqueueForAll: sending "+eventType+" to all listeners (oldView={}, newView={}).", oldView, newView);
+ for (final TopologyEventListener da : eventListeners) {
+ enqueueAsyncTopologyEvent(da, new TopologyEvent(eventType, oldView, newView));
+ }
+ if (eventType!=Type.TOPOLOGY_CHANGING) {
+ setOldView(newView);
}
-
- this.oldView = newView;
if (heartbeatHandler!=null) {
// trigger a heartbeat 'now' to pass it on to the topology asap
heartbeatHandler.triggerHeartbeat();
Modified: sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/TopologyWebConsolePlugin.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/TopologyWebConsolePlugin.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/TopologyWebConsolePlugin.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/TopologyWebConsolePlugin.java Tue Sep 22 12:35:19 2015
@@ -55,6 +55,7 @@ import org.apache.sling.discovery.Topolo
import org.apache.sling.discovery.TopologyEventListener;
import org.apache.sling.discovery.TopologyView;
import org.apache.sling.discovery.impl.cluster.ClusterViewService;
+import org.apache.sling.discovery.impl.cluster.UndefinedClusterViewException;
import org.apache.sling.discovery.impl.topology.announcement.Announcement;
import org.apache.sling.discovery.impl.topology.announcement.AnnouncementRegistry;
import org.apache.sling.discovery.impl.topology.announcement.CachedAnnouncement;
@@ -239,16 +240,20 @@ public class TopologyWebConsolePlugin ex
Set<ClusterView> clusters = topology.getClusterViews();
ClusterView myCluster = topology.getLocalInstance().getClusterView();
boolean odd = true;
- renderCluster(pw, myCluster, myCluster, odd);
-
- for (Iterator<ClusterView> it = clusters.iterator(); it.hasNext();) {
- ClusterView clusterView = it.next();
- if (clusterView.equals(myCluster)) {
- // skip - I already rendered that
- continue;
+ try{
+ renderCluster(pw, myCluster, myCluster, odd);
+
+ for (Iterator<ClusterView> it = clusters.iterator(); it.hasNext();) {
+ ClusterView clusterView = it.next();
+ if (clusterView.equals(myCluster)) {
+ // skip - I already rendered that
+ continue;
+ }
+ odd = !odd;
+ renderCluster(pw, clusterView, myCluster, odd);
}
- odd = !odd;
- renderCluster(pw, clusterView, myCluster, odd);
+ } catch(UndefinedClusterViewException e) {
+ pw.println("<tr><td>No ClusterView available at the moment, either isolated or not yet voted atm! ("+e+")</td></tr>");
}
pw.println("</tbody>");
@@ -283,8 +288,10 @@ public class TopologyWebConsolePlugin ex
/**
* Render a particular cluster (into table rows)
+ * @throws UndefinedClusterViewException
*/
- private void renderCluster(final PrintWriter pw, final ClusterView renderCluster, final ClusterView localCluster, final boolean odd) {
+ private void renderCluster(final PrintWriter pw, final ClusterView renderCluster, final ClusterView localCluster, final boolean odd)
+ throws UndefinedClusterViewException {
final Collection<Announcement> announcements = announcementRegistry.listAnnouncementsInSameCluster(localCluster);
for (Iterator<InstanceDescription> it = renderCluster.getInstances()
@@ -755,15 +762,19 @@ public class TopologyWebConsolePlugin ex
final Set<ClusterView> clusters = topology.getClusterViews();
final ClusterView myCluster = topology.getLocalInstance().getClusterView();
- printCluster(pw, myCluster, myCluster);
-
- for (Iterator<ClusterView> it = clusters.iterator(); it.hasNext();) {
- ClusterView clusterView = it.next();
- if (clusterView.equals(myCluster)) {
- // skip - I already rendered that
- continue;
+ try{
+ printCluster(pw, myCluster, myCluster);
+
+ for (Iterator<ClusterView> it = clusters.iterator(); it.hasNext();) {
+ ClusterView clusterView = it.next();
+ if (clusterView.equals(myCluster)) {
+ // skip - I already rendered that
+ continue;
+ }
+ printCluster(pw, clusterView, myCluster);
}
- printCluster(pw, clusterView, myCluster);
+ } catch (UndefinedClusterViewException e) {
+ pw.println("No ClusterView available at the moment, either isolated or not yet voted atm! ("+e+")");
}
pw.println();
@@ -876,8 +887,10 @@ public class TopologyWebConsolePlugin ex
/**
* Render a particular cluster
+ * @throws UndefinedClusterViewException
*/
- private void printCluster(final PrintWriter pw, final ClusterView renderCluster, final ClusterView localCluster) {
+ private void printCluster(final PrintWriter pw, final ClusterView renderCluster, final ClusterView localCluster)
+ throws UndefinedClusterViewException {
final Collection<Announcement> announcements = announcementRegistry.listAnnouncementsInSameCluster(localCluster);
for(final InstanceDescription instanceDescription : renderCluster.getInstances() ) {
Modified: sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/ClusterViewService.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/ClusterViewService.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/ClusterViewService.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/ClusterViewService.java Tue Sep 22 12:35:19 2015
@@ -32,19 +32,18 @@ public interface ClusterViewService {
/** the sling id of the local instance **/
String getSlingId();
- /** the current cluster view **/
- ClusterView getClusterView();
-
/**
- * the view id of the cluster view when isolated - ie before any view is
- * established
- **/
- String getIsolatedClusterViewId();
+ * the current cluster view
+ * @return the current cluster view - never returns null
+ * @throws UndefinedClusterViewException
+ */
+ ClusterView getClusterView() throws UndefinedClusterViewException;
/** checks whether the cluster view contains a particular sling id **/
- boolean contains(String slingId);
+ boolean contains(String slingId) throws UndefinedClusterViewException;
/** checks whether the cluster contains any of the provided instances **/
- boolean containsAny(Collection<InstanceDescription> listInstances);
+ boolean containsAny(Collection<InstanceDescription> listInstances)
+ throws UndefinedClusterViewException;
}
Modified: sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/ClusterViewServiceImpl.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/ClusterViewServiceImpl.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/ClusterViewServiceImpl.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/ClusterViewServiceImpl.java Tue Sep 22 12:35:19 2015
@@ -21,13 +21,11 @@ package org.apache.sling.discovery.impl.
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
-import java.util.UUID;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.Reference;
import org.apache.felix.scr.annotations.Service;
import org.apache.sling.api.resource.LoginException;
-import org.apache.sling.api.resource.Resource;
import org.apache.sling.api.resource.ResourceResolver;
import org.apache.sling.api.resource.ResourceResolverFactory;
import org.apache.sling.discovery.ClusterView;
@@ -36,7 +34,6 @@ import org.apache.sling.discovery.impl.C
import org.apache.sling.discovery.impl.common.View;
import org.apache.sling.discovery.impl.common.ViewHelper;
import org.apache.sling.discovery.impl.common.resource.EstablishedClusterView;
-import org.apache.sling.discovery.impl.common.resource.IsolatedInstanceDescription;
import org.apache.sling.settings.SlingSettingsService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -62,34 +59,6 @@ public class ClusterViewServiceImpl impl
@Reference
private Config config;
- /** the cluster view id of the isolated cluster view */
- private String isolatedClusterViewId = UUID.randomUUID().toString();
-
- public String getIsolatedClusterViewId() {
- return isolatedClusterViewId;
- }
-
- private ClusterView getIsolatedClusterView() {
- ResourceResolver resourceResolver = null;
- try {
- resourceResolver = resourceResolverFactory
- .getAdministrativeResourceResolver(null);
- Resource instanceResource = resourceResolver
- .getResource(config.getClusterInstancesPath() + "/"
- + getSlingId());
- IsolatedInstanceDescription ownInstance = new IsolatedInstanceDescription(instanceResource,
- isolatedClusterViewId, getSlingId());
- return ownInstance.getClusterView();
- } catch (LoginException e) {
- logger.error("Could not do a login: " + e, e);
- throw new RuntimeException("Could not do a login", e);
- } finally {
- if (resourceResolver != null) {
- resourceResolver.close();
- }
- }
- }
-
public String getSlingId() {
if (settingsService==null) {
return null;
@@ -97,9 +66,8 @@ public class ClusterViewServiceImpl impl
return settingsService.getSlingId();
}
- public boolean contains(final String slingId) {
- List<InstanceDescription> localInstances = getClusterView()
- .getInstances();
+ public boolean contains(final String slingId) throws UndefinedClusterViewException {
+ List<InstanceDescription> localInstances = getClusterView().getInstances();
for (Iterator<InstanceDescription> it = localInstances.iterator(); it
.hasNext();) {
InstanceDescription aLocalInstance = it.next();
@@ -111,7 +79,8 @@ public class ClusterViewServiceImpl impl
return false;
}
- public boolean containsAny(Collection<InstanceDescription> listInstances) {
+ public boolean containsAny(Collection<InstanceDescription> listInstances)
+ throws UndefinedClusterViewException{
for (Iterator<InstanceDescription> it = listInstances.iterator(); it
.hasNext();) {
InstanceDescription instanceDescription = it.next();
@@ -122,10 +91,10 @@ public class ClusterViewServiceImpl impl
return false;
}
- public ClusterView getClusterView() {
+ public ClusterView getClusterView() throws UndefinedClusterViewException {
if (resourceResolverFactory==null) {
logger.warn("getClusterView: no resourceResolverFactory set at the moment.");
- return null;
+ throw new UndefinedClusterViewException("no resourceResolverFactory set");
}
ResourceResolver resourceResolver = null;
try {
@@ -135,7 +104,7 @@ public class ClusterViewServiceImpl impl
View view = ViewHelper.getEstablishedView(resourceResolver, config);
if (view == null) {
logger.debug("getClusterView: no view established at the moment. isolated mode");
- return getIsolatedClusterView();
+ throw new UndefinedClusterViewException("no established view at the moment");
}
EstablishedClusterView clusterViewImpl = new EstablishedClusterView(
@@ -155,12 +124,12 @@ public class ClusterViewServiceImpl impl
logger.info("getClusterView: the existing established view does not incude the local instance ("+getSlingId()+") yet! Assuming isolated mode. "
+ "If this occurs at runtime - other than at startup - it could cause a pseudo-network-partition, see SLING-3432. "
+ "Consider increasing heartbeatTimeout then!");
- return getIsolatedClusterView();
+ throw new UndefinedClusterViewException("established view does not include local instance - isolated");
}
} catch (LoginException e) {
logger.error(
"handleEvent: could not log in administratively: " + e, e);
- return null;
+ throw new UndefinedClusterViewException("could not log in administratively: "+e);
} finally {
if (resourceResolver != null) {
resourceResolver.close();
Modified: sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingHandler.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingHandler.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingHandler.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingHandler.java Tue Sep 22 12:35:19 2015
@@ -389,10 +389,8 @@ public class VotingHandler implements Ev
// 3b: move the result under /established
final String newEstablishedViewPath = establishedViewsResource.getPath()
+ "/" + winningVoteResource.getName();
- if (logger.isDebugEnabled()) {
- logger.debug("promote: promote to new established node "
+ logger.info("promote: promote to new established node "
+ newEstablishedViewPath);
- }
ResourceHelper.moveResource(winningVoteResource, newEstablishedViewPath);
// step 4: delete all ongoing votings...
Modified: sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingHelper.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingHelper.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingHelper.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingHelper.java Tue Sep 22 12:35:19 2015
@@ -58,7 +58,9 @@ public class VotingHelper {
final Resource ongoingVotingsResource = resourceResolver
.getResource(ongoingVotingsPath);
if (ongoingVotingsResource == null) {
- logger.info("listOpenNonWinningVotings: no ongoing votings parent resource found"); // TOOD - is this expected?
+ // it is legal that at this stage there is no ongoingvotings node yet
+ // for example when there was never a voting yet
+ logger.debug("listOpenNonWinningVotings: no ongoing votings parent resource found");
return new ArrayList<VotingView>();
}
final Iterable<Resource> children = ongoingVotingsResource.getChildren();
@@ -150,7 +152,9 @@ public class VotingHelper {
Resource ongoingVotingsResource = resourceResolver
.getResource(ongoingVotingsPath);
if (ongoingVotingsResource == null) {
- logger.info("getWinningVoting: no ongoing votings parent resource found"); // TOOD - is this expected?
+ // it is legal that at this stage there is no ongoingvotings node yet
+ // for example when there was never a voting yet
+ logger.debug("getWinningVoting: no ongoing votings parent resource found");
return null;
}
Iterable<Resource> children = ongoingVotingsResource.getChildren();
Modified: sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingView.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingView.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingView.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/cluster/voting/VotingView.java Tue Sep 22 12:35:19 2015
@@ -115,6 +115,7 @@ public class VotingView extends View {
resourceResolver.create(membersResource, memberId, properties);
}
resourceResolver.commit();
+ logger.info("newVoting: new voting started: newViewId="+newViewId+", resource="+votingResource+", #members: "+liveInstances.size()+", members: "+liveInstances);
return new VotingView(votingResource);
}
Modified: sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/common/ViewHelper.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/common/ViewHelper.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/common/ViewHelper.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/common/ViewHelper.java Tue Sep 22 12:35:19 2015
@@ -88,14 +88,17 @@ public class ViewHelper {
final Resource establishedParent = resourceResolver
.getResource(config.getEstablishedViewPath());
if (establishedParent == null) {
+ logger.debug("getEstablishedView: no established view found: {}", config.getEstablishedViewPath());
return null;
}
final Iterable<Resource> children = establishedParent.getChildren();
if (children == null) {
+ logger.debug("getEstablishedView: no children found of {}", establishedParent);
return null;
}
final Iterator<Resource> it = children.iterator();
if (!it.hasNext()) {
+ logger.debug("getEstablishedView: no it of children of {}", establishedParent);
return null;
}
Resource establishedView = it.next();
Modified: sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/common/heartbeat/HeartbeatHandler.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/common/heartbeat/HeartbeatHandler.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/common/heartbeat/HeartbeatHandler.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/common/heartbeat/HeartbeatHandler.java Tue Sep 22 12:35:19 2015
@@ -209,14 +209,20 @@ public class HeartbeatHandler implements
* a reference on us - but we cant have circular references in osgi).
* <p>
* The initialVotingId is used to avoid an unnecessary topologyChanged event
- * when switching form isolated to established view but with only the local
- * instance in the view.
+ * when starting up an instance in a 1-node cluster: the instance
+ * will wait until the first voting has been finished to send
+ * the TOPOLOGY_INIT event - BUT even before that the API method
+ * getTopology() is open - so if anyone asks for the topology
+ * BEFORE the first voting in a 1-node cluster is done, it gets
+ * a particular clusterId - that one we aim to reuse for the first
+ * voting.
*/
public void initialize(final DiscoveryServiceImpl discoveryService,
final String initialVotingId) {
synchronized(lock) {
this.discoveryService = discoveryService;
this.nextVotingId = initialVotingId;
+ logger.info("initialize: nextVotingId="+nextVotingId);
issueHeartbeat();
}
@@ -281,7 +287,7 @@ public class HeartbeatHandler implements
* and then a remote heartbeat (to all the topology connectors
* which announce this part of the topology to others)
*/
- private void issueHeartbeat() {
+ void issueHeartbeat() {
if (discoveryService == null) {
logger.error("issueHeartbeat: discoveryService is null");
} else {
@@ -462,7 +468,7 @@ public class HeartbeatHandler implements
/** Check whether the established view matches the reality, ie matches the
* heartbeats
*/
- private void checkView() {
+ void checkView() {
// check the remotes first
if (announcementRegistry == null) {
logger.error("announcementRegistry is null");
Modified: sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/topology/connector/TopologyConnectorClient.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/topology/connector/TopologyConnectorClient.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/topology/connector/TopologyConnectorClient.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/topology/connector/TopologyConnectorClient.java Tue Sep 22 12:35:19 2015
@@ -49,6 +49,7 @@ import org.apache.sling.discovery.Cluste
import org.apache.sling.discovery.InstanceDescription;
import org.apache.sling.discovery.impl.Config;
import org.apache.sling.discovery.impl.cluster.ClusterViewService;
+import org.apache.sling.discovery.impl.cluster.UndefinedClusterViewException;
import org.apache.sling.discovery.impl.topology.announcement.Announcement;
import org.apache.sling.discovery.impl.topology.announcement.AnnouncementFilter;
import org.apache.sling.discovery.impl.topology.announcement.AnnouncementRegistry;
@@ -181,8 +182,15 @@ public class TopologyConnectorClient imp
Announcement topologyAnnouncement = new Announcement(
clusterViewService.getSlingId());
topologyAnnouncement.setServerInfo(serverInfo);
- final ClusterView clusterView = clusterViewService
- .getClusterView();
+ final ClusterView clusterView;
+ try {
+ clusterView = clusterViewService
+ .getClusterView();
+ } catch (UndefinedClusterViewException e) {
+ // SLING-5030 : then we cannot ping
+ logger.warn("ping: no clusterView available at the moment, cannot ping others now: "+e);
+ return;
+ }
topologyAnnouncement.setLocalCluster(clusterView);
if (force) {
logger.debug("ping: sending a resetBackoff");
@@ -194,7 +202,7 @@ public class TopologyConnectorClient imp
// filter out announcements that are of old cluster instances
// which I dont really have in my cluster view at the moment
final Iterator<InstanceDescription> it =
- clusterViewService.getClusterView().getInstances().iterator();
+ clusterView.getInstances().iterator();
while(it.hasNext()) {
final InstanceDescription instance = it.next();
if (instance.getSlingId().equals(receivingSlingId)) {
Modified: sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/topology/connector/TopologyConnectorServlet.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/topology/connector/TopologyConnectorServlet.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/topology/connector/TopologyConnectorServlet.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/main/java/org/apache/sling/discovery/impl/topology/connector/TopologyConnectorServlet.java Tue Sep 22 12:35:19 2015
@@ -42,6 +42,7 @@ import org.apache.sling.commons.json.JSO
import org.apache.sling.discovery.ClusterView;
import org.apache.sling.discovery.impl.Config;
import org.apache.sling.discovery.impl.cluster.ClusterViewService;
+import org.apache.sling.discovery.impl.cluster.UndefinedClusterViewException;
import org.apache.sling.discovery.impl.topology.announcement.Announcement;
import org.apache.sling.discovery.impl.topology.announcement.AnnouncementFilter;
import org.apache.sling.discovery.impl.topology.announcement.AnnouncementRegistry;
@@ -322,6 +323,9 @@ public class TopologyConnectorServlet ex
} catch (JSONException e) {
logger.error("doPost: Got a JSONException: " + e, e);
response.sendError(500);
+ } catch (UndefinedClusterViewException e) {
+ logger.warn("doPost: no clusterView available at the moment - cannot handle connectors now: "+e);
+ response.sendError(503); // "please retry, but atm I can't help since I'm isolated"
}
}
Modified: sling/trunk/bundles/extensions/discovery/impl/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/main/resources/OSGI-INF/metatype/metatype.properties (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/main/resources/OSGI-INF/metatype/metatype.properties Tue Sep 22 12:35:19 2015
@@ -114,9 +114,3 @@ backoffStableFactor.description = When a
backoffStandbyFactor.name = Backoff factor for standby connectors
backoffStandbyFactor.description = When a topology connector is in standby mode (ie when it is redundant), the heartbeat \
frequency is lowered, ie the heartbeatInterval for this connector is increased , at maximum by the backoffStandbyFactor
-
-delayInitEventUntilVoted.name = Delay first INIT event until voted
-delayInitEventUntilVoted.description = Delay sending the TOPOLOGY_INIT event until the instance has finished \
- an initial round of voting within the local cluster to make sure the view and leader are established. Avoids \
- duplicate leaders on startup.
-
Modified: sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/ClusterLoadTest.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/ClusterLoadTest.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/ClusterLoadTest.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/ClusterLoadTest.java Tue Sep 22 12:35:19 2015
@@ -1,6 +1,7 @@
package org.apache.sling.discovery.impl.cluster;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
import static org.junit.Assert.fail;
import java.util.Iterator;
@@ -9,7 +10,6 @@ import java.util.List;
import java.util.Random;
import org.apache.sling.discovery.impl.common.resource.EstablishedInstanceDescription;
-import org.apache.sling.discovery.impl.common.resource.IsolatedInstanceDescription;
import org.apache.sling.discovery.impl.setup.Instance;
import org.apache.sling.discovery.impl.setup.WithholdingAppender;
import org.apache.sling.testing.tools.retry.RetryLoop;
@@ -66,9 +66,15 @@ public class ClusterLoadTest {
Thread.sleep(2000);
// without any heartbeat action, the discovery service reports its local instance
// in so called 'isolated' mode - lets test for that
- assertEquals(IsolatedInstanceDescription.class, firstInstance
- .getClusterViewService().getClusterView().getInstances().get(0)
- .getClass());
+// assertEquals(IsolatedInstanceDescription.class, firstInstance
+// .getClusterViewService().getClusterView().getInstances().get(0)
+// .getClass());
+ try{
+ firstInstance.getClusterViewService().getClusterView();
+ fail("should complain");
+ } catch(UndefinedClusterViewException e) {
+ // SLING-5030:
+ }
firstInstance.startHeartbeats(1);
Thread.sleep(4000);
// after a heartbeat and letting it settle, the discovery service must have
Modified: sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/ClusterTest.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/ClusterTest.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/ClusterTest.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/ClusterTest.java Tue Sep 22 12:35:19 2015
@@ -169,11 +169,19 @@ public class ClusterTest {
assertNotNull(instance1);
assertNotNull(instance2);
- // the two instances are still isolated - so in a cluster of size 1
- assertEquals(1, instance1.getClusterViewService().getClusterView().getInstances().size());
- assertEquals(1, instance2.getClusterViewService().getClusterView().getInstances().size());
- assertTrue(instance1.getLocalInstanceDescription().isLeader());
- assertTrue(instance2.getLocalInstanceDescription().isLeader());
+ // the two instances are still isolated - hence they throw an exception
+ try{
+ instance1.getClusterViewService().getClusterView();
+ fail("should complain");
+ } catch(UndefinedClusterViewException e) {
+ // ok
+ }
+ try{
+ instance2.getClusterViewService().getClusterView();
+ fail("should complain");
+ } catch(UndefinedClusterViewException e) {
+ // ok
+ }
// let the sync/voting happen
for(int m=0; m<4; m++) {
@@ -456,7 +464,11 @@ public class ClusterTest {
// join the instances to form a cluster by sending out heartbeats
runHeartbeatOnceWith(instance1, instance2, instance3, instance5);
+ Thread.sleep(500);
+ runHeartbeatOnceWith(instance1, instance2, instance3, instance5);
+ Thread.sleep(500);
runHeartbeatOnceWith(instance1, instance2, instance3, instance5);
+ Thread.sleep(500);
assertSameTopology(new SimpleClusterView(instance1, instance2));
assertSameTopology(new SimpleClusterView(instance3));
@@ -541,7 +553,7 @@ public class ClusterTest {
logger.info("testDuplicateInstance3726: end");
}
- private void assertSameTopology(SimpleClusterView... clusters) {
+ private void assertSameTopology(SimpleClusterView... clusters) throws UndefinedClusterViewException {
if (clusters==null) {
return;
}
@@ -590,10 +602,16 @@ public class ClusterTest {
// start instance4 in a separate cluster
instance4 = Instance.newStandaloneInstance("/var/discovery/implremote4/", "remoteInstance4", false, Integer.MAX_VALUE /* no timeout */, 1);
- assertNotSameClusterIds(instance2, instance4);
- assertNotSameClusterIds(instance3, instance4);
+ try{
+ instance4.getClusterViewService().getClusterView();
+ fail("should complain");
+ } catch(UndefinedClusterViewException e) {
+ // ok
+ }
// instead, now start a connector from instance3 to instance2
+ instance4.runHeartbeatOnce();
+ instance4.runHeartbeatOnce();
pingConnector(instance3, instance4);
// start instance 1
@@ -674,10 +692,23 @@ public class ClusterTest {
// now launch the remote instance
instance3 = Instance.newStandaloneInstance("/var/discovery/implremote/", "remoteInstance", false, Integer.MAX_VALUE /* no timeout */, 1);
assertSameClusterIds(instance1, instance2);
- assertNotSameClusterIds(instance1, instance3);
+ try{
+ instance3.getClusterViewService().getClusterView();
+ fail("should complain");
+ } catch(UndefinedClusterViewException ue) {
+ // ok
+ }
+ assertEquals(0, instance1.getAnnouncementRegistry().listLocalAnnouncements().size());
+ assertEquals(0, instance1.getAnnouncementRegistry().listLocalIncomingAnnouncements().size());
+ assertEquals(0, instance2.getAnnouncementRegistry().listLocalAnnouncements().size());
+ assertEquals(0, instance2.getAnnouncementRegistry().listLocalIncomingAnnouncements().size());
+ assertEquals(0, instance3.getAnnouncementRegistry().listLocalAnnouncements().size());
+ assertEquals(0, instance3.getAnnouncementRegistry().listLocalIncomingAnnouncements().size());
// create a topology connector from instance3 to instance1
// -> corresponds to starting to ping
+ instance3.runHeartbeatOnce();
+ instance3.runHeartbeatOnce();
pingConnector(instance3, instance1);
// make asserts on the topology
instance1.dumpRepo();
@@ -708,7 +739,7 @@ public class ClusterTest {
return instance1SlingId;
}
- private void assertNotSameClusterIds(Instance... instances) {
+ private void assertNotSameClusterIds(Instance... instances) throws UndefinedClusterViewException {
if (instances==null) {
fail("must not pass empty set of instances here");
}
@@ -730,7 +761,7 @@ public class ClusterTest {
}
}
- private void assertSameClusterIds(Instance... instances) {
+ private void assertSameClusterIds(Instance... instances) throws UndefinedClusterViewException {
if (instances==null) {
// then there is nothing to compare
return;
@@ -827,7 +858,7 @@ public class ClusterTest {
return true;
}
- private boolean pingConnector(final Instance from, final Instance to) {
+ private boolean pingConnector(final Instance from, final Instance to) throws UndefinedClusterViewException {
final Announcement fromAnnouncement = createFromAnnouncement(from);
Announcement replyAnnouncement = null;
try{
@@ -835,7 +866,10 @@ public class ClusterTest {
} catch(AssertionError e) {
logger.warn("pingConnector: ping failed, assertionError: "+e);
return false;
- }
+ } catch (UndefinedClusterViewException e) {
+ logger.warn("pingConnector: ping failed, currently the cluster view is undefined: "+e);
+ return false;
+ }
registerReplyAnnouncement(from, replyAnnouncement);
return true;
}
@@ -860,7 +894,8 @@ public class ClusterTest {
// statusDetails = null;
}
- private Announcement ping(Instance to, final Announcement incomingTopologyAnnouncement) {
+ private Announcement ping(Instance to, final Announcement incomingTopologyAnnouncement)
+ throws UndefinedClusterViewException {
final String slingId = to.slingId;
final ClusterViewService clusterViewService = to.getClusterViewService();
final AnnouncementRegistry announcementRegistry = to.getAnnouncementRegistry();
@@ -910,7 +945,7 @@ public class ClusterTest {
}
}
- private Announcement createFromAnnouncement(final Instance from) {
+ private Announcement createFromAnnouncement(final Instance from) throws UndefinedClusterViewException {
// TODO: refactor TopologyConnectorClient to avoid duplicating code from there (ping())
Announcement topologyAnnouncement = new Announcement(from.slingId);
topologyAnnouncement.setServerInfo(from.slingId);
@@ -922,7 +957,7 @@ public class ClusterTest {
// filter out announcements that are of old cluster instances
// which I dont really have in my cluster view at the moment
final Iterator<InstanceDescription> it =
- from.getClusterViewService().getClusterView().getInstances().iterator();
+ clusterView.getInstances().iterator();
while(it.hasNext()) {
final InstanceDescription instance = it.next();
if (instance.getSlingId().equals(receivingSlingId)) {
@@ -954,14 +989,18 @@ public class ClusterTest {
assertNotNull(instance1);
assertNotNull(instance2);
- String clusterId1 = instance1.getClusterViewService()
- .getClusterView().getId();
- String clusterId2 = instance2.getClusterViewService()
- .getClusterView().getId();
- // the cluster ids must differ
- assertNotEquals(clusterId1, clusterId2);
- assertEquals(1, instance1.getClusterViewService().getClusterView().getInstances().size());
- assertEquals(1, instance2.getClusterViewService().getClusterView().getInstances().size());
+ try{
+ instance1.getClusterViewService().getClusterView();
+ fail("should complain");
+ } catch(UndefinedClusterViewException e) {
+ // ok
+ }
+ try{
+ instance2.getClusterViewService().getClusterView();
+ fail("should complain");
+ } catch(UndefinedClusterViewException e) {
+ // ok
+ }
// let the sync/voting happen
instance1.runHeartbeatOnce();
@@ -980,10 +1019,6 @@ public class ClusterTest {
// both cluster ids must be the same
assertEquals(newClusterId1, newClusterId1);
- // either instance1 or instance2 must have kept the cluster id
- if (!newClusterId1.equals(clusterId1)) {
- assertEquals(newClusterId2, clusterId2);
- }
instance1.dumpRepo();
assertEquals(2, instance1.getClusterViewService().getClusterView().getInstances().size());
assertEquals(2, instance2.getClusterViewService().getClusterView().getInstances().size());
@@ -1007,8 +1042,13 @@ public class ClusterTest {
// the cluster should now have size 1
assertEquals(1, instance1.getClusterViewService().getClusterView().getInstances().size());
// the instance 2 should be in isolated mode as it is no longer in the established view
- // hence also size 1
- assertEquals(1, instance2.getClusterViewService().getClusterView().getInstances().size());
+ // hence null
+ try{
+ instance2.getClusterViewService().getClusterView();
+ fail("should complain");
+ } catch(UndefinedClusterViewException e) {
+ // ok
+ }
// but the cluster id must have remained stable
instance1.dumpRepo();
@@ -1037,15 +1077,24 @@ public class ClusterTest {
assertEquals(instance3.getSlingId(), instance3.getClusterViewService()
.getSlingId());
- int numC1 = instance1.getClusterViewService().getClusterView()
- .getInstances().size();
- assertEquals(1, numC1);
- int numC2 = instance2.getClusterViewService().getClusterView()
- .getInstances().size();
- assertEquals(1, numC2);
- int numC3 = instance3.getClusterViewService().getClusterView()
- .getInstances().size();
- assertEquals(1, numC3);
+ try{
+ instance1.getClusterViewService().getClusterView();
+ fail("should complain");
+ } catch(UndefinedClusterViewException e) {
+ // ok
+ }
+ try{
+ instance2.getClusterViewService().getClusterView();
+ fail("should complain");
+ } catch(UndefinedClusterViewException e) {
+ // ok
+ }
+ try{
+ instance3.getClusterViewService().getClusterView();
+ fail("should complain");
+ } catch(UndefinedClusterViewException e) {
+ // ok
+ }
instance1.dumpRepo();
@@ -1096,12 +1145,18 @@ public class ClusterTest {
assertEquals(instance2.getSlingId(), instance2.getClusterViewService()
.getSlingId());
- int numC1 = instance1.getClusterViewService().getClusterView()
- .getInstances().size();
- assertEquals(1, numC1);
- int numC2 = instance2.getClusterViewService().getClusterView()
- .getInstances().size();
- assertEquals(1, numC2);
+ try{
+ instance1.getClusterViewService().getClusterView();
+ fail("should complain");
+ } catch(UndefinedClusterViewException e) {
+ // ok
+ }
+ try{
+ instance2.getClusterViewService().getClusterView();
+ fail("should complain");
+ } catch(UndefinedClusterViewException e) {
+ // ok
+ }
instance1.runHeartbeatOnce();
instance2.runHeartbeatOnce();
@@ -1204,19 +1259,19 @@ public class ClusterTest {
logger.info("testPropertyProviders: end");
}
- private void assertPropertyValues() {
+ private void assertPropertyValues() throws UndefinedClusterViewException {
assertPropertyValues(instance1.getSlingId(), property1Name,
property1Value);
assertPropertyValues(instance2.getSlingId(), property2Name,
property2Value);
}
- private void assertPropertyValues(String slingId, String name, String value) {
+ private void assertPropertyValues(String slingId, String name, String value) throws UndefinedClusterViewException {
assertEquals(value, getInstance(instance1, slingId).getProperty(name));
assertEquals(value, getInstance(instance2, slingId).getProperty(name));
}
- private InstanceDescription getInstance(Instance instance, String slingId) {
+ private InstanceDescription getInstance(Instance instance, String slingId) throws UndefinedClusterViewException {
Iterator<InstanceDescription> it = instance.getClusterViewService()
.getClusterView().getInstances().iterator();
while (it.hasNext()) {
Modified: sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/SingleInstanceTest.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/SingleInstanceTest.java?rev=1704604&r1=1704603&r2=1704604&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/SingleInstanceTest.java (original)
+++ sling/trunk/bundles/extensions/discovery/impl/src/test/java/org/apache/sling/discovery/impl/cluster/SingleInstanceTest.java Tue Sep 22 12:35:19 2015
@@ -23,6 +23,7 @@ import static org.junit.Assert.assertNot
import static org.junit.Assert.assertNotSame;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
import java.util.List;
import java.util.Map;
@@ -36,7 +37,6 @@ import org.apache.sling.discovery.Topolo
import org.apache.sling.discovery.TopologyEvent.Type;
import org.apache.sling.discovery.impl.cluster.helpers.AssertingTopologyEventListener;
import org.apache.sling.discovery.impl.common.resource.EstablishedInstanceDescription;
-import org.apache.sling.discovery.impl.common.resource.IsolatedInstanceDescription;
import org.apache.sling.discovery.impl.setup.Instance;
import org.apache.sling.discovery.impl.setup.PropertyProviderImpl;
import org.junit.After;
@@ -63,7 +63,7 @@ public class SingleInstanceTest {
logger.info("setup: creating new standalone instance");
instance = Instance.newStandaloneInstance("/var/discovery/impl/", "standaloneInstance", true,
20, 999/*long enough heartbeat interval to prevent them to disturb the explicit heartbeats during the test*/,
- 3, UUID.randomUUID().toString(), true);
+ 3, UUID.randomUUID().toString());
logger.info("setup: creating new standalone instance done.");
}
@@ -80,12 +80,20 @@ public class SingleInstanceTest {
}
@Test
- public void testGetters() {
+ public void testGetters() throws UndefinedClusterViewException {
logger.info("testGetters: start");
assertNotNull(instance);
logger.info("sling id=" + instance.getSlingId());
- assertNotNull(instance.getClusterViewService().getClusterView());
+ try{
+ instance.getClusterViewService().getClusterView();
+ fail("should complain"); // SLING-5030
+ } catch(UndefinedClusterViewException e) {
+ // ok
+ }
+ instance.runHeartbeatOnce();
+
+ assertNotNull(instance.getClusterViewService().getClusterView());
ClusterView cv = instance.getClusterViewService().getClusterView();
logger.info("cluster view: id=" + cv.getId());
assertNotNull(cv.getId());
@@ -121,6 +129,7 @@ public class SingleInstanceTest {
pp.setProperty(propertyName, propertyValue);
instance.bindPropertyProvider(pp, propertyName);
+ instance.runHeartbeatOnce();
assertEquals(propertyValue,
instance.getClusterViewService().getClusterView()
.getInstances().get(0).getProperty(propertyName));
@@ -141,6 +150,10 @@ public class SingleInstanceTest {
@Test
public void testInvalidProperties() throws Throwable {
logger.info("testInvalidProperties: start");
+
+ instance.runHeartbeatOnce();
+ instance.runHeartbeatOnce();
+
final String propertyValue = UUID.randomUUID().toString();
doTestProperty(UUID.randomUUID().toString(), propertyValue, propertyValue);
@@ -223,9 +236,13 @@ public class SingleInstanceTest {
@Test
public void testBootstrap() throws Throwable {
logger.info("testBootstrap: start");
- ClusterView initialClusterView = instance.getClusterViewService()
- .getClusterView();
- assertNotNull(initialClusterView);
+ try{
+ instance.getClusterViewService().getClusterView();
+ fail("should complain");
+ } catch(UndefinedClusterViewException e) {
+ // SLING-5030 : isolated mode is gone, replaced with exception
+ // ok
+ }
// SLING-3750 : with delaying the init event, we now should NOT get any events
// before we let the view establish (which happens via heartbeats below)
@@ -234,15 +251,13 @@ public class SingleInstanceTest {
assertEquals(0, ada.getEvents().size());
assertEquals(0, ada.getUnexpectedCount());
- // hard assumption that the class we get is an
- // IsolatedInstanceDescription
- // this is because we dont have any established clusterview yet - hence
- // still entirely isolated
- assertEquals(IsolatedInstanceDescription.class, initialClusterView
- .getInstances().get(0).getClass());
- assertEquals(IsolatedInstanceDescription.class, instance
- .getClusterViewService().getClusterView().getInstances().get(0)
- .getClass());
+ try{
+ instance.getClusterViewService().getClusterView();
+ fail("should complain");
+ } catch(UndefinedClusterViewException e) {
+ // ok
+ }
+
ada.addExpected(Type.TOPOLOGY_INIT);
instance.runHeartbeatOnce();
Thread.sleep(1000);
@@ -253,14 +268,11 @@ public class SingleInstanceTest {
assertEquals(1, ada.getEvents().size());
TopologyEvent initEvent = ada.getEvents().remove(0);
assertNotNull(initEvent);
-
- assertEquals(initialClusterView.getId(), initEvent.getNewView()
- .getClusterViews().iterator().next().getId());
- assertEquals(initialClusterView.getInstances().get(0).getSlingId(),
- initEvent.getNewView().getLocalInstance().getSlingId());
+ assertNotNull(initEvent.getNewView());
+ assertNotNull(initEvent.getNewView().getClusterViews());
// after the view was established though, we expect it to be a normal
- // ResourceInstanceDescription
+ // EstablishedInstanceDescription
assertEquals(EstablishedInstanceDescription.class, instance
.getClusterViewService().getClusterView().getInstances().get(0)
.getClass());