You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@qpid.apache.org by kw...@apache.org on 2014/11/13 17:24:06 UTC
svn commit: r1639380 -
/qpid/trunk/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java
Author: kwall
Date: Thu Nov 13 16:24:06 2014
New Revision: 1639380
URL: http://svn.apache.org/r1639380
Log:
QPID-6225: [Java Broker] Reduce the frequency with with the failure to ping a remote node is reported
Modified:
qpid/trunk/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java
Modified: qpid/trunk/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java
URL: http://svn.apache.org/viewvc/qpid/trunk/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java?rev=1639380&r1=1639379&r2=1639380&view=diff
==============================================================================
--- qpid/trunk/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java (original)
+++ qpid/trunk/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java Thu Nov 13 16:24:06 2014
@@ -1496,6 +1496,8 @@ public class ReplicatedEnvironmentFacade
private class RemoteNodeStateLearner implements Callable<Void>
{
+ private static final long TIMEOUT_WARN_GAP = 1000 * 60 * 5;
+ private final Map<ReplicationNode, Long> _currentlyTimedOutNodes = new HashMap<>();
private Map<String, ReplicatedEnvironment.State> _previousGroupState = Collections.emptyMap();
private boolean _previousDesignatedPrimary;
private int _previousElectableGroupOverride;
@@ -1624,7 +1626,7 @@ public class ReplicatedEnvironmentFacade
private Map<ReplicationNode, NodeState> discoverNodeStates(Collection<ReplicationNode> electableNodes)
{
final Map<ReplicationNode, NodeState> nodeStates = new HashMap<ReplicationNode, NodeState>();
- Set<Future<Void>> futures = new HashSet<Future<Void>>();
+ Map<ReplicationNode, Future<Void>> futureMap = new HashMap<ReplicationNode, Future<Void>>();
for (final ReplicationNode node : electableNodes)
{
@@ -1649,14 +1651,24 @@ public class ReplicatedEnvironmentFacade
return null;
}
});
- futures.add(future);
+ futureMap.put(node, future);
}
- for (Future<Void> future : futures)
+ boolean atLeastOneNodeTimesOut = false;
+
+ for (Map.Entry<ReplicationNode, Future<Void>> entry : futureMap.entrySet())
{
+ ReplicationNode node = entry.getKey();
+ String nodeName = node.getName();
+ Future<Void> future = entry.getValue();
try
{
future.get(_remoteNodeMonitorInterval, TimeUnit.MILLISECONDS);
+ if (_currentlyTimedOutNodes.remove(node) != null)
+ {
+ LOGGER.warn("Node '" + nodeName + "' from group " + _configuration.getGroupName()
+ + " is responding again.");
+ }
}
catch (InterruptedException e)
{
@@ -1664,14 +1676,34 @@ public class ReplicatedEnvironmentFacade
}
catch (ExecutionException e)
{
- LOGGER.warn("Cannot update node state for group " + _configuration.getGroupName(), e.getCause());
+ LOGGER.warn("Cannot determine state for node '" + nodeName + "' from group "
+ + _configuration.getGroupName(), e.getCause());
}
catch (TimeoutException e)
{
- LOGGER.warn("Timeout whilst updating node state for group " + _configuration.getGroupName());
+ atLeastOneNodeTimesOut = true;
+ if (! _currentlyTimedOutNodes.containsKey(node))
+ {
+ LOGGER.warn("Timeout whilst determining state for node '" + nodeName + "' from group "
+ + _configuration.getGroupName());
+ _currentlyTimedOutNodes.put(node, System.currentTimeMillis());
+ }
+ else if (_currentlyTimedOutNodes.get(node) > (System.currentTimeMillis() + TIMEOUT_WARN_GAP))
+ {
+ LOGGER.warn("Node '" + nodeName + "' from group "
+ + _configuration.getGroupName()
+ + " is still timing out.");
+ _currentlyTimedOutNodes.put(node, System.currentTimeMillis());
+ }
+
future.cancel(true);
}
}
+
+ if (!atLeastOneNodeTimesOut)
+ {
+ _currentlyTimedOutNodes.clear();
+ }
return nodeStates;
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@qpid.apache.org
For additional commands, e-mail: commits-help@qpid.apache.org