You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@qpid.apache.org by kw...@apache.org on 2014/11/13 17:24:06 UTC

svn commit: r1639380 - /qpid/trunk/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java

Author: kwall
Date: Thu Nov 13 16:24:06 2014
New Revision: 1639380

URL: http://svn.apache.org/r1639380
Log:
QPID-6225: [Java Broker] Reduce the frequency with with the failure to ping a remote node is reported

Modified:
    qpid/trunk/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java

Modified: qpid/trunk/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java
URL: http://svn.apache.org/viewvc/qpid/trunk/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java?rev=1639380&r1=1639379&r2=1639380&view=diff
==============================================================================
--- qpid/trunk/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java (original)
+++ qpid/trunk/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java Thu Nov 13 16:24:06 2014
@@ -1496,6 +1496,8 @@ public class ReplicatedEnvironmentFacade
 
     private class RemoteNodeStateLearner implements Callable<Void>
     {
+        private static final long TIMEOUT_WARN_GAP = 1000 * 60 * 5;
+        private final Map<ReplicationNode, Long> _currentlyTimedOutNodes = new HashMap<>();
         private Map<String, ReplicatedEnvironment.State> _previousGroupState = Collections.emptyMap();
         private boolean _previousDesignatedPrimary;
         private int _previousElectableGroupOverride;
@@ -1624,7 +1626,7 @@ public class ReplicatedEnvironmentFacade
         private Map<ReplicationNode, NodeState> discoverNodeStates(Collection<ReplicationNode> electableNodes)
         {
             final Map<ReplicationNode, NodeState> nodeStates = new HashMap<ReplicationNode, NodeState>();
-            Set<Future<Void>> futures = new HashSet<Future<Void>>();
+            Map<ReplicationNode, Future<Void>> futureMap = new HashMap<ReplicationNode, Future<Void>>();
 
             for (final ReplicationNode node : electableNodes)
             {
@@ -1649,14 +1651,24 @@ public class ReplicatedEnvironmentFacade
                         return null;
                     }
                 });
-                futures.add(future);
+                futureMap.put(node, future);
             }
 
-            for (Future<Void> future : futures)
+            boolean atLeastOneNodeTimesOut = false;
+
+            for (Map.Entry<ReplicationNode, Future<Void>> entry : futureMap.entrySet())
             {
+                ReplicationNode node = entry.getKey();
+                String nodeName = node.getName();
+                Future<Void> future = entry.getValue();
                 try
                 {
                     future.get(_remoteNodeMonitorInterval, TimeUnit.MILLISECONDS);
+                    if (_currentlyTimedOutNodes.remove(node) != null)
+                    {
+                        LOGGER.warn("Node '" + nodeName + "' from group " + _configuration.getGroupName()
+                                    + " is responding again.");
+                    }
                 }
                 catch (InterruptedException e)
                 {
@@ -1664,14 +1676,34 @@ public class ReplicatedEnvironmentFacade
                 }
                 catch (ExecutionException e)
                 {
-                    LOGGER.warn("Cannot update node state for group " + _configuration.getGroupName(), e.getCause());
+                    LOGGER.warn("Cannot determine state for node '" + nodeName + "' from group "
+                                + _configuration.getGroupName(), e.getCause());
                 }
                 catch (TimeoutException e)
                 {
-                    LOGGER.warn("Timeout whilst updating node state for group " + _configuration.getGroupName());
+                    atLeastOneNodeTimesOut = true;
+                    if (! _currentlyTimedOutNodes.containsKey(node))
+                    {
+                        LOGGER.warn("Timeout whilst determining state for node '" + nodeName + "' from group "
+                                    + _configuration.getGroupName());
+                        _currentlyTimedOutNodes.put(node, System.currentTimeMillis());
+                    }
+                    else if (_currentlyTimedOutNodes.get(node) > (System.currentTimeMillis() + TIMEOUT_WARN_GAP))
+                    {
+                        LOGGER.warn("Node '" + nodeName + "' from group "
+                                    + _configuration.getGroupName()
+                                    + " is still timing out.");
+                        _currentlyTimedOutNodes.put(node, System.currentTimeMillis());
+                    }
+
                     future.cancel(true);
                 }
             }
+
+            if (!atLeastOneNodeTimesOut)
+            {
+                _currentlyTimedOutNodes.clear();
+            }
             return nodeStates;
         }
 



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@qpid.apache.org
For additional commands, e-mail: commits-help@qpid.apache.org