You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ignite.apache.org by se...@apache.org on 2015/08/03 17:16:10 UTC

[19/32] incubator-ignite git commit: Squashed commit of the following:

Squashed commit of the following:

commit f55a17f71ec97513a6968b1ea3c359bc6238cc5e
Author: Yakov Zhdanov <yz...@gridgain.com>
Date:   Fri Jul 31 13:32:32 2015 +0300

    review

commit 58ca345f622dbadfba7ef2d3dce850c4baa1f319
Merge: 5f921f6 7ed4d15
Author: Yakov Zhdanov <yz...@gridgain.com>
Date:   Fri Jul 31 13:24:51 2015 +0300

    Merge branches 'ignite-752-2' and 'master' of https://git-wip-us.apache.org/repos/asf/incubator-ignite into ignite-752-2

commit 5f921f62dd6563a88b2ecdde92a2b2ee8218ec95
Author: Denis Magda <dm...@gridgain.com>
Date:   Wed Jul 29 10:40:44 2015 +0300

    ignite-752-2: added info on the lowest failure detection timeout to the documentation

commit 55f0eb56967d2cc9bdf62c3fb665521a59ddaf33
Author: Denis Magda <dm...@gridgain.com>
Date:   Wed Jul 29 09:15:29 2015 +0300

    ignite-752-2: supported connection check frequency even for cases when failure timeout is ignored; performance optimizations


Project: http://git-wip-us.apache.org/repos/asf/incubator-ignite/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-ignite/commit/44072f80
Tree: http://git-wip-us.apache.org/repos/asf/incubator-ignite/tree/44072f80
Diff: http://git-wip-us.apache.org/repos/asf/incubator-ignite/diff/44072f80

Branch: refs/heads/ignite-1142
Commit: 44072f806d8d14d716475a3665d0afdf004c6db2
Parents: 7ed4d15
Author: Denis Magda <dm...@gridgain.com>
Authored: Fri Jul 31 13:35:46 2015 +0300
Committer: Yakov Zhdanov <yz...@gridgain.com>
Committed: Fri Jul 31 13:35:46 2015 +0300

----------------------------------------------------------------------
 .../ignite/spi/discovery/tcp/ServerImpl.java    | 42 +++++++++++---------
 .../spi/discovery/tcp/TcpDiscoverySpi.java      |  2 +-
 2 files changed, 24 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-ignite/blob/44072f80/modules/core/src/main/java/org/apache/ignite/spi/discovery/tcp/ServerImpl.java
----------------------------------------------------------------------
diff --git a/modules/core/src/main/java/org/apache/ignite/spi/discovery/tcp/ServerImpl.java b/modules/core/src/main/java/org/apache/ignite/spi/discovery/tcp/ServerImpl.java
index 547347c..47ba8e6 100644
--- a/modules/core/src/main/java/org/apache/ignite/spi/discovery/tcp/ServerImpl.java
+++ b/modules/core/src/main/java/org/apache/ignite/spi/discovery/tcp/ServerImpl.java
@@ -1787,6 +1787,9 @@ class ServerImpl extends TcpDiscoveryImpl {
         /** Connection check frequency. */
         private long connCheckFreq;
 
+        /** Connection check threshold. */
+        private long connCheckThreshold;
+
         /**
          */
         protected RingMessageWorker() {
@@ -1799,19 +1802,22 @@ class ServerImpl extends TcpDiscoveryImpl {
          * Initializes connection check frequency. Used only when failure detection timeout is enabled.
          */
         private void initConnectionCheckFrequency() {
-            if (spi.failureDetectionTimeoutEnabled()) {
-                for (int i = 3; i > 0; i--) {
-                    connCheckFreq = spi.failureDetectionTimeout() / i;
-
-                    if (connCheckFreq > 0)
-                        break;
-                }
+            if (spi.failureDetectionTimeoutEnabled())
+                connCheckThreshold = spi.failureDetectionTimeout();
+            else
+                connCheckThreshold = Math.min(spi.getSocketTimeout(), spi.getHeartbeatFrequency());
 
-                assert connCheckFreq > 0;
+            for (int i = 3; i > 0; i--) {
+                connCheckFreq = connCheckThreshold / i;
 
-                if (log.isDebugEnabled())
-                    log.debug("Connection check frequency is calculated: " + connCheckFreq);
+                if (connCheckFreq > 10)
+                    break;
             }
+
+            assert connCheckFreq > 0;
+
+            if (log.isDebugEnabled())
+                log.debug("Connection check frequency is calculated: " + connCheckFreq);
         }
 
         /**
@@ -2306,9 +2312,9 @@ class ServerImpl extends TcpDiscoveryImpl {
 
                             // If node existed on connection initialization we should check
                             // whether it has not gone yet.
-                            if (nextNodeExists && pingNode(next))
-                                U.error(log, "Failed to send message to next node [msg=" + msg +
-                                    ", next=" + next + ']', err);
+                            if (nextNodeExists)
+                                U.warn(log, "Failed to send message to next node [msg=" + msg + ", next=" + next +
+                                    ", errMsg=" + (err != null ? err.getMessage() : "N/A") + ']');
                             else if (log.isDebugEnabled())
                                 log.debug("Failed to send message to next node [msg=" + msg + ", next=" + next +
                                     ", errMsg=" + (err != null ? err.getMessage() : "N/A") + ']');
@@ -4025,7 +4031,7 @@ class ServerImpl extends TcpDiscoveryImpl {
 
         /**
          * Check the last time a heartbeat message received. If the time is bigger than {@code hbCheckTimeout} than
-         * {@link TcpDiscoveryStatusCheckMessage} is sent accros the ring.
+         * {@link TcpDiscoveryStatusCheckMessage} is sent across the ring.
          */
         private void checkHeartbeatsReceiving() {
             if (lastTimeStatusMsgSent < locNode.lastUpdateTime())
@@ -4045,11 +4051,9 @@ class ServerImpl extends TcpDiscoveryImpl {
          * Check connection aliveness status.
          */
         private void checkConnection() {
-            if (!spi.failureDetectionTimeoutEnabled())
-                return;
-
-            if (!failureThresholdReached && U.currentTimeMillis() - locNode.lastDataReceivedTime()
-                >= spi.failureDetectionTimeout() && ring.hasRemoteNodes() && spiStateCopy() == CONNECTED) {
+            if (spi.failureDetectionTimeoutEnabled() && !failureThresholdReached &&
+                U.currentTimeMillis() - locNode.lastDataReceivedTime() >= connCheckThreshold &&
+                ring.hasRemoteNodes() && spiStateCopy() == CONNECTED) {
 
                 log.info("Local node seems to be disconnected from topology (failure detection timeout " +
                     "is reached): [failureDetectionTimeout=" + spi.failureDetectionTimeout() +

http://git-wip-us.apache.org/repos/asf/incubator-ignite/blob/44072f80/modules/core/src/main/java/org/apache/ignite/spi/discovery/tcp/TcpDiscoverySpi.java
----------------------------------------------------------------------
diff --git a/modules/core/src/main/java/org/apache/ignite/spi/discovery/tcp/TcpDiscoverySpi.java b/modules/core/src/main/java/org/apache/ignite/spi/discovery/tcp/TcpDiscoverySpi.java
index 09690dc..3216166 100644
--- a/modules/core/src/main/java/org/apache/ignite/spi/discovery/tcp/TcpDiscoverySpi.java
+++ b/modules/core/src/main/java/org/apache/ignite/spi/discovery/tcp/TcpDiscoverySpi.java
@@ -74,7 +74,7 @@ import java.util.concurrent.atomic.*;
  * {@link IgniteConfiguration#setFailureDetectionTimeout(long)}. This failure timeout automatically controls the
  * following parameters: {@link #getSocketTimeout()}, {@link #getAckTimeout()}, {@link #getMaxAckTimeout()},
  * {@link #getReconnectCount()}. If any of those parameters is set explicitly, then the failure timeout setting will be
- * ignored.
+ * ignored. As an example, for stable low-latency networks the failure detection timeout may be set to ~120 ms.
  * <p>
  * If it's required to perform advanced settings of failure detection and
  * {@link IgniteConfiguration#getFailureDetectionTimeout()} is unsuitable then various {@code TcpDiscoverySpi}