You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ignite.apache.org by am...@apache.org on 2019/06/05 11:08:06 UTC

[ignite] 03/31: GG-18877 additional heartbeat to prevent FailureProcessor from treating tcp-comm-worker as blocked

This is an automated email from the ASF dual-hosted git repository.

amashenkov pushed a commit to branch gg-19225
in repository https://gitbox.apache.org/repos/asf/ignite.git

commit 4cc49e4a1f30e6c64d9aac22c81db7ce7794422b
Author: Sergey Chugunov <se...@gmail.com>
AuthorDate: Wed May 29 18:34:40 2019 +0300

    GG-18877 additional heartbeat to prevent FailureProcessor from treating tcp-comm-worker as blocked
    
    Signed-off-by: Dmitriy Govorukhin <dm...@gmail.com>
    
    (cherry-picked from commit #8905c3f)
---
 .../client/suite/IgniteClientTestSuite.java        |   3 +
 .../spi/communication/tcp/TcpCommunicationSpi.java |   5 +
 .../ignite/internal/IgniteClientFailuresTest.java  | 160 +++++++++++++++++++++
 3 files changed, 168 insertions(+)

diff --git a/modules/clients/src/test/java/org/apache/ignite/internal/client/suite/IgniteClientTestSuite.java b/modules/clients/src/test/java/org/apache/ignite/internal/client/suite/IgniteClientTestSuite.java
index e0c3249..075e61c 100644
--- a/modules/clients/src/test/java/org/apache/ignite/internal/client/suite/IgniteClientTestSuite.java
+++ b/modules/clients/src/test/java/org/apache/ignite/internal/client/suite/IgniteClientTestSuite.java
@@ -18,6 +18,7 @@ package org.apache.ignite.internal.client.suite;
 
 import junit.framework.JUnit4TestAdapter;
 import junit.framework.TestSuite;
+import org.apache.ignite.internal.IgniteClientFailuresTest;
 import org.apache.ignite.internal.TaskEventSubjectIdSelfTest;
 import org.apache.ignite.internal.client.ClientDefaultCacheSelfTest;
 import org.apache.ignite.internal.client.ClientReconnectionSelfTest;
@@ -171,6 +172,8 @@ public class IgniteClientTestSuite extends TestSuite {
         // SSL params.
             suite.addTest(new JUnit4TestAdapter(ClientSslParametersTest.class));
 
+            suite.addTest(new JUnit4TestAdapter(IgniteClientFailuresTest.class));
+
         return suite;
     }
 }
diff --git a/modules/core/src/main/java/org/apache/ignite/spi/communication/tcp/TcpCommunicationSpi.java b/modules/core/src/main/java/org/apache/ignite/spi/communication/tcp/TcpCommunicationSpi.java
index 0776a5d..2a9fb9a 100755
--- a/modules/core/src/main/java/org/apache/ignite/spi/communication/tcp/TcpCommunicationSpi.java
+++ b/modules/core/src/main/java/org/apache/ignite/spi/communication/tcp/TcpCommunicationSpi.java
@@ -3551,6 +3551,11 @@ public class TcpCommunicationSpi extends IgniteSpiAdapter implements Communicati
                         break;
                     }
                 }
+
+                CommunicationWorker commWorker0 = commWorker;
+
+                if (commWorker0 != null && commWorker0.runner() == Thread.currentThread())
+                    commWorker0.updateHeartbeat();
             }
 
             if (client != null)
diff --git a/modules/core/src/test/java/org/apache/ignite/internal/IgniteClientFailuresTest.java b/modules/core/src/test/java/org/apache/ignite/internal/IgniteClientFailuresTest.java
new file mode 100644
index 0000000..82522ae
--- /dev/null
+++ b/modules/core/src/test/java/org/apache/ignite/internal/IgniteClientFailuresTest.java
@@ -0,0 +1,160 @@
+/*
+ * Copyright 2019 GridGain Systems, Inc. and Contributors.
+ *
+ * Licensed under the GridGain Community Edition License (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     https://www.gridgain.com/products/software/community-edition/gridgain-community-edition-license
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.ignite.internal;
+
+import org.apache.ignite.IgniteCache;
+import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.configuration.IgniteConfiguration;
+import org.apache.ignite.internal.cluster.IgniteClusterEx;
+import org.apache.ignite.internal.managers.GridManagerAdapter;
+import org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi;
+import org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi;
+import org.apache.ignite.testframework.GridStringLogger;
+import org.apache.ignite.testframework.GridTestUtils;
+import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
+import org.apache.ignite.testframework.junits.logger.GridTestLog4jLogger;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ *
+ */
+public class IgniteClientFailuresTest extends GridCommonAbstractTest {
+    /** */
+    private boolean clientMode;
+
+    /** */
+    private GridStringLogger inMemoryLog;
+
+    /** {@inheritDoc} */
+    @Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception {
+        IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName);
+
+        cfg.setClientMode(clientMode);
+
+        if (!clientMode) {
+            cfg.setClientFailureDetectionTimeout(10_000);
+
+            cfg.setSystemWorkerBlockedTimeout(5_000);
+
+            cfg.setGridLogger(inMemoryLog);
+        }
+
+        return cfg;
+    }
+
+    /** */
+    @Before
+    public void setupClientFailuresTest() {
+        stopAllGrids();
+    }
+
+    /** */
+    @After
+    public void tearDownClientFailuresTest() {
+        stopAllGrids();
+    }
+
+    /**
+     * Test verifies that FailureProcessor doesn't treat tcp-comm-worker thread as blocked when
+     * the thread handles situation of failed client node and thus doesn't print full thread dump into logs.
+     *
+     * @throws Exception If failed.
+     */
+    @Test
+    public void testNoMessagesFromFailureProcessor() throws Exception {
+        inMemoryLog = new GridStringLogger(false, new GridTestLog4jLogger());
+
+        inMemoryLog.logLength(1024 * 1024);
+
+        IgniteEx srv = startGrid(0);
+
+        clientMode = true;
+
+        IgniteEx client00 = startGrid("client00");
+
+        client00.getOrCreateCache(new CacheConfiguration<>("cache0"));
+
+        breakClient(client00);
+
+        boolean waitRes = GridTestUtils.waitForCondition(() -> {
+            IgniteClusterEx cl = srv.cluster();
+
+            return (cl.topology(cl.topologyVersion()).size() == 1);
+        }, 30_000);
+
+        assertTrue(waitRes);
+
+        assertFalse(inMemoryLog.toString().contains("name=tcp-comm-worker"));
+    }
+
+    /**
+     * Test verifies that when client node failed but not yet cleaned up from topology (because {@link IgniteConfiguration#clientFailureDetectionTimeout} has not been reached yet)
+     * it doesn't affect new client connected from the same address.
+     *
+     * @throws Exception If failed.
+     */
+    @Test
+    public void testFailedClientLeavesTopologyAfterTimeout() throws Exception {
+        IgniteEx srv0 = startGrid(0);
+
+        clientMode = true;
+
+        IgniteEx client00 = startGrid("client00");
+
+        Thread.sleep(5_000);
+
+        client00.getOrCreateCache(new CacheConfiguration<>("cache0"));
+
+        breakClient(client00);
+
+        final IgniteClusterEx cl = srv0.cluster();
+
+        assertEquals(2, cl.topology(cl.topologyVersion()).size());
+
+        IgniteEx client01 = startGrid("client01");
+
+        assertEquals(3, cl.topology(cl.topologyVersion()).size());
+
+        boolean waitRes = GridTestUtils.waitForCondition(() -> (cl.topology(cl.topologyVersion()).size() == 2),
+            20_000);
+
+        checkCacheOperations(client01.cache("cache0"));
+
+        assertTrue(waitRes);
+    }
+
+    /** */
+    private void checkCacheOperations(IgniteCache cache) {
+        for (int i = 0; i < 100; i++)
+            cache.put(i, i);
+
+        for (int i = 0; i < 100; i++)
+            assertEquals(i, cache.get(i));
+    }
+
+    /** */
+    private void breakClient(IgniteEx client) {
+        Object discoSpi = ((Object[])GridTestUtils.getFieldValue(client.context().discovery(), GridManagerAdapter.class, "spis"))[0];
+
+        Object commSpi = ((Object[])GridTestUtils.getFieldValue(client.context().io(), GridManagerAdapter.class, "spis"))[0];
+
+        ((TcpCommunicationSpi)commSpi).simulateNodeFailure();
+
+        ((TcpDiscoverySpi)discoSpi).simulateNodeFailure();
+    }
+}