You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ignite.apache.org by am...@apache.org on 2019/06/05 11:08:06 UTC
[ignite] 03/31: GG-18877 additional heartbeat to prevent
FailureProcessor from treating tcp-comm-worker as blocked
This is an automated email from the ASF dual-hosted git repository.
amashenkov pushed a commit to branch gg-19225
in repository https://gitbox.apache.org/repos/asf/ignite.git
commit 4cc49e4a1f30e6c64d9aac22c81db7ce7794422b
Author: Sergey Chugunov <se...@gmail.com>
AuthorDate: Wed May 29 18:34:40 2019 +0300
GG-18877 additional heartbeat to prevent FailureProcessor from treating tcp-comm-worker as blocked
Signed-off-by: Dmitriy Govorukhin <dm...@gmail.com>
(cherry-picked from commit #8905c3f)
---
.../client/suite/IgniteClientTestSuite.java | 3 +
.../spi/communication/tcp/TcpCommunicationSpi.java | 5 +
.../ignite/internal/IgniteClientFailuresTest.java | 160 +++++++++++++++++++++
3 files changed, 168 insertions(+)
diff --git a/modules/clients/src/test/java/org/apache/ignite/internal/client/suite/IgniteClientTestSuite.java b/modules/clients/src/test/java/org/apache/ignite/internal/client/suite/IgniteClientTestSuite.java
index e0c3249..075e61c 100644
--- a/modules/clients/src/test/java/org/apache/ignite/internal/client/suite/IgniteClientTestSuite.java
+++ b/modules/clients/src/test/java/org/apache/ignite/internal/client/suite/IgniteClientTestSuite.java
@@ -18,6 +18,7 @@ package org.apache.ignite.internal.client.suite;
import junit.framework.JUnit4TestAdapter;
import junit.framework.TestSuite;
+import org.apache.ignite.internal.IgniteClientFailuresTest;
import org.apache.ignite.internal.TaskEventSubjectIdSelfTest;
import org.apache.ignite.internal.client.ClientDefaultCacheSelfTest;
import org.apache.ignite.internal.client.ClientReconnectionSelfTest;
@@ -171,6 +172,8 @@ public class IgniteClientTestSuite extends TestSuite {
// SSL params.
suite.addTest(new JUnit4TestAdapter(ClientSslParametersTest.class));
+ suite.addTest(new JUnit4TestAdapter(IgniteClientFailuresTest.class));
+
return suite;
}
}
diff --git a/modules/core/src/main/java/org/apache/ignite/spi/communication/tcp/TcpCommunicationSpi.java b/modules/core/src/main/java/org/apache/ignite/spi/communication/tcp/TcpCommunicationSpi.java
index 0776a5d..2a9fb9a 100755
--- a/modules/core/src/main/java/org/apache/ignite/spi/communication/tcp/TcpCommunicationSpi.java
+++ b/modules/core/src/main/java/org/apache/ignite/spi/communication/tcp/TcpCommunicationSpi.java
@@ -3551,6 +3551,11 @@ public class TcpCommunicationSpi extends IgniteSpiAdapter implements Communicati
break;
}
}
+
+ CommunicationWorker commWorker0 = commWorker;
+
+ if (commWorker0 != null && commWorker0.runner() == Thread.currentThread())
+ commWorker0.updateHeartbeat();
}
if (client != null)
diff --git a/modules/core/src/test/java/org/apache/ignite/internal/IgniteClientFailuresTest.java b/modules/core/src/test/java/org/apache/ignite/internal/IgniteClientFailuresTest.java
new file mode 100644
index 0000000..82522ae
--- /dev/null
+++ b/modules/core/src/test/java/org/apache/ignite/internal/IgniteClientFailuresTest.java
@@ -0,0 +1,160 @@
+/*
+ * Copyright 2019 GridGain Systems, Inc. and Contributors.
+ *
+ * Licensed under the GridGain Community Edition License (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.gridgain.com/products/software/community-edition/gridgain-community-edition-license
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.ignite.internal;
+
+import org.apache.ignite.IgniteCache;
+import org.apache.ignite.configuration.CacheConfiguration;
+import org.apache.ignite.configuration.IgniteConfiguration;
+import org.apache.ignite.internal.cluster.IgniteClusterEx;
+import org.apache.ignite.internal.managers.GridManagerAdapter;
+import org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi;
+import org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi;
+import org.apache.ignite.testframework.GridStringLogger;
+import org.apache.ignite.testframework.GridTestUtils;
+import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
+import org.apache.ignite.testframework.junits.logger.GridTestLog4jLogger;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ *
+ */
+public class IgniteClientFailuresTest extends GridCommonAbstractTest {
+ /** */
+ private boolean clientMode;
+
+ /** */
+ private GridStringLogger inMemoryLog;
+
+ /** {@inheritDoc} */
+ @Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception {
+ IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName);
+
+ cfg.setClientMode(clientMode);
+
+ if (!clientMode) {
+ cfg.setClientFailureDetectionTimeout(10_000);
+
+ cfg.setSystemWorkerBlockedTimeout(5_000);
+
+ cfg.setGridLogger(inMemoryLog);
+ }
+
+ return cfg;
+ }
+
+ /** */
+ @Before
+ public void setupClientFailuresTest() {
+ stopAllGrids();
+ }
+
+ /** */
+ @After
+ public void tearDownClientFailuresTest() {
+ stopAllGrids();
+ }
+
+ /**
+ * Test verifies that FailureProcessor doesn't treat tcp-comm-worker thread as blocked when
+ * the thread handles situation of failed client node and thus doesn't print full thread dump into logs.
+ *
+ * @throws Exception If failed.
+ */
+ @Test
+ public void testNoMessagesFromFailureProcessor() throws Exception {
+ inMemoryLog = new GridStringLogger(false, new GridTestLog4jLogger());
+
+ inMemoryLog.logLength(1024 * 1024);
+
+ IgniteEx srv = startGrid(0);
+
+ clientMode = true;
+
+ IgniteEx client00 = startGrid("client00");
+
+ client00.getOrCreateCache(new CacheConfiguration<>("cache0"));
+
+ breakClient(client00);
+
+ boolean waitRes = GridTestUtils.waitForCondition(() -> {
+ IgniteClusterEx cl = srv.cluster();
+
+ return (cl.topology(cl.topologyVersion()).size() == 1);
+ }, 30_000);
+
+ assertTrue(waitRes);
+
+ assertFalse(inMemoryLog.toString().contains("name=tcp-comm-worker"));
+ }
+
+ /**
+ * Test verifies that when client node failed but not yet cleaned up from topology (because {@link IgniteConfiguration#clientFailureDetectionTimeout} has not been reached yet)
+ * it doesn't affect new client connected from the same address.
+ *
+ * @throws Exception If failed.
+ */
+ @Test
+ public void testFailedClientLeavesTopologyAfterTimeout() throws Exception {
+ IgniteEx srv0 = startGrid(0);
+
+ clientMode = true;
+
+ IgniteEx client00 = startGrid("client00");
+
+ Thread.sleep(5_000);
+
+ client00.getOrCreateCache(new CacheConfiguration<>("cache0"));
+
+ breakClient(client00);
+
+ final IgniteClusterEx cl = srv0.cluster();
+
+ assertEquals(2, cl.topology(cl.topologyVersion()).size());
+
+ IgniteEx client01 = startGrid("client01");
+
+ assertEquals(3, cl.topology(cl.topologyVersion()).size());
+
+ boolean waitRes = GridTestUtils.waitForCondition(() -> (cl.topology(cl.topologyVersion()).size() == 2),
+ 20_000);
+
+ checkCacheOperations(client01.cache("cache0"));
+
+ assertTrue(waitRes);
+ }
+
+ /** */
+ private void checkCacheOperations(IgniteCache cache) {
+ for (int i = 0; i < 100; i++)
+ cache.put(i, i);
+
+ for (int i = 0; i < 100; i++)
+ assertEquals(i, cache.get(i));
+ }
+
+ /** */
+ private void breakClient(IgniteEx client) {
+ Object discoSpi = ((Object[])GridTestUtils.getFieldValue(client.context().discovery(), GridManagerAdapter.class, "spis"))[0];
+
+ Object commSpi = ((Object[])GridTestUtils.getFieldValue(client.context().io(), GridManagerAdapter.class, "spis"))[0];
+
+ ((TcpCommunicationSpi)commSpi).simulateNodeFailure();
+
+ ((TcpDiscoverySpi)discoSpi).simulateNodeFailure();
+ }
+}