You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@activemq.apache.org by ni...@apache.org on 2021/08/03 12:44:20 UTC
[activemq-artemis] branch main updated: ARTEMIS-3402 Split Brain
detection should reject bad member updates
This is an automated email from the ASF dual-hosted git repository.
nigrofranz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/activemq-artemis.git
The following commit(s) were added to refs/heads/main by this push:
new 2694f8b ARTEMIS-3402 Split Brain detection should reject bad member updates
new 1883801 This closes #3672
2694f8b is described below
commit 2694f8b52e6628b6a894cbd4cc006533c4ee6894
Author: franz1981 <ni...@gmail.com>
AuthorDate: Mon Jul 26 18:55:45 2021 +0200
ARTEMIS-3402 Split Brain detection should reject bad member updates
---
.../artemis/core/server/ActiveMQServerLogger.java | 6 ++
.../server/cluster/impl/ClusterConnectionImpl.java | 12 ++-
.../tests/smoke/dnsswitch/DNSSwitchTest.java | 102 +++++++++++++++++++++
3 files changed, 116 insertions(+), 4 deletions(-)
diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/ActiveMQServerLogger.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/ActiveMQServerLogger.java
index bc415ed..1baf9ca 100644
--- a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/ActiveMQServerLogger.java
+++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/ActiveMQServerLogger.java
@@ -1706,6 +1706,12 @@ public interface ActiveMQServerLogger extends BasicLogger {
"**************************************************************************************************************************************************************************************************************************************************************", format = Message.Format.MESSAGE_FORMAT)
void possibleSplitBrain(String nodeID, String connectionPairInformation);
+ @LogMessage(level = Logger.Level.WARN) // I really want emphasis on this logger, so adding the stars
+ @Message(id = 222295, value = "\n**************************************************************************************************************************************************************************************************************************************************************\n" +
+ "There is a possible split brain on nodeID {0}. Topology update ignored.\n" +
+ "**************************************************************************************************************************************************************************************************************************************************************", format = Message.Format.MESSAGE_FORMAT)
+ void possibleSplitBrain(String nodeID);
+
@LogMessage(level = Logger.Level.WARN)
@Message(id = 222296, value = "Unable to deploy Hawtio MBeam, console client side RBAC not available",
diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/impl/ClusterConnectionImpl.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/impl/ClusterConnectionImpl.java
index 42bd1e7..7a8130d 100644
--- a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/impl/ClusterConnectionImpl.java
+++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/impl/ClusterConnectionImpl.java
@@ -521,10 +521,14 @@ public final class ClusterConnectionImpl implements ClusterConnection, AfterConn
@Override
public boolean updateMember(long uniqueEventID, String nodeId, TopologyMemberImpl memberInput) {
if (splitBrainDetection && nodeId.equals(nodeManager.getNodeId().toString())) {
- if (memberInput.getLive() != null && !memberInput.getLive().isSameParams(connector)) {
- ActiveMQServerLogger.LOGGER.possibleSplitBrain(nodeId, memberInput.toString());
+ if (memberInput.getLive() != null) {
+ if (!memberInput.getLive().isSameParams(connector)) {
+ ActiveMQServerLogger.LOGGER.possibleSplitBrain(nodeId, memberInput.toString());
+ return false;
+ }
+ } else {
+ memberInput.setLive(connector);
}
- memberInput.setLive(connector);
}
return true;
}
@@ -538,7 +542,7 @@ public final class ClusterConnectionImpl implements ClusterConnection, AfterConn
@Override
public boolean removeMember(final long uniqueEventID, final String nodeId) {
if (splitBrainDetection && nodeId.equals(nodeManager.getNodeId().toString())) {
- ActiveMQServerLogger.LOGGER.possibleSplitBrain(nodeId, nodeId);
+ ActiveMQServerLogger.LOGGER.possibleSplitBrain(nodeId);
return false;
}
return true;
diff --git a/tests/smoke-tests/src/test/java/org/apache/activemq/artemis/tests/smoke/dnsswitch/DNSSwitchTest.java b/tests/smoke-tests/src/test/java/org/apache/activemq/artemis/tests/smoke/dnsswitch/DNSSwitchTest.java
index 0667aba..f837fba 100644
--- a/tests/smoke-tests/src/test/java/org/apache/activemq/artemis/tests/smoke/dnsswitch/DNSSwitchTest.java
+++ b/tests/smoke-tests/src/test/java/org/apache/activemq/artemis/tests/smoke/dnsswitch/DNSSwitchTest.java
@@ -43,8 +43,12 @@ import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.activemq.artemis.api.config.ActiveMQDefaultConfiguration;
+import org.apache.activemq.artemis.api.core.TransportConfiguration;
import org.apache.activemq.artemis.api.core.management.ActiveMQServerControl;
import org.apache.activemq.artemis.api.core.management.ObjectNameBuilder;
+import org.apache.activemq.artemis.core.client.impl.Topology;
+import org.apache.activemq.artemis.core.client.impl.TopologyMemberImpl;
+import org.apache.activemq.artemis.core.remoting.impl.netty.NettyConnectorFactory;
import org.apache.activemq.artemis.jms.client.ActiveMQConnectionFactory;
import org.apache.activemq.artemis.tests.smoke.common.SmokeTestBase;
import org.apache.activemq.artemis.util.ServerUtil;
@@ -912,6 +916,104 @@ public class DNSSwitchTest extends SmokeTestBase {
validateIP("test", "192.0.0.3");
}
+ @Test
+ public void testSplitBrainDetection() throws Throwable {
+ spawnRun(serverLocation, "testSplitBrainDetection", getServerLocation(SERVER_LIVE), getServerLocation(SERVER_BACKUP));
+ }
+
+ /**
+ * arg[0] = constant "testSplitBrainDetection" to be used on reflection through main(String arg[])
+ * arg[1] = serverlive
+ * arg[2] = server backup
+ */
+ public static void testSplitBrainDetection(String[] args) throws Throwable {
+ NetUtil.netUp(FIRST_IP, "lo:first");
+ NetUtil.netUp(SECOND_IP, "lo:second");
+
+ // notice there's no THIRD_IP anywhere
+ saveConf(hostsFile, FIRST_IP, "FIRST", SECOND_IP, "SECOND");
+
+ Process serverLive = null;
+ Process serverBackup = null;
+
+ try {
+ serverLive = ServerUtil.startServer(args[1], "live", "tcp://FIRST:61616", 0);
+ ActiveMQServerControl liveControl = getServerControl(liveURI, liveNameBuilder, 20_000);
+
+ Wait.assertTrue(liveControl::isStarted);
+
+ // notice the first server does not know about this server at all
+ serverBackup = ServerUtil.startServer(args[2], "backup", "tcp://SECOND:61716", 0);
+ ActiveMQServerControl backupControl = getServerControl(backupURI, backupNameBuilder, 20_000);
+
+ Wait.assertTrue(backupControl::isStarted);
+ Wait.assertTrue(backupControl::isReplicaSync);
+
+ logger.debug("shutdown the Network now");
+
+ // this will remove all the DNS information
+ // I need the pingers to stop responding.
+ // That will only happen if I stop both devices on Linux.
+ // On mac that works regardless
+ NetUtil.netDown(FIRST_IP, "lo:first", false);
+ NetUtil.netDown(SECOND_IP, "lo:second", false);
+ saveConf(hostsFile);
+
+ Wait.assertTrue(backupControl::isActive);
+
+ logger.debug("Starting the network");
+
+ NetUtil.netUp(FIRST_IP, "lo:first");
+ NetUtil.netUp(SECOND_IP, "lo:second");
+ saveConf(hostsFile, FIRST_IP, "FIRST", SECOND_IP, "SECOND");
+
+ logger.debug("Waiting until live is not replicated anymore");
+ Wait.assertTrue(() -> !liveControl.isReplicaSync());
+
+ logger.debug("Waiting enough to let live spread its topology around");
+ try (ActiveMQConnectionFactory firstCf = new ActiveMQConnectionFactory("tcp://FIRST:61616?ha=false");
+ Connection ignored = firstCf.createConnection()) {
+ waitForTopology(firstCf.getServerLocator().getTopology(), 60_000, 1, 1);
+ final Topology topology = firstCf.getServerLocator().getTopology();
+ final TopologyMemberImpl member = topology.getMember(liveControl.getNodeID());
+ Assert.assertNotNull(member.getBackup());
+ Assert.assertNotNull(member.getLive());
+ final TransportConfiguration live = member.getLive();
+ Assert.assertEquals("artemis", live.getName());
+ Assert.assertEquals(NettyConnectorFactory.class.getName(), live.getFactoryClassName());
+ Assert.assertEquals("FIRST", live.getParams().get("host"));
+ Assert.assertEquals("61616", live.getParams().get("port"));
+ final TransportConfiguration backup = member.getBackup();
+ Assert.assertEquals("artemis", backup.getName());
+ Assert.assertEquals(NettyConnectorFactory.class.getName(), backup.getFactoryClassName());
+ Assert.assertEquals("SECOND", backup.getParams().get("host"));
+ Assert.assertEquals("61716", backup.getParams().get("port"));
+ }
+ try (ActiveMQConnectionFactory secondCf = new ActiveMQConnectionFactory("tcp://SECOND:61716?ha=false");
+ Connection ignored = secondCf.createConnection()) {
+ logger.debug("Waiting until second broker topology has just a single live node");
+ waitForTopology(secondCf.getServerLocator().getTopology(), 60_000, 1, 0);
+ final Topology topology = secondCf.getServerLocator().getTopology();
+ final TopologyMemberImpl member = topology.getMember(liveControl.getNodeID());
+ Assert.assertNull(member.getBackup());
+ Assert.assertNotNull(member.getLive());
+ final TransportConfiguration live = member.getLive();
+ Assert.assertEquals("artemis", live.getName());
+ Assert.assertEquals(NettyConnectorFactory.class.getName(), live.getFactoryClassName());
+ Assert.assertEquals("SECOND", live.getParams().get("host"));
+ Assert.assertEquals("61716", live.getParams().get("port"));
+ }
+
+ } finally {
+ if (serverBackup != null) {
+ serverBackup.destroyForcibly();
+ }
+ if (serverLive != null) {
+ serverLive.destroyForcibly();
+ }
+ }
+ }
+
/**
* it will continue the test on a spwned VM with the properties we need for this test
*/