You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@activemq.apache.org by ni...@apache.org on 2021/08/03 12:44:20 UTC

[activemq-artemis] branch main updated: ARTEMIS-3402 Split Brain detection should reject bad member updates

This is an automated email from the ASF dual-hosted git repository.

nigrofranz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/activemq-artemis.git


The following commit(s) were added to refs/heads/main by this push:
     new 2694f8b  ARTEMIS-3402 Split Brain detection should reject bad member updates
     new 1883801  This closes #3672
2694f8b is described below

commit 2694f8b52e6628b6a894cbd4cc006533c4ee6894
Author: franz1981 <ni...@gmail.com>
AuthorDate: Mon Jul 26 18:55:45 2021 +0200

    ARTEMIS-3402 Split Brain detection should reject bad member updates
---
 .../artemis/core/server/ActiveMQServerLogger.java  |   6 ++
 .../server/cluster/impl/ClusterConnectionImpl.java |  12 ++-
 .../tests/smoke/dnsswitch/DNSSwitchTest.java       | 102 +++++++++++++++++++++
 3 files changed, 116 insertions(+), 4 deletions(-)

diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/ActiveMQServerLogger.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/ActiveMQServerLogger.java
index bc415ed..1baf9ca 100644
--- a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/ActiveMQServerLogger.java
+++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/ActiveMQServerLogger.java
@@ -1706,6 +1706,12 @@ public interface ActiveMQServerLogger extends BasicLogger {
                                  "**************************************************************************************************************************************************************************************************************************************************************", format = Message.Format.MESSAGE_FORMAT)
    void possibleSplitBrain(String nodeID, String connectionPairInformation);
 
+   @LogMessage(level = Logger.Level.WARN) // I really want emphasis on this logger, so adding the stars
+   @Message(id = 222295, value = "\n**************************************************************************************************************************************************************************************************************************************************************\n" +
+                                 "There is a possible split brain on nodeID {0}. Topology update ignored.\n" +
+                                 "**************************************************************************************************************************************************************************************************************************************************************", format = Message.Format.MESSAGE_FORMAT)
+   void possibleSplitBrain(String nodeID);
+
 
    @LogMessage(level = Logger.Level.WARN)
    @Message(id = 222296, value = "Unable to deploy Hawtio MBeam, console client side RBAC not available",
diff --git a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/impl/ClusterConnectionImpl.java b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/impl/ClusterConnectionImpl.java
index 42bd1e7..7a8130d 100644
--- a/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/impl/ClusterConnectionImpl.java
+++ b/artemis-server/src/main/java/org/apache/activemq/artemis/core/server/cluster/impl/ClusterConnectionImpl.java
@@ -521,10 +521,14 @@ public final class ClusterConnectionImpl implements ClusterConnection, AfterConn
    @Override
    public boolean updateMember(long uniqueEventID, String nodeId, TopologyMemberImpl memberInput) {
       if (splitBrainDetection && nodeId.equals(nodeManager.getNodeId().toString())) {
-         if (memberInput.getLive() != null && !memberInput.getLive().isSameParams(connector)) {
-            ActiveMQServerLogger.LOGGER.possibleSplitBrain(nodeId, memberInput.toString());
+         if (memberInput.getLive() != null) {
+            if (!memberInput.getLive().isSameParams(connector)) {
+               ActiveMQServerLogger.LOGGER.possibleSplitBrain(nodeId, memberInput.toString());
+               return false;
+            }
+         } else {
+            memberInput.setLive(connector);
          }
-         memberInput.setLive(connector);
       }
       return true;
    }
@@ -538,7 +542,7 @@ public final class ClusterConnectionImpl implements ClusterConnection, AfterConn
    @Override
    public boolean removeMember(final long uniqueEventID, final String nodeId) {
       if (splitBrainDetection && nodeId.equals(nodeManager.getNodeId().toString())) {
-         ActiveMQServerLogger.LOGGER.possibleSplitBrain(nodeId, nodeId);
+         ActiveMQServerLogger.LOGGER.possibleSplitBrain(nodeId);
          return false;
       }
       return true;
diff --git a/tests/smoke-tests/src/test/java/org/apache/activemq/artemis/tests/smoke/dnsswitch/DNSSwitchTest.java b/tests/smoke-tests/src/test/java/org/apache/activemq/artemis/tests/smoke/dnsswitch/DNSSwitchTest.java
index 0667aba..f837fba 100644
--- a/tests/smoke-tests/src/test/java/org/apache/activemq/artemis/tests/smoke/dnsswitch/DNSSwitchTest.java
+++ b/tests/smoke-tests/src/test/java/org/apache/activemq/artemis/tests/smoke/dnsswitch/DNSSwitchTest.java
@@ -43,8 +43,12 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.activemq.artemis.api.config.ActiveMQDefaultConfiguration;
+import org.apache.activemq.artemis.api.core.TransportConfiguration;
 import org.apache.activemq.artemis.api.core.management.ActiveMQServerControl;
 import org.apache.activemq.artemis.api.core.management.ObjectNameBuilder;
+import org.apache.activemq.artemis.core.client.impl.Topology;
+import org.apache.activemq.artemis.core.client.impl.TopologyMemberImpl;
+import org.apache.activemq.artemis.core.remoting.impl.netty.NettyConnectorFactory;
 import org.apache.activemq.artemis.jms.client.ActiveMQConnectionFactory;
 import org.apache.activemq.artemis.tests.smoke.common.SmokeTestBase;
 import org.apache.activemq.artemis.util.ServerUtil;
@@ -912,6 +916,104 @@ public class DNSSwitchTest extends SmokeTestBase {
       validateIP("test", "192.0.0.3");
    }
 
+   @Test
+   public void testSplitBrainDetection() throws Throwable {
+      spawnRun(serverLocation, "testSplitBrainDetection", getServerLocation(SERVER_LIVE), getServerLocation(SERVER_BACKUP));
+   }
+
+   /**
+    * arg[0] = constant "testSplitBrainDetection" to be used on reflection through main(String arg[])
+    * arg[1] = serverlive
+    * arg[2] = server backup
+    */
+   public static void testSplitBrainDetection(String[] args) throws Throwable {
+      NetUtil.netUp(FIRST_IP, "lo:first");
+      NetUtil.netUp(SECOND_IP, "lo:second");
+
+      // notice there's no THIRD_IP anywhere
+      saveConf(hostsFile, FIRST_IP, "FIRST", SECOND_IP, "SECOND");
+
+      Process serverLive = null;
+      Process serverBackup = null;
+
+      try {
+         serverLive = ServerUtil.startServer(args[1], "live", "tcp://FIRST:61616", 0);
+         ActiveMQServerControl liveControl = getServerControl(liveURI, liveNameBuilder, 20_000);
+
+         Wait.assertTrue(liveControl::isStarted);
+
+         // notice the first server does not know about this server at all
+         serverBackup = ServerUtil.startServer(args[2], "backup", "tcp://SECOND:61716", 0);
+         ActiveMQServerControl backupControl = getServerControl(backupURI, backupNameBuilder, 20_000);
+
+         Wait.assertTrue(backupControl::isStarted);
+         Wait.assertTrue(backupControl::isReplicaSync);
+
+         logger.debug("shutdown the Network now");
+
+         // this will remove all the DNS information
+         // I need the pingers to stop responding.
+         // That will only happen if I stop both devices on Linux.
+         // On mac that works regardless
+         NetUtil.netDown(FIRST_IP, "lo:first", false);
+         NetUtil.netDown(SECOND_IP, "lo:second", false);
+         saveConf(hostsFile);
+
+         Wait.assertTrue(backupControl::isActive);
+
+         logger.debug("Starting the network");
+
+         NetUtil.netUp(FIRST_IP, "lo:first");
+         NetUtil.netUp(SECOND_IP, "lo:second");
+         saveConf(hostsFile, FIRST_IP, "FIRST", SECOND_IP, "SECOND");
+
+         logger.debug("Waiting until live is not replicated anymore");
+         Wait.assertTrue(() -> !liveControl.isReplicaSync());
+
+         logger.debug("Waiting enough to let live spread its topology around");
+         try (ActiveMQConnectionFactory firstCf = new ActiveMQConnectionFactory("tcp://FIRST:61616?ha=false");
+              Connection ignored = firstCf.createConnection()) {
+            waitForTopology(firstCf.getServerLocator().getTopology(), 60_000, 1, 1);
+            final Topology topology = firstCf.getServerLocator().getTopology();
+            final TopologyMemberImpl member = topology.getMember(liveControl.getNodeID());
+            Assert.assertNotNull(member.getBackup());
+            Assert.assertNotNull(member.getLive());
+            final TransportConfiguration live = member.getLive();
+            Assert.assertEquals("artemis", live.getName());
+            Assert.assertEquals(NettyConnectorFactory.class.getName(), live.getFactoryClassName());
+            Assert.assertEquals("FIRST", live.getParams().get("host"));
+            Assert.assertEquals("61616", live.getParams().get("port"));
+            final TransportConfiguration backup = member.getBackup();
+            Assert.assertEquals("artemis", backup.getName());
+            Assert.assertEquals(NettyConnectorFactory.class.getName(), backup.getFactoryClassName());
+            Assert.assertEquals("SECOND", backup.getParams().get("host"));
+            Assert.assertEquals("61716", backup.getParams().get("port"));
+         }
+         try (ActiveMQConnectionFactory secondCf = new ActiveMQConnectionFactory("tcp://SECOND:61716?ha=false");
+              Connection ignored = secondCf.createConnection()) {
+            logger.debug("Waiting until second broker topology has just a single live node");
+            waitForTopology(secondCf.getServerLocator().getTopology(), 60_000, 1, 0);
+            final Topology topology = secondCf.getServerLocator().getTopology();
+            final TopologyMemberImpl member = topology.getMember(liveControl.getNodeID());
+            Assert.assertNull(member.getBackup());
+            Assert.assertNotNull(member.getLive());
+            final TransportConfiguration live = member.getLive();
+            Assert.assertEquals("artemis", live.getName());
+            Assert.assertEquals(NettyConnectorFactory.class.getName(), live.getFactoryClassName());
+            Assert.assertEquals("SECOND", live.getParams().get("host"));
+            Assert.assertEquals("61716", live.getParams().get("port"));
+         }
+
+      } finally {
+         if (serverBackup != null) {
+            serverBackup.destroyForcibly();
+         }
+         if (serverLive != null) {
+            serverLive.destroyForcibly();
+         }
+      }
+   }
+
    /**
     * it will continue the test on a spwned VM with the properties we need for this test
     */