You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by jm...@apache.org on 2021/08/27 19:51:15 UTC
[cassandra] branch cassandra-4.0 updated: Tolerate missing DNS
entry when completing host replacement
This is an automated email from the ASF dual-hosted git repository.
jmckenzie pushed a commit to branch cassandra-4.0
in repository https://gitbox.apache.org/repos/asf/cassandra.git
The following commit(s) were added to refs/heads/cassandra-4.0 by this push:
new f59411f Tolerate missing DNS entry when completing host replacement
f59411f is described below
commit f59411f1c985043850154971e9c4066013f355cb
Author: Chris Lohfink <cl...@apple.com>
AuthorDate: Fri Aug 13 11:07:34 2021 -0400
Tolerate missing DNS entry when completing host replacement
patch by Chris Lohfink; reviewed by Brandon Williams for CASSANDRA-16873
Co-authored by Chris Lohfink <cl...@apple.com>
Co-authored by Josh McKenzie <jm...@apache.org>
---
CHANGES.txt | 1 +
.../apache/cassandra/service/StorageService.java | 37 ++++++++++++++++++++--
.../service/StorageServiceServerTest.java | 34 +++++++++++++++++++-
3 files changed, 69 insertions(+), 3 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index b9d4aa1..4236e0c 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
4.0.1
+ * Tolerate missing DNS entry when completing a host replacement (CASSANDRA-16873)
* Harden PrunableArrayQueue against Pruner implementations that might throw exceptions (CASSANDRA-16866)
* Move RepairedDataInfo to the execution controller rather than the ReadCommand to avoid unintended sharing (CASSANDRA-16721)
* Bump zstd-jni version to 1.5.0-4 (CASSANDRA-16884)
diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java
index 9e2d020..6c72682 100644
--- a/src/java/org/apache/cassandra/service/StorageService.java
+++ b/src/java/org/apache/cassandra/service/StorageService.java
@@ -2719,6 +2719,38 @@ public class StorageService extends NotificationBroadcasterSupport implements IE
SystemKeyspace.updateTokens(endpoint, tokensToUpdateInSystemKeyspace);
}
+ @VisibleForTesting
+ public boolean isReplacingSameHostAddressAndHostId(UUID hostId)
+ {
+ try
+ {
+ return isReplacingSameAddress() &&
+ Gossiper.instance.getEndpointStateForEndpoint(DatabaseDescriptor.getReplaceAddress()) != null
+ && hostId.equals(Gossiper.instance.getHostId(DatabaseDescriptor.getReplaceAddress()));
+ }
+ catch (RuntimeException ex)
+ {
+ // If a host is decomissioned and the DNS entry is removed before the
+ // bootstrap completes, when it completes and advertises NORMAL state to other nodes, they will be unable
+ // to resolve it to an InetAddress unless it happens to be cached. This could happen on nodes
+ // storing large amounts of data or with long index rebuild times or if new instances have been added
+ // to the cluster through expansion or additional host replacement.
+ //
+ // The original host replacement must have been able to resolve the replacing address on startup
+ // when setting StorageService.replacing, so if it is impossible to resolve now it is probably
+ // decommissioned and did not have the same IP address or host id. Allow the handleStateNormal
+ // handling to proceed, otherwise gossip state will be inconistent with some nodes believing the
+ // replacement host to be normal, and nodes unable to resolve the hostname will be left in JOINING.
+ if (ex.getCause() != null && ex.getCause().getClass() == UnknownHostException.class)
+ {
+ logger.info("Suppressed exception while checking isReplacingSameHostAddressAndHostId({}). Original host was probably decommissioned. ({})",
+ hostId, ex.getMessage());
+ return false;
+ }
+ throw ex; // otherwise rethrow
+ }
+ }
+
/**
* Handle node move to normal state. That is, node is entering token ring and participating
* in reads.
@@ -2764,9 +2796,10 @@ public class StorageService extends NotificationBroadcasterSupport implements IE
// Order Matters, TM.updateHostID() should be called before TM.updateNormalToken(), (see CASSANDRA-4300).
UUID hostId = Gossiper.instance.getHostId(endpoint);
InetAddressAndPort existing = tokenMetadata.getEndpointForHostId(hostId);
- if (replacing && isReplacingSameAddress() && Gossiper.instance.getEndpointStateForEndpoint(DatabaseDescriptor.getReplaceAddress()) != null
- && (hostId.equals(Gossiper.instance.getHostId(DatabaseDescriptor.getReplaceAddress()))))
+ if (replacing && isReplacingSameHostAddressAndHostId(hostId))
+ {
logger.warn("Not updating token metadata for {} because I am replacing it", endpoint);
+ }
else
{
if (existing != null && !existing.equals(endpoint))
diff --git a/test/unit/org/apache/cassandra/service/StorageServiceServerTest.java b/test/unit/org/apache/cassandra/service/StorageServiceServerTest.java
index c0071b4..9dda9f9 100644
--- a/test/unit/org/apache/cassandra/service/StorageServiceServerTest.java
+++ b/test/unit/org/apache/cassandra/service/StorageServiceServerTest.java
@@ -24,11 +24,13 @@ import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.InetAddress;
+import java.net.UnknownHostException;
import java.util.*;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
+import org.apache.cassandra.db.SystemKeyspace;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
@@ -697,4 +699,34 @@ public class StorageServiceServerTest
assertTrue(AuditLogManager.instance.isEnabled());
StorageService.instance.disableAuditLog();
}
-}
+
+ @Test
+ public void isReplacingSameHostAddressAndHostIdTest() throws UnknownHostException
+ {
+ try
+ {
+ UUID differentHostId = UUID.randomUUID();
+ Assert.assertFalse(StorageService.instance.isReplacingSameHostAddressAndHostId(differentHostId));
+
+ final String hostAddress = FBUtilities.getBroadcastAddressAndPort().getHostAddress(false);
+ UUID localHostId = SystemKeyspace.getLocalHostId();
+ Gossiper.instance.initializeNodeUnsafe(FBUtilities.getBroadcastAddressAndPort(), localHostId, 1);
+
+ // Check detects replacing the same host address with the same hostid
+ System.setProperty("cassandra.replace_address", hostAddress);
+ Assert.assertTrue(StorageService.instance.isReplacingSameHostAddressAndHostId(localHostId));
+
+ // Check detects replacing the same host address with a different host id
+ System.setProperty("cassandra.replace_address", hostAddress);
+ Assert.assertFalse(StorageService.instance.isReplacingSameHostAddressAndHostId(differentHostId));
+
+ // Check tolerates the DNS entry going away for the replace_address
+ System.setProperty("cassandra.replace_address", "unresolvable.host.local.");
+ Assert.assertFalse(StorageService.instance.isReplacingSameHostAddressAndHostId(differentHostId));
+ }
+ finally
+ {
+ System.clearProperty("cassandra.replace_address");
+ }
+ }
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@cassandra.apache.org
For additional commands, e-mail: commits-help@cassandra.apache.org