You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by al...@apache.org on 2018/11/08 02:45:27 UTC
cassandra git commit: Fix incorrect sorting of replicas in
SimpleStrategy.calculateNaturalReplicas
Repository: cassandra
Updated Branches:
refs/heads/trunk 0ad056432 -> 507e4a46a
Fix incorrect sorting of replicas in SimpleStrategy.calculateNaturalReplicas
patch by Joseph Lynch; reviewed by Benedict Elliott Smith for
CASSANDRA-14862
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/507e4a46
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/507e4a46
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/507e4a46
Branch: refs/heads/trunk
Commit: 507e4a46a166cab5322a50fbe40c80cb0d16c290
Parents: 0ad0564
Author: Joseph Lynch <jo...@gmail.com>
Authored: Wed Oct 31 20:51:34 2018 -0700
Committer: Aleksey Yeshchenko <al...@apple.com>
Committed: Wed Nov 7 18:43:30 2018 -0800
----------------------------------------------------------------------
CHANGES.txt | 1 +
cafd44c8d9ae24c953a8d82746fc89bfe2465641.patch | 180 +++++++++++++++++++
.../locator/AbstractReplicationStrategy.java | 12 +-
.../cassandra/locator/SimpleStrategy.java | 5 +-
.../cassandra/locator/SimpleStrategyTest.java | 61 ++++++-
5 files changed, 247 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/507e4a46/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 2373cb2..4081fce 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
4.0
+ * Fix incorrect sorting of replicas in SimpleStrategy.calculateNaturalReplicas (CASSANDRA-14862)
* Partitioned outbound internode TCP connections can occur when nodes restart (CASSANDRA-14358)
* Don't write to system_distributed.repair_history, system_traces.sessions, system_traces.events in mixed version 3.X/4.0 clusters (CASSANDRA-14841)
* Avoid running query to self through messaging service (CASSANDRA-14807)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/507e4a46/cafd44c8d9ae24c953a8d82746fc89bfe2465641.patch
----------------------------------------------------------------------
diff --git a/cafd44c8d9ae24c953a8d82746fc89bfe2465641.patch b/cafd44c8d9ae24c953a8d82746fc89bfe2465641.patch
new file mode 100644
index 0000000..0f7e423
--- /dev/null
+++ b/cafd44c8d9ae24c953a8d82746fc89bfe2465641.patch
@@ -0,0 +1,180 @@
+From cafd44c8d9ae24c953a8d82746fc89bfe2465641 Mon Sep 17 00:00:00 2001
+From: Joseph Lynch <jo...@gmail.com>
+Date: Wed, 31 Oct 2018 20:51:34 -0700
+Subject: [PATCH] Fixes incorrect sorting of replicas in
+ SimpleStrategy.calculateNaturalReplicas
+
+A small bug was introduced in e645b917 which would change SimpleStrategy
+primary replica sets. This patch adds a regression test that emulates
+the dtest that was broken by this change
+(TestTopology.test_size_estimates_multidc) which fails before this patch
+and succeeds after.
+---
+ .../locator/AbstractReplicationStrategy.java | 12 +++-
+ .../cassandra/locator/SimpleStrategy.java | 5 +-
+ .../cassandra/locator/SimpleStrategyTest.java | 61 +++++++++++++++++--
+ 3 files changed, 66 insertions(+), 12 deletions(-)
+
+diff --git a/src/java/org/apache/cassandra/locator/AbstractReplicationStrategy.java b/src/java/org/apache/cassandra/locator/AbstractReplicationStrategy.java
+index 818e20efe93..deb43c65478 100644
+--- a/src/java/org/apache/cassandra/locator/AbstractReplicationStrategy.java
++++ b/src/java/org/apache/cassandra/locator/AbstractReplicationStrategy.java
+@@ -132,11 +132,19 @@ public Replica getLocalReplicaFor(RingPosition searchPosition)
+ }
+
+ /**
+- * calculate the natural endpoints for the given token
++ * Calculate the natural endpoints for the given token. Endpoints are returned in the order
++ * they occur in the ring following the searchToken, as defined by the replication strategy.
++ *
++ * Note that the order of the replicas is _implicitly relied upon_ by the definition of
++ * "primary" range in
++ * {@link org.apache.cassandra.service.StorageService#getPrimaryRangesForEndpoint(String, InetAddressAndPort)}
++ * which is in turn relied on by various components like repair and size estimate calculations.
+ *
+ * @see #getNaturalReplicasForToken(org.apache.cassandra.dht.RingPosition)
+ *
+- * @param searchToken the token the natural endpoints are requested for
++ * @param tokenMetadata the token metadata used to find the searchToken, e.g. contains token to endpoint
++ * mapping information
++ * @param searchToken the token to find the natural endpoints for
+ * @return a copy of the natural endpoints for the given token
+ */
+ public abstract EndpointsForRange calculateNaturalReplicas(Token searchToken, TokenMetadata tokenMetadata);
+diff --git a/src/java/org/apache/cassandra/locator/SimpleStrategy.java b/src/java/org/apache/cassandra/locator/SimpleStrategy.java
+index 2dd835c2612..748d2d3238a 100644
+--- a/src/java/org/apache/cassandra/locator/SimpleStrategy.java
++++ b/src/java/org/apache/cassandra/locator/SimpleStrategy.java
+@@ -68,10 +68,7 @@ public EndpointsForRange calculateNaturalReplicas(Token token, TokenMetadata met
+ replicas.add(new Replica(ep, replicaRange, replicas.size() < rf.fullReplicas));
+ }
+
+- // group endpoints by DC, so that we can cheaply filter them to a given DC
+- IEndpointSnitch snitch = DatabaseDescriptor.getEndpointSnitch();
+- return replicas.build()
+- .sorted(Comparator.comparing(r -> snitch.getDatacenter(r.endpoint())));
++ return replicas.build();
+ }
+
+ public ReplicationFactor getReplicationFactor()
+diff --git a/test/unit/org/apache/cassandra/locator/SimpleStrategyTest.java b/test/unit/org/apache/cassandra/locator/SimpleStrategyTest.java
+index 338e752d407..507cc1fe5b7 100644
+--- a/test/unit/org/apache/cassandra/locator/SimpleStrategyTest.java
++++ b/test/unit/org/apache/cassandra/locator/SimpleStrategyTest.java
+@@ -27,7 +27,7 @@
+ import com.google.common.collect.HashMultimap;
+ import com.google.common.collect.Lists;
+ import com.google.common.collect.Multimap;
+-import org.junit.Assert;
++import org.junit.Before;
+ import org.junit.BeforeClass;
+ import org.junit.Test;
+
+@@ -56,15 +56,23 @@
+ public class SimpleStrategyTest
+ {
+ public static final String KEYSPACE1 = "SimpleStrategyTest";
++ public static final String MULTIDC = "MultiDCSimpleStrategyTest";
+
+ @BeforeClass
+- public static void defineSchema() throws Exception
++ public static void defineSchema()
+ {
+ SchemaLoader.prepareServer();
+ SchemaLoader.createKeyspace(KEYSPACE1, KeyspaceParams.simple(1));
++ SchemaLoader.createKeyspace(MULTIDC, KeyspaceParams.simple(3));
+ DatabaseDescriptor.setTransientReplicationEnabledUnsafe(true);
+ }
+
++ @Before
++ public void resetSnitch()
++ {
++ DatabaseDescriptor.setEndpointSnitch(new SimpleSnitch());
++ }
++
+ @Test
+ public void tryValidKeyspace()
+ {
+@@ -97,6 +105,47 @@ public void testStringEndpoints() throws UnknownHostException
+ verifyGetNaturalEndpoints(endpointTokens.toArray(new Token[0]), keyTokens.toArray(new Token[0]));
+ }
+
++ @Test
++ public void testMultiDCSimpleStrategyEndpoints() throws UnknownHostException
++ {
++ IEndpointSnitch snitch = new PropertyFileSnitch();
++ DatabaseDescriptor.setEndpointSnitch(snitch);
++
++ TokenMetadata metadata = new TokenMetadata();
++
++ AbstractReplicationStrategy strategy = getStrategy(MULTIDC, metadata, snitch);
++
++ // Topology taken directly from the topology_test.test_size_estimates_multidc dtest that regressed
++ Multimap<InetAddressAndPort, Token> dc1 = HashMultimap.create();
++ dc1.put(InetAddressAndPort.getByName("127.0.0.1"), new Murmur3Partitioner.LongToken(-6639341390736545756L));
++ dc1.put(InetAddressAndPort.getByName("127.0.0.1"), new Murmur3Partitioner.LongToken(-2688160409776496397L));
++ dc1.put(InetAddressAndPort.getByName("127.0.0.2"), new Murmur3Partitioner.LongToken(-2506475074448728501L));
++ dc1.put(InetAddressAndPort.getByName("127.0.0.2"), new Murmur3Partitioner.LongToken(8473270337963525440L));
++ metadata.updateNormalTokens(dc1);
++
++ Multimap<InetAddressAndPort, Token> dc2 = HashMultimap.create();
++ dc2.put(InetAddressAndPort.getByName("127.0.0.4"), new Murmur3Partitioner.LongToken(-3736333188524231709L));
++ dc2.put(InetAddressAndPort.getByName("127.0.0.4"), new Murmur3Partitioner.LongToken(8673615181726552074L));
++ metadata.updateNormalTokens(dc2);
++
++ Map<InetAddressAndPort, Integer> primaryCount = new HashMap<>();
++ Map<InetAddressAndPort, Integer> replicaCount = new HashMap<>();
++ for (Token t : metadata.sortedTokens())
++ {
++ EndpointsForToken replicas = strategy.getNaturalReplicasForToken(t);
++ primaryCount.compute(replicas.get(0).endpoint(), (k, v) -> (v == null) ? 1 : v + 1);
++ for (Replica replica : replicas)
++ replicaCount.compute(replica.endpoint(), (k, v) -> (v == null) ? 1 : v + 1);
++ }
++
++ // All three hosts should have 2 "primary" replica ranges and 6 total ranges with RF=3, 3 nodes and 2 DCs.
++ for (InetAddressAndPort addr : primaryCount.keySet())
++ {
++ assertEquals(2, (int) primaryCount.get(addr));
++ assertEquals(6, (int) replicaCount.get(addr));
++ }
++ }
++
+ // given a list of endpoint tokens, and a set of key tokens falling between the endpoint tokens,
+ // make sure that the Strategy picks the right endpoints for the keys.
+ private void verifyGetNaturalEndpoints(Token[] endpointTokens, Token[] keyTokens) throws UnknownHostException
+@@ -106,7 +155,7 @@ private void verifyGetNaturalEndpoints(Token[] endpointTokens, Token[] keyTokens
+ for (String keyspaceName : Schema.instance.getNonLocalStrategyKeyspaces())
+ {
+ tmd = new TokenMetadata();
+- strategy = getStrategy(keyspaceName, tmd);
++ strategy = getStrategy(keyspaceName, tmd, new SimpleSnitch());
+ List<InetAddressAndPort> hosts = new ArrayList<>();
+ for (int i = 0; i < endpointTokens.length; i++)
+ {
+@@ -160,7 +209,7 @@ public void testGetEndpointsDuringBootstrap() throws UnknownHostException
+ AbstractReplicationStrategy strategy = null;
+ for (String keyspaceName : Schema.instance.getNonLocalStrategyKeyspaces())
+ {
+- strategy = getStrategy(keyspaceName, tmd);
++ strategy = getStrategy(keyspaceName, tmd, new SimpleSnitch());
+
+ PendingRangeCalculatorService.calculatePendingRanges(strategy, keyspaceName);
+
+@@ -238,14 +287,14 @@ public void transientReplica() throws Exception
+ strategy.getNaturalReplicasForToken(tk(101)));
+ }
+
+- private AbstractReplicationStrategy getStrategy(String keyspaceName, TokenMetadata tmd)
++ private AbstractReplicationStrategy getStrategy(String keyspaceName, TokenMetadata tmd, IEndpointSnitch snitch)
+ {
+ KeyspaceMetadata ksmd = Schema.instance.getKeyspaceMetadata(keyspaceName);
+ return AbstractReplicationStrategy.createReplicationStrategy(
+ keyspaceName,
+ ksmd.params.replication.klass,
+ tmd,
+- new SimpleSnitch(),
++ snitch,
+ ksmd.params.replication.options);
+ }
+ }
http://git-wip-us.apache.org/repos/asf/cassandra/blob/507e4a46/src/java/org/apache/cassandra/locator/AbstractReplicationStrategy.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/locator/AbstractReplicationStrategy.java b/src/java/org/apache/cassandra/locator/AbstractReplicationStrategy.java
index 818e20e..deb43c6 100644
--- a/src/java/org/apache/cassandra/locator/AbstractReplicationStrategy.java
+++ b/src/java/org/apache/cassandra/locator/AbstractReplicationStrategy.java
@@ -132,11 +132,19 @@ public abstract class AbstractReplicationStrategy
}
/**
- * calculate the natural endpoints for the given token
+ * Calculate the natural endpoints for the given token. Endpoints are returned in the order
+ * they occur in the ring following the searchToken, as defined by the replication strategy.
+ *
+ * Note that the order of the replicas is _implicitly relied upon_ by the definition of
+ * "primary" range in
+ * {@link org.apache.cassandra.service.StorageService#getPrimaryRangesForEndpoint(String, InetAddressAndPort)}
+ * which is in turn relied on by various components like repair and size estimate calculations.
*
* @see #getNaturalReplicasForToken(org.apache.cassandra.dht.RingPosition)
*
- * @param searchToken the token the natural endpoints are requested for
+ * @param tokenMetadata the token metadata used to find the searchToken, e.g. contains token to endpoint
+ * mapping information
+ * @param searchToken the token to find the natural endpoints for
* @return a copy of the natural endpoints for the given token
*/
public abstract EndpointsForRange calculateNaturalReplicas(Token searchToken, TokenMetadata tokenMetadata);
http://git-wip-us.apache.org/repos/asf/cassandra/blob/507e4a46/src/java/org/apache/cassandra/locator/SimpleStrategy.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/locator/SimpleStrategy.java b/src/java/org/apache/cassandra/locator/SimpleStrategy.java
index 2dd835c..748d2d3 100644
--- a/src/java/org/apache/cassandra/locator/SimpleStrategy.java
+++ b/src/java/org/apache/cassandra/locator/SimpleStrategy.java
@@ -68,10 +68,7 @@ public class SimpleStrategy extends AbstractReplicationStrategy
replicas.add(new Replica(ep, replicaRange, replicas.size() < rf.fullReplicas));
}
- // group endpoints by DC, so that we can cheaply filter them to a given DC
- IEndpointSnitch snitch = DatabaseDescriptor.getEndpointSnitch();
- return replicas.build()
- .sorted(Comparator.comparing(r -> snitch.getDatacenter(r.endpoint())));
+ return replicas.build();
}
public ReplicationFactor getReplicationFactor()
http://git-wip-us.apache.org/repos/asf/cassandra/blob/507e4a46/test/unit/org/apache/cassandra/locator/SimpleStrategyTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/locator/SimpleStrategyTest.java b/test/unit/org/apache/cassandra/locator/SimpleStrategyTest.java
index 338e752..507cc1f 100644
--- a/test/unit/org/apache/cassandra/locator/SimpleStrategyTest.java
+++ b/test/unit/org/apache/cassandra/locator/SimpleStrategyTest.java
@@ -27,7 +27,7 @@ import java.util.Map;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;
-import org.junit.Assert;
+import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
@@ -56,15 +56,23 @@ import static org.junit.Assert.assertTrue;
public class SimpleStrategyTest
{
public static final String KEYSPACE1 = "SimpleStrategyTest";
+ public static final String MULTIDC = "MultiDCSimpleStrategyTest";
@BeforeClass
- public static void defineSchema() throws Exception
+ public static void defineSchema()
{
SchemaLoader.prepareServer();
SchemaLoader.createKeyspace(KEYSPACE1, KeyspaceParams.simple(1));
+ SchemaLoader.createKeyspace(MULTIDC, KeyspaceParams.simple(3));
DatabaseDescriptor.setTransientReplicationEnabledUnsafe(true);
}
+ @Before
+ public void resetSnitch()
+ {
+ DatabaseDescriptor.setEndpointSnitch(new SimpleSnitch());
+ }
+
@Test
public void tryValidKeyspace()
{
@@ -97,6 +105,47 @@ public class SimpleStrategyTest
verifyGetNaturalEndpoints(endpointTokens.toArray(new Token[0]), keyTokens.toArray(new Token[0]));
}
+ @Test
+ public void testMultiDCSimpleStrategyEndpoints() throws UnknownHostException
+ {
+ IEndpointSnitch snitch = new PropertyFileSnitch();
+ DatabaseDescriptor.setEndpointSnitch(snitch);
+
+ TokenMetadata metadata = new TokenMetadata();
+
+ AbstractReplicationStrategy strategy = getStrategy(MULTIDC, metadata, snitch);
+
+ // Topology taken directly from the topology_test.test_size_estimates_multidc dtest that regressed
+ Multimap<InetAddressAndPort, Token> dc1 = HashMultimap.create();
+ dc1.put(InetAddressAndPort.getByName("127.0.0.1"), new Murmur3Partitioner.LongToken(-6639341390736545756L));
+ dc1.put(InetAddressAndPort.getByName("127.0.0.1"), new Murmur3Partitioner.LongToken(-2688160409776496397L));
+ dc1.put(InetAddressAndPort.getByName("127.0.0.2"), new Murmur3Partitioner.LongToken(-2506475074448728501L));
+ dc1.put(InetAddressAndPort.getByName("127.0.0.2"), new Murmur3Partitioner.LongToken(8473270337963525440L));
+ metadata.updateNormalTokens(dc1);
+
+ Multimap<InetAddressAndPort, Token> dc2 = HashMultimap.create();
+ dc2.put(InetAddressAndPort.getByName("127.0.0.4"), new Murmur3Partitioner.LongToken(-3736333188524231709L));
+ dc2.put(InetAddressAndPort.getByName("127.0.0.4"), new Murmur3Partitioner.LongToken(8673615181726552074L));
+ metadata.updateNormalTokens(dc2);
+
+ Map<InetAddressAndPort, Integer> primaryCount = new HashMap<>();
+ Map<InetAddressAndPort, Integer> replicaCount = new HashMap<>();
+ for (Token t : metadata.sortedTokens())
+ {
+ EndpointsForToken replicas = strategy.getNaturalReplicasForToken(t);
+ primaryCount.compute(replicas.get(0).endpoint(), (k, v) -> (v == null) ? 1 : v + 1);
+ for (Replica replica : replicas)
+ replicaCount.compute(replica.endpoint(), (k, v) -> (v == null) ? 1 : v + 1);
+ }
+
+ // All three hosts should have 2 "primary" replica ranges and 6 total ranges with RF=3, 3 nodes and 2 DCs.
+ for (InetAddressAndPort addr : primaryCount.keySet())
+ {
+ assertEquals(2, (int) primaryCount.get(addr));
+ assertEquals(6, (int) replicaCount.get(addr));
+ }
+ }
+
// given a list of endpoint tokens, and a set of key tokens falling between the endpoint tokens,
// make sure that the Strategy picks the right endpoints for the keys.
private void verifyGetNaturalEndpoints(Token[] endpointTokens, Token[] keyTokens) throws UnknownHostException
@@ -106,7 +155,7 @@ public class SimpleStrategyTest
for (String keyspaceName : Schema.instance.getNonLocalStrategyKeyspaces())
{
tmd = new TokenMetadata();
- strategy = getStrategy(keyspaceName, tmd);
+ strategy = getStrategy(keyspaceName, tmd, new SimpleSnitch());
List<InetAddressAndPort> hosts = new ArrayList<>();
for (int i = 0; i < endpointTokens.length; i++)
{
@@ -160,7 +209,7 @@ public class SimpleStrategyTest
AbstractReplicationStrategy strategy = null;
for (String keyspaceName : Schema.instance.getNonLocalStrategyKeyspaces())
{
- strategy = getStrategy(keyspaceName, tmd);
+ strategy = getStrategy(keyspaceName, tmd, new SimpleSnitch());
PendingRangeCalculatorService.calculatePendingRanges(strategy, keyspaceName);
@@ -238,14 +287,14 @@ public class SimpleStrategyTest
strategy.getNaturalReplicasForToken(tk(101)));
}
- private AbstractReplicationStrategy getStrategy(String keyspaceName, TokenMetadata tmd)
+ private AbstractReplicationStrategy getStrategy(String keyspaceName, TokenMetadata tmd, IEndpointSnitch snitch)
{
KeyspaceMetadata ksmd = Schema.instance.getKeyspaceMetadata(keyspaceName);
return AbstractReplicationStrategy.createReplicationStrategy(
keyspaceName,
ksmd.params.replication.klass,
tmd,
- new SimpleSnitch(),
+ snitch,
ksmd.params.replication.options);
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@cassandra.apache.org
For additional commands, e-mail: commits-help@cassandra.apache.org