You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by jb...@apache.org on 2014/08/12 23:31:46 UTC
[15/15] git commit: merge from 2.1
merge from 2.1
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/59600625
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/59600625
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/59600625
Branch: refs/heads/trunk
Commit: 59600625fe3c3e6bb45df8151891d518151a4ce8
Parents: d2bed56 25982d1
Author: Jonathan Ellis <jb...@apache.org>
Authored: Tue Aug 12 16:31:22 2014 -0500
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Tue Aug 12 16:31:22 2014 -0500
----------------------------------------------------------------------
CHANGES.txt | 8 ++++++++
src/java/org/apache/cassandra/locator/TokenMetadata.java | 3 ++-
src/java/org/apache/cassandra/service/StorageService.java | 3 ++-
3 files changed, 12 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/59600625/CHANGES.txt
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/59600625/src/java/org/apache/cassandra/locator/TokenMetadata.java
----------------------------------------------------------------------
diff --cc src/java/org/apache/cassandra/locator/TokenMetadata.java
index 2a6a624,f848e3b..a609c67
--- a/src/java/org/apache/cassandra/locator/TokenMetadata.java
+++ b/src/java/org/apache/cassandra/locator/TokenMetadata.java
@@@ -671,109 -671,9 +671,110 @@@ public class TokenMetadat
return ranges;
}
- public void setPendingRanges(String keyspaceName, Multimap<Range<Token>, InetAddress> rangeMap)
+ /**
+ * Calculate pending ranges according to bootsrapping and leaving nodes. Reasoning is:
+ *
+ * (1) When in doubt, it is better to write too much to a node than too little. That is, if
+ * there are multiple nodes moving, calculate the biggest ranges a node could have. Cleaning
+ * up unneeded data afterwards is better than missing writes during movement.
+ * (2) When a node leaves, ranges for other nodes can only grow (a node might get additional
+ * ranges, but it will not lose any of its current ranges as a result of a leave). Therefore
+ * we will first remove _all_ leaving tokens for the sake of calculation and then check what
+ * ranges would go where if all nodes are to leave. This way we get the biggest possible
+ * ranges with regard current leave operations, covering all subsets of possible final range
+ * values.
+ * (3) When a node bootstraps, ranges of other nodes can only get smaller. Without doing
+ * complex calculations to see if multiple bootstraps overlap, we simply base calculations
+ * on the same token ring used before (reflecting situation after all leave operations have
+ * completed). Bootstrapping nodes will be added and removed one by one to that metadata and
+ * checked what their ranges would be. This will give us the biggest possible ranges the
+ * node could have. It might be that other bootstraps make our actual final ranges smaller,
+ * but it does not matter as we can clean up the data afterwards.
+ *
+ * NOTE: This is heavy and ineffective operation. This will be done only once when a node
+ * changes state in the cluster, so it should be manageable.
+ */
+ public void calculatePendingRanges(AbstractReplicationStrategy strategy, String keyspaceName)
{
- pendingRanges.put(keyspaceName, rangeMap);
+ lock.readLock().lock();
+ try
+ {
+ Multimap<Range<Token>, InetAddress> newPendingRanges = HashMultimap.create();
+
+ if (bootstrapTokens.isEmpty() && leavingEndpoints.isEmpty() && movingEndpoints.isEmpty())
+ {
+ if (logger.isDebugEnabled())
+ logger.debug("No bootstrapping, leaving or moving nodes -> empty pending ranges for {}", keyspaceName);
+
+ pendingRanges.put(keyspaceName, newPendingRanges);
+ return;
+ }
+
+ Multimap<InetAddress, Range<Token>> addressRanges = strategy.getAddressRanges();
+
+ // Copy of metadata reflecting the situation after all leave operations are finished.
+ TokenMetadata allLeftMetadata = cloneAfterAllLeft();
+
+ // get all ranges that will be affected by leaving nodes
+ Set<Range<Token>> affectedRanges = new HashSet<Range<Token>>();
+ for (InetAddress endpoint : leavingEndpoints)
+ affectedRanges.addAll(addressRanges.get(endpoint));
+
+ // for each of those ranges, find what new nodes will be responsible for the range when
+ // all leaving nodes are gone.
++ TokenMetadata metadata = cloneOnlyTokenMap(); // don't do this in the loop! #7758
+ for (Range<Token> range : affectedRanges)
+ {
- Set<InetAddress> currentEndpoints = ImmutableSet.copyOf(strategy.calculateNaturalEndpoints(range.right, cloneOnlyTokenMap()));
++ Set<InetAddress> currentEndpoints = ImmutableSet.copyOf(strategy.calculateNaturalEndpoints(range.right, metadata));
+ Set<InetAddress> newEndpoints = ImmutableSet.copyOf(strategy.calculateNaturalEndpoints(range.right, allLeftMetadata));
+ newPendingRanges.putAll(range, Sets.difference(newEndpoints, currentEndpoints));
+ }
+
+ // At this stage newPendingRanges has been updated according to leave operations. We can
+ // now continue the calculation by checking bootstrapping nodes.
+
+ // For each of the bootstrapping nodes, simply add and remove them one by one to
+ // allLeftMetadata and check in between what their ranges would be.
+ Multimap<InetAddress, Token> bootstrapAddresses = bootstrapTokens.inverse();
+ for (InetAddress endpoint : bootstrapAddresses.keySet())
+ {
+ Collection<Token> tokens = bootstrapAddresses.get(endpoint);
+
+ allLeftMetadata.updateNormalTokens(tokens, endpoint);
+ for (Range<Token> range : strategy.getAddressRanges(allLeftMetadata).get(endpoint))
+ newPendingRanges.put(range, endpoint);
+ allLeftMetadata.removeEndpoint(endpoint);
+ }
+
+ // At this stage newPendingRanges has been updated according to leaving and bootstrapping nodes.
+ // We can now finish the calculation by checking moving nodes.
+
+ // For each of the moving nodes, we do the same thing we did for bootstrapping:
+ // simply add and remove them one by one to allLeftMetadata and check in between what their ranges would be.
+ for (Pair<Token, InetAddress> moving : movingEndpoints)
+ {
+ InetAddress endpoint = moving.right; // address of the moving node
+
+ // moving.left is a new token of the endpoint
+ allLeftMetadata.updateNormalToken(moving.left, endpoint);
+
+ for (Range<Token> range : strategy.getAddressRanges(allLeftMetadata).get(endpoint))
+ {
+ newPendingRanges.put(range, endpoint);
+ }
+
+ allLeftMetadata.removeEndpoint(endpoint);
+ }
+
+ pendingRanges.put(keyspaceName, newPendingRanges);
+
+ if (logger.isDebugEnabled())
+ logger.debug("Pending ranges:\n{}", (pendingRanges.isEmpty() ? "<empty>" : printPendingRanges()));
+ }
+ finally
+ {
+ lock.readLock().unlock();
+ }
}
public Token getPredecessor(Token token)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/59600625/src/java/org/apache/cassandra/service/StorageService.java
----------------------------------------------------------------------