You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by pa...@apache.org on 2017/04/20 13:49:02 UTC
[4/9] cassandra git commit: Fail repair if insufficient responses
received
Fail repair if insufficient responses received
Patch by Simon Zhou; Reviewed by Paulo Motta for CASSANDRA-13397
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/f5b36f12
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/f5b36f12
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/f5b36f12
Branch: refs/heads/cassandra-3.11
Commit: f5b36f12df65a780a52851207c285db7a8b4122f
Parents: 175e4f8
Author: Simon Zhou <sz...@uber.com>
Authored: Fri Mar 31 20:53:39 2017 -0700
Committer: Paulo Motta <pa...@apache.org>
Committed: Thu Apr 20 10:26:16 2017 -0300
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../cassandra/service/ActiveRepairService.java | 22 +++++++++++++-------
2 files changed, 15 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/f5b36f12/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 6a1e486..7a860fe 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
3.0.14
+ * Fail repair if insufficient responses received (CASSANDRA-13397)
* Fix SSTableLoader fail when the loaded table contains dropped columns (CASSANDRA-13276)
* Avoid name clashes in CassandraIndexTest (CASSANDRA-13427)
* Handling partially written hint files (CASSANDRA-12728)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/f5b36f12/src/java/org/apache/cassandra/service/ActiveRepairService.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/service/ActiveRepairService.java b/src/java/org/apache/cassandra/service/ActiveRepairService.java
index 11d4617..b4cea79 100644
--- a/src/java/org/apache/cassandra/service/ActiveRepairService.java
+++ b/src/java/org/apache/cassandra/service/ActiveRepairService.java
@@ -321,30 +321,36 @@ public class ActiveRepairService implements IEndpointStateChangeSubscriber, IFai
}
else
{
- status.set(false);
- failedNodes.add(neighbour.getHostAddress());
- prepareLatch.countDown();
+ // bailout early to avoid potentially waiting for a long time.
+ failRepair(parentRepairSession, "Endpoint not alive: " + neighbour);
}
}
+
try
{
- prepareLatch.await(1, TimeUnit.HOURS);
+ // Failed repair is expensive so we wait for longer time.
+ if (!prepareLatch.await(1, TimeUnit.HOURS)) {
+ failRepair(parentRepairSession, "Did not get replies from all endpoints.");
+ }
}
catch (InterruptedException e)
{
- removeParentRepairSession(parentRepairSession);
- throw new RuntimeException("Did not get replies from all endpoints. List of failed endpoint(s): " + failedNodes.toString(), e);
+ failRepair(parentRepairSession, "Interrupted while waiting for prepare repair response.");
}
if (!status.get())
{
- removeParentRepairSession(parentRepairSession);
- throw new RuntimeException("Did not get positive replies from all endpoints. List of failed endpoint(s): " + failedNodes.toString());
+ failRepair(parentRepairSession, "Got negative replies from endpoints " + failedNodes);
}
return parentRepairSession;
}
+ private void failRepair(UUID parentRepairSession, String errorMsg) {
+ removeParentRepairSession(parentRepairSession);
+ throw new RuntimeException(errorMsg);
+ }
+
public void registerParentRepairSession(UUID parentRepairSession, InetAddress coordinator, List<ColumnFamilyStore> columnFamilyStores, Collection<Range<Token>> ranges, boolean isIncremental, long timestamp, boolean isGlobal)
{
if (!registeredForEndpointChanges)