You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by pa...@apache.org on 2017/04/20 13:49:02 UTC

[4/9] cassandra git commit: Fail repair if insufficient responses received

Fail repair if insufficient responses received

Patch by Simon Zhou; Reviewed by Paulo Motta for CASSANDRA-13397


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/f5b36f12
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/f5b36f12
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/f5b36f12

Branch: refs/heads/cassandra-3.11
Commit: f5b36f12df65a780a52851207c285db7a8b4122f
Parents: 175e4f8
Author: Simon Zhou <sz...@uber.com>
Authored: Fri Mar 31 20:53:39 2017 -0700
Committer: Paulo Motta <pa...@apache.org>
Committed: Thu Apr 20 10:26:16 2017 -0300

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../cassandra/service/ActiveRepairService.java  | 22 +++++++++++++-------
 2 files changed, 15 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/f5b36f12/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 6a1e486..7a860fe 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 3.0.14
+ * Fail repair if insufficient responses received (CASSANDRA-13397)
  * Fix SSTableLoader fail when the loaded table contains dropped columns (CASSANDRA-13276)
  * Avoid name clashes in CassandraIndexTest (CASSANDRA-13427)
  * Handling partially written hint files (CASSANDRA-12728) 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/f5b36f12/src/java/org/apache/cassandra/service/ActiveRepairService.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/service/ActiveRepairService.java b/src/java/org/apache/cassandra/service/ActiveRepairService.java
index 11d4617..b4cea79 100644
--- a/src/java/org/apache/cassandra/service/ActiveRepairService.java
+++ b/src/java/org/apache/cassandra/service/ActiveRepairService.java
@@ -321,30 +321,36 @@ public class ActiveRepairService implements IEndpointStateChangeSubscriber, IFai
             }
             else
             {
-                status.set(false);
-                failedNodes.add(neighbour.getHostAddress());
-                prepareLatch.countDown();
+                // bailout early to avoid potentially waiting for a long time.
+                failRepair(parentRepairSession, "Endpoint not alive: " + neighbour);
             }
         }
+
         try
         {
-            prepareLatch.await(1, TimeUnit.HOURS);
+            // Failed repair is expensive so we wait for longer time.
+            if (!prepareLatch.await(1, TimeUnit.HOURS)) {
+                failRepair(parentRepairSession, "Did not get replies from all endpoints.");
+            }
         }
         catch (InterruptedException e)
         {
-            removeParentRepairSession(parentRepairSession);
-            throw new RuntimeException("Did not get replies from all endpoints. List of failed endpoint(s): " + failedNodes.toString(), e);
+            failRepair(parentRepairSession, "Interrupted while waiting for prepare repair response.");
         }
 
         if (!status.get())
         {
-            removeParentRepairSession(parentRepairSession);
-            throw new RuntimeException("Did not get positive replies from all endpoints. List of failed endpoint(s): " + failedNodes.toString());
+            failRepair(parentRepairSession, "Got negative replies from endpoints " + failedNodes);
         }
 
         return parentRepairSession;
     }
 
+    private void failRepair(UUID parentRepairSession, String errorMsg) {
+        removeParentRepairSession(parentRepairSession);
+        throw new RuntimeException(errorMsg);
+    }
+
     public void registerParentRepairSession(UUID parentRepairSession, InetAddress coordinator, List<ColumnFamilyStore> columnFamilyStores, Collection<Range<Token>> ranges, boolean isIncremental, long timestamp, boolean isGlobal)
     {
         if (!registeredForEndpointChanges)