You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by br...@apache.org on 2015/05/13 01:40:30 UTC
[2/4] cassandra git commit: Failure detector detects and ignores
local pauses
Failure detector detects and ignores local pauses
Patch by brandonwilliams, reviewed by Richard Low for CASSANDRA-9183
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/4012134f
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/4012134f
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/4012134f
Branch: refs/heads/trunk
Commit: 4012134f6e03b61bbf5ce64cc456cf7592675d67
Parents: 75e5b3b
Author: Brandon Williams <br...@apache.org>
Authored: Tue May 12 18:38:48 2015 -0500
Committer: Brandon Williams <br...@apache.org>
Committed: Tue May 12 18:38:48 2015 -0500
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../apache/cassandra/gms/FailureDetector.java | 29 ++++++++++++++++++++
2 files changed, 30 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/4012134f/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 1643f9c..7cb0dfd 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
2.1.6
+ * Failure detector detects and ignores local pauses (CASSANDRA-9183)
* Add utility class to support for rate limiting a given log statement (CASSANDRA-9029)
* Add missing consistency levels to cassandra-stess (CASSANDRA-9361)
* Fix commitlog getCompletedTasks to not increment (CASSANDRA-9339)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/4012134f/src/java/org/apache/cassandra/gms/FailureDetector.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/gms/FailureDetector.java b/src/java/org/apache/cassandra/gms/FailureDetector.java
index 0c40ae3..322aae2 100644
--- a/src/java/org/apache/cassandra/gms/FailureDetector.java
+++ b/src/java/org/apache/cassandra/gms/FailureDetector.java
@@ -48,6 +48,22 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean
public static final String MBEAN_NAME = "org.apache.cassandra.net:type=FailureDetector";
private static final int SAMPLE_SIZE = 1000;
protected static final long INITIAL_VALUE_NANOS = TimeUnit.NANOSECONDS.convert(getInitialValue(), TimeUnit.MILLISECONDS);
+ private static final long DEFAULT_MAX_PAUSE = 5000L * 1000000L; // 5 seconds
+ private static final long MAX_LOCAL_PAUSE_IN_NANOS = getMaxLocalPause();
+ private long lastInterpret = System.nanoTime();
+ private boolean wasPaused = false;
+
+ private static long getMaxLocalPause()
+ {
+ if (System.getProperty("cassandra.max_local_pause_in_ms") != null)
+ {
+ long pause = Long.parseLong(System.getProperty("cassandra.max_local_pause_in_ms"));
+ logger.warn("Overriding max local pause time to {}ms", pause);
+ return pause * 1000000L;
+ }
+ else
+ return DEFAULT_MAX_PAUSE;
+ }
public static final IFailureDetector instance = new FailureDetector();
@@ -228,6 +244,19 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean
return;
}
long now = System.nanoTime();
+ long diff = now - lastInterpret;
+ lastInterpret = now;
+ if (diff > MAX_LOCAL_PAUSE_IN_NANOS)
+ {
+ logger.warn("Not marking nodes down due to local pause of {} > {}", diff, MAX_LOCAL_PAUSE_IN_NANOS);
+ wasPaused = true;
+ return;
+ }
+ if (wasPaused)
+ {
+ wasPaused = false;
+ return;
+ }
double phi = hbWnd.phi(now);
if (logger.isTraceEnabled())
logger.trace("PHI for " + ep + " : " + phi);