You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by jb...@apache.org on 2013/11/21 03:58:06 UTC
[1/9] git commit: Improve initial FD phi estimate when starting up
patch by jbellis; reviewed by brandonwilliams for CASSANDRA-6385
Updated Branches:
refs/heads/cassandra-1.2 b3ae77d7b -> e01224ede
refs/heads/cassandra-2.0 26d1b68c7 -> ed8117ff9
refs/heads/trunk 03e3d15e9 -> 256da6385
Improve initial FD phi estimate when starting up
patch by jbellis; reviewed by brandonwilliams for CASSANDRA-6385
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/e01224ed
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/e01224ed
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/e01224ed
Branch: refs/heads/cassandra-1.2
Commit: e01224ede63e941fffa7b9b3906c1d54fb699bea
Parents: b3ae77d
Author: Jonathan Ellis <jb...@apache.org>
Authored: Wed Nov 20 17:52:45 2013 -0600
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Wed Nov 20 18:00:15 2013 -0600
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../apache/cassandra/gms/FailureDetector.java | 47 +++++++++++---------
.../apache/cassandra/gms/ArrivalWindowTest.java | 7 +--
3 files changed, 29 insertions(+), 26 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/e01224ed/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index dc56d90..b3fa565 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,6 @@
1.2.13
* Optimize FD phi calculation (CASSANDRA-6386)
+ * Improve initial FD phi estimate when starting up (CASSANDRA-6385)
1.2.12
http://git-wip-us.apache.org/repos/asf/cassandra/blob/e01224ed/src/java/org/apache/cassandra/gms/FailureDetector.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/gms/FailureDetector.java b/src/java/org/apache/cassandra/gms/FailureDetector.java
index e4ffb88..ee47997 100644
--- a/src/java/org/apache/cassandra/gms/FailureDetector.java
+++ b/src/java/org/apache/cassandra/gms/FailureDetector.java
@@ -49,6 +49,12 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean
public static final IFailureDetector instance = new FailureDetector();
private static final Logger logger = LoggerFactory.getLogger(FailureDetector.class);
+ // this is useless except to provide backwards compatibility in phi_convict_threshold,
+ // because everyone seems pretty accustomed to the default of 8, and users who have
+ // already tuned their phi_convict_threshold for their own environments won't need to
+ // change.
+ private final double PHI_FACTOR = 1.0 / Math.log(10.0); // 0.434...
+
private final Map<InetAddress, ArrivalWindow> arrivalSamples = new Hashtable<InetAddress, ArrivalWindow>();
private final List<IFailureDetectionEventListener> fdEvntListeners = new CopyOnWriteArrayList<IFailureDetectionEventListener>();
@@ -183,12 +189,17 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean
logger.trace("reporting {}", ep);
long now = System.currentTimeMillis();
ArrivalWindow heartbeatWindow = arrivalSamples.get(ep);
- if ( heartbeatWindow == null )
+ if (heartbeatWindow == null)
{
+ // avoid adding an empty ArrivalWindow to the Map
heartbeatWindow = new ArrivalWindow(SAMPLE_SIZE);
+ heartbeatWindow.add(now);
arrivalSamples.put(ep, heartbeatWindow);
}
- heartbeatWindow.add(now);
+ else
+ {
+ heartbeatWindow.add(now);
+ }
}
public void interpret(InetAddress ep)
@@ -203,7 +214,7 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean
if (logger.isTraceEnabled())
logger.trace("PHI for " + ep + " : " + phi);
- if (phi > getPhiConvictThreshold())
+ if (PHI_FACTOR * phi > getPhiConvictThreshold())
{
logger.trace("notifying listeners that {} is down", ep);
logger.trace("intervals: {} mean: {}", hbWnd, hbWnd.mean());
@@ -266,12 +277,6 @@ class ArrivalWindow
private double tLast = 0L;
private final BoundedStatsDeque arrivalIntervals;
- // this is useless except to provide backwards compatibility in phi_convict_threshold,
- // because everyone seems pretty accustomed to the default of 8, and users who have
- // already tuned their phi_convict_threshold for their own environments won't need to
- // change.
- private final double PHI_FACTOR = 1.0 / Math.log(10.0);
-
// in the event of a long partition, never record an interval longer than the rpc timeout,
// since if a host is regularly experiencing connectivity problems lasting this long we'd
// rather mark it down quickly instead of adapting
@@ -284,19 +289,21 @@ class ArrivalWindow
synchronized void add(double value)
{
- double interArrivalTime;
- if ( tLast > 0L )
+ if (tLast > 0L)
{
- interArrivalTime = (value - tLast);
+ double interArrivalTime = (value - tLast);
+ if (interArrivalTime <= MAX_INTERVAL_IN_MS)
+ arrivalIntervals.add(interArrivalTime);
+ else
+ logger.debug("Ignoring interval time of {}", interArrivalTime);
}
else
{
- interArrivalTime = Gossiper.intervalInMillis / 2;
+ // We use a very large initial interval since the "right" average depends on the cluster size
+ // and it's better to err high (false negatives, which will be corrected by waiting a bit longer)
+ // than low (false positives, which cause "flapping").
+ arrivalIntervals.add(Gossiper.intervalInMillis * 30);
}
- if (interArrivalTime <= MAX_INTERVAL_IN_MS)
- arrivalIntervals.add(interArrivalTime);
- else
- logger.debug("Ignoring interval time of {}", interArrivalTime);
tLast = value;
}
@@ -308,11 +315,9 @@ class ArrivalWindow
// see CASSANDRA-2597 for an explanation of the math at work here.
double phi(long tnow)
{
- int size = arrivalIntervals.size();
+ assert arrivalIntervals.size() > 0 && tLast > 0; // should not be called before any samples arrive
double t = tnow - tLast;
- return (size > 0)
- ? PHI_FACTOR * t / mean()
- : 0.0;
+ return t / mean();
}
public String toString()
http://git-wip-us.apache.org/repos/asf/cassandra/blob/e01224ed/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java b/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
index 277cae1..46fee34 100644
--- a/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
+++ b/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
@@ -27,7 +27,6 @@ import org.junit.Test;
public class ArrivalWindowTest
{
-
@Test
public void test()
{
@@ -40,11 +39,9 @@ public class ArrivalWindowTest
window.add(555);
//all good
- assertEquals(0.4342, window.phi(666), 0.01);
+ assertEquals(1.0, window.phi(666), 0.01);
//oh noes, a much higher timestamp, something went wrong!
- assertEquals(9.566, window.phi(3000), 0.01);
+ assertEquals(22.03, window.phi(3000), 0.01);
}
-
-
}
[3/9] git commit: Improve initial FD phi estimate when starting up
patch by jbellis; reviewed by brandonwilliams for CASSANDRA-6385
Posted by jb...@apache.org.
Improve initial FD phi estimate when starting up
patch by jbellis; reviewed by brandonwilliams for CASSANDRA-6385
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/e01224ed
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/e01224ed
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/e01224ed
Branch: refs/heads/trunk
Commit: e01224ede63e941fffa7b9b3906c1d54fb699bea
Parents: b3ae77d
Author: Jonathan Ellis <jb...@apache.org>
Authored: Wed Nov 20 17:52:45 2013 -0600
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Wed Nov 20 18:00:15 2013 -0600
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../apache/cassandra/gms/FailureDetector.java | 47 +++++++++++---------
.../apache/cassandra/gms/ArrivalWindowTest.java | 7 +--
3 files changed, 29 insertions(+), 26 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/e01224ed/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index dc56d90..b3fa565 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,6 @@
1.2.13
* Optimize FD phi calculation (CASSANDRA-6386)
+ * Improve initial FD phi estimate when starting up (CASSANDRA-6385)
1.2.12
http://git-wip-us.apache.org/repos/asf/cassandra/blob/e01224ed/src/java/org/apache/cassandra/gms/FailureDetector.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/gms/FailureDetector.java b/src/java/org/apache/cassandra/gms/FailureDetector.java
index e4ffb88..ee47997 100644
--- a/src/java/org/apache/cassandra/gms/FailureDetector.java
+++ b/src/java/org/apache/cassandra/gms/FailureDetector.java
@@ -49,6 +49,12 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean
public static final IFailureDetector instance = new FailureDetector();
private static final Logger logger = LoggerFactory.getLogger(FailureDetector.class);
+ // this is useless except to provide backwards compatibility in phi_convict_threshold,
+ // because everyone seems pretty accustomed to the default of 8, and users who have
+ // already tuned their phi_convict_threshold for their own environments won't need to
+ // change.
+ private final double PHI_FACTOR = 1.0 / Math.log(10.0); // 0.434...
+
private final Map<InetAddress, ArrivalWindow> arrivalSamples = new Hashtable<InetAddress, ArrivalWindow>();
private final List<IFailureDetectionEventListener> fdEvntListeners = new CopyOnWriteArrayList<IFailureDetectionEventListener>();
@@ -183,12 +189,17 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean
logger.trace("reporting {}", ep);
long now = System.currentTimeMillis();
ArrivalWindow heartbeatWindow = arrivalSamples.get(ep);
- if ( heartbeatWindow == null )
+ if (heartbeatWindow == null)
{
+ // avoid adding an empty ArrivalWindow to the Map
heartbeatWindow = new ArrivalWindow(SAMPLE_SIZE);
+ heartbeatWindow.add(now);
arrivalSamples.put(ep, heartbeatWindow);
}
- heartbeatWindow.add(now);
+ else
+ {
+ heartbeatWindow.add(now);
+ }
}
public void interpret(InetAddress ep)
@@ -203,7 +214,7 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean
if (logger.isTraceEnabled())
logger.trace("PHI for " + ep + " : " + phi);
- if (phi > getPhiConvictThreshold())
+ if (PHI_FACTOR * phi > getPhiConvictThreshold())
{
logger.trace("notifying listeners that {} is down", ep);
logger.trace("intervals: {} mean: {}", hbWnd, hbWnd.mean());
@@ -266,12 +277,6 @@ class ArrivalWindow
private double tLast = 0L;
private final BoundedStatsDeque arrivalIntervals;
- // this is useless except to provide backwards compatibility in phi_convict_threshold,
- // because everyone seems pretty accustomed to the default of 8, and users who have
- // already tuned their phi_convict_threshold for their own environments won't need to
- // change.
- private final double PHI_FACTOR = 1.0 / Math.log(10.0);
-
// in the event of a long partition, never record an interval longer than the rpc timeout,
// since if a host is regularly experiencing connectivity problems lasting this long we'd
// rather mark it down quickly instead of adapting
@@ -284,19 +289,21 @@ class ArrivalWindow
synchronized void add(double value)
{
- double interArrivalTime;
- if ( tLast > 0L )
+ if (tLast > 0L)
{
- interArrivalTime = (value - tLast);
+ double interArrivalTime = (value - tLast);
+ if (interArrivalTime <= MAX_INTERVAL_IN_MS)
+ arrivalIntervals.add(interArrivalTime);
+ else
+ logger.debug("Ignoring interval time of {}", interArrivalTime);
}
else
{
- interArrivalTime = Gossiper.intervalInMillis / 2;
+ // We use a very large initial interval since the "right" average depends on the cluster size
+ // and it's better to err high (false negatives, which will be corrected by waiting a bit longer)
+ // than low (false positives, which cause "flapping").
+ arrivalIntervals.add(Gossiper.intervalInMillis * 30);
}
- if (interArrivalTime <= MAX_INTERVAL_IN_MS)
- arrivalIntervals.add(interArrivalTime);
- else
- logger.debug("Ignoring interval time of {}", interArrivalTime);
tLast = value;
}
@@ -308,11 +315,9 @@ class ArrivalWindow
// see CASSANDRA-2597 for an explanation of the math at work here.
double phi(long tnow)
{
- int size = arrivalIntervals.size();
+ assert arrivalIntervals.size() > 0 && tLast > 0; // should not be called before any samples arrive
double t = tnow - tLast;
- return (size > 0)
- ? PHI_FACTOR * t / mean()
- : 0.0;
+ return t / mean();
}
public String toString()
http://git-wip-us.apache.org/repos/asf/cassandra/blob/e01224ed/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java b/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
index 277cae1..46fee34 100644
--- a/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
+++ b/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
@@ -27,7 +27,6 @@ import org.junit.Test;
public class ArrivalWindowTest
{
-
@Test
public void test()
{
@@ -40,11 +39,9 @@ public class ArrivalWindowTest
window.add(555);
//all good
- assertEquals(0.4342, window.phi(666), 0.01);
+ assertEquals(1.0, window.phi(666), 0.01);
//oh noes, a much higher timestamp, something went wrong!
- assertEquals(9.566, window.phi(3000), 0.01);
+ assertEquals(22.03, window.phi(3000), 0.01);
}
-
-
}
[8/9] git commit: Merge branch 'cassandra-2.0' into trunk
Posted by jb...@apache.org.
Merge branch 'cassandra-2.0' into trunk
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/ac3afd3b
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/ac3afd3b
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/ac3afd3b
Branch: refs/heads/trunk
Commit: ac3afd3bc45481330eb3d57329512f7a593ecf19
Parents: 10a2089 ed8117f
Author: Jonathan Ellis <jb...@apache.org>
Authored: Wed Nov 20 20:57:36 2013 -0600
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Wed Nov 20 20:57:36 2013 -0600
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../apache/cassandra/gms/FailureDetector.java | 41 +++++++++++++-------
.../cassandra/utils/BoundedStatsDeque.java | 2 +-
.../apache/cassandra/gms/ArrivalWindowTest.java | 9 ++---
4 files changed, 31 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/ac3afd3b/CHANGES.txt
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/ac3afd3b/src/java/org/apache/cassandra/gms/FailureDetector.java
----------------------------------------------------------------------
[5/9] git commit: merge from 1.2
Posted by jb...@apache.org.
merge from 1.2
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/91d56bf5
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/91d56bf5
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/91d56bf5
Branch: refs/heads/trunk
Commit: 91d56bf5cadd25674e4034462f5a0a2ee2ab4132
Parents: 26d1b68 e01224e
Author: Jonathan Ellis <jb...@apache.org>
Authored: Wed Nov 20 20:55:51 2013 -0600
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Wed Nov 20 20:55:51 2013 -0600
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../apache/cassandra/gms/FailureDetector.java | 41 +++++++++++++-------
.../apache/cassandra/gms/ArrivalWindowTest.java | 9 ++---
3 files changed, 30 insertions(+), 21 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/91d56bf5/CHANGES.txt
----------------------------------------------------------------------
diff --cc CHANGES.txt
index 00b82c0,b3fa565..6ae8e8c
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@@ -1,33 -1,9 +1,34 @@@
-1.2.13
+2.0.3
+ * Fix FD leak on slice read path (CASSANDRA-6275)
+ * Cancel read meter task when closing SSTR (CASSANDRA-6358)
+ * free off-heap IndexSummary during bulk (CASSANDRA-6359)
+ * Recover from IOException in accept() thread (CASSANDRA-6349)
+ * Improve Gossip tolerance of abnormally slow tasks (CASSANDRA-6338)
+ * Fix trying to hint timed out counter writes (CASSANDRA-6322)
+ * Allow restoring specific columnfamilies from archived CL (CASSANDRA-4809)
+ * Avoid flushing compaction_history after each operation (CASSANDRA-6287)
+ * Fix repair assertion error when tombstones expire (CASSANDRA-6277)
+ * Skip loading corrupt key cache (CASSANDRA-6260)
+ * Fixes for compacting larger-than-memory rows (CASSANDRA-6274)
+ * Compact hottest sstables first and optionally omit coldest from
+ compaction entirely (CASSANDRA-6109)
+ * Fix modifying column_metadata from thrift (CASSANDRA-6182)
+ * cqlsh: fix LIST USERS output (CASSANDRA-6242)
+ * Add IRequestSink interface (CASSANDRA-6248)
+ * Update memtable size while flushing (CASSANDRA-6249)
+ * Provide hooks around CQL2/CQL3 statement execution (CASSANDRA-6252)
+ * Require Permission.SELECT for CAS updates (CASSANDRA-6247)
+ * New CQL-aware SSTableWriter (CASSANDRA-5894)
+ * Reject CAS operation when the protocol v1 is used (CASSANDRA-6270)
+ * Correctly throw error when frame too large (CASSANDRA-5981)
+ * Fix serialization bug in PagedRange with 2ndary indexes (CASSANDRA-6299)
+ * Fix CQL3 table validation in Thrift (CASSANDRA-6140)
+ * Fix bug missing results with IN clauses (CASSANDRA-6327)
+ * Fix paging with reversed slices (CASSANDRA-6343)
+ * Set minTimestamp correctly to be able to drop expired sstables (CASSANDRA-6337)
+Merged from 1.2:
* Optimize FD phi calculation (CASSANDRA-6386)
+ * Improve initial FD phi estimate when starting up (CASSANDRA-6385)
-
-
-1.2.12
* Invalidate row cache when dropping CF (CASSANDRA-6351)
* add non-jamm path for cached statements (CASSANDRA-6293)
* (Hadoop) Require CFRR batchSize to be at least 2 (CASSANDRA-6114)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/91d56bf5/src/java/org/apache/cassandra/gms/FailureDetector.java
----------------------------------------------------------------------
diff --cc src/java/org/apache/cassandra/gms/FailureDetector.java
index bb07154,ee47997..4e5ba90
--- a/src/java/org/apache/cassandra/gms/FailureDetector.java
+++ b/src/java/org/apache/cassandra/gms/FailureDetector.java
@@@ -284,22 -287,23 +295,24 @@@ class ArrivalWindo
arrivalIntervals = new BoundedStatsDeque(size);
}
- synchronized void add(double value)
+ synchronized void add(long value)
{
- long interArrivalTime;
- if (tLast != 0L)
++ assert tLast >= 0;
+ if (tLast > 0L)
{
- interArrivalTime = (value - tLast);
- double interArrivalTime = (value - tLast);
- if (interArrivalTime <= MAX_INTERVAL_IN_MS)
++ long interArrivalTime = (value - tLast);
++ if (interArrivalTime <= MAX_INTERVAL_IN_NANO)
+ arrivalIntervals.add(interArrivalTime);
+ else
+ logger.debug("Ignoring interval time of {}", interArrivalTime);
}
else
{
- interArrivalTime = (Gossiper.intervalInMillis * MILLI_TO_NANO) / 2;
+ // We use a very large initial interval since the "right" average depends on the cluster size
+ // and it's better to err high (false negatives, which will be corrected by waiting a bit longer)
+ // than low (false positives, which cause "flapping").
- arrivalIntervals.add(Gossiper.intervalInMillis * 30);
++ arrivalIntervals.add(Gossiper.intervalInMillis * MILLI_TO_NANO * 30);
}
-
- if (interArrivalTime <= MAX_INTERVAL_IN_NANO)
- arrivalIntervals.add(interArrivalTime);
- else
- logger.debug("Ignoring interval time of {}", interArrivalTime);
tLast = value;
}
@@@ -311,11 -315,9 +324,9 @@@
// see CASSANDRA-2597 for an explanation of the math at work here.
double phi(long tnow)
{
- int size = arrivalIntervals.size();
+ assert arrivalIntervals.size() > 0 && tLast > 0; // should not be called before any samples arrive
- double t = tnow - tLast;
+ long t = tnow - tLast;
- return (size > 0)
- ? PHI_FACTOR * (t / mean())
- : 0.0;
+ return t / mean();
}
public String toString()
http://git-wip-us.apache.org/repos/asf/cassandra/blob/91d56bf5/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
----------------------------------------------------------------------
diff --cc test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
index d05ba96,46fee34..e678d86
--- a/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
+++ b/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
@@@ -28,23 -28,20 +28,20 @@@ import org.junit.Test
public class ArrivalWindowTest
{
@Test
- public void test()
+ public void testWithNanoTime()
{
- ArrivalWindow window = new ArrivalWindow(4);
- //base readings
- window.add(111);
- window.add(222);
- window.add(333);
- window.add(444);
- window.add(555);
+ final ArrivalWindow windowWithNano = new ArrivalWindow(4);
+ final long toNano = 1000000L;
- //all good
- assertEquals(1.0, window.phi(666), 0.01);
+ windowWithNano.add(111 * toNano);
+ windowWithNano.add(222 * toNano);
+ windowWithNano.add(333 * toNano);
+ windowWithNano.add(444 * toNano);
+ windowWithNano.add(555 * toNano);
-
- assertEquals(0.4342, windowWithNano.phi(666 * toNano), 0.01);
++ //all good
++ assertEquals(1.0, windowWithNano.phi(666 * toNano), 0.01);
//oh noes, a much higher timestamp, something went wrong!
- assertEquals(9.566, windowWithNano.phi(3000 * toNano), 0.01);
-
- assertEquals(22.03, window.phi(3000), 0.01);
++ assertEquals(22.03, windowWithNano.phi(3000 * toNano), 0.01);
}
-
-
}
[7/9] git commit: convert sum to double before dividing
Posted by jb...@apache.org.
convert sum to double before dividing
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/ed8117ff
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/ed8117ff
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/ed8117ff
Branch: refs/heads/cassandra-2.0
Commit: ed8117ff9b69511c0c3a715030daa7da004afe86
Parents: 91d56bf
Author: Jonathan Ellis <jb...@apache.org>
Authored: Wed Nov 20 20:57:27 2013 -0600
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Wed Nov 20 20:57:27 2013 -0600
----------------------------------------------------------------------
src/java/org/apache/cassandra/utils/BoundedStatsDeque.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/ed8117ff/src/java/org/apache/cassandra/utils/BoundedStatsDeque.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/BoundedStatsDeque.java b/src/java/org/apache/cassandra/utils/BoundedStatsDeque.java
index 2491e8d..3983b74 100644
--- a/src/java/org/apache/cassandra/utils/BoundedStatsDeque.java
+++ b/src/java/org/apache/cassandra/utils/BoundedStatsDeque.java
@@ -67,6 +67,6 @@ public class BoundedStatsDeque implements Iterable<Long>
public double mean()
{
- return size() > 0 ? sum() / size() : 0;
+ return size() > 0 ? ((double) sum()) / size() : 0;
}
}
[2/9] git commit: Improve initial FD phi estimate when starting up
patch by jbellis; reviewed by brandonwilliams for CASSANDRA-6385
Posted by jb...@apache.org.
Improve initial FD phi estimate when starting up
patch by jbellis; reviewed by brandonwilliams for CASSANDRA-6385
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/e01224ed
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/e01224ed
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/e01224ed
Branch: refs/heads/cassandra-2.0
Commit: e01224ede63e941fffa7b9b3906c1d54fb699bea
Parents: b3ae77d
Author: Jonathan Ellis <jb...@apache.org>
Authored: Wed Nov 20 17:52:45 2013 -0600
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Wed Nov 20 18:00:15 2013 -0600
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../apache/cassandra/gms/FailureDetector.java | 47 +++++++++++---------
.../apache/cassandra/gms/ArrivalWindowTest.java | 7 +--
3 files changed, 29 insertions(+), 26 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/e01224ed/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index dc56d90..b3fa565 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,6 @@
1.2.13
* Optimize FD phi calculation (CASSANDRA-6386)
+ * Improve initial FD phi estimate when starting up (CASSANDRA-6385)
1.2.12
http://git-wip-us.apache.org/repos/asf/cassandra/blob/e01224ed/src/java/org/apache/cassandra/gms/FailureDetector.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/gms/FailureDetector.java b/src/java/org/apache/cassandra/gms/FailureDetector.java
index e4ffb88..ee47997 100644
--- a/src/java/org/apache/cassandra/gms/FailureDetector.java
+++ b/src/java/org/apache/cassandra/gms/FailureDetector.java
@@ -49,6 +49,12 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean
public static final IFailureDetector instance = new FailureDetector();
private static final Logger logger = LoggerFactory.getLogger(FailureDetector.class);
+ // this is useless except to provide backwards compatibility in phi_convict_threshold,
+ // because everyone seems pretty accustomed to the default of 8, and users who have
+ // already tuned their phi_convict_threshold for their own environments won't need to
+ // change.
+ private final double PHI_FACTOR = 1.0 / Math.log(10.0); // 0.434...
+
private final Map<InetAddress, ArrivalWindow> arrivalSamples = new Hashtable<InetAddress, ArrivalWindow>();
private final List<IFailureDetectionEventListener> fdEvntListeners = new CopyOnWriteArrayList<IFailureDetectionEventListener>();
@@ -183,12 +189,17 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean
logger.trace("reporting {}", ep);
long now = System.currentTimeMillis();
ArrivalWindow heartbeatWindow = arrivalSamples.get(ep);
- if ( heartbeatWindow == null )
+ if (heartbeatWindow == null)
{
+ // avoid adding an empty ArrivalWindow to the Map
heartbeatWindow = new ArrivalWindow(SAMPLE_SIZE);
+ heartbeatWindow.add(now);
arrivalSamples.put(ep, heartbeatWindow);
}
- heartbeatWindow.add(now);
+ else
+ {
+ heartbeatWindow.add(now);
+ }
}
public void interpret(InetAddress ep)
@@ -203,7 +214,7 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean
if (logger.isTraceEnabled())
logger.trace("PHI for " + ep + " : " + phi);
- if (phi > getPhiConvictThreshold())
+ if (PHI_FACTOR * phi > getPhiConvictThreshold())
{
logger.trace("notifying listeners that {} is down", ep);
logger.trace("intervals: {} mean: {}", hbWnd, hbWnd.mean());
@@ -266,12 +277,6 @@ class ArrivalWindow
private double tLast = 0L;
private final BoundedStatsDeque arrivalIntervals;
- // this is useless except to provide backwards compatibility in phi_convict_threshold,
- // because everyone seems pretty accustomed to the default of 8, and users who have
- // already tuned their phi_convict_threshold for their own environments won't need to
- // change.
- private final double PHI_FACTOR = 1.0 / Math.log(10.0);
-
// in the event of a long partition, never record an interval longer than the rpc timeout,
// since if a host is regularly experiencing connectivity problems lasting this long we'd
// rather mark it down quickly instead of adapting
@@ -284,19 +289,21 @@ class ArrivalWindow
synchronized void add(double value)
{
- double interArrivalTime;
- if ( tLast > 0L )
+ if (tLast > 0L)
{
- interArrivalTime = (value - tLast);
+ double interArrivalTime = (value - tLast);
+ if (interArrivalTime <= MAX_INTERVAL_IN_MS)
+ arrivalIntervals.add(interArrivalTime);
+ else
+ logger.debug("Ignoring interval time of {}", interArrivalTime);
}
else
{
- interArrivalTime = Gossiper.intervalInMillis / 2;
+ // We use a very large initial interval since the "right" average depends on the cluster size
+ // and it's better to err high (false negatives, which will be corrected by waiting a bit longer)
+ // than low (false positives, which cause "flapping").
+ arrivalIntervals.add(Gossiper.intervalInMillis * 30);
}
- if (interArrivalTime <= MAX_INTERVAL_IN_MS)
- arrivalIntervals.add(interArrivalTime);
- else
- logger.debug("Ignoring interval time of {}", interArrivalTime);
tLast = value;
}
@@ -308,11 +315,9 @@ class ArrivalWindow
// see CASSANDRA-2597 for an explanation of the math at work here.
double phi(long tnow)
{
- int size = arrivalIntervals.size();
+ assert arrivalIntervals.size() > 0 && tLast > 0; // should not be called before any samples arrive
double t = tnow - tLast;
- return (size > 0)
- ? PHI_FACTOR * t / mean()
- : 0.0;
+ return t / mean();
}
public String toString()
http://git-wip-us.apache.org/repos/asf/cassandra/blob/e01224ed/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java b/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
index 277cae1..46fee34 100644
--- a/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
+++ b/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
@@ -27,7 +27,6 @@ import org.junit.Test;
public class ArrivalWindowTest
{
-
@Test
public void test()
{
@@ -40,11 +39,9 @@ public class ArrivalWindowTest
window.add(555);
//all good
- assertEquals(0.4342, window.phi(666), 0.01);
+ assertEquals(1.0, window.phi(666), 0.01);
//oh noes, a much higher timestamp, something went wrong!
- assertEquals(9.566, window.phi(3000), 0.01);
+ assertEquals(22.03, window.phi(3000), 0.01);
}
-
-
}
[4/9] git commit: merge from 1.2
Posted by jb...@apache.org.
merge from 1.2
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/91d56bf5
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/91d56bf5
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/91d56bf5
Branch: refs/heads/cassandra-2.0
Commit: 91d56bf5cadd25674e4034462f5a0a2ee2ab4132
Parents: 26d1b68 e01224e
Author: Jonathan Ellis <jb...@apache.org>
Authored: Wed Nov 20 20:55:51 2013 -0600
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Wed Nov 20 20:55:51 2013 -0600
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../apache/cassandra/gms/FailureDetector.java | 41 +++++++++++++-------
.../apache/cassandra/gms/ArrivalWindowTest.java | 9 ++---
3 files changed, 30 insertions(+), 21 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/91d56bf5/CHANGES.txt
----------------------------------------------------------------------
diff --cc CHANGES.txt
index 00b82c0,b3fa565..6ae8e8c
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@@ -1,33 -1,9 +1,34 @@@
-1.2.13
+2.0.3
+ * Fix FD leak on slice read path (CASSANDRA-6275)
+ * Cancel read meter task when closing SSTR (CASSANDRA-6358)
+ * free off-heap IndexSummary during bulk (CASSANDRA-6359)
+ * Recover from IOException in accept() thread (CASSANDRA-6349)
+ * Improve Gossip tolerance of abnormally slow tasks (CASSANDRA-6338)
+ * Fix trying to hint timed out counter writes (CASSANDRA-6322)
+ * Allow restoring specific columnfamilies from archived CL (CASSANDRA-4809)
+ * Avoid flushing compaction_history after each operation (CASSANDRA-6287)
+ * Fix repair assertion error when tombstones expire (CASSANDRA-6277)
+ * Skip loading corrupt key cache (CASSANDRA-6260)
+ * Fixes for compacting larger-than-memory rows (CASSANDRA-6274)
+ * Compact hottest sstables first and optionally omit coldest from
+ compaction entirely (CASSANDRA-6109)
+ * Fix modifying column_metadata from thrift (CASSANDRA-6182)
+ * cqlsh: fix LIST USERS output (CASSANDRA-6242)
+ * Add IRequestSink interface (CASSANDRA-6248)
+ * Update memtable size while flushing (CASSANDRA-6249)
+ * Provide hooks around CQL2/CQL3 statement execution (CASSANDRA-6252)
+ * Require Permission.SELECT for CAS updates (CASSANDRA-6247)
+ * New CQL-aware SSTableWriter (CASSANDRA-5894)
+ * Reject CAS operation when the protocol v1 is used (CASSANDRA-6270)
+ * Correctly throw error when frame too large (CASSANDRA-5981)
+ * Fix serialization bug in PagedRange with 2ndary indexes (CASSANDRA-6299)
+ * Fix CQL3 table validation in Thrift (CASSANDRA-6140)
+ * Fix bug missing results with IN clauses (CASSANDRA-6327)
+ * Fix paging with reversed slices (CASSANDRA-6343)
+ * Set minTimestamp correctly to be able to drop expired sstables (CASSANDRA-6337)
+Merged from 1.2:
* Optimize FD phi calculation (CASSANDRA-6386)
+ * Improve initial FD phi estimate when starting up (CASSANDRA-6385)
-
-
-1.2.12
* Invalidate row cache when dropping CF (CASSANDRA-6351)
* add non-jamm path for cached statements (CASSANDRA-6293)
* (Hadoop) Require CFRR batchSize to be at least 2 (CASSANDRA-6114)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/91d56bf5/src/java/org/apache/cassandra/gms/FailureDetector.java
----------------------------------------------------------------------
diff --cc src/java/org/apache/cassandra/gms/FailureDetector.java
index bb07154,ee47997..4e5ba90
--- a/src/java/org/apache/cassandra/gms/FailureDetector.java
+++ b/src/java/org/apache/cassandra/gms/FailureDetector.java
@@@ -284,22 -287,23 +295,24 @@@ class ArrivalWindo
arrivalIntervals = new BoundedStatsDeque(size);
}
- synchronized void add(double value)
+ synchronized void add(long value)
{
- long interArrivalTime;
- if (tLast != 0L)
++ assert tLast >= 0;
+ if (tLast > 0L)
{
- interArrivalTime = (value - tLast);
- double interArrivalTime = (value - tLast);
- if (interArrivalTime <= MAX_INTERVAL_IN_MS)
++ long interArrivalTime = (value - tLast);
++ if (interArrivalTime <= MAX_INTERVAL_IN_NANO)
+ arrivalIntervals.add(interArrivalTime);
+ else
+ logger.debug("Ignoring interval time of {}", interArrivalTime);
}
else
{
- interArrivalTime = (Gossiper.intervalInMillis * MILLI_TO_NANO) / 2;
+ // We use a very large initial interval since the "right" average depends on the cluster size
+ // and it's better to err high (false negatives, which will be corrected by waiting a bit longer)
+ // than low (false positives, which cause "flapping").
- arrivalIntervals.add(Gossiper.intervalInMillis * 30);
++ arrivalIntervals.add(Gossiper.intervalInMillis * MILLI_TO_NANO * 30);
}
-
- if (interArrivalTime <= MAX_INTERVAL_IN_NANO)
- arrivalIntervals.add(interArrivalTime);
- else
- logger.debug("Ignoring interval time of {}", interArrivalTime);
tLast = value;
}
@@@ -311,11 -315,9 +324,9 @@@
// see CASSANDRA-2597 for an explanation of the math at work here.
double phi(long tnow)
{
- int size = arrivalIntervals.size();
+ assert arrivalIntervals.size() > 0 && tLast > 0; // should not be called before any samples arrive
- double t = tnow - tLast;
+ long t = tnow - tLast;
- return (size > 0)
- ? PHI_FACTOR * (t / mean())
- : 0.0;
+ return t / mean();
}
public String toString()
http://git-wip-us.apache.org/repos/asf/cassandra/blob/91d56bf5/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
----------------------------------------------------------------------
diff --cc test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
index d05ba96,46fee34..e678d86
--- a/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
+++ b/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
@@@ -28,23 -28,20 +28,20 @@@ import org.junit.Test
public class ArrivalWindowTest
{
@Test
- public void test()
+ public void testWithNanoTime()
{
- ArrivalWindow window = new ArrivalWindow(4);
- //base readings
- window.add(111);
- window.add(222);
- window.add(333);
- window.add(444);
- window.add(555);
+ final ArrivalWindow windowWithNano = new ArrivalWindow(4);
+ final long toNano = 1000000L;
- //all good
- assertEquals(1.0, window.phi(666), 0.01);
+ windowWithNano.add(111 * toNano);
+ windowWithNano.add(222 * toNano);
+ windowWithNano.add(333 * toNano);
+ windowWithNano.add(444 * toNano);
+ windowWithNano.add(555 * toNano);
-
- assertEquals(0.4342, windowWithNano.phi(666 * toNano), 0.01);
++ //all good
++ assertEquals(1.0, windowWithNano.phi(666 * toNano), 0.01);
//oh noes, a much higher timestamp, something went wrong!
- assertEquals(9.566, windowWithNano.phi(3000 * toNano), 0.01);
-
- assertEquals(22.03, window.phi(3000), 0.01);
++ assertEquals(22.03, windowWithNano.phi(3000 * toNano), 0.01);
}
-
-
}
[9/9] git commit: Merge remote-tracking branch 'origin/trunk' into
trunk
Posted by jb...@apache.org.
Merge remote-tracking branch 'origin/trunk' into trunk
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/256da638
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/256da638
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/256da638
Branch: refs/heads/trunk
Commit: 256da638594d84010a1cb5131db0c05cfce2d1eb
Parents: ac3afd3 03e3d15
Author: Jonathan Ellis <jb...@apache.org>
Authored: Wed Nov 20 20:57:54 2013 -0600
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Wed Nov 20 20:57:54 2013 -0600
----------------------------------------------------------------------
.../cassandra/db/ColumnFamilyStoreTest.java | 56 +++++++-------------
1 file changed, 20 insertions(+), 36 deletions(-)
----------------------------------------------------------------------
[6/9] git commit: convert sum to double before dividing
Posted by jb...@apache.org.
convert sum to double before dividing
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/ed8117ff
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/ed8117ff
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/ed8117ff
Branch: refs/heads/trunk
Commit: ed8117ff9b69511c0c3a715030daa7da004afe86
Parents: 91d56bf
Author: Jonathan Ellis <jb...@apache.org>
Authored: Wed Nov 20 20:57:27 2013 -0600
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Wed Nov 20 20:57:27 2013 -0600
----------------------------------------------------------------------
src/java/org/apache/cassandra/utils/BoundedStatsDeque.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/ed8117ff/src/java/org/apache/cassandra/utils/BoundedStatsDeque.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/BoundedStatsDeque.java b/src/java/org/apache/cassandra/utils/BoundedStatsDeque.java
index 2491e8d..3983b74 100644
--- a/src/java/org/apache/cassandra/utils/BoundedStatsDeque.java
+++ b/src/java/org/apache/cassandra/utils/BoundedStatsDeque.java
@@ -67,6 +67,6 @@ public class BoundedStatsDeque implements Iterable<Long>
public double mean()
{
- return size() > 0 ? sum() / size() : 0;
+ return size() > 0 ? ((double) sum()) / size() : 0;
}
}