You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by jb...@apache.org on 2013/11/21 03:58:06 UTC

[1/9] git commit: Improve initial FD phi estimate when starting up patch by jbellis; reviewed by brandonwilliams for CASSANDRA-6385

Updated Branches:
  refs/heads/cassandra-1.2 b3ae77d7b -> e01224ede
  refs/heads/cassandra-2.0 26d1b68c7 -> ed8117ff9
  refs/heads/trunk 03e3d15e9 -> 256da6385


Improve initial FD phi estimate when starting up
patch by jbellis; reviewed by brandonwilliams for CASSANDRA-6385


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/e01224ed
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/e01224ed
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/e01224ed

Branch: refs/heads/cassandra-1.2
Commit: e01224ede63e941fffa7b9b3906c1d54fb699bea
Parents: b3ae77d
Author: Jonathan Ellis <jb...@apache.org>
Authored: Wed Nov 20 17:52:45 2013 -0600
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Wed Nov 20 18:00:15 2013 -0600

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../apache/cassandra/gms/FailureDetector.java   | 47 +++++++++++---------
 .../apache/cassandra/gms/ArrivalWindowTest.java |  7 +--
 3 files changed, 29 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/e01224ed/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index dc56d90..b3fa565 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,6 @@
 1.2.13
  * Optimize FD phi calculation (CASSANDRA-6386)
+ * Improve initial FD phi estimate when starting up (CASSANDRA-6385)
 
 
 1.2.12

http://git-wip-us.apache.org/repos/asf/cassandra/blob/e01224ed/src/java/org/apache/cassandra/gms/FailureDetector.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/gms/FailureDetector.java b/src/java/org/apache/cassandra/gms/FailureDetector.java
index e4ffb88..ee47997 100644
--- a/src/java/org/apache/cassandra/gms/FailureDetector.java
+++ b/src/java/org/apache/cassandra/gms/FailureDetector.java
@@ -49,6 +49,12 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean
     public static final IFailureDetector instance = new FailureDetector();
     private static final Logger logger = LoggerFactory.getLogger(FailureDetector.class);
 
+    // this is useless except to provide backwards compatibility in phi_convict_threshold,
+    // because everyone seems pretty accustomed to the default of 8, and users who have
+    // already tuned their phi_convict_threshold for their own environments won't need to
+    // change.
+    private final double PHI_FACTOR = 1.0 / Math.log(10.0); // 0.434...
+
     private final Map<InetAddress, ArrivalWindow> arrivalSamples = new Hashtable<InetAddress, ArrivalWindow>();
     private final List<IFailureDetectionEventListener> fdEvntListeners = new CopyOnWriteArrayList<IFailureDetectionEventListener>();
 
@@ -183,12 +189,17 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean
             logger.trace("reporting {}", ep);
         long now = System.currentTimeMillis();
         ArrivalWindow heartbeatWindow = arrivalSamples.get(ep);
-        if ( heartbeatWindow == null )
+        if (heartbeatWindow == null)
         {
+            // avoid adding an empty ArrivalWindow to the Map
             heartbeatWindow = new ArrivalWindow(SAMPLE_SIZE);
+            heartbeatWindow.add(now);
             arrivalSamples.put(ep, heartbeatWindow);
         }
-        heartbeatWindow.add(now);
+        else
+        {
+            heartbeatWindow.add(now);
+        }
     }
 
     public void interpret(InetAddress ep)
@@ -203,7 +214,7 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean
         if (logger.isTraceEnabled())
             logger.trace("PHI for " + ep + " : " + phi);
 
-        if (phi > getPhiConvictThreshold())
+        if (PHI_FACTOR * phi > getPhiConvictThreshold())
         {
             logger.trace("notifying listeners that {} is down", ep);
             logger.trace("intervals: {} mean: {}", hbWnd, hbWnd.mean());
@@ -266,12 +277,6 @@ class ArrivalWindow
     private double tLast = 0L;
     private final BoundedStatsDeque arrivalIntervals;
 
-    // this is useless except to provide backwards compatibility in phi_convict_threshold,
-    // because everyone seems pretty accustomed to the default of 8, and users who have
-    // already tuned their phi_convict_threshold for their own environments won't need to
-    // change.
-    private final double PHI_FACTOR = 1.0 / Math.log(10.0);
-
     // in the event of a long partition, never record an interval longer than the rpc timeout,
     // since if a host is regularly experiencing connectivity problems lasting this long we'd
     // rather mark it down quickly instead of adapting
@@ -284,19 +289,21 @@ class ArrivalWindow
 
     synchronized void add(double value)
     {
-        double interArrivalTime;
-        if ( tLast > 0L )
+        if (tLast > 0L)
         {
-            interArrivalTime = (value - tLast);
+            double interArrivalTime = (value - tLast);
+            if (interArrivalTime <= MAX_INTERVAL_IN_MS)
+                arrivalIntervals.add(interArrivalTime);
+            else
+                logger.debug("Ignoring interval time of {}", interArrivalTime);
         }
         else
         {
-            interArrivalTime = Gossiper.intervalInMillis / 2;
+            // We use a very large initial interval since the "right" average depends on the cluster size
+            // and it's better to err high (false negatives, which will be corrected by waiting a bit longer)
+            // than low (false positives, which cause "flapping").
+            arrivalIntervals.add(Gossiper.intervalInMillis * 30);
         }
-        if (interArrivalTime <= MAX_INTERVAL_IN_MS)
-            arrivalIntervals.add(interArrivalTime);
-        else
-            logger.debug("Ignoring interval time of {}", interArrivalTime);
         tLast = value;
     }
 
@@ -308,11 +315,9 @@ class ArrivalWindow
     // see CASSANDRA-2597 for an explanation of the math at work here.
     double phi(long tnow)
     {
-        int size = arrivalIntervals.size();
+        assert arrivalIntervals.size() > 0 && tLast > 0; // should not be called before any samples arrive
         double t = tnow - tLast;
-        return (size > 0)
-               ? PHI_FACTOR * t / mean()
-               : 0.0;
+        return t / mean();
     }
 
     public String toString()

http://git-wip-us.apache.org/repos/asf/cassandra/blob/e01224ed/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java b/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
index 277cae1..46fee34 100644
--- a/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
+++ b/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
@@ -27,7 +27,6 @@ import org.junit.Test;
 
 public class ArrivalWindowTest
 {
-
     @Test
     public void test()
     {
@@ -40,11 +39,9 @@ public class ArrivalWindowTest
         window.add(555);
 
         //all good
-        assertEquals(0.4342, window.phi(666), 0.01);
+        assertEquals(1.0, window.phi(666), 0.01);
 
         //oh noes, a much higher timestamp, something went wrong!
-        assertEquals(9.566, window.phi(3000), 0.01);
+        assertEquals(22.03, window.phi(3000), 0.01);
     }
-
-
 }


[3/9] git commit: Improve initial FD phi estimate when starting up patch by jbellis; reviewed by brandonwilliams for CASSANDRA-6385

Posted by jb...@apache.org.
Improve initial FD phi estimate when starting up
patch by jbellis; reviewed by brandonwilliams for CASSANDRA-6385


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/e01224ed
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/e01224ed
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/e01224ed

Branch: refs/heads/trunk
Commit: e01224ede63e941fffa7b9b3906c1d54fb699bea
Parents: b3ae77d
Author: Jonathan Ellis <jb...@apache.org>
Authored: Wed Nov 20 17:52:45 2013 -0600
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Wed Nov 20 18:00:15 2013 -0600

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../apache/cassandra/gms/FailureDetector.java   | 47 +++++++++++---------
 .../apache/cassandra/gms/ArrivalWindowTest.java |  7 +--
 3 files changed, 29 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/e01224ed/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index dc56d90..b3fa565 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,6 @@
 1.2.13
  * Optimize FD phi calculation (CASSANDRA-6386)
+ * Improve initial FD phi estimate when starting up (CASSANDRA-6385)
 
 
 1.2.12

http://git-wip-us.apache.org/repos/asf/cassandra/blob/e01224ed/src/java/org/apache/cassandra/gms/FailureDetector.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/gms/FailureDetector.java b/src/java/org/apache/cassandra/gms/FailureDetector.java
index e4ffb88..ee47997 100644
--- a/src/java/org/apache/cassandra/gms/FailureDetector.java
+++ b/src/java/org/apache/cassandra/gms/FailureDetector.java
@@ -49,6 +49,12 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean
     public static final IFailureDetector instance = new FailureDetector();
     private static final Logger logger = LoggerFactory.getLogger(FailureDetector.class);
 
+    // this is useless except to provide backwards compatibility in phi_convict_threshold,
+    // because everyone seems pretty accustomed to the default of 8, and users who have
+    // already tuned their phi_convict_threshold for their own environments won't need to
+    // change.
+    private final double PHI_FACTOR = 1.0 / Math.log(10.0); // 0.434...
+
     private final Map<InetAddress, ArrivalWindow> arrivalSamples = new Hashtable<InetAddress, ArrivalWindow>();
     private final List<IFailureDetectionEventListener> fdEvntListeners = new CopyOnWriteArrayList<IFailureDetectionEventListener>();
 
@@ -183,12 +189,17 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean
             logger.trace("reporting {}", ep);
         long now = System.currentTimeMillis();
         ArrivalWindow heartbeatWindow = arrivalSamples.get(ep);
-        if ( heartbeatWindow == null )
+        if (heartbeatWindow == null)
         {
+            // avoid adding an empty ArrivalWindow to the Map
             heartbeatWindow = new ArrivalWindow(SAMPLE_SIZE);
+            heartbeatWindow.add(now);
             arrivalSamples.put(ep, heartbeatWindow);
         }
-        heartbeatWindow.add(now);
+        else
+        {
+            heartbeatWindow.add(now);
+        }
     }
 
     public void interpret(InetAddress ep)
@@ -203,7 +214,7 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean
         if (logger.isTraceEnabled())
             logger.trace("PHI for " + ep + " : " + phi);
 
-        if (phi > getPhiConvictThreshold())
+        if (PHI_FACTOR * phi > getPhiConvictThreshold())
         {
             logger.trace("notifying listeners that {} is down", ep);
             logger.trace("intervals: {} mean: {}", hbWnd, hbWnd.mean());
@@ -266,12 +277,6 @@ class ArrivalWindow
     private double tLast = 0L;
     private final BoundedStatsDeque arrivalIntervals;
 
-    // this is useless except to provide backwards compatibility in phi_convict_threshold,
-    // because everyone seems pretty accustomed to the default of 8, and users who have
-    // already tuned their phi_convict_threshold for their own environments won't need to
-    // change.
-    private final double PHI_FACTOR = 1.0 / Math.log(10.0);
-
     // in the event of a long partition, never record an interval longer than the rpc timeout,
     // since if a host is regularly experiencing connectivity problems lasting this long we'd
     // rather mark it down quickly instead of adapting
@@ -284,19 +289,21 @@ class ArrivalWindow
 
     synchronized void add(double value)
     {
-        double interArrivalTime;
-        if ( tLast > 0L )
+        if (tLast > 0L)
         {
-            interArrivalTime = (value - tLast);
+            double interArrivalTime = (value - tLast);
+            if (interArrivalTime <= MAX_INTERVAL_IN_MS)
+                arrivalIntervals.add(interArrivalTime);
+            else
+                logger.debug("Ignoring interval time of {}", interArrivalTime);
         }
         else
         {
-            interArrivalTime = Gossiper.intervalInMillis / 2;
+            // We use a very large initial interval since the "right" average depends on the cluster size
+            // and it's better to err high (false negatives, which will be corrected by waiting a bit longer)
+            // than low (false positives, which cause "flapping").
+            arrivalIntervals.add(Gossiper.intervalInMillis * 30);
         }
-        if (interArrivalTime <= MAX_INTERVAL_IN_MS)
-            arrivalIntervals.add(interArrivalTime);
-        else
-            logger.debug("Ignoring interval time of {}", interArrivalTime);
         tLast = value;
     }
 
@@ -308,11 +315,9 @@ class ArrivalWindow
     // see CASSANDRA-2597 for an explanation of the math at work here.
     double phi(long tnow)
     {
-        int size = arrivalIntervals.size();
+        assert arrivalIntervals.size() > 0 && tLast > 0; // should not be called before any samples arrive
         double t = tnow - tLast;
-        return (size > 0)
-               ? PHI_FACTOR * t / mean()
-               : 0.0;
+        return t / mean();
     }
 
     public String toString()

http://git-wip-us.apache.org/repos/asf/cassandra/blob/e01224ed/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java b/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
index 277cae1..46fee34 100644
--- a/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
+++ b/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
@@ -27,7 +27,6 @@ import org.junit.Test;
 
 public class ArrivalWindowTest
 {
-
     @Test
     public void test()
     {
@@ -40,11 +39,9 @@ public class ArrivalWindowTest
         window.add(555);
 
         //all good
-        assertEquals(0.4342, window.phi(666), 0.01);
+        assertEquals(1.0, window.phi(666), 0.01);
 
         //oh noes, a much higher timestamp, something went wrong!
-        assertEquals(9.566, window.phi(3000), 0.01);
+        assertEquals(22.03, window.phi(3000), 0.01);
     }
-
-
 }


[8/9] git commit: Merge branch 'cassandra-2.0' into trunk

Posted by jb...@apache.org.
Merge branch 'cassandra-2.0' into trunk


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/ac3afd3b
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/ac3afd3b
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/ac3afd3b

Branch: refs/heads/trunk
Commit: ac3afd3bc45481330eb3d57329512f7a593ecf19
Parents: 10a2089 ed8117f
Author: Jonathan Ellis <jb...@apache.org>
Authored: Wed Nov 20 20:57:36 2013 -0600
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Wed Nov 20 20:57:36 2013 -0600

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../apache/cassandra/gms/FailureDetector.java   | 41 +++++++++++++-------
 .../cassandra/utils/BoundedStatsDeque.java      |  2 +-
 .../apache/cassandra/gms/ArrivalWindowTest.java |  9 ++---
 4 files changed, 31 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/ac3afd3b/CHANGES.txt
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/cassandra/blob/ac3afd3b/src/java/org/apache/cassandra/gms/FailureDetector.java
----------------------------------------------------------------------


[5/9] git commit: merge from 1.2

Posted by jb...@apache.org.
merge from 1.2


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/91d56bf5
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/91d56bf5
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/91d56bf5

Branch: refs/heads/trunk
Commit: 91d56bf5cadd25674e4034462f5a0a2ee2ab4132
Parents: 26d1b68 e01224e
Author: Jonathan Ellis <jb...@apache.org>
Authored: Wed Nov 20 20:55:51 2013 -0600
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Wed Nov 20 20:55:51 2013 -0600

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../apache/cassandra/gms/FailureDetector.java   | 41 +++++++++++++-------
 .../apache/cassandra/gms/ArrivalWindowTest.java |  9 ++---
 3 files changed, 30 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/91d56bf5/CHANGES.txt
----------------------------------------------------------------------
diff --cc CHANGES.txt
index 00b82c0,b3fa565..6ae8e8c
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@@ -1,33 -1,9 +1,34 @@@
 -1.2.13
 +2.0.3
 + * Fix FD leak on slice read path (CASSANDRA-6275)
 + * Cancel read meter task when closing SSTR (CASSANDRA-6358)
 + * free off-heap IndexSummary during bulk (CASSANDRA-6359)
 + * Recover from IOException in accept() thread (CASSANDRA-6349)
 + * Improve Gossip tolerance of abnormally slow tasks (CASSANDRA-6338)
 + * Fix trying to hint timed out counter writes (CASSANDRA-6322)
 + * Allow restoring specific columnfamilies from archived CL (CASSANDRA-4809)
 + * Avoid flushing compaction_history after each operation (CASSANDRA-6287)
 + * Fix repair assertion error when tombstones expire (CASSANDRA-6277)
 + * Skip loading corrupt key cache (CASSANDRA-6260)
 + * Fixes for compacting larger-than-memory rows (CASSANDRA-6274)
 + * Compact hottest sstables first and optionally omit coldest from
 +   compaction entirely (CASSANDRA-6109)
 + * Fix modifying column_metadata from thrift (CASSANDRA-6182)
 + * cqlsh: fix LIST USERS output (CASSANDRA-6242)
 + * Add IRequestSink interface (CASSANDRA-6248)
 + * Update memtable size while flushing (CASSANDRA-6249)
 + * Provide hooks around CQL2/CQL3 statement execution (CASSANDRA-6252)
 + * Require Permission.SELECT for CAS updates (CASSANDRA-6247)
 + * New CQL-aware SSTableWriter (CASSANDRA-5894)
 + * Reject CAS operation when the protocol v1 is used (CASSANDRA-6270)
 + * Correctly throw error when frame too large (CASSANDRA-5981)
 + * Fix serialization bug in PagedRange with 2ndary indexes (CASSANDRA-6299)
 + * Fix CQL3 table validation in Thrift (CASSANDRA-6140)
 + * Fix bug missing results with IN clauses (CASSANDRA-6327)
 + * Fix paging with reversed slices (CASSANDRA-6343)
 + * Set minTimestamp correctly to be able to drop expired sstables (CASSANDRA-6337)
 +Merged from 1.2:
   * Optimize FD phi calculation (CASSANDRA-6386)
+  * Improve initial FD phi estimate when starting up (CASSANDRA-6385)
 -
 -
 -1.2.12
   * Invalidate row cache when dropping CF (CASSANDRA-6351)
   * add non-jamm path for cached statements (CASSANDRA-6293)
   * (Hadoop) Require CFRR batchSize to be at least 2 (CASSANDRA-6114)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/91d56bf5/src/java/org/apache/cassandra/gms/FailureDetector.java
----------------------------------------------------------------------
diff --cc src/java/org/apache/cassandra/gms/FailureDetector.java
index bb07154,ee47997..4e5ba90
--- a/src/java/org/apache/cassandra/gms/FailureDetector.java
+++ b/src/java/org/apache/cassandra/gms/FailureDetector.java
@@@ -284,22 -287,23 +295,24 @@@ class ArrivalWindo
          arrivalIntervals = new BoundedStatsDeque(size);
      }
  
 -    synchronized void add(double value)
 +    synchronized void add(long value)
      {
-         long interArrivalTime;
-         if (tLast != 0L)
++        assert tLast >= 0;
+         if (tLast > 0L)
          {
-             interArrivalTime = (value - tLast);
 -            double interArrivalTime = (value - tLast);
 -            if (interArrivalTime <= MAX_INTERVAL_IN_MS)
++            long interArrivalTime = (value - tLast);
++            if (interArrivalTime <= MAX_INTERVAL_IN_NANO)
+                 arrivalIntervals.add(interArrivalTime);
+             else
+                 logger.debug("Ignoring interval time of {}", interArrivalTime);
          }
          else
          {
-             interArrivalTime = (Gossiper.intervalInMillis * MILLI_TO_NANO) / 2;
+             // We use a very large initial interval since the "right" average depends on the cluster size
+             // and it's better to err high (false negatives, which will be corrected by waiting a bit longer)
+             // than low (false positives, which cause "flapping").
 -            arrivalIntervals.add(Gossiper.intervalInMillis * 30);
++            arrivalIntervals.add(Gossiper.intervalInMillis * MILLI_TO_NANO * 30);
          }
- 
-         if (interArrivalTime <= MAX_INTERVAL_IN_NANO)
-             arrivalIntervals.add(interArrivalTime);
-         else
-             logger.debug("Ignoring interval time of {}", interArrivalTime);
          tLast = value;
      }
  
@@@ -311,11 -315,9 +324,9 @@@
      // see CASSANDRA-2597 for an explanation of the math at work here.
      double phi(long tnow)
      {
-         int size = arrivalIntervals.size();
+         assert arrivalIntervals.size() > 0 && tLast > 0; // should not be called before any samples arrive
 -        double t = tnow - tLast;
 +        long t = tnow - tLast;
-         return (size > 0)
-                ? PHI_FACTOR * (t / mean())
-                : 0.0;
+         return t / mean();
      }
  
      public String toString()

http://git-wip-us.apache.org/repos/asf/cassandra/blob/91d56bf5/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
----------------------------------------------------------------------
diff --cc test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
index d05ba96,46fee34..e678d86
--- a/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
+++ b/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
@@@ -28,23 -28,20 +28,20 @@@ import org.junit.Test
  public class ArrivalWindowTest
  {
      @Test
 -    public void test()
 +    public void testWithNanoTime()
      {
 -        ArrivalWindow window = new ArrivalWindow(4);
 -        //base readings
 -        window.add(111);
 -        window.add(222);
 -        window.add(333);
 -        window.add(444);
 -        window.add(555);
 +        final ArrivalWindow windowWithNano = new ArrivalWindow(4);
 +        final long toNano = 1000000L;
  
 -        //all good
 -        assertEquals(1.0, window.phi(666), 0.01);
 +        windowWithNano.add(111 * toNano);
 +        windowWithNano.add(222 * toNano);
 +        windowWithNano.add(333 * toNano);
 +        windowWithNano.add(444 * toNano);
 +        windowWithNano.add(555 * toNano);
  
- 
-         assertEquals(0.4342, windowWithNano.phi(666 * toNano), 0.01);
++        //all good
++        assertEquals(1.0, windowWithNano.phi(666 * toNano), 0.01);
          //oh noes, a much higher timestamp, something went wrong!
-         assertEquals(9.566, windowWithNano.phi(3000 * toNano), 0.01);
- 
 -        assertEquals(22.03, window.phi(3000), 0.01);
++        assertEquals(22.03, windowWithNano.phi(3000 * toNano), 0.01);
      }
- 
- 
  }


[7/9] git commit: convert sum to double before dividing

Posted by jb...@apache.org.
convert sum to double before dividing


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/ed8117ff
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/ed8117ff
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/ed8117ff

Branch: refs/heads/cassandra-2.0
Commit: ed8117ff9b69511c0c3a715030daa7da004afe86
Parents: 91d56bf
Author: Jonathan Ellis <jb...@apache.org>
Authored: Wed Nov 20 20:57:27 2013 -0600
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Wed Nov 20 20:57:27 2013 -0600

----------------------------------------------------------------------
 src/java/org/apache/cassandra/utils/BoundedStatsDeque.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/ed8117ff/src/java/org/apache/cassandra/utils/BoundedStatsDeque.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/BoundedStatsDeque.java b/src/java/org/apache/cassandra/utils/BoundedStatsDeque.java
index 2491e8d..3983b74 100644
--- a/src/java/org/apache/cassandra/utils/BoundedStatsDeque.java
+++ b/src/java/org/apache/cassandra/utils/BoundedStatsDeque.java
@@ -67,6 +67,6 @@ public class BoundedStatsDeque implements Iterable<Long>
 
     public double mean()
     {
-        return size() > 0 ? sum() / size() : 0;
+        return size() > 0 ? ((double) sum()) / size() : 0;
     }
 }


[2/9] git commit: Improve initial FD phi estimate when starting up patch by jbellis; reviewed by brandonwilliams for CASSANDRA-6385

Posted by jb...@apache.org.
Improve initial FD phi estimate when starting up
patch by jbellis; reviewed by brandonwilliams for CASSANDRA-6385


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/e01224ed
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/e01224ed
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/e01224ed

Branch: refs/heads/cassandra-2.0
Commit: e01224ede63e941fffa7b9b3906c1d54fb699bea
Parents: b3ae77d
Author: Jonathan Ellis <jb...@apache.org>
Authored: Wed Nov 20 17:52:45 2013 -0600
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Wed Nov 20 18:00:15 2013 -0600

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../apache/cassandra/gms/FailureDetector.java   | 47 +++++++++++---------
 .../apache/cassandra/gms/ArrivalWindowTest.java |  7 +--
 3 files changed, 29 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/e01224ed/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index dc56d90..b3fa565 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,6 @@
 1.2.13
  * Optimize FD phi calculation (CASSANDRA-6386)
+ * Improve initial FD phi estimate when starting up (CASSANDRA-6385)
 
 
 1.2.12

http://git-wip-us.apache.org/repos/asf/cassandra/blob/e01224ed/src/java/org/apache/cassandra/gms/FailureDetector.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/gms/FailureDetector.java b/src/java/org/apache/cassandra/gms/FailureDetector.java
index e4ffb88..ee47997 100644
--- a/src/java/org/apache/cassandra/gms/FailureDetector.java
+++ b/src/java/org/apache/cassandra/gms/FailureDetector.java
@@ -49,6 +49,12 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean
     public static final IFailureDetector instance = new FailureDetector();
     private static final Logger logger = LoggerFactory.getLogger(FailureDetector.class);
 
+    // this is useless except to provide backwards compatibility in phi_convict_threshold,
+    // because everyone seems pretty accustomed to the default of 8, and users who have
+    // already tuned their phi_convict_threshold for their own environments won't need to
+    // change.
+    private final double PHI_FACTOR = 1.0 / Math.log(10.0); // 0.434...
+
     private final Map<InetAddress, ArrivalWindow> arrivalSamples = new Hashtable<InetAddress, ArrivalWindow>();
     private final List<IFailureDetectionEventListener> fdEvntListeners = new CopyOnWriteArrayList<IFailureDetectionEventListener>();
 
@@ -183,12 +189,17 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean
             logger.trace("reporting {}", ep);
         long now = System.currentTimeMillis();
         ArrivalWindow heartbeatWindow = arrivalSamples.get(ep);
-        if ( heartbeatWindow == null )
+        if (heartbeatWindow == null)
         {
+            // avoid adding an empty ArrivalWindow to the Map
             heartbeatWindow = new ArrivalWindow(SAMPLE_SIZE);
+            heartbeatWindow.add(now);
             arrivalSamples.put(ep, heartbeatWindow);
         }
-        heartbeatWindow.add(now);
+        else
+        {
+            heartbeatWindow.add(now);
+        }
     }
 
     public void interpret(InetAddress ep)
@@ -203,7 +214,7 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean
         if (logger.isTraceEnabled())
             logger.trace("PHI for " + ep + " : " + phi);
 
-        if (phi > getPhiConvictThreshold())
+        if (PHI_FACTOR * phi > getPhiConvictThreshold())
         {
             logger.trace("notifying listeners that {} is down", ep);
             logger.trace("intervals: {} mean: {}", hbWnd, hbWnd.mean());
@@ -266,12 +277,6 @@ class ArrivalWindow
     private double tLast = 0L;
     private final BoundedStatsDeque arrivalIntervals;
 
-    // this is useless except to provide backwards compatibility in phi_convict_threshold,
-    // because everyone seems pretty accustomed to the default of 8, and users who have
-    // already tuned their phi_convict_threshold for their own environments won't need to
-    // change.
-    private final double PHI_FACTOR = 1.0 / Math.log(10.0);
-
     // in the event of a long partition, never record an interval longer than the rpc timeout,
     // since if a host is regularly experiencing connectivity problems lasting this long we'd
     // rather mark it down quickly instead of adapting
@@ -284,19 +289,21 @@ class ArrivalWindow
 
     synchronized void add(double value)
     {
-        double interArrivalTime;
-        if ( tLast > 0L )
+        if (tLast > 0L)
         {
-            interArrivalTime = (value - tLast);
+            double interArrivalTime = (value - tLast);
+            if (interArrivalTime <= MAX_INTERVAL_IN_MS)
+                arrivalIntervals.add(interArrivalTime);
+            else
+                logger.debug("Ignoring interval time of {}", interArrivalTime);
         }
         else
         {
-            interArrivalTime = Gossiper.intervalInMillis / 2;
+            // We use a very large initial interval since the "right" average depends on the cluster size
+            // and it's better to err high (false negatives, which will be corrected by waiting a bit longer)
+            // than low (false positives, which cause "flapping").
+            arrivalIntervals.add(Gossiper.intervalInMillis * 30);
         }
-        if (interArrivalTime <= MAX_INTERVAL_IN_MS)
-            arrivalIntervals.add(interArrivalTime);
-        else
-            logger.debug("Ignoring interval time of {}", interArrivalTime);
         tLast = value;
     }
 
@@ -308,11 +315,9 @@ class ArrivalWindow
     // see CASSANDRA-2597 for an explanation of the math at work here.
     double phi(long tnow)
     {
-        int size = arrivalIntervals.size();
+        assert arrivalIntervals.size() > 0 && tLast > 0; // should not be called before any samples arrive
         double t = tnow - tLast;
-        return (size > 0)
-               ? PHI_FACTOR * t / mean()
-               : 0.0;
+        return t / mean();
     }
 
     public String toString()

http://git-wip-us.apache.org/repos/asf/cassandra/blob/e01224ed/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java b/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
index 277cae1..46fee34 100644
--- a/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
+++ b/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
@@ -27,7 +27,6 @@ import org.junit.Test;
 
 public class ArrivalWindowTest
 {
-
     @Test
     public void test()
     {
@@ -40,11 +39,9 @@ public class ArrivalWindowTest
         window.add(555);
 
         //all good
-        assertEquals(0.4342, window.phi(666), 0.01);
+        assertEquals(1.0, window.phi(666), 0.01);
 
         //oh noes, a much higher timestamp, something went wrong!
-        assertEquals(9.566, window.phi(3000), 0.01);
+        assertEquals(22.03, window.phi(3000), 0.01);
     }
-
-
 }


[4/9] git commit: merge from 1.2

Posted by jb...@apache.org.
merge from 1.2


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/91d56bf5
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/91d56bf5
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/91d56bf5

Branch: refs/heads/cassandra-2.0
Commit: 91d56bf5cadd25674e4034462f5a0a2ee2ab4132
Parents: 26d1b68 e01224e
Author: Jonathan Ellis <jb...@apache.org>
Authored: Wed Nov 20 20:55:51 2013 -0600
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Wed Nov 20 20:55:51 2013 -0600

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../apache/cassandra/gms/FailureDetector.java   | 41 +++++++++++++-------
 .../apache/cassandra/gms/ArrivalWindowTest.java |  9 ++---
 3 files changed, 30 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/91d56bf5/CHANGES.txt
----------------------------------------------------------------------
diff --cc CHANGES.txt
index 00b82c0,b3fa565..6ae8e8c
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@@ -1,33 -1,9 +1,34 @@@
 -1.2.13
 +2.0.3
 + * Fix FD leak on slice read path (CASSANDRA-6275)
 + * Cancel read meter task when closing SSTR (CASSANDRA-6358)
 + * free off-heap IndexSummary during bulk (CASSANDRA-6359)
 + * Recover from IOException in accept() thread (CASSANDRA-6349)
 + * Improve Gossip tolerance of abnormally slow tasks (CASSANDRA-6338)
 + * Fix trying to hint timed out counter writes (CASSANDRA-6322)
 + * Allow restoring specific columnfamilies from archived CL (CASSANDRA-4809)
 + * Avoid flushing compaction_history after each operation (CASSANDRA-6287)
 + * Fix repair assertion error when tombstones expire (CASSANDRA-6277)
 + * Skip loading corrupt key cache (CASSANDRA-6260)
 + * Fixes for compacting larger-than-memory rows (CASSANDRA-6274)
 + * Compact hottest sstables first and optionally omit coldest from
 +   compaction entirely (CASSANDRA-6109)
 + * Fix modifying column_metadata from thrift (CASSANDRA-6182)
 + * cqlsh: fix LIST USERS output (CASSANDRA-6242)
 + * Add IRequestSink interface (CASSANDRA-6248)
 + * Update memtable size while flushing (CASSANDRA-6249)
 + * Provide hooks around CQL2/CQL3 statement execution (CASSANDRA-6252)
 + * Require Permission.SELECT for CAS updates (CASSANDRA-6247)
 + * New CQL-aware SSTableWriter (CASSANDRA-5894)
 + * Reject CAS operation when the protocol v1 is used (CASSANDRA-6270)
 + * Correctly throw error when frame too large (CASSANDRA-5981)
 + * Fix serialization bug in PagedRange with 2ndary indexes (CASSANDRA-6299)
 + * Fix CQL3 table validation in Thrift (CASSANDRA-6140)
 + * Fix bug missing results with IN clauses (CASSANDRA-6327)
 + * Fix paging with reversed slices (CASSANDRA-6343)
 + * Set minTimestamp correctly to be able to drop expired sstables (CASSANDRA-6337)
 +Merged from 1.2:
   * Optimize FD phi calculation (CASSANDRA-6386)
+  * Improve initial FD phi estimate when starting up (CASSANDRA-6385)
 -
 -
 -1.2.12
   * Invalidate row cache when dropping CF (CASSANDRA-6351)
   * add non-jamm path for cached statements (CASSANDRA-6293)
   * (Hadoop) Require CFRR batchSize to be at least 2 (CASSANDRA-6114)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/91d56bf5/src/java/org/apache/cassandra/gms/FailureDetector.java
----------------------------------------------------------------------
diff --cc src/java/org/apache/cassandra/gms/FailureDetector.java
index bb07154,ee47997..4e5ba90
--- a/src/java/org/apache/cassandra/gms/FailureDetector.java
+++ b/src/java/org/apache/cassandra/gms/FailureDetector.java
@@@ -284,22 -287,23 +295,24 @@@ class ArrivalWindo
          arrivalIntervals = new BoundedStatsDeque(size);
      }
  
 -    synchronized void add(double value)
 +    synchronized void add(long value)
      {
-         long interArrivalTime;
-         if (tLast != 0L)
++        assert tLast >= 0;
+         if (tLast > 0L)
          {
-             interArrivalTime = (value - tLast);
 -            double interArrivalTime = (value - tLast);
 -            if (interArrivalTime <= MAX_INTERVAL_IN_MS)
++            long interArrivalTime = (value - tLast);
++            if (interArrivalTime <= MAX_INTERVAL_IN_NANO)
+                 arrivalIntervals.add(interArrivalTime);
+             else
+                 logger.debug("Ignoring interval time of {}", interArrivalTime);
          }
          else
          {
-             interArrivalTime = (Gossiper.intervalInMillis * MILLI_TO_NANO) / 2;
+             // We use a very large initial interval since the "right" average depends on the cluster size
+             // and it's better to err high (false negatives, which will be corrected by waiting a bit longer)
+             // than low (false positives, which cause "flapping").
 -            arrivalIntervals.add(Gossiper.intervalInMillis * 30);
++            arrivalIntervals.add(Gossiper.intervalInMillis * MILLI_TO_NANO * 30);
          }
- 
-         if (interArrivalTime <= MAX_INTERVAL_IN_NANO)
-             arrivalIntervals.add(interArrivalTime);
-         else
-             logger.debug("Ignoring interval time of {}", interArrivalTime);
          tLast = value;
      }
  
@@@ -311,11 -315,9 +324,9 @@@
      // see CASSANDRA-2597 for an explanation of the math at work here.
      double phi(long tnow)
      {
-         int size = arrivalIntervals.size();
+         assert arrivalIntervals.size() > 0 && tLast > 0; // should not be called before any samples arrive
 -        double t = tnow - tLast;
 +        long t = tnow - tLast;
-         return (size > 0)
-                ? PHI_FACTOR * (t / mean())
-                : 0.0;
+         return t / mean();
      }
  
      public String toString()

http://git-wip-us.apache.org/repos/asf/cassandra/blob/91d56bf5/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
----------------------------------------------------------------------
diff --cc test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
index d05ba96,46fee34..e678d86
--- a/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
+++ b/test/unit/org/apache/cassandra/gms/ArrivalWindowTest.java
@@@ -28,23 -28,20 +28,20 @@@ import org.junit.Test
  public class ArrivalWindowTest
  {
      @Test
 -    public void test()
 +    public void testWithNanoTime()
      {
 -        ArrivalWindow window = new ArrivalWindow(4);
 -        //base readings
 -        window.add(111);
 -        window.add(222);
 -        window.add(333);
 -        window.add(444);
 -        window.add(555);
 +        final ArrivalWindow windowWithNano = new ArrivalWindow(4);
 +        final long toNano = 1000000L;
  
 -        //all good
 -        assertEquals(1.0, window.phi(666), 0.01);
 +        windowWithNano.add(111 * toNano);
 +        windowWithNano.add(222 * toNano);
 +        windowWithNano.add(333 * toNano);
 +        windowWithNano.add(444 * toNano);
 +        windowWithNano.add(555 * toNano);
  
- 
-         assertEquals(0.4342, windowWithNano.phi(666 * toNano), 0.01);
++        //all good
++        assertEquals(1.0, windowWithNano.phi(666 * toNano), 0.01);
          //oh noes, a much higher timestamp, something went wrong!
-         assertEquals(9.566, windowWithNano.phi(3000 * toNano), 0.01);
- 
 -        assertEquals(22.03, window.phi(3000), 0.01);
++        assertEquals(22.03, windowWithNano.phi(3000 * toNano), 0.01);
      }
- 
- 
  }


[9/9] git commit: Merge remote-tracking branch 'origin/trunk' into trunk

Posted by jb...@apache.org.
Merge remote-tracking branch 'origin/trunk' into trunk


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/256da638
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/256da638
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/256da638

Branch: refs/heads/trunk
Commit: 256da638594d84010a1cb5131db0c05cfce2d1eb
Parents: ac3afd3 03e3d15
Author: Jonathan Ellis <jb...@apache.org>
Authored: Wed Nov 20 20:57:54 2013 -0600
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Wed Nov 20 20:57:54 2013 -0600

----------------------------------------------------------------------
 .../cassandra/db/ColumnFamilyStoreTest.java     | 56 +++++++-------------
 1 file changed, 20 insertions(+), 36 deletions(-)
----------------------------------------------------------------------



[6/9] git commit: convert sum to double before dividing

Posted by jb...@apache.org.
convert sum to double before dividing


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/ed8117ff
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/ed8117ff
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/ed8117ff

Branch: refs/heads/trunk
Commit: ed8117ff9b69511c0c3a715030daa7da004afe86
Parents: 91d56bf
Author: Jonathan Ellis <jb...@apache.org>
Authored: Wed Nov 20 20:57:27 2013 -0600
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Wed Nov 20 20:57:27 2013 -0600

----------------------------------------------------------------------
 src/java/org/apache/cassandra/utils/BoundedStatsDeque.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/ed8117ff/src/java/org/apache/cassandra/utils/BoundedStatsDeque.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/BoundedStatsDeque.java b/src/java/org/apache/cassandra/utils/BoundedStatsDeque.java
index 2491e8d..3983b74 100644
--- a/src/java/org/apache/cassandra/utils/BoundedStatsDeque.java
+++ b/src/java/org/apache/cassandra/utils/BoundedStatsDeque.java
@@ -67,6 +67,6 @@ public class BoundedStatsDeque implements Iterable<Long>
 
     public double mean()
     {
-        return size() > 0 ? sum() / size() : 0;
+        return size() > 0 ? ((double) sum()) / size() : 0;
     }
 }