You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zipkin.apache.org by ad...@apache.org on 2019/06/02 02:14:12 UTC

[incubator-zipkin-brave] branch master updated: Fixes some edge cases around rate-limited sampler resetting (#908)

This is an automated email from the ASF dual-hosted git repository.

adriancole pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-zipkin-brave.git


The following commit(s) were added to refs/heads/master by this push:
     new 417774a  Fixes some edge cases around rate-limited sampler resetting (#908)
417774a is described below

commit 417774a2bbce59936e64f7aaa2da296ed77726ad
Author: Adrian Cole <ad...@users.noreply.github.com>
AuthorDate: Sun Jun 2 10:14:07 2019 +0800

    Fixes some edge cases around rate-limited sampler resetting (#908)
---
 .../java/brave/sampler/RateLimitingSampler.java    | 32 +++++++------
 .../brave/sampler/RateLimitingSamplerTest.java     | 53 +++++++++++++++++++++-
 2 files changed, 70 insertions(+), 15 deletions(-)

diff --git a/brave/src/main/java/brave/sampler/RateLimitingSampler.java b/brave/src/main/java/brave/sampler/RateLimitingSampler.java
index 8f1a5fd..b16f7f3 100644
--- a/brave/src/main/java/brave/sampler/RateLimitingSampler.java
+++ b/brave/src/main/java/brave/sampler/RateLimitingSampler.java
@@ -53,7 +53,7 @@ public class RateLimitingSampler extends Sampler {
   }
 
   static final long NANOS_PER_SECOND = TimeUnit.SECONDS.toNanos(1);
-  static final int NANOS_PER_DECISECOND = (int) (NANOS_PER_SECOND / 10);
+  static final long NANOS_PER_DECISECOND = NANOS_PER_SECOND / 10;
 
   final MaxFunction maxFunction;
   final AtomicInteger usage = new AtomicInteger(0);
@@ -61,7 +61,7 @@ public class RateLimitingSampler extends Sampler {
 
   RateLimitingSampler(int tracesPerSecond) {
     this.maxFunction =
-        tracesPerSecond < 10 ? new LessThan10(tracesPerSecond) : new AtLeast10(tracesPerSecond);
+      tracesPerSecond < 10 ? new LessThan10(tracesPerSecond) : new AtLeast10(tracesPerSecond);
     long now = System.nanoTime();
     this.nextReset = new AtomicLong(now + NANOS_PER_SECOND);
   }
@@ -71,14 +71,17 @@ public class RateLimitingSampler extends Sampler {
     long updateAt = nextReset.get();
 
     long nanosUntilReset = -(now - updateAt); // because nanoTime can be negative
-    boolean shouldReset = nanosUntilReset <= 0;
-    if (shouldReset) {
-      if (nextReset.compareAndSet(updateAt, updateAt + NANOS_PER_SECOND)) {
-        usage.set(0);
-      }
+    if (nanosUntilReset <= 0) {
+      // Attempt to move into the next sampling interval.
+      // nanosUntilReset is now invalid regardless of race winner, so we can't sample based on it.
+      if (nextReset.compareAndSet(updateAt, now + NANOS_PER_SECOND)) usage.set(0);
+
+      // recurse as it is simpler than resetting all the locals.
+      // reset happens once per second, this code doesn't take a second, so no infinite recursion.
+      return isSampled(ignoredTraceId);
     }
 
-    int max = maxFunction.max(shouldReset ? 0 : nanosUntilReset);
+    int max = maxFunction.max(nanosUntilReset);
     int prev, next;
     do { // same form as java 8 AtomicLong.getAndUpdate
       prev = usage.get();
@@ -89,7 +92,6 @@ public class RateLimitingSampler extends Sampler {
   }
 
   static abstract class MaxFunction {
-    /** @param nanosUntilReset zero if was just reset */
     abstract int max(long nanosUntilReset);
   }
 
@@ -101,7 +103,7 @@ public class RateLimitingSampler extends Sampler {
       this.tracesPerSecond = tracesPerSecond;
     }
 
-    @Override int max(long nanosUntilReset) {
+    @Override int max(long nanosUntilResetIgnored) {
       return tracesPerSecond;
     }
   }
@@ -130,9 +132,13 @@ public class RateLimitingSampler extends Sampler {
     }
 
     @Override int max(long nanosUntilReset) {
-      int decisecondsUntilReset = ((int) nanosUntilReset / NANOS_PER_DECISECOND);
-      int index = decisecondsUntilReset == 0 ? 0 : 10 - decisecondsUntilReset;
-      return max[index];
+      // Check to see if we are in the first or last interval
+      if (nanosUntilReset > NANOS_PER_SECOND - NANOS_PER_DECISECOND) return max[0];
+      if (nanosUntilReset < NANOS_PER_DECISECOND) return max[9];
+
+      // Choose a slot based on the remaining deciseconds
+      int decisecondsUntilReset = (int) (nanosUntilReset / NANOS_PER_DECISECOND);
+      return max[10 - decisecondsUntilReset];
     }
   }
 }
diff --git a/brave/src/test/java/brave/sampler/RateLimitingSamplerTest.java b/brave/src/test/java/brave/sampler/RateLimitingSamplerTest.java
index 22e9686..0c4881e 100644
--- a/brave/src/test/java/brave/sampler/RateLimitingSamplerTest.java
+++ b/brave/src/test/java/brave/sampler/RateLimitingSamplerTest.java
@@ -17,6 +17,7 @@
 package brave.sampler;
 
 import java.util.Random;
+import java.util.concurrent.TimeUnit;
 import org.junit.Test;
 import org.junit.experimental.theories.DataPoints;
 import org.junit.experimental.theories.Theory;
@@ -78,6 +79,54 @@ public class RateLimitingSamplerTest {
     assertThat(sampler.isSampled(0L)).isTrue();
   }
 
+  @Test public void resetsAfterALongGap() {
+    mockStatic(System.class);
+
+    when(System.nanoTime()).thenReturn(0L);
+    Sampler sampler = RateLimitingSampler.create(10);
+
+    // Try a really long time later. Makes sure extra credit isn't given, and no recursion errors
+    when(System.nanoTime()).thenReturn(TimeUnit.DAYS.toNanos(365));
+    assertThat(sampler.isSampled(0L)).isTrue();
+    assertThat(sampler.isSampled(0L)).isFalse(); // we took the credit of the 1st decisecond
+  }
+
+  @Test public void worksWithEdgeCases() {
+    mockStatic(System.class);
+
+    when(System.nanoTime()).thenReturn(0L);
+    Sampler sampler = RateLimitingSampler.create(10);
+
+    // try exact same nanosecond, however unlikely
+    assertThat(sampler.isSampled(0L)).isTrue(); // 1
+
+    // Try a value smaller than a decisecond, to ensure edge cases are covered
+    when(System.nanoTime()).thenReturn(1L);
+    assertThat(sampler.isSampled(0L)).isFalse(); // credit used
+
+    // Try exactly a decisecond later, which should be a reset condition
+    when(System.nanoTime()).thenReturn(NANOS_PER_DECISECOND);
+    assertThat(sampler.isSampled(0L)).isTrue(); // 2
+    assertThat(sampler.isSampled(0L)).isFalse(); // credit used
+
+    // Try almost a second later
+    when(System.nanoTime()).thenReturn(NANOS_PER_SECOND - 1);
+    assertThat(sampler.isSampled(0L)).isTrue(); // 3
+    assertThat(sampler.isSampled(0L)).isTrue(); // 4
+    assertThat(sampler.isSampled(0L)).isTrue(); // 5
+    assertThat(sampler.isSampled(0L)).isTrue(); // 6
+    assertThat(sampler.isSampled(0L)).isTrue(); // 7
+    assertThat(sampler.isSampled(0L)).isTrue(); // 8
+    assertThat(sampler.isSampled(0L)).isTrue(); // 9
+    assertThat(sampler.isSampled(0L)).isTrue(); // 10
+    assertThat(sampler.isSampled(0L)).isFalse(); // credit used
+
+    // Try exactly a second later, which should be a reset condition
+    when(System.nanoTime()).thenReturn(NANOS_PER_SECOND);
+    assertThat(sampler.isSampled(0L)).isTrue();
+    assertThat(sampler.isSampled(0L)).isFalse(); // credit used
+  }
+
   @Test public void allowsOddRates() {
     mockStatic(System.class);
 
@@ -86,8 +135,8 @@ public class RateLimitingSamplerTest {
     when(System.nanoTime()).thenReturn(NANOS_PER_SECOND + NANOS_PER_DECISECOND * 9);
     for (int i = 0; i < 11; i++) {
       assertThat(sampler.isSampled(0L))
-          .withFailMessage("failed after " + (i + 1))
-          .isTrue();
+        .withFailMessage("failed after " + (i + 1))
+        .isTrue();
     }
     assertThat(sampler.isSampled(0L)).isFalse();
   }