You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by sl...@apache.org on 2016/07/28 14:34:59 UTC
[2/6] cassandra git commit: Rerun ReplicationAwareTokenAllocatorTest
on failure to avoid flakiness.
Rerun ReplicationAwareTokenAllocatorTest on failure to avoid flakiness.
patch by Branimir Lambov; reviewed by Sylvain Lebresne for CASSANDRA-12277.
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/a5cbb026
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/a5cbb026
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/a5cbb026
Branch: refs/heads/cassandra-3.9
Commit: a5cbb026195ace450a514db22a64dc9c7c8fa66e
Parents: 3f9b9ff
Author: Branimir Lambov <br...@datastax.com>
Authored: Tue Jul 26 15:19:51 2016 +0300
Committer: Sylvain Lebresne <sy...@datastax.com>
Committed: Thu Jul 28 16:25:07 2016 +0200
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../ReplicationAwareTokenAllocatorTest.java | 15 ++++++
test/unit/org/apache/cassandra/Util.java | 56 ++++++++++++++++++++
3 files changed, 72 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/a5cbb026/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index caecefb..8f6997b 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
3.0.9
+ * Rerun ReplicationAwareTokenAllocatorTest on failure to avoid flakiness (CASSANDRA-12277)
* Exception when computing read-repair for range tombstones (CASSANDRA-12263)
* Lost counter writes in compact table and static columns (CASSANDRA-12219)
* AssertionError with MVs on updating a row that isn't indexed due to a null value (CASSANDRA-12247)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/a5cbb026/test/long/org/apache/cassandra/dht/tokenallocator/ReplicationAwareTokenAllocatorTest.java
----------------------------------------------------------------------
diff --git a/test/long/org/apache/cassandra/dht/tokenallocator/ReplicationAwareTokenAllocatorTest.java b/test/long/org/apache/cassandra/dht/tokenallocator/ReplicationAwareTokenAllocatorTest.java
index 0a27e1c..1b36c55 100644
--- a/test/long/org/apache/cassandra/dht/tokenallocator/ReplicationAwareTokenAllocatorTest.java
+++ b/test/long/org/apache/cassandra/dht/tokenallocator/ReplicationAwareTokenAllocatorTest.java
@@ -29,6 +29,7 @@ import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
import org.junit.Test;
+import org.apache.cassandra.Util;
import org.apache.cassandra.dht.Murmur3Partitioner;
import org.apache.cassandra.dht.Token;
@@ -545,6 +546,20 @@ public class ReplicationAwareTokenAllocatorTest
@Test
public void testNewCluster()
{
+ Util.flakyTest(this::flakyTestNewCluster,
+ 5,
+ "It tends to fail sometimes due to the random selection of the tokens in the first few nodes.");
+ }
+
+ public void flakyTestNewCluster()
+ {
+ // This test is flaky because the selection of the tokens for the first RF nodes (which is random, with an
+ // uncontrolled seed) can sometimes cause a pathological situation where the algorithm will find a (close to)
+ // ideal distribution of tokens for some number of nodes, which in turn will inevitably cause it to go into a
+ // bad (unacceptable to the test criteria) distribution after adding one more node.
+
+ // This should happen very rarely, unless something is broken in the token allocation code.
+
for (int rf = 2; rf <= 5; ++rf)
{
for (int perUnitCount = 1; perUnitCount <= MAX_VNODE_COUNT; perUnitCount *= 4)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/a5cbb026/test/unit/org/apache/cassandra/Util.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/Util.java b/test/unit/org/apache/cassandra/Util.java
index 7ce8f04..e7b1ffa 100644
--- a/test/unit/org/apache/cassandra/Util.java
+++ b/test/unit/org/apache/cassandra/Util.java
@@ -536,6 +536,62 @@ public class Util
thread.join(10000);
}
+ public static AssertionError runCatchingAssertionError(Runnable test)
+ {
+ try
+ {
+ test.run();
+ return null;
+ }
+ catch (AssertionError e)
+ {
+ return e;
+ }
+ }
+
+ /**
+ * Wrapper function used to run a test that can sometimes flake for uncontrollable reasons.
+ *
+ * If the given test fails on the first run, it is executed the given number of times again, expecting all secondary
+ * runs to succeed. If they do, the failure is understood as a flake and the test is treated as passing.
+ *
+ * Do not use this if the test is deterministic and its success is not influenced by external factors (such as time,
+ * selection of random seed, network failures, etc.). If the test can be made independent of such factors, it is
+ * probably preferable to do so rather than use this method.
+ *
+ * @param test The test to run.
+ * @param rerunsOnFailure How many times to re-run it if it fails. All reruns must pass.
+ * @param message Message to send to System.err on initial failure.
+ */
+ public static void flakyTest(Runnable test, int rerunsOnFailure, String message)
+ {
+ AssertionError e = runCatchingAssertionError(test);
+ if (e == null)
+ return; // success
+ System.err.format("Test failed. %s%n"
+ + "Re-running %d times to verify it isn't failing more often than it should.%n"
+ + "Failure was: %s%n", message, rerunsOnFailure, e);
+ e.printStackTrace();
+
+ int rerunsFailed = 0;
+ for (int i = 0; i < rerunsOnFailure; ++i)
+ {
+ AssertionError t = runCatchingAssertionError(test);
+ if (t != null)
+ {
+ ++rerunsFailed;
+ e.addSuppressed(t);
+ }
+ }
+ if (rerunsFailed > 0)
+ {
+ System.err.format("Test failed in %d of the %d reruns.%n", rerunsFailed, rerunsOnFailure);
+ throw e;
+ }
+
+ System.err.println("All reruns succeeded. Failure treated as flake.");
+ }
+
// for use with Optional in tests, can be used as an argument to orElseThrow
public static Supplier<AssertionError> throwAssert(final String message)
{