You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@helix.apache.org by ji...@apache.org on 2021/08/31 17:24:59 UTC

[helix] branch master updated: Improve TestControllerLeadershipChange test logic to tolerate longer delay when test runs. (#1853)

This is an automated email from the ASF dual-hosted git repository.

jiajunwang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/helix.git


The following commit(s) were added to refs/heads/master by this push:
     new a570d05  Improve TestControllerLeadershipChange test logic to tolerate longer delay when test runs. (#1853)
a570d05 is described below

commit a570d0566c42942b6154cb84a3d44f864fde37f0
Author: Jiajun Wang <jj...@linkedin.com>
AuthorDate: Tue Aug 31 10:24:05 2021 -0700

    Improve TestControllerLeadershipChange test logic to tolerate longer delay when test runs. (#1853)
    
    Change the assert condition to consider real test script execute delay to avoid test failure due to slow test runs.
---
 .../controller/TestControllerLeadershipChange.java      | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/helix-core/src/test/java/org/apache/helix/integration/controller/TestControllerLeadershipChange.java b/helix-core/src/test/java/org/apache/helix/integration/controller/TestControllerLeadershipChange.java
index 1de1296..379feb9 100644
--- a/helix-core/src/test/java/org/apache/helix/integration/controller/TestControllerLeadershipChange.java
+++ b/helix-core/src/test/java/org/apache/helix/integration/controller/TestControllerLeadershipChange.java
@@ -189,6 +189,7 @@ public class TestControllerLeadershipChange extends ZkTestBase {
     String resourceName = "testResource";
     int numPartition = 1;
     int numReplica = 1;
+    int simulatedTransitionDelayMs = 100;
     String stateModel = "LeaderStandby";
     ObjectName resourceMBeanObjectName = getResourceMonitorObjectName(clusterName, resourceName);
     MBeanServer beanServer = ManagementFactory.getPlatformMBeanServer();
@@ -205,7 +206,7 @@ public class TestControllerLeadershipChange extends ZkTestBase {
     // Create participant
     _gSetupTool.addInstanceToCluster(clusterName, instanceName);
     MockParticipantManager participant =
-        new MockParticipantManager(ZK_ADDR, clusterName, instanceName);
+        new MockParticipantManager(ZK_ADDR, clusterName, instanceName, simulatedTransitionDelayMs);
     participant.syncStart();
 
     // Create controller, since this is the only controller, it will be the leader
@@ -246,6 +247,9 @@ public class TestControllerLeadershipChange extends ZkTestBase {
     Assert.assertTrue(clusterVerifier.verify());
 
     Thread.sleep(1000);
+
+    // The moment before manager1 regain leadership. The topstateless duration will start counting.
+    long start = System.currentTimeMillis();
     setLeader(manager1);
 
     Assert.assertTrue(manager1.isLeader());
@@ -258,12 +262,19 @@ public class TestControllerLeadershipChange extends ZkTestBase {
     _gSetupTool.rebalanceResource(clusterName, resourceName, numReplica);
 
     Assert.assertTrue(clusterVerifier.verifyByPolling());
+    // The moment that partition top state has been recovered. The topstateless duration stopped counting.
+    long end = System.currentTimeMillis();
 
     // Resource lost top state, and manager1 lost leadership for 2000ms, because manager1 will
     // clean monitoring cache after re-gaining leadership, so max value of hand off duration should
     // not have such a large value
-    Assert.assertTrue((long) beanServer
-        .getAttribute(resourceMBeanObjectName, "PartitionTopStateHandoffDurationGauge.Max") < 500);
+    long duration = (long) beanServer
+        .getAttribute(resourceMBeanObjectName, "PartitionTopStateHandoffDurationGauge.Max");
+    long controllerOpDuration = end - start;
+    Assert.assertTrue(duration >= simulatedTransitionDelayMs && duration <= controllerOpDuration,
+        String.format(
+            "The recorded TopState-less duration is %d. But the controller operation duration is %d.",
+            duration, controllerOpDuration));
 
     participant.syncStop();
     manager1.disconnect();