You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by nd...@apache.org on 2020/10/08 21:52:56 UTC

[hbase] branch branch-1 updated: HBASE-25156 TestMasterFailover.testSimpleMasterFailover is flaky (#2507)

This is an automated email from the ASF dual-hosted git repository.

ndimiduk pushed a commit to branch branch-1
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-1 by this push:
     new 5749747  HBASE-25156 TestMasterFailover.testSimpleMasterFailover is flaky (#2507)
5749747 is described below

commit 5749747cb2e2eceac0967cbabefc2067d11f7be8
Author: Nick Dimiduk <nd...@apache.org>
AuthorDate: Thu Oct 8 14:45:48 2020 -0700

    HBASE-25156 TestMasterFailover.testSimpleMasterFailover is flaky (#2507)
    
    Change the test to wait for evidence that the active master has seen
    that the backup master killed by the test has gone away. This is done
    before proceeding to validate that the dead backup is correctly
    omitted from the ClusterStatus report.
    
    Also, minor fixup to several assertions, using `assertEquals` instead
    of `assertTrue(...equals(...))` and correcting expected vs. actual
    ordering of assertion arguments.
    
    Signed-off-by: Michael Stack <st...@apache.org>
---
 .../hadoop/hbase/master/TestMasterFailover.java    | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
index a5b3d49..99bc21e 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
@@ -30,6 +30,7 @@ import java.util.List;
 import java.util.Set;
 import java.util.TreeSet;
 
+import java.util.concurrent.TimeUnit;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -1112,7 +1113,7 @@ public class TestMasterFailover {
     // Check that ClusterStatus reports the correct active and backup masters
     assertNotNull(active);
     ClusterStatus status = active.getClusterStatus();
-    assertTrue(status.getMaster().equals(activeName));
+    assertEquals(status.getMaster(), activeName);
     assertEquals(2, status.getBackupMastersSize());
     assertEquals(2, status.getBackupMasters().size());
 
@@ -1126,7 +1127,7 @@ public class TestMasterFailover {
     // Verify still one active master and it's the same
     for (int i = 0; i < masterThreads.size(); i++) {
       if (masterThreads.get(i).getMaster().isActiveMaster()) {
-        assertTrue(activeName.equals(masterThreads.get(i).getMaster().getServerName()));
+        assertEquals(activeName, masterThreads.get(i).getMaster().getServerName());
         activeIndex = i;
         active = masterThreads.get(activeIndex).getMaster();
       }
@@ -1137,6 +1138,14 @@ public class TestMasterFailover {
     LOG.info("Active master " + active.getServerName() + " managing " + rsCount +  " regions servers");
     assertEquals(3, rsCount);
 
+    // wait for the active master to acknowledge loss of the backup from ZK
+    final HMaster activeFinal = active;
+    TEST_UTIL.waitFor(TimeUnit.SECONDS.toMillis(30), new Waiter.Predicate<Exception>() {
+      @Override public boolean evaluate() {
+        return activeFinal.getBackupMasters().size() == 1;
+      }
+    });
+
     // Check that ClusterStatus reports the correct active and backup masters
     assertNotNull(active);
     final HMaster finalActive = active;
@@ -1148,7 +1157,7 @@ public class TestMasterFailover {
       }
     });
     status = active.getClusterStatus();
-    assertTrue(status.getMaster().equals(activeName));
+    assertEquals(activeName, status.getMaster());
 
     // kill the active master
     LOG.debug("\n\nStopping the active master " + active.getServerName() + "\n");
@@ -1166,13 +1175,14 @@ public class TestMasterFailover {
     active = masterThreads.get(0).getMaster();
     assertNotNull(active);
     status = active.getClusterStatus();
-    ServerName mastername = status.getMaster();
-    assertTrue(mastername.equals(active.getServerName()));
+    ServerName masterName = status.getMaster();
+    assertNotNull(masterName);
+    assertEquals(active.getServerName(), masterName);
     assertTrue(active.isActiveMaster());
     assertEquals(0, status.getBackupMastersSize());
     assertEquals(0, status.getBackupMasters().size());
     int rss = status.getServersSize();
-    LOG.info("Active master " + mastername.getServerName() + " managing " +
+    LOG.info("Active master " + masterName.getServerName() + " managing " +
       rss +  " region servers");
     assertEquals(3, rss);