You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by nd...@apache.org on 2020/10/08 21:24:20 UTC
[hbase] branch master updated: HBASE-25156
TestMasterFailover.testSimpleMasterFailover is flaky (#2507)
This is an automated email from the ASF dual-hosted git repository.
ndimiduk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/master by this push:
new b82d8a5 HBASE-25156 TestMasterFailover.testSimpleMasterFailover is flaky (#2507)
b82d8a5 is described below
commit b82d8a55171b7815662defc5c0d01f5f07baf520
Author: Nick Dimiduk <nd...@apache.org>
AuthorDate: Thu Oct 8 14:23:38 2020 -0700
HBASE-25156 TestMasterFailover.testSimpleMasterFailover is flaky (#2507)
Change the test to wait for evidence that the active master has seen
that the backup master killed by the test has gone away. This is done
before proceeding to validate that the dead backup is correctly
omitted from the ClusterStatus report.
Also, minor fixup to several assertions, using `assertEquals` instead
of `assertTrue(...equals(...))` and correcting expected vs. actual
ordering of assertion arguments.
Signed-off-by: Michael Stack <st...@apache.org>
---
.../hadoop/hbase/master/TestMasterFailover.java | 23 +++++++++++++---------
1 file changed, 14 insertions(+), 9 deletions(-)
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
index a27936d..1e37fcb 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -20,8 +20,8 @@ package org.apache.hadoop.hbase.master;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
-
import java.util.List;
+import java.util.concurrent.TimeUnit;
import org.apache.hadoop.hbase.ClusterMetrics;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
@@ -100,7 +100,7 @@ public class TestMasterFailover {
// Check that ClusterStatus reports the correct active and backup masters
assertNotNull(active);
ClusterMetrics status = active.getClusterMetrics();
- assertTrue(status.getMasterName().equals(activeName));
+ assertEquals(activeName, status.getMasterName());
assertEquals(2, status.getBackupMasterNames().size());
// attempt to stop one of the inactive masters
@@ -113,7 +113,7 @@ public class TestMasterFailover {
// Verify still one active master and it's the same
for (int i = 0; i < masterThreads.size(); i++) {
if (masterThreads.get(i).getMaster().isActiveMaster()) {
- assertTrue(activeName.equals(masterThreads.get(i).getMaster().getServerName()));
+ assertEquals(activeName, masterThreads.get(i).getMaster().getServerName());
activeIndex = i;
active = masterThreads.get(activeIndex).getMaster();
}
@@ -126,10 +126,15 @@ public class TestMasterFailover {
" regions servers");
assertEquals(3, rsCount);
+ // wait for the active master to acknowledge loss of the backup from ZK
+ final HMaster activeFinal = active;
+ TEST_UTIL.waitFor(
+ TimeUnit.SECONDS.toMillis(30), () -> activeFinal.getBackupMasters().size() == 1);
+
// Check that ClusterStatus reports the correct active and backup masters
assertNotNull(active);
status = active.getClusterMetrics();
- assertTrue(status.getMasterName().equals(activeName));
+ assertEquals(activeName, status.getMasterName());
assertEquals(1, status.getBackupMasterNames().size());
// kill the active master
@@ -148,13 +153,13 @@ public class TestMasterFailover {
active = masterThreads.get(0).getMaster();
assertNotNull(active);
status = active.getClusterMetrics();
- ServerName mastername = status.getMasterName();
- assertTrue(mastername.equals(active.getServerName()));
+ ServerName masterName = status.getMasterName();
+ assertNotNull(masterName);
+ assertEquals(active.getServerName(), masterName);
assertTrue(active.isActiveMaster());
assertEquals(0, status.getBackupMasterNames().size());
int rss = status.getLiveServerMetrics().size();
- LOG.info("Active master " + mastername.getServerName() + " managing " +
- rss + " region servers");
+ LOG.info("Active master {} managing {} region servers", masterName.getServerName(), rss);
assertEquals(3, rss);
} finally {
// Stop the cluster