You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ni...@apache.org on 2021/04/29 11:48:48 UTC

[hbase] branch HBASE-25714 updated: HBASE-25774 TestSyncReplicationStandbyKillRS#testStandbyKillRegionServer is flaky (#3189)

This is an automated email from the ASF dual-hosted git repository.

niuyulin pushed a commit to branch HBASE-25714
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/HBASE-25714 by this push:
     new 4d9a360  HBASE-25774 TestSyncReplicationStandbyKillRS#testStandbyKillRegionServer is flaky (#3189)
4d9a360 is described below

commit 4d9a360a785400fbebb7aedf3fef86caa8a2b387
Author: Duo Zhang <zh...@apache.org>
AuthorDate: Thu Apr 22 10:10:15 2021 +0800

    HBASE-25774 TestSyncReplicationStandbyKillRS#testStandbyKillRegionServer is flaky (#3189)
    
    Wait for the restarter thread to finish before checking the state
    Add more detailed logs
    
    Signed-off-by: meiyi <my...@gmail.com>
---
 .../TestSyncReplicationStandbyKillRS.java          | 26 +++++++++++++++-------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestSyncReplicationStandbyKillRS.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestSyncReplicationStandbyKillRS.java
index e9e915a..fd28f59 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestSyncReplicationStandbyKillRS.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestSyncReplicationStandbyKillRS.java
@@ -68,12 +68,14 @@ public class TestSyncReplicationStandbyKillRS extends SyncReplicationTestBase {
     UTIL1.shutdownMiniCluster();
 
     JVMClusterUtil.MasterThread activeMaster = UTIL2.getMiniHBaseCluster().getMasterThread();
+    String threadName = "RegionServer-Restarter";
     Thread t = new Thread(() -> {
       try {
         List<JVMClusterUtil.RegionServerThread> regionServers =
             UTIL2.getMiniHBaseCluster().getLiveRegionServerThreads();
         for (JVMClusterUtil.RegionServerThread rst : regionServers) {
           ServerName serverName = rst.getRegionServer().getServerName();
+          LOG.debug("Going to stop [{}]", serverName);
           rst.getRegionServer().stop("Stop RS for test");
           waitForRSShutdownToStartAndFinish(activeMaster, serverName);
           JVMClusterUtil.RegionServerThread restarted =
@@ -83,9 +85,11 @@ public class TestSyncReplicationStandbyKillRS extends SyncReplicationTestBase {
       } catch (Exception e) {
         LOG.error("Failed to kill RS", e);
       }
-    });
+    }, threadName);
     t.start();
 
+    LOG.debug("Going to transit peer {} to {} state", PEER_ID,
+      SyncReplicationState.DOWNGRADE_ACTIVE);
     // Transit standby to DA to replay logs
     try {
       UTIL2.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID,
@@ -94,11 +98,18 @@ public class TestSyncReplicationStandbyKillRS extends SyncReplicationTestBase {
       LOG.error("Failed to transit standby cluster to " + SyncReplicationState.DOWNGRADE_ACTIVE, e);
     }
 
-    while (UTIL2.getAdmin().getReplicationPeerSyncReplicationState(PEER_ID)
-        != SyncReplicationState.DOWNGRADE_ACTIVE) {
+    LOG.debug("Waiting for the restarter thread {} to quit", threadName);
+    t.join();
+
+    while (UTIL2.getAdmin()
+      .getReplicationPeerSyncReplicationState(PEER_ID) != SyncReplicationState.DOWNGRADE_ACTIVE) {
+      LOG.debug("Waiting for peer {} to be in {} state", PEER_ID,
+        SyncReplicationState.DOWNGRADE_ACTIVE);
       Thread.sleep(SLEEP_TIME);
     }
+    LOG.debug("Going to verify the result, {} records expected", COUNT);
     verify(UTIL2, 0, COUNT);
+    LOG.debug("Verification successfully done");
   }
 
   private void waitForRSShutdownToStartAndFinish(JVMClusterUtil.MasterThread activeMaster,
@@ -106,15 +117,14 @@ public class TestSyncReplicationStandbyKillRS extends SyncReplicationTestBase {
     ServerManager sm = activeMaster.getMaster().getServerManager();
     // First wait for it to be in dead list
     while (!sm.getDeadServers().isDeadServer(serverName)) {
-      LOG.debug("Waiting for [" + serverName + "] to be listed as dead in master");
+      LOG.debug("Waiting for {} to be listed as dead in master", serverName);
       Thread.sleep(SLEEP_TIME);
     }
-    LOG.debug("Server [" + serverName + "] marked as dead, waiting for it to " +
-        "finish dead processing");
+    LOG.debug("Server {} marked as dead, waiting for it to finish dead processing", serverName);
     while (sm.areDeadServersInProgress()) {
-      LOG.debug("Server [" + serverName + "] still being processed, waiting");
+      LOG.debug("Server {} still being processed, waiting", serverName);
       Thread.sleep(SLEEP_TIME);
     }
-    LOG.debug("Server [" + serverName + "] done with server shutdown processing");
+    LOG.debug("Server {} done with server shutdown processing", serverName);
   }
 }