You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by bu...@apache.org on 2018/02/27 17:46:34 UTC

[05/20] hbase git commit: HBASE-19974 Fix decommissioned servers cannot be removed by remove_servers_rsgroup methods

HBASE-19974 Fix decommissioned servers cannot be removed by remove_servers_rsgroup methods

Signed-off-by: tedyu <yu...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/a29b3caf
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/a29b3caf
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/a29b3caf

Branch: refs/heads/HBASE-15151
Commit: a29b3caf4dbc7b8833474ef5da5438f7f6907e00
Parents: 2beda62
Author: haxiaolin <ha...@xiaomi.com>
Authored: Mon Feb 26 14:25:01 2018 +0800
Committer: tedyu <yu...@gmail.com>
Committed: Mon Feb 26 07:30:56 2018 -0800

----------------------------------------------------------------------
 .../hbase/rsgroup/RSGroupAdminServer.java       |  8 +++-
 .../hadoop/hbase/rsgroup/TestRSGroups.java      |  7 ++-
 .../hadoop/hbase/rsgroup/TestRSGroupsBase.java  | 46 ++++++++++++--------
 3 files changed, 40 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/a29b3caf/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java
----------------------------------------------------------------------
diff --git a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java
index aba57fe..094fc1d 100644
--- a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java
+++ b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java
@@ -675,8 +675,12 @@ public class RSGroupAdminServer implements RSGroupAdmin {
   private void checkForDeadOrOnlineServers(Set<Address> servers) throws ConstraintException {
     // This uglyness is because we only have Address, not ServerName.
     Set<Address> onlineServers = new HashSet<>();
-    for(ServerName server: master.getServerManager().getOnlineServers().keySet()) {
-      onlineServers.add(server.getAddress());
+    List<ServerName> drainingServers = master.getServerManager().getDrainingServersList();
+    for (ServerName server : master.getServerManager().getOnlineServers().keySet()) {
+      // Only online but not decommissioned servers are really online
+      if (!drainingServers.contains(server)) {
+        onlineServers.add(server.getAddress());
+      }
     }
 
     Set<Address> deadServers = new HashSet<>();

http://git-wip-us.apache.org/repos/asf/hbase/blob/a29b3caf/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroups.java
----------------------------------------------------------------------
diff --git a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroups.java b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroups.java
index 9116f3b..610278a 100644
--- a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroups.java
+++ b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroups.java
@@ -36,7 +36,6 @@ import org.apache.hadoop.hbase.Waiter;
 import org.apache.hadoop.hbase.Waiter.Predicate;
 import org.apache.hadoop.hbase.client.ClusterConnection;
 import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
-import org.apache.hadoop.hbase.master.HMaster;
 import org.apache.hadoop.hbase.master.ServerManager;
 import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
 import org.apache.hadoop.hbase.net.Address;
@@ -66,7 +65,6 @@ public class TestRSGroups extends TestRSGroupsBase {
       HBaseClassTestRule.forClass(TestRSGroups.class);
 
   protected static final Logger LOG = LoggerFactory.getLogger(TestRSGroups.class);
-  private static HMaster master;
   private static boolean INIT = false;
   private static RSGroupAdminEndpoint rsGroupAdminEndpoint;
 
@@ -126,6 +124,11 @@ public class TestRSGroups extends TestRSGroupsBase {
     deleteNamespaceIfNecessary();
     deleteGroups();
 
+    for(ServerName sn : admin.listDecommissionedRegionServers()){
+      admin.recommissionRegionServer(sn, null);
+    }
+    assertTrue(admin.listDecommissionedRegionServers().isEmpty());
+
     int missing = NUM_SLAVES_BASE - getNumServers();
     LOG.info("Restoring servers: "+missing);
     for(int i=0; i<missing; i++) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/a29b3caf/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsBase.java
----------------------------------------------------------------------
diff --git a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsBase.java b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsBase.java
index c3f7eef..76bcd20 100644
--- a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsBase.java
+++ b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsBase.java
@@ -25,8 +25,10 @@ import static org.junit.Assert.fail;
 
 import java.io.IOException;
 import java.security.SecureRandom;
+import java.util.ArrayList;
 import java.util.EnumSet;
 import java.util.HashSet;
+import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
@@ -81,9 +83,11 @@ public abstract class TestRSGroupsBase {
   protected static Admin admin;
   protected static HBaseCluster cluster;
   protected static RSGroupAdmin rsGroupAdmin;
+  protected static HMaster master;
 
   public final static long WAIT_TIMEOUT = 60000*5;
   public final static int NUM_SLAVES_BASE = 4; //number of slaves for the smallest cluster
+  public static int NUM_DEAD_SERVERS = 0;
 
   // Per test variables
   TableName tableName;
@@ -271,10 +275,10 @@ public abstract class TestRSGroupsBase {
   public int getNumServers() throws IOException {
     ClusterMetrics status =
         admin.getClusterMetrics(EnumSet.of(Option.MASTER, Option.LIVE_SERVERS));
-    ServerName master = status.getMasterName();
+    ServerName masterName = status.getMasterName();
     int count = 0;
     for (ServerName sn : status.getLiveServerMetrics().keySet()) {
-      if (!sn.equals(master)) {
+      if (!sn.equals(masterName)) {
         count++;
       }
     }
@@ -885,6 +889,7 @@ public abstract class TestRSGroupsBase {
   public void testClearDeadServers() throws Exception {
     LOG.info("testClearDeadServers");
     final RSGroupInfo newGroup = addGroup(getGroupName(name.getMethodName()), 3);
+    NUM_DEAD_SERVERS = cluster.getClusterMetrics().getDeadServerNames().size();
 
     ServerName targetServer = ServerName.parseServerName(
         newGroup.getServers().iterator().next().toString());
@@ -897,15 +902,15 @@ public abstract class TestRSGroupsBase {
       //due to the connection loss
       targetRS.stopServer(null,
           AdminProtos.StopServerRequest.newBuilder().setReason("Die").build());
+      NUM_DEAD_SERVERS ++;
     } catch(Exception e) {
     }
-    HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
     //wait for stopped regionserver to dead server list
     TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
       @Override
       public boolean evaluate() throws Exception {
         return !master.getServerManager().areDeadServersInProgress()
-            && cluster.getClusterMetrics().getDeadServerNames().size() > 0;
+            && cluster.getClusterMetrics().getDeadServerNames().size() == NUM_DEAD_SERVERS;
       }
     });
     assertFalse(cluster.getClusterMetrics().getLiveServerMetrics().containsKey(targetServer));
@@ -925,8 +930,10 @@ public abstract class TestRSGroupsBase {
   public void testRemoveServers() throws Exception {
     LOG.info("testRemoveServers");
     final RSGroupInfo newGroup = addGroup(getGroupName(name.getMethodName()), 3);
-    ServerName targetServer = ServerName.parseServerName(
-        newGroup.getServers().iterator().next().toString());
+    Iterator<Address> iterator = newGroup.getServers().iterator();
+    ServerName targetServer = ServerName.parseServerName(iterator.next().toString());
+
+    // remove online servers
     try {
       rsGroupAdmin.removeServers(Sets.newHashSet(targetServer.getAddress()));
       fail("Online servers shouldn't have been successfully removed.");
@@ -938,6 +945,8 @@ public abstract class TestRSGroupsBase {
     }
     assertTrue(newGroup.getServers().contains(targetServer.getAddress()));
 
+    // remove dead servers
+    NUM_DEAD_SERVERS = cluster.getClusterMetrics().getDeadServerNames().size();
     AdminProtos.AdminService.BlockingInterface targetRS =
         ((ClusterConnection) admin.getConnection()).getAdmin(targetServer);
     try {
@@ -945,18 +954,19 @@ public abstract class TestRSGroupsBase {
           GetServerInfoRequest.newBuilder().build()).getServerInfo().getServerName());
       //stopping may cause an exception
       //due to the connection loss
+      LOG.info("stopping server " + targetServer.getHostAndPort());
       targetRS.stopServer(null,
           AdminProtos.StopServerRequest.newBuilder().setReason("Die").build());
+      NUM_DEAD_SERVERS ++;
     } catch(Exception e) {
     }
 
-    HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
     //wait for stopped regionserver to dead server list
     TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
       @Override
       public boolean evaluate() throws Exception {
         return !master.getServerManager().areDeadServersInProgress()
-            && cluster.getClusterMetrics().getDeadServerNames().size() > 0;
+            && cluster.getClusterMetrics().getDeadServerNames().size() == NUM_DEAD_SERVERS;
       }
     });
 
@@ -971,17 +981,19 @@ public abstract class TestRSGroupsBase {
     }
     assertTrue(newGroup.getServers().contains(targetServer.getAddress()));
 
-    ServerName sn = TEST_UTIL.getHBaseClusterInterface().getClusterMetrics().getMasterName();
-    TEST_UTIL.getHBaseClusterInterface().stopMaster(sn);
-    TEST_UTIL.getHBaseClusterInterface().waitForMasterToStop(sn, 60000);
-    TEST_UTIL.getHBaseClusterInterface().startMaster(sn.getHostname(), 0);
-    TEST_UTIL.getHBaseClusterInterface().waitForActiveAndReadyMaster(60000);
+    // remove decommissioned servers
+    List<ServerName> serversToDecommission = new ArrayList<>();
+    targetServer = ServerName.parseServerName(iterator.next().toString());
+    targetRS = ((ClusterConnection) admin.getConnection()).getAdmin(targetServer);
+    targetServer = ProtobufUtil.toServerName(targetRS.getServerInfo(null,
+          GetServerInfoRequest.newBuilder().build()).getServerInfo().getServerName());
+    assertTrue(master.getServerManager().getOnlineServers().containsKey(targetServer));
+    serversToDecommission.add(targetServer);
 
-    assertEquals(3, cluster.getClusterMetrics().getLiveServerMetrics().size());
-    assertFalse(cluster.getClusterMetrics().getLiveServerMetrics().containsKey(targetServer));
-    assertFalse(cluster.getClusterMetrics().getDeadServerNames().contains(targetServer));
-    assertTrue(newGroup.getServers().contains(targetServer.getAddress()));
+    admin.decommissionRegionServers(serversToDecommission, true);
+    assertEquals(1, admin.listDecommissionedRegionServers().size());
 
+    assertTrue(newGroup.getServers().contains(targetServer.getAddress()));
     rsGroupAdmin.removeServers(Sets.newHashSet(targetServer.getAddress()));
     Set<Address> newGroupServers = rsGroupAdmin.getRSGroupInfo(newGroup.getName()).getServers();
     assertFalse(newGroupServers.contains(targetServer.getAddress()));