You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hbase.apache.org by GitBox <gi...@apache.org> on 2019/07/03 02:19:23 UTC

[GitHub] [hbase] sunhelly commented on a change in pull request #323: HBASE-22414 Interruption of moving regions in RSGroup will cause regi…

sunhelly commented on a change in pull request #323: HBASE-22414 Interruption of moving regions in RSGroup will cause regi…
URL: https://github.com/apache/hbase/pull/323#discussion_r299753379
 
 

 ##########
 File path: hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java
 ##########
 @@ -198,108 +200,99 @@ private void checkServersAndTables(Set<Address> servers, Set<TableName> tables,
     }
   }
 
-  private enum MoveType {
-    TO, FROM
+  /**
+   * Move every region from servers which are currently located on these servers,
+   * but should not be located there.
+   *
+   * @param servers the servers that will move to new group
+   * @param targetGroupName the target group name
+   * @throws IOException if moving the server and tables fail
+   */
+  private void moveServerRegionsFromGroup(Set<Address> servers, String targetGroupName)
+    throws IOException {
+    moveRegionsBetweenGroups(servers, targetGroupName,
+      rs -> getRegions(rs),
+      info -> {
+        try {
+          RSGroupInfo group = getRSGroupInfo(targetGroupName);
+          return group.containsTable(info.getTable());
+        } catch (IOException e) {
+          e.printStackTrace();
+          return false;
+        }
+      },
+      rs -> rs.getHostname());
   }
 
   /**
-   * When move a table to a group, we can only move regions of it which are not on group servers
-   * TO the group. Because all regions of the table must be in target group. When move a server to
-   * a group, we can only move regions on it which do not belong to group tables FROM the group.
-   * And what's more, table regions or server regions that already in target group need not to be
-   * moved. So MoveType.TO means move regions of TABLES, and MoveType.FROM means move regions on
-   * SERVERS.
+   * Moves regions of tables which are not on target group servers.
    *
-   * @param set it's a table set or a server set
-   * @param targetGroupName target group name
-   * @param type type of move regions
-   * @param <T> the type of elements in Set
-   * @throws IOException if move haven't succeed even after max number of retries
+   * @param tables the tables that will move to new group
+   * @param targetGroupName the target group name
+   * @throws IOException if moving the region fails
    */
-  private <T> void moveRegionsToOrFromGroup(Set<T> set, String targetGroupName, MoveType type)
-      throws IOException {
-    Set<T> newSet = new HashSet<>(set);
+  private void moveTableRegionsToGroup(Set<TableName> tables, String targetGroupName)
+    throws IOException {
+    moveRegionsBetweenGroups(tables, targetGroupName,
+      table -> master.getAssignmentManager().getRegionStates().getRegionsOfTable(table),
+      info -> {
+        try {
+          RSGroupInfo group = getRSGroupInfo(targetGroupName);
+          ServerName sn =
+            master.getAssignmentManager().getRegionStates().getRegionServerOfRegion(info);
+          return group.containsServer(sn.getAddress());
+        } catch (IOException e) {
+          e.printStackTrace();
+          return false;
+        }
+      },
+      table -> table.getNameWithNamespaceInclAsString());
+  }
+
+  private <T> void moveRegionsBetweenGroups(Set<T> regionsOwners, String targetGroupName,
+    Function<T, List<RegionInfo>> getRegionsInfo,
+    Function<RegionInfo,Boolean> validation,
+    Function<T,String> getOwnerName) throws IOException {
     boolean hasRegionsToMove;
     int retry = 0;
-    IOException toThrow = null;
-    Set<String> failedRegions = new HashSet<>();
-    RSGroupInfo targetGrp = getRSGroupInfo(targetGroupName);
+    Set<T> allOwners = new HashSet<>(regionsOwners);
     do {
       hasRegionsToMove = false;
-      for (Iterator<T> iter = newSet.iterator(); iter.hasNext(); ) {
-        T element = iter.next();
-        List<RegionInfo> toMoveRegions = new ArrayList<>();
-        if (type == MoveType.TO) {
-          // means element type of set is TableName
-          assert element instanceof TableName;
-          if (master.getAssignmentManager().isTableDisabled((TableName) element)) {
-            LOG.debug("Skipping move regions because the table {} is disabled", element);
-          }else {
-            // Get regions of these tables and filter regions by group servers.
-            for (RegionInfo region : master.getAssignmentManager().getRegionStates()
-                .getRegionsOfTable((TableName) element)) {
-              ServerName sn =
-                  master.getAssignmentManager().getRegionStates().getRegionServerOfRegion(region);
-              if (!targetGrp.containsServer(sn.getAddress())) {
-                toMoveRegions.add(region);
-              }
-            }
-          }
-        }
-        if (type == MoveType.FROM) {
-          // means element type of set is Address
-          assert element instanceof Address;
-          // Get regions that are associated with these servers and filter regions by group tables.
-          for (RegionInfo region : getRegions((Address) element)) {
-            if (!targetGrp.containsTable(region.getTable())) {
-              toMoveRegions.add(region);
-            }
-          }
-        }
-
-        //move regions
-        for (RegionInfo region : toMoveRegions) {
-          LOG.info("Moving server region {}, which do not belong to RSGroup {}",
+      for (Iterator<T> iter = allOwners.iterator(); iter.hasNext();) {
+        T owner = iter.next();
+        // Get regions that are associated with this server and filter regions by group tables.
+        for (RegionInfo region : getRegionsInfo.apply(owner)) {
+          if (!validation.apply(region)) {
+            LOG.info("Moving region {}, which do not belong to RSGroup {}",
               region.getShortNameToLog(), targetGroupName);
-          try {
-            this.master.getAssignmentManager().move(region);
-            failedRegions.remove(region.getRegionNameAsString());
-          } catch (IOException ioe) {
-            LOG.debug("Move region {} from group failed, will retry, current retry time is {}",
+            try {
+              this.master.getAssignmentManager().move(region);
+            }catch (IOException ioe){
+              LOG.error("Move region {} from group failed, will retry, current retry time is {}",
                 region.getShortNameToLog(), retry, ioe);
-            toThrow = ioe;
-            failedRegions.add(region.getRegionNameAsString());
-          }
-          if (master.getAssignmentManager().getRegionStates().
+            }
+            if (master.getAssignmentManager().getRegionStates().
               getRegionState(region).isFailedOpen()) {
-            continue;
+              continue;
+            }
+            hasRegionsToMove = true;
           }
-          hasRegionsToMove = true;
         }
+
         if (!hasRegionsToMove) {
-          LOG.info("There are no more regions of {} to move for RSGroup {}", element,
-              targetGroupName);
+          LOG.info("No more regions to move from {} to RSGroup", getOwnerName.apply(owner));
           iter.remove();
         }
       }
+
       retry++;
       try {
         rsGroupInfoManager.wait(1000);
       } catch (InterruptedException e) {
         LOG.warn("Sleep interrupted", e);
         Thread.currentThread().interrupt();
       }
-    } while (hasRegionsToMove && retry <= moveMaxRetry);
-
-    //has up to max retry time or there are no more regions to move
-    if (hasRegionsToMove) {
-      // print failed moved regions, for later process conveniently
-      String msg = String.format("move regions for group %s failed, failed regions: %s",
-          targetGroupName, failedRegions);
-      LOG.error(msg);
-      throw new DoNotRetryIOException(msg +
-          ", just record the last failed region's cause, more details in server log", toThrow);
-    }
+    } while (hasRegionsToMove && retry <= 50);
 
 Review comment:
   50 can be replaced by moveMaxRetry.
   And if the number of retries has reached the limit but there are still some regions need to be moved, should we throw out an Exception to notify the failure?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services