You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hbase.apache.org by GitBox <gi...@apache.org> on 2020/01/02 18:01:48 UTC

[GitHub] [hbase] virajjasani commented on a change in pull request #952: HBASE-23596 HBCKServerCrashProcedure can double assign

virajjasani commented on a change in pull request #952: HBASE-23596 HBCKServerCrashProcedure can double assign
URL: https://github.com/apache/hbase/pull/952#discussion_r362568604
 
 

 ##########
 File path: hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/HBCKServerCrashProcedure.java
 ##########
 @@ -65,31 +75,91 @@ public HBCKServerCrashProcedure(final MasterProcedureEnv env, final ServerName s
   public HBCKServerCrashProcedure() {}
 
   /**
-   * Adds Regions found by super method any found scanning hbase:meta.
+   * If no Regions found in Master context, then we will search hbase:meta for references
+   * to the passed server. Operator may have passed ServerName because they have found
+   * references to 'Unknown Servers'. They are using HBCKSCP to clear them out.
    */
   @Override
   @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NP_NULL_ON_SOME_PATH_EXCEPTION",
     justification="FindBugs seems confused on ps in below.")
   List<RegionInfo> getRegionsOnCrashedServer(MasterProcedureEnv env) {
-    // Super can return immutable emptyList.
+    // Super will return an immutable list (empty if nothing on this server).
     List<RegionInfo> ris = super.getRegionsOnCrashedServer(env);
-    List<Pair<RegionInfo, ServerName>> ps = null;
+    if (!ris.isEmpty()) {
+      return ris;
+    }
+    // Nothing in in-master context. Check for Unknown Server! in hbase:meta.
+    // If super list is empty, then allow that an operator scheduled an SCP because they are trying
+    // to purge 'Unknown Servers' -- servers that are neither online nor in dead servers
+    // list but that ARE in hbase:meta and so showing as unknown in places like 'HBCK Report'.
+    // This mis-accounting does not happen in normal circumstance but may arise in-extremis
+    // when cluster has been damaged in operation.
+    UnknownServerVisitor visitor =
+        new UnknownServerVisitor(env.getMasterServices().getConnection(), getServerName());
     try {
-      ps = MetaTableAccessor.getTableRegionsAndLocations(env.getMasterServices().getConnection(),
-              null, false);
+      MetaTableAccessor.scanMetaForTableRegions(env.getMasterServices().getConnection(),
+          visitor, null);
     } catch (IOException ioe) {
-      LOG.warn("Failed get of all regions; continuing", ioe);
-    }
-    if (ps == null || ps.isEmpty()) {
-      LOG.warn("No regions found in hbase:meta");
+      LOG.warn("Failed scan of hbase:meta for 'Unknown Servers'", ioe);
       return ris;
     }
-    List<RegionInfo> aggregate = ris == null || ris.isEmpty()?
-        new ArrayList<>(): new ArrayList<>(ris);
-    int before = aggregate.size();
-    ps.stream().filter(p -> p.getSecond() != null && p.getSecond().equals(getServerName())).
-        forEach(p -> aggregate.add(p.getFirst()));
-    LOG.info("Found {} mentions of {} in hbase:meta", aggregate.size() - before, getServerName());
-    return aggregate;
+    LOG.info("Found {} mentions of {} in hbase:meta of OPEN/OPENING Regions: {}",
+        visitor.getReassigns().size(), getServerName(),
+        visitor.getReassigns().stream().map(RegionInfo::getEncodedName).
+            collect(Collectors.joining(",")));
+    return visitor.getReassigns();
+  }
+
+  /**
+   * Visitor for hbase:meta that 'fixes' Unknown Server issues. Collects
+   * a List of Regions to reassign as 'result'.
+   */
+  static class UnknownServerVisitor implements MetaTableAccessor.Visitor {
+    private final List<RegionInfo> reassigns = new ArrayList<>();
+    private final ServerName unknownServerName;
+    private final Connection connection;
+
+    UnknownServerVisitor(Connection connection, ServerName unknownServerName) {
+      this.connection = connection;
+      this.unknownServerName = unknownServerName;
+    }
+
+    @Override
+    public boolean visit(Result result) throws IOException {
+      RegionLocations rls = MetaTableAccessor.getRegionLocations(result);
+      for (HRegionLocation hrl: rls.getRegionLocations()) {
+        if (hrl.getRegion() == null) {
+          continue;
+        }
+        if (hrl.getServerName() == null) {
+          continue;
+        }
+        if (!hrl.getServerName().equals(this.unknownServerName)) {
+          continue;
+        }
 
 Review comment:
   Above 3 conditions can be combined for `continue`?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services