You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2019/11/22 16:28:17 UTC

[hbase] branch branch-2 updated: HBASE-23307 Add running of ReplicationBarrierCleaner to hbck2 fixMeta invocation (#859)

This is an automated email from the ASF dual-hosted git repository.

stack pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2 by this push:
     new c8592f1  HBASE-23307 Add running of ReplicationBarrierCleaner to hbck2 fixMeta invocation (#859)
c8592f1 is described below

commit c8592f1fb79dac32a13a7c8075934e3081107269
Author: Michael Stack <sa...@users.noreply.github.com>
AuthorDate: Fri Nov 22 08:26:45 2019 -0800

    HBASE-23307 Add running of ReplicationBarrierCleaner to hbck2 fixMeta invocation (#859)
    
    Signed-off-by: Lijin Bin <bi...@apache.org>
---
 .../org/apache/hadoop/hbase/master/HMaster.java    |  8 ++++++++
 .../org/apache/hadoop/hbase/master/HbckChore.java  | 24 ++++++++++++++--------
 .../apache/hadoop/hbase/master/MasterServices.java |  4 ++++
 .../org/apache/hadoop/hbase/master/MetaFixer.java  |  3 +++
 .../master/cleaner/ReplicationBarrierCleaner.java  | 13 ++++++------
 .../hbase/master/MockNoopMasterServices.java       |  3 +++
 .../hbase/master/TestClusterRestartFailover.java   |  2 +-
 7 files changed, 41 insertions(+), 16 deletions(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 52ba326..6757ae3 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -3805,6 +3805,14 @@ public class HMaster extends HRegionServer implements MasterServices {
     return this.hbckChore;
   }
 
+  @Override
+  public void runReplicationBarrierCleaner() {
+    ReplicationBarrierCleaner rbc = this.replicationBarrierCleaner;
+    if (rbc != null) {
+      rbc.chore();
+    }
+  }
+
   public SnapshotQuotaObserverChore getSnapshotQuotaObserverChore() {
     return this.snapshotQuotaChore;
   }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChore.java
index cf43685..b25bb15 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChore.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -122,7 +122,6 @@ public class HbckChore extends ScheduledChore {
       LOG.warn("hbckChore is either disabled or is already running. Can't run the chore");
       return;
     }
-    running = true;
     regionInfoMap.clear();
     disabledTableRegions.clear();
     splitParentRegions.clear();
@@ -130,14 +129,19 @@ public class HbckChore extends ScheduledChore {
     orphanRegionsOnFS.clear();
     inconsistentRegions.clear();
     checkingStartTimestamp = EnvironmentEdgeManager.currentTime();
-    loadRegionsFromInMemoryState();
-    loadRegionsFromRSReport();
+    running = true;
     try {
-      loadRegionsFromFS();
-    } catch (IOException e) {
-      LOG.warn("Failed to load the regions from filesystem", e);
+      loadRegionsFromInMemoryState();
+      loadRegionsFromRSReport();
+      try {
+        loadRegionsFromFS();
+      } catch (IOException e) {
+        LOG.warn("Failed to load the regions from filesystem", e);
+      }
+      saveCheckResultToSnapshot();
+    } catch (Throwable t) {
+      LOG.warn("Unexpected", t);
     }
-    saveCheckResultToSnapshot();
     running = false;
   }
 
@@ -262,6 +266,10 @@ public class HbckChore extends ScheduledChore {
       List<Path> regionDirs = FSUtils.getRegionDirs(fs, tableDir);
       for (Path regionDir : regionDirs) {
         String encodedRegionName = regionDir.getName();
+        if (encodedRegionName == null) {
+          LOG.warn("Failed get of encoded name from {}", regionDir);
+          continue;
+        }
         HbckRegionInfo hri = regionInfoMap.get(encodedRegionName);
         if (hri == null) {
           orphanRegionsOnFS.put(encodedRegionName, regionDir);
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
index 3524f63..d2b4cb2 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
@@ -522,4 +522,8 @@ public interface MasterServices extends Server {
    */
   List<RegionPlan> executeRegionPlansWithThrottling(List<RegionPlan> plans);
 
+  /**
+   * Run the ReplicationBarrierChore.
+   */
+  void runReplicationBarrierCleaner();
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java
index 281df1e..15b4e88 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java
@@ -77,6 +77,9 @@ class MetaFixer {
     }
     fixHoles(report);
     fixOverlaps(report);
+    // Run the ReplicationBarrierCleaner here; it may clear out rep_barrier rows which
+    // can help cleaning up damaged hbase:meta.
+    this.masterServices.runReplicationBarrierCleaner();
   }
 
   /**
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/ReplicationBarrierCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/ReplicationBarrierCleaner.java
index ff1da0b..653f735 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/ReplicationBarrierCleaner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/ReplicationBarrierCleaner.java
@@ -48,7 +48,6 @@ import org.slf4j.LoggerFactory;
  */
 @InterfaceAudience.Private
 public class ReplicationBarrierCleaner extends ScheduledChore {
-
   private static final Logger LOG = LoggerFactory.getLogger(ReplicationBarrierCleaner.class);
 
   private static final String REPLICATION_BARRIER_CLEANER_INTERVAL =
@@ -71,7 +70,9 @@ public class ReplicationBarrierCleaner extends ScheduledChore {
   }
 
   @Override
-  protected void chore() {
+  // Public so can be run out of MasterRpcServices. Synchronized so only one
+  // running instance at a time.
+  public synchronized void chore() {
     long totalRows = 0;
     long cleanedRows = 0;
     long deletedRows = 0;
@@ -168,11 +169,9 @@ public class ReplicationBarrierCleaner extends ScheduledChore {
       LOG.warn("Failed to clean up replication barrier", e);
     }
     if (totalRows > 0) {
-      LOG.info(
-        "Cleanup replication barriers: totalRows {}, " +
-          "cleanedRows {}, deletedRows {}, deletedBarriers {}, deletedLastPushedSeqIds {}",
-        totalRows, cleanedRows, deletedRows, deletedBarriers, deletedLastPushedSeqIds);
+      LOG.info("TotalRows={}, cleanedRows={}, deletedRows={}, deletedBarriers={}, " +
+          "deletedLastPushedSeqIds={}", totalRows, cleanedRows, deletedRows,
+          deletedBarriers, deletedLastPushedSeqIds);
     }
   }
-
 }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java
index 6b22201..6f9ebc9 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java
@@ -479,4 +479,7 @@ public class MockNoopMasterServices implements MasterServices {
   public List<RegionPlan> executeRegionPlansWithThrottling(List<RegionPlan> plans) {
     return null;
   }
+
+  @Override
+  public void runReplicationBarrierCleaner() {}
 }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClusterRestartFailover.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClusterRestartFailover.java
index 2e18c16..a6844fc 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClusterRestartFailover.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClusterRestartFailover.java
@@ -108,7 +108,7 @@ public class TestClusterRestartFailover extends AbstractTestRestartCluster {
         .filter(p -> (p instanceof ServerCrashProcedure) &&
             ((ServerCrashProcedure) p).getServerName().equals(SERVER_FOR_TEST)).findAny();
     assertTrue("Should have one SCP for " + SERVER_FOR_TEST, procedure.isPresent());
-    assertFalse("Submit the SCP for the same serverName " + SERVER_FOR_TEST + " which should fail",
+    assertTrue("Submit the SCP for the same serverName " + SERVER_FOR_TEST + " which should fail",
       UTIL.getHBaseCluster().getMaster().getServerManager().expireServer(SERVER_FOR_TEST) ==
           Procedure.NO_PROC_ID);