You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2019/11/22 16:26:58 UTC

[hbase] branch branch-2.2 updated: HBASE-23307 Add running of ReplicationBarrierCleaner to hbck2 fixMeta invocation (#859)

This is an automated email from the ASF dual-hosted git repository.

stack pushed a commit to branch branch-2.2
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2.2 by this push:
     new 826010a  HBASE-23307 Add running of ReplicationBarrierCleaner to hbck2 fixMeta invocation (#859)
826010a is described below

commit 826010ad77db9cf37d02adf3e01f6c4e2ecd45b2
Author: Michael Stack <sa...@users.noreply.github.com>
AuthorDate: Fri Nov 22 08:26:45 2019 -0800

    HBASE-23307 Add running of ReplicationBarrierCleaner to hbck2 fixMeta invocation (#859)
    
    Signed-off-by: Lijin Bin <bi...@apache.org>
---
 .../org/apache/hadoop/hbase/master/HMaster.java    |  8 ++++++++
 .../org/apache/hadoop/hbase/master/HbckChore.java  | 24 ++++++++++++++--------
 .../apache/hadoop/hbase/master/MasterServices.java |  5 +++++
 .../org/apache/hadoop/hbase/master/MetaFixer.java  |  3 +++
 .../master/cleaner/ReplicationBarrierCleaner.java  | 13 ++++++------
 .../hbase/master/MockNoopMasterServices.java       |  3 +++
 .../hbase/master/TestClusterRestartFailover.java   |  2 +-
 7 files changed, 42 insertions(+), 16 deletions(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 05770dd..93835c1 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -3682,6 +3682,14 @@ public class HMaster extends HRegionServer implements MasterServices {
     return this.hbckChore;
   }
 
+  @Override
+  public void runReplicationBarrierCleaner() {
+    ReplicationBarrierCleaner rbc = this.replicationBarrierCleaner;
+    if (rbc != null) {
+      rbc.chore();
+    }
+  }
+
   public SnapshotQuotaObserverChore getSnapshotQuotaObserverChore() {
     return this.snapshotQuotaChore;
   }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChore.java
index cf43685..b25bb15 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChore.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -122,7 +122,6 @@ public class HbckChore extends ScheduledChore {
       LOG.warn("hbckChore is either disabled or is already running. Can't run the chore");
       return;
     }
-    running = true;
     regionInfoMap.clear();
     disabledTableRegions.clear();
     splitParentRegions.clear();
@@ -130,14 +129,19 @@ public class HbckChore extends ScheduledChore {
     orphanRegionsOnFS.clear();
     inconsistentRegions.clear();
     checkingStartTimestamp = EnvironmentEdgeManager.currentTime();
-    loadRegionsFromInMemoryState();
-    loadRegionsFromRSReport();
+    running = true;
     try {
-      loadRegionsFromFS();
-    } catch (IOException e) {
-      LOG.warn("Failed to load the regions from filesystem", e);
+      loadRegionsFromInMemoryState();
+      loadRegionsFromRSReport();
+      try {
+        loadRegionsFromFS();
+      } catch (IOException e) {
+        LOG.warn("Failed to load the regions from filesystem", e);
+      }
+      saveCheckResultToSnapshot();
+    } catch (Throwable t) {
+      LOG.warn("Unexpected", t);
     }
-    saveCheckResultToSnapshot();
     running = false;
   }
 
@@ -262,6 +266,10 @@ public class HbckChore extends ScheduledChore {
       List<Path> regionDirs = FSUtils.getRegionDirs(fs, tableDir);
       for (Path regionDir : regionDirs) {
         String encodedRegionName = regionDir.getName();
+        if (encodedRegionName == null) {
+          LOG.warn("Failed get of encoded name from {}", regionDir);
+          continue;
+        }
         HbckRegionInfo hri = regionInfoMap.get(encodedRegionName);
         if (hri == null) {
           orphanRegionsOnFS.put(encodedRegionName, regionDir);
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
index a6d1a39..0fbb0e8 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
@@ -509,4 +509,9 @@ public interface MasterServices extends Server {
    * @return succeeded plans
    */
   List<RegionPlan> executeRegionPlansWithThrottling(List<RegionPlan> plans);
+
+  /**
+   * Run the ReplicationBarrierChore.
+   */
+  void runReplicationBarrierCleaner();
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java
index 281df1e..15b4e88 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java
@@ -77,6 +77,9 @@ class MetaFixer {
     }
     fixHoles(report);
     fixOverlaps(report);
+    // Run the ReplicationBarrierCleaner here; it may clear out rep_barrier rows which
+    // can help cleaning up damaged hbase:meta.
+    this.masterServices.runReplicationBarrierCleaner();
   }
 
   /**
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/ReplicationBarrierCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/ReplicationBarrierCleaner.java
index ff1da0b..653f735 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/ReplicationBarrierCleaner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/ReplicationBarrierCleaner.java
@@ -48,7 +48,6 @@ import org.slf4j.LoggerFactory;
  */
 @InterfaceAudience.Private
 public class ReplicationBarrierCleaner extends ScheduledChore {
-
   private static final Logger LOG = LoggerFactory.getLogger(ReplicationBarrierCleaner.class);
 
   private static final String REPLICATION_BARRIER_CLEANER_INTERVAL =
@@ -71,7 +70,9 @@ public class ReplicationBarrierCleaner extends ScheduledChore {
   }
 
   @Override
-  protected void chore() {
+  // Public so can be run out of MasterRpcServices. Synchronized so only one
+  // running instance at a time.
+  public synchronized void chore() {
     long totalRows = 0;
     long cleanedRows = 0;
     long deletedRows = 0;
@@ -168,11 +169,9 @@ public class ReplicationBarrierCleaner extends ScheduledChore {
       LOG.warn("Failed to clean up replication barrier", e);
     }
     if (totalRows > 0) {
-      LOG.info(
-        "Cleanup replication barriers: totalRows {}, " +
-          "cleanedRows {}, deletedRows {}, deletedBarriers {}, deletedLastPushedSeqIds {}",
-        totalRows, cleanedRows, deletedRows, deletedBarriers, deletedLastPushedSeqIds);
+      LOG.info("TotalRows={}, cleanedRows={}, deletedRows={}, deletedBarriers={}, " +
+          "deletedLastPushedSeqIds={}", totalRows, cleanedRows, deletedRows,
+          deletedBarriers, deletedLastPushedSeqIds);
     }
   }
-
 }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java
index 67f6b16..c0afbdd 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java
@@ -467,4 +467,7 @@ public class MockNoopMasterServices implements MasterServices {
   public List<RegionPlan> executeRegionPlansWithThrottling(List<RegionPlan> plans) {
     return null;
   }
+
+  @Override
+  public void runReplicationBarrierCleaner() {}
 }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClusterRestartFailover.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClusterRestartFailover.java
index 7d15439..7c7b447 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClusterRestartFailover.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClusterRestartFailover.java
@@ -116,7 +116,7 @@ public class TestClusterRestartFailover extends AbstractTestRestartCluster {
         .filter(p -> (p instanceof ServerCrashProcedure) &&
             ((ServerCrashProcedure) p).getServerName().equals(SERVER_FOR_TEST)).findAny();
     assertTrue("Should have one SCP for " + SERVER_FOR_TEST, procedure.isPresent());
-    assertFalse("Submit the SCP for the same serverName " + SERVER_FOR_TEST + " which should fail",
+    assertTrue("Submit the SCP for the same serverName " + SERVER_FOR_TEST + " which should fail",
       UTIL.getHBaseCluster().getMaster().getServerManager().expireServer(SERVER_FOR_TEST) ==
           Procedure.NO_PROC_ID);