You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2019/11/22 16:28:17 UTC
[hbase] branch branch-2 updated: HBASE-23307 Add running of
ReplicationBarrierCleaner to hbck2 fixMeta invocation (#859)
This is an automated email from the ASF dual-hosted git repository.
stack pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2 by this push:
new c8592f1 HBASE-23307 Add running of ReplicationBarrierCleaner to hbck2 fixMeta invocation (#859)
c8592f1 is described below
commit c8592f1fb79dac32a13a7c8075934e3081107269
Author: Michael Stack <sa...@users.noreply.github.com>
AuthorDate: Fri Nov 22 08:26:45 2019 -0800
HBASE-23307 Add running of ReplicationBarrierCleaner to hbck2 fixMeta invocation (#859)
Signed-off-by: Lijin Bin <bi...@apache.org>
---
.../org/apache/hadoop/hbase/master/HMaster.java | 8 ++++++++
.../org/apache/hadoop/hbase/master/HbckChore.java | 24 ++++++++++++++--------
.../apache/hadoop/hbase/master/MasterServices.java | 4 ++++
.../org/apache/hadoop/hbase/master/MetaFixer.java | 3 +++
.../master/cleaner/ReplicationBarrierCleaner.java | 13 ++++++------
.../hbase/master/MockNoopMasterServices.java | 3 +++
.../hbase/master/TestClusterRestartFailover.java | 2 +-
7 files changed, 41 insertions(+), 16 deletions(-)
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 52ba326..6757ae3 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -3805,6 +3805,14 @@ public class HMaster extends HRegionServer implements MasterServices {
return this.hbckChore;
}
+ @Override
+ public void runReplicationBarrierCleaner() {
+ ReplicationBarrierCleaner rbc = this.replicationBarrierCleaner;
+ if (rbc != null) {
+ rbc.chore();
+ }
+ }
+
public SnapshotQuotaObserverChore getSnapshotQuotaObserverChore() {
return this.snapshotQuotaChore;
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChore.java
index cf43685..b25bb15 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChore.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -122,7 +122,6 @@ public class HbckChore extends ScheduledChore {
LOG.warn("hbckChore is either disabled or is already running. Can't run the chore");
return;
}
- running = true;
regionInfoMap.clear();
disabledTableRegions.clear();
splitParentRegions.clear();
@@ -130,14 +129,19 @@ public class HbckChore extends ScheduledChore {
orphanRegionsOnFS.clear();
inconsistentRegions.clear();
checkingStartTimestamp = EnvironmentEdgeManager.currentTime();
- loadRegionsFromInMemoryState();
- loadRegionsFromRSReport();
+ running = true;
try {
- loadRegionsFromFS();
- } catch (IOException e) {
- LOG.warn("Failed to load the regions from filesystem", e);
+ loadRegionsFromInMemoryState();
+ loadRegionsFromRSReport();
+ try {
+ loadRegionsFromFS();
+ } catch (IOException e) {
+ LOG.warn("Failed to load the regions from filesystem", e);
+ }
+ saveCheckResultToSnapshot();
+ } catch (Throwable t) {
+ LOG.warn("Unexpected", t);
}
- saveCheckResultToSnapshot();
running = false;
}
@@ -262,6 +266,10 @@ public class HbckChore extends ScheduledChore {
List<Path> regionDirs = FSUtils.getRegionDirs(fs, tableDir);
for (Path regionDir : regionDirs) {
String encodedRegionName = regionDir.getName();
+ if (encodedRegionName == null) {
+ LOG.warn("Failed get of encoded name from {}", regionDir);
+ continue;
+ }
HbckRegionInfo hri = regionInfoMap.get(encodedRegionName);
if (hri == null) {
orphanRegionsOnFS.put(encodedRegionName, regionDir);
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
index 3524f63..d2b4cb2 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
@@ -522,4 +522,8 @@ public interface MasterServices extends Server {
*/
List<RegionPlan> executeRegionPlansWithThrottling(List<RegionPlan> plans);
+ /**
+ * Run the ReplicationBarrierChore.
+ */
+ void runReplicationBarrierCleaner();
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java
index 281df1e..15b4e88 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java
@@ -77,6 +77,9 @@ class MetaFixer {
}
fixHoles(report);
fixOverlaps(report);
+ // Run the ReplicationBarrierCleaner here; it may clear out rep_barrier rows which
+ // can help cleaning up damaged hbase:meta.
+ this.masterServices.runReplicationBarrierCleaner();
}
/**
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/ReplicationBarrierCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/ReplicationBarrierCleaner.java
index ff1da0b..653f735 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/ReplicationBarrierCleaner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/ReplicationBarrierCleaner.java
@@ -48,7 +48,6 @@ import org.slf4j.LoggerFactory;
*/
@InterfaceAudience.Private
public class ReplicationBarrierCleaner extends ScheduledChore {
-
private static final Logger LOG = LoggerFactory.getLogger(ReplicationBarrierCleaner.class);
private static final String REPLICATION_BARRIER_CLEANER_INTERVAL =
@@ -71,7 +70,9 @@ public class ReplicationBarrierCleaner extends ScheduledChore {
}
@Override
- protected void chore() {
+ // Public so can be run out of MasterRpcServices. Synchronized so only one
+ // running instance at a time.
+ public synchronized void chore() {
long totalRows = 0;
long cleanedRows = 0;
long deletedRows = 0;
@@ -168,11 +169,9 @@ public class ReplicationBarrierCleaner extends ScheduledChore {
LOG.warn("Failed to clean up replication barrier", e);
}
if (totalRows > 0) {
- LOG.info(
- "Cleanup replication barriers: totalRows {}, " +
- "cleanedRows {}, deletedRows {}, deletedBarriers {}, deletedLastPushedSeqIds {}",
- totalRows, cleanedRows, deletedRows, deletedBarriers, deletedLastPushedSeqIds);
+ LOG.info("TotalRows={}, cleanedRows={}, deletedRows={}, deletedBarriers={}, " +
+ "deletedLastPushedSeqIds={}", totalRows, cleanedRows, deletedRows,
+ deletedBarriers, deletedLastPushedSeqIds);
}
}
-
}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java
index 6b22201..6f9ebc9 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java
@@ -479,4 +479,7 @@ public class MockNoopMasterServices implements MasterServices {
public List<RegionPlan> executeRegionPlansWithThrottling(List<RegionPlan> plans) {
return null;
}
+
+ @Override
+ public void runReplicationBarrierCleaner() {}
}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClusterRestartFailover.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClusterRestartFailover.java
index 2e18c16..a6844fc 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClusterRestartFailover.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClusterRestartFailover.java
@@ -108,7 +108,7 @@ public class TestClusterRestartFailover extends AbstractTestRestartCluster {
.filter(p -> (p instanceof ServerCrashProcedure) &&
((ServerCrashProcedure) p).getServerName().equals(SERVER_FOR_TEST)).findAny();
assertTrue("Should have one SCP for " + SERVER_FOR_TEST, procedure.isPresent());
- assertFalse("Submit the SCP for the same serverName " + SERVER_FOR_TEST + " which should fail",
+ assertTrue("Submit the SCP for the same serverName " + SERVER_FOR_TEST + " which should fail",
UTIL.getHBaseCluster().getMaster().getServerManager().expireServer(SERVER_FOR_TEST) ==
Procedure.NO_PROC_ID);