You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2019/11/07 01:17:12 UTC
[hbase] branch branch-2 updated: HBASE-23212 Dynamically reload
configs for Region Recovery chore (#802)
This is an automated email from the ASF dual-hosted git repository.
apurtell pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2 by this push:
new 29c27e3 HBASE-23212 Dynamically reload configs for Region Recovery chore (#802)
29c27e3 is described below
commit 29c27e3d0478d75ccbe8dcbe71dd71555dadba9d
Author: Viraj Jasani <vi...@gmail.com>
AuthorDate: Tue Nov 5 11:13:44 2019 +0530
HBASE-23212 Dynamically reload configs for Region Recovery chore (#802)
Signed-off-by: Andrew Purtell <ap...@apache.org>
---
.../java/org/apache/hadoop/hbase/HConstants.java | 5 +
.../org/apache/hadoop/hbase/master/HMaster.java | 5 +
.../hadoop/hbase/master/RegionsRecoveryChore.java | 25 +++-
.../hbase/master/RegionsRecoveryConfigManager.java | 100 ++++++++++++++
.../master/TestRegionsRecoveryConfigManager.java | 147 +++++++++++++++++++++
src/main/asciidoc/_chapters/configuration.adoc | 2 +
6 files changed, 277 insertions(+), 7 deletions(-)
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
index 1f7adb3..53b3de7 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
@@ -1508,6 +1508,11 @@ public final class HConstants {
// default -1 indicates there is no threshold on high storeRefCount
public static final int DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD = -1;
+ public static final String REGIONS_RECOVERY_INTERVAL =
+ "hbase.master.regions.recovery.check.interval";
+
+ public static final int DEFAULT_REGIONS_RECOVERY_INTERVAL = 1200 * 1000; // Default 20 min
+
/**
* Configurations for master executor services.
*/
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 1939703..2a31a38 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -416,6 +416,8 @@ public class HMaster extends HRegionServer implements MasterServices {
private MasterProcedureManagerHost mpmHost;
private RegionsRecoveryChore regionsRecoveryChore = null;
+
+ private RegionsRecoveryConfigManager regionsRecoveryConfigManager = null;
// it is assigned after 'initialized' guard set to true, so should be volatile
private volatile MasterQuotaManager quotaManager;
private SpaceQuotaSnapshotNotifier spaceQuotaSnapshotNotifier;
@@ -1131,6 +1133,7 @@ public class HMaster extends HRegionServer implements MasterServices {
configurationManager.registerObserver(this.cleanerPool);
configurationManager.registerObserver(this.hfileCleaner);
configurationManager.registerObserver(this.logCleaner);
+ configurationManager.registerObserver(this.regionsRecoveryConfigManager);
// Set master as 'initialized'.
setInitialized(true);
@@ -1457,6 +1460,8 @@ public class HMaster extends HRegionServer implements MasterServices {
HConstants.STORE_FILE_REF_COUNT_THRESHOLD);
}
+ this.regionsRecoveryConfigManager = new RegionsRecoveryConfigManager(this);
+
replicationBarrierCleaner = new ReplicationBarrierCleaner(conf, this, getConnection(),
replicationPeerManager);
getChoreService().scheduleChore(replicationBarrierCleaner);
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryChore.java
index 7502eeb..c5ad867 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryChore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryChore.java
@@ -52,11 +52,6 @@ public class RegionsRecoveryChore extends ScheduledChore {
private static final String REGIONS_RECOVERY_CHORE_NAME = "RegionsRecoveryChore";
- private static final String REGIONS_RECOVERY_INTERVAL =
- "hbase.master.regions.recovery.check.interval";
-
- private static final int DEFAULT_REGIONS_RECOVERY_INTERVAL = 1200 * 1000; // Default 20 min ?
-
private static final String ERROR_REOPEN_REIONS_MSG =
"Error reopening regions with high storeRefCount. ";
@@ -76,8 +71,8 @@ public class RegionsRecoveryChore extends ScheduledChore {
RegionsRecoveryChore(final Stoppable stopper, final Configuration configuration,
final HMaster hMaster) {
- super(REGIONS_RECOVERY_CHORE_NAME, stopper, configuration.getInt(REGIONS_RECOVERY_INTERVAL,
- DEFAULT_REGIONS_RECOVERY_INTERVAL));
+ super(REGIONS_RECOVERY_CHORE_NAME, stopper, configuration.getInt(
+ HConstants.REGIONS_RECOVERY_INTERVAL, HConstants.DEFAULT_REGIONS_RECOVERY_INTERVAL));
this.hMaster = hMaster;
this.storeFileRefCountThreshold = configuration.getInt(
HConstants.STORE_FILE_REF_COUNT_THRESHOLD,
@@ -171,4 +166,20 @@ public class RegionsRecoveryChore extends ScheduledChore {
}
+ // hashcode/equals implementation to ensure at-most one object of RegionsRecoveryChore
+ // is scheduled at a time - RegionsRecoveryConfigManager
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ return o != null && getClass() == o.getClass();
+ }
+
+ @Override
+ public int hashCode() {
+ return 31;
+ }
+
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryConfigManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryConfigManager.java
new file mode 100644
index 0000000..b1bfdc0
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryConfigManager.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.master;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.ChoreService;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.conf.ConfigurationObserver;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Config manager for RegionsRecovery Chore - Dynamically reload config and update chore
+ * accordingly
+ */
+@InterfaceAudience.Private
+public class RegionsRecoveryConfigManager implements ConfigurationObserver {
+
+ private static final Logger LOG = LoggerFactory.getLogger(RegionsRecoveryConfigManager.class);
+
+ private final HMaster hMaster;
+ private int prevMaxStoreFileRefCount;
+ private int prevRegionsRecoveryInterval;
+
+ RegionsRecoveryConfigManager(final HMaster hMaster) {
+ this.hMaster = hMaster;
+ Configuration conf = hMaster.getConfiguration();
+ this.prevMaxStoreFileRefCount = getMaxStoreFileRefCount(conf);
+ this.prevRegionsRecoveryInterval = getRegionsRecoveryChoreInterval(conf);
+ }
+
+ @Override
+ public void onConfigurationChange(Configuration conf) {
+ final int newMaxStoreFileRefCount = getMaxStoreFileRefCount(conf);
+ final int newRegionsRecoveryInterval = getRegionsRecoveryChoreInterval(conf);
+
+ if (prevMaxStoreFileRefCount == newMaxStoreFileRefCount
+ && prevRegionsRecoveryInterval == newRegionsRecoveryInterval) {
+ // no need to re-schedule the chore with updated config
+ // as there is no change in desired configs
+ return;
+ }
+
+ LOG.info("Config Reload for RegionsRecovery Chore. prevMaxStoreFileRefCount: {}," +
+ " newMaxStoreFileRefCount: {}, prevRegionsRecoveryInterval: {}, " +
+ "newRegionsRecoveryInterval: {}", prevMaxStoreFileRefCount, newMaxStoreFileRefCount,
+ prevRegionsRecoveryInterval, newRegionsRecoveryInterval);
+
+ RegionsRecoveryChore regionsRecoveryChore = new RegionsRecoveryChore(this.hMaster,
+ conf, this.hMaster);
+ ChoreService choreService = this.hMaster.getChoreService();
+
+ // Regions Reopen based on very high storeFileRefCount is considered enabled
+ // only if hbase.regions.recovery.store.file.ref.count has value > 0
+
+ synchronized (this) {
+ if (newMaxStoreFileRefCount > 0) {
+ // reschedule the chore
+ // provide mayInterruptIfRunning - false to take care of completion
+ // of in progress task if any
+ choreService.cancelChore(regionsRecoveryChore, false);
+ choreService.scheduleChore(regionsRecoveryChore);
+ } else {
+ choreService.cancelChore(regionsRecoveryChore, false);
+ }
+ this.prevMaxStoreFileRefCount = newMaxStoreFileRefCount;
+ this.prevRegionsRecoveryInterval = newRegionsRecoveryInterval;
+ }
+ }
+
+ private int getMaxStoreFileRefCount(Configuration configuration) {
+ return configuration.getInt(
+ HConstants.STORE_FILE_REF_COUNT_THRESHOLD,
+ HConstants.DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD);
+ }
+
+ private int getRegionsRecoveryChoreInterval(Configuration configuration) {
+ return configuration.getInt(
+ HConstants.REGIONS_RECOVERY_INTERVAL,
+ HConstants.DEFAULT_REGIONS_RECOVERY_INTERVAL);
+ }
+
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryConfigManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryConfigManager.java
new file mode 100644
index 0000000..22554d3
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryConfigManager.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.master;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.StartMiniClusterOption;
+import org.apache.hadoop.hbase.Stoppable;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.zookeeper.KeeperException;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+/**
+ * Test for Regions Recovery Config Manager
+ */
+@Category({MasterTests.class, MediumTests.class})
+public class TestRegionsRecoveryConfigManager {
+
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestRegionsRecoveryConfigManager.class);
+
+ private static final HBaseTestingUtility HBASE_TESTING_UTILITY = new HBaseTestingUtility();
+
+ private MiniHBaseCluster cluster;
+
+ private HMaster hMaster;
+
+ private RegionsRecoveryChore regionsRecoveryChore;
+
+ private RegionsRecoveryConfigManager regionsRecoveryConfigManager;
+
+ private Configuration conf;
+
+ @Before
+ public void setup() throws Exception {
+ conf = HBASE_TESTING_UTILITY.getConfiguration();
+ conf.unset("hbase.regions.recovery.store.file.ref.count");
+ conf.unset("hbase.master.regions.recovery.check.interval");
+ StartMiniClusterOption option = StartMiniClusterOption.builder()
+ .masterClass(TestHMaster.class)
+ .numRegionServers(1)
+ .numDataNodes(1).build();
+ HBASE_TESTING_UTILITY.startMiniCluster(option);
+ cluster = HBASE_TESTING_UTILITY.getMiniHBaseCluster();
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ HBASE_TESTING_UTILITY.shutdownMiniCluster();
+ }
+
+ @Test
+ public void testChoreSchedule() throws Exception {
+
+ this.hMaster = cluster.getMaster();
+
+ Stoppable stoppable = new StoppableImplementation();
+ this.regionsRecoveryChore = new RegionsRecoveryChore(stoppable, conf, hMaster);
+
+ this.regionsRecoveryConfigManager = new RegionsRecoveryConfigManager(this.hMaster);
+ // not yet scheduled
+ Assert.assertFalse(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
+
+ this.regionsRecoveryConfigManager.onConfigurationChange(conf);
+ // not yet scheduled
+ Assert.assertFalse(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
+
+ conf.setInt("hbase.master.regions.recovery.check.interval", 10);
+ this.regionsRecoveryConfigManager.onConfigurationChange(conf);
+ // not yet scheduled - missing config: hbase.regions.recovery.store.file.ref.count
+ Assert.assertFalse(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
+
+ conf.setInt("hbase.regions.recovery.store.file.ref.count", 10);
+ this.regionsRecoveryConfigManager.onConfigurationChange(conf);
+ // chore scheduled
+ Assert.assertTrue(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
+
+ conf.setInt("hbase.regions.recovery.store.file.ref.count", 20);
+ this.regionsRecoveryConfigManager.onConfigurationChange(conf);
+ // chore re-scheduled
+ Assert.assertTrue(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
+
+ conf.setInt("hbase.regions.recovery.store.file.ref.count", 20);
+ this.regionsRecoveryConfigManager.onConfigurationChange(conf);
+ // chore scheduling untouched
+ Assert.assertTrue(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
+
+ conf.unset("hbase.regions.recovery.store.file.ref.count");
+ this.regionsRecoveryConfigManager.onConfigurationChange(conf);
+ // chore un-scheduled
+ Assert.assertFalse(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
+ }
+
+ // Make it public so that JVMClusterUtil can access it.
+ public static class TestHMaster extends HMaster {
+ public TestHMaster(Configuration conf) throws IOException, KeeperException {
+ super(conf);
+ }
+ }
+
+ /**
+ * Simple helper class that just keeps track of whether or not its stopped.
+ */
+ private static class StoppableImplementation implements Stoppable {
+
+ private boolean stop = false;
+
+ @Override
+ public void stop(String why) {
+ this.stop = true;
+ }
+
+ @Override
+ public boolean isStopped() {
+ return this.stop;
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/src/main/asciidoc/_chapters/configuration.adoc b/src/main/asciidoc/_chapters/configuration.adoc
index b40d244..f7d6b79 100644
--- a/src/main/asciidoc/_chapters/configuration.adoc
+++ b/src/main/asciidoc/_chapters/configuration.adoc
@@ -1150,6 +1150,8 @@ Here are those configurations:
| hbase.master.balancer.stochastic.moveCost
| hbase.master.balancer.stochastic.maxMovePercent
| hbase.master.balancer.stochastic.tableSkewCost
+| hbase.master.regions.recovery.check.interval
+| hbase.regions.recovery.store.file.ref.count
|===
ifdef::backend-docbook[]