You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2019/11/07 01:17:12 UTC

[hbase] branch branch-2 updated: HBASE-23212 Dynamically reload configs for Region Recovery chore (#802)

This is an automated email from the ASF dual-hosted git repository.

apurtell pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2 by this push:
     new 29c27e3  HBASE-23212 Dynamically reload configs for Region Recovery chore (#802)
29c27e3 is described below

commit 29c27e3d0478d75ccbe8dcbe71dd71555dadba9d
Author: Viraj Jasani <vi...@gmail.com>
AuthorDate: Tue Nov 5 11:13:44 2019 +0530

    HBASE-23212 Dynamically reload configs for Region Recovery chore (#802)
    
    Signed-off-by: Andrew Purtell <ap...@apache.org>
---
 .../java/org/apache/hadoop/hbase/HConstants.java   |   5 +
 .../org/apache/hadoop/hbase/master/HMaster.java    |   5 +
 .../hadoop/hbase/master/RegionsRecoveryChore.java  |  25 +++-
 .../hbase/master/RegionsRecoveryConfigManager.java | 100 ++++++++++++++
 .../master/TestRegionsRecoveryConfigManager.java   | 147 +++++++++++++++++++++
 src/main/asciidoc/_chapters/configuration.adoc     |   2 +
 6 files changed, 277 insertions(+), 7 deletions(-)

diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
index 1f7adb3..53b3de7 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
@@ -1508,6 +1508,11 @@ public final class HConstants {
   // default -1 indicates there is no threshold on high storeRefCount
   public static final int DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD = -1;
 
+  public static final String REGIONS_RECOVERY_INTERVAL =
+    "hbase.master.regions.recovery.check.interval";
+
+  public static final int DEFAULT_REGIONS_RECOVERY_INTERVAL = 1200 * 1000; // Default 20 min
+
   /**
    * Configurations for master executor services.
    */
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 1939703..2a31a38 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -416,6 +416,8 @@ public class HMaster extends HRegionServer implements MasterServices {
   private MasterProcedureManagerHost mpmHost;
 
   private RegionsRecoveryChore regionsRecoveryChore = null;
+
+  private RegionsRecoveryConfigManager regionsRecoveryConfigManager = null;
   // it is assigned after 'initialized' guard set to true, so should be volatile
   private volatile MasterQuotaManager quotaManager;
   private SpaceQuotaSnapshotNotifier spaceQuotaSnapshotNotifier;
@@ -1131,6 +1133,7 @@ public class HMaster extends HRegionServer implements MasterServices {
     configurationManager.registerObserver(this.cleanerPool);
     configurationManager.registerObserver(this.hfileCleaner);
     configurationManager.registerObserver(this.logCleaner);
+    configurationManager.registerObserver(this.regionsRecoveryConfigManager);
     // Set master as 'initialized'.
     setInitialized(true);
 
@@ -1457,6 +1460,8 @@ public class HMaster extends HRegionServer implements MasterServices {
         HConstants.STORE_FILE_REF_COUNT_THRESHOLD);
     }
 
+    this.regionsRecoveryConfigManager = new RegionsRecoveryConfigManager(this);
+
     replicationBarrierCleaner = new ReplicationBarrierCleaner(conf, this, getConnection(),
       replicationPeerManager);
     getChoreService().scheduleChore(replicationBarrierCleaner);
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryChore.java
index 7502eeb..c5ad867 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryChore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryChore.java
@@ -52,11 +52,6 @@ public class RegionsRecoveryChore extends ScheduledChore {
 
   private static final String REGIONS_RECOVERY_CHORE_NAME = "RegionsRecoveryChore";
 
-  private static final String REGIONS_RECOVERY_INTERVAL =
-    "hbase.master.regions.recovery.check.interval";
-
-  private static final int DEFAULT_REGIONS_RECOVERY_INTERVAL = 1200 * 1000; // Default 20 min ?
-
   private static final String ERROR_REOPEN_REIONS_MSG =
     "Error reopening regions with high storeRefCount. ";
 
@@ -76,8 +71,8 @@ public class RegionsRecoveryChore extends ScheduledChore {
   RegionsRecoveryChore(final Stoppable stopper, final Configuration configuration,
       final HMaster hMaster) {
 
-    super(REGIONS_RECOVERY_CHORE_NAME, stopper, configuration.getInt(REGIONS_RECOVERY_INTERVAL,
-      DEFAULT_REGIONS_RECOVERY_INTERVAL));
+    super(REGIONS_RECOVERY_CHORE_NAME, stopper, configuration.getInt(
+      HConstants.REGIONS_RECOVERY_INTERVAL, HConstants.DEFAULT_REGIONS_RECOVERY_INTERVAL));
     this.hMaster = hMaster;
     this.storeFileRefCountThreshold = configuration.getInt(
       HConstants.STORE_FILE_REF_COUNT_THRESHOLD,
@@ -171,4 +166,20 @@ public class RegionsRecoveryChore extends ScheduledChore {
 
   }
 
+  // hashcode/equals implementation to ensure at-most one object of RegionsRecoveryChore
+  // is scheduled at a time - RegionsRecoveryConfigManager
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    return o != null && getClass() == o.getClass();
+  }
+
+  @Override
+  public int hashCode() {
+    return 31;
+  }
+
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryConfigManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryConfigManager.java
new file mode 100644
index 0000000..b1bfdc0
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryConfigManager.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.master;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.ChoreService;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.conf.ConfigurationObserver;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Config manager for RegionsRecovery Chore - Dynamically reload config and update chore
+ * accordingly
+ */
+@InterfaceAudience.Private
+public class RegionsRecoveryConfigManager implements ConfigurationObserver {
+
+  private static final Logger LOG = LoggerFactory.getLogger(RegionsRecoveryConfigManager.class);
+
+  private final HMaster hMaster;
+  private int prevMaxStoreFileRefCount;
+  private int prevRegionsRecoveryInterval;
+
+  RegionsRecoveryConfigManager(final HMaster hMaster) {
+    this.hMaster = hMaster;
+    Configuration conf = hMaster.getConfiguration();
+    this.prevMaxStoreFileRefCount = getMaxStoreFileRefCount(conf);
+    this.prevRegionsRecoveryInterval = getRegionsRecoveryChoreInterval(conf);
+  }
+
+  @Override
+  public void onConfigurationChange(Configuration conf) {
+    final int newMaxStoreFileRefCount = getMaxStoreFileRefCount(conf);
+    final int newRegionsRecoveryInterval = getRegionsRecoveryChoreInterval(conf);
+
+    if (prevMaxStoreFileRefCount == newMaxStoreFileRefCount
+        && prevRegionsRecoveryInterval == newRegionsRecoveryInterval) {
+      // no need to re-schedule the chore with updated config
+      // as there is no change in desired configs
+      return;
+    }
+
+    LOG.info("Config Reload for RegionsRecovery Chore. prevMaxStoreFileRefCount: {}," +
+        " newMaxStoreFileRefCount: {}, prevRegionsRecoveryInterval: {}, " +
+        "newRegionsRecoveryInterval: {}", prevMaxStoreFileRefCount, newMaxStoreFileRefCount,
+      prevRegionsRecoveryInterval, newRegionsRecoveryInterval);
+
+    RegionsRecoveryChore regionsRecoveryChore = new RegionsRecoveryChore(this.hMaster,
+      conf, this.hMaster);
+    ChoreService choreService = this.hMaster.getChoreService();
+
+    // Regions Reopen based on very high storeFileRefCount is considered enabled
+    // only if hbase.regions.recovery.store.file.ref.count has value > 0
+
+    synchronized (this) {
+      if (newMaxStoreFileRefCount > 0) {
+        // reschedule the chore
+        // provide mayInterruptIfRunning - false to take care of completion
+        // of in progress task if any
+        choreService.cancelChore(regionsRecoveryChore, false);
+        choreService.scheduleChore(regionsRecoveryChore);
+      } else {
+        choreService.cancelChore(regionsRecoveryChore, false);
+      }
+      this.prevMaxStoreFileRefCount = newMaxStoreFileRefCount;
+      this.prevRegionsRecoveryInterval = newRegionsRecoveryInterval;
+    }
+  }
+
+  private int getMaxStoreFileRefCount(Configuration configuration) {
+    return configuration.getInt(
+      HConstants.STORE_FILE_REF_COUNT_THRESHOLD,
+      HConstants.DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD);
+  }
+
+  private int getRegionsRecoveryChoreInterval(Configuration configuration) {
+    return configuration.getInt(
+      HConstants.REGIONS_RECOVERY_INTERVAL,
+      HConstants.DEFAULT_REGIONS_RECOVERY_INTERVAL);
+  }
+
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryConfigManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryConfigManager.java
new file mode 100644
index 0000000..22554d3
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryConfigManager.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.master;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.StartMiniClusterOption;
+import org.apache.hadoop.hbase.Stoppable;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.zookeeper.KeeperException;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+/**
+ * Test for Regions Recovery Config Manager
+ */
+@Category({MasterTests.class, MediumTests.class})
+public class TestRegionsRecoveryConfigManager {
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+    HBaseClassTestRule.forClass(TestRegionsRecoveryConfigManager.class);
+
+  private static final HBaseTestingUtility HBASE_TESTING_UTILITY = new HBaseTestingUtility();
+
+  private MiniHBaseCluster cluster;
+
+  private HMaster hMaster;
+
+  private RegionsRecoveryChore regionsRecoveryChore;
+
+  private RegionsRecoveryConfigManager regionsRecoveryConfigManager;
+
+  private Configuration conf;
+
+  @Before
+  public void setup() throws Exception {
+    conf = HBASE_TESTING_UTILITY.getConfiguration();
+    conf.unset("hbase.regions.recovery.store.file.ref.count");
+    conf.unset("hbase.master.regions.recovery.check.interval");
+    StartMiniClusterOption option = StartMiniClusterOption.builder()
+      .masterClass(TestHMaster.class)
+      .numRegionServers(1)
+      .numDataNodes(1).build();
+    HBASE_TESTING_UTILITY.startMiniCluster(option);
+    cluster = HBASE_TESTING_UTILITY.getMiniHBaseCluster();
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    HBASE_TESTING_UTILITY.shutdownMiniCluster();
+  }
+
+  @Test
+  public void testChoreSchedule() throws Exception {
+
+    this.hMaster = cluster.getMaster();
+
+    Stoppable stoppable = new StoppableImplementation();
+    this.regionsRecoveryChore = new RegionsRecoveryChore(stoppable, conf, hMaster);
+
+    this.regionsRecoveryConfigManager = new RegionsRecoveryConfigManager(this.hMaster);
+    // not yet scheduled
+    Assert.assertFalse(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
+
+    this.regionsRecoveryConfigManager.onConfigurationChange(conf);
+    // not yet scheduled
+    Assert.assertFalse(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
+
+    conf.setInt("hbase.master.regions.recovery.check.interval", 10);
+    this.regionsRecoveryConfigManager.onConfigurationChange(conf);
+    // not yet scheduled - missing config: hbase.regions.recovery.store.file.ref.count
+    Assert.assertFalse(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
+
+    conf.setInt("hbase.regions.recovery.store.file.ref.count", 10);
+    this.regionsRecoveryConfigManager.onConfigurationChange(conf);
+    // chore scheduled
+    Assert.assertTrue(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
+
+    conf.setInt("hbase.regions.recovery.store.file.ref.count", 20);
+    this.regionsRecoveryConfigManager.onConfigurationChange(conf);
+    // chore re-scheduled
+    Assert.assertTrue(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
+
+    conf.setInt("hbase.regions.recovery.store.file.ref.count", 20);
+    this.regionsRecoveryConfigManager.onConfigurationChange(conf);
+    // chore scheduling untouched
+    Assert.assertTrue(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
+
+    conf.unset("hbase.regions.recovery.store.file.ref.count");
+    this.regionsRecoveryConfigManager.onConfigurationChange(conf);
+    // chore un-scheduled
+    Assert.assertFalse(hMaster.getChoreService().isChoreScheduled(regionsRecoveryChore));
+  }
+
+  // Make it public so that JVMClusterUtil can access it.
+  public static class TestHMaster extends HMaster {
+    public TestHMaster(Configuration conf) throws IOException, KeeperException {
+      super(conf);
+    }
+  }
+
+  /**
+   * Simple helper class that just keeps track of whether or not its stopped.
+   */
+  private static class StoppableImplementation implements Stoppable {
+
+    private boolean stop = false;
+
+    @Override
+    public void stop(String why) {
+      this.stop = true;
+    }
+
+    @Override
+    public boolean isStopped() {
+      return this.stop;
+    }
+
+  }
+
+}
\ No newline at end of file
diff --git a/src/main/asciidoc/_chapters/configuration.adoc b/src/main/asciidoc/_chapters/configuration.adoc
index b40d244..f7d6b79 100644
--- a/src/main/asciidoc/_chapters/configuration.adoc
+++ b/src/main/asciidoc/_chapters/configuration.adoc
@@ -1150,6 +1150,8 @@ Here are those configurations:
 | hbase.master.balancer.stochastic.moveCost
 | hbase.master.balancer.stochastic.maxMovePercent
 | hbase.master.balancer.stochastic.tableSkewCost
+| hbase.master.regions.recovery.check.interval
+| hbase.regions.recovery.store.file.ref.count
 |===
 
 ifdef::backend-docbook[]