You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2018/05/24 23:26:25 UTC
[6/6] hbase git commit: HBASE-20548 Master fails to startup on large
clusters, refreshing block distribution
HBASE-20548 Master fails to startup on large clusters, refreshing block distribution
Signed-off-by: Andrew Purtell <ap...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/1fbce10f
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/1fbce10f
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/1fbce10f
Branch: refs/heads/master
Commit: 1fbce10ff45571633755eb27605fae880dc23a6f
Parents: 554d513
Author: Thiruvel Thirumoolan <th...@oath.com>
Authored: Thu May 24 01:01:54 2018 -0700
Committer: Andrew Purtell <ap...@apache.org>
Committed: Thu May 24 15:47:24 2018 -0700
----------------------------------------------------------------------
.../hbase/rsgroup/RSGroupBasedLoadBalancer.java | 5 ++++
.../org/apache/hadoop/hbase/master/HMaster.java | 11 +++++++++
.../hadoop/hbase/master/LoadBalancer.java | 5 ++++
.../hbase/master/balancer/BaseLoadBalancer.java | 26 +++++++++++++-------
4 files changed, 38 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/1fbce10f/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupBasedLoadBalancer.java
----------------------------------------------------------------------
diff --git a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupBasedLoadBalancer.java b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupBasedLoadBalancer.java
index 69131f9..059f07e 100644
--- a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupBasedLoadBalancer.java
+++ b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupBasedLoadBalancer.java
@@ -466,4 +466,9 @@ public class RSGroupBasedLoadBalancer implements RSGroupableBalancer {
public void setRsGroupInfoManager(RSGroupInfoManager rsGroupInfoManager) {
this.rsGroupInfoManager = rsGroupInfoManager;
}
+
+ @Override
+ public void postMasterStartupInitialize() {
+ this.internalBalancer.postMasterStartupInitialize();
+ }
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/1fbce10f/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 6c41b8e..68b3ce8 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -1003,6 +1003,17 @@ public class HMaster extends HRegionServer implements MasterServices {
}
zombieDetector.interrupt();
+
+ /*
+ * After master has started up, lets do balancer post startup initialization. Since this runs
+ * in activeMasterManager thread, it should be fine.
+ */
+ long start = System.currentTimeMillis();
+ this.balancer.postMasterStartupInitialize();
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Balancer post startup initialization complete, took " + (
+ (System.currentTimeMillis() - start) / 1000) + " seconds");
+ }
}
/**
http://git-wip-us.apache.org/repos/asf/hbase/blob/1fbce10f/hbase-server/src/main/java/org/apache/hadoop/hbase/master/LoadBalancer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/LoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/LoadBalancer.java
index 917da08..aa88f49 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/LoadBalancer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/LoadBalancer.java
@@ -160,6 +160,11 @@ public interface LoadBalancer extends Configurable, Stoppable, ConfigurationObse
void onConfigurationChange(Configuration conf);
/**
+ * If balancer needs to do initialization after Master has started up, lets do that here.
+ */
+ void postMasterStartupInitialize();
+
+ /**
* @return true if Master carries regions
*/
static boolean isTablesOnMaster(Configuration conf) {
http://git-wip-us.apache.org/repos/asf/hbase/blob/1fbce10f/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
index 5f597dd..f32930f 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
@@ -1151,6 +1151,19 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
}
}
+ @Override
+ public void postMasterStartupInitialize() {
+ if (services != null && regionFinder != null) {
+ try {
+ Set<RegionInfo> regions =
+ services.getAssignmentManager().getRegionStates().getRegionAssignments().keySet();
+ regionFinder.refreshAndWait(regions);
+ } catch (Exception e) {
+ LOG.warn("Refreshing region HDFS Block dist failed with exception, ignoring", e);
+ }
+ }
+ }
+
public void setRackManager(RackManager rackManager) {
this.rackManager = rackManager;
}
@@ -1249,7 +1262,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
return assignments;
}
- Cluster cluster = createCluster(servers, regions, false);
+ Cluster cluster = createCluster(servers, regions);
List<RegionInfo> unassignedRegions = new ArrayList<>();
roundRobinAssignment(cluster, regions, unassignedRegions,
@@ -1288,11 +1301,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
return assignments;
}
- protected Cluster createCluster(List<ServerName> servers,
- Collection<RegionInfo> regions, boolean forceRefresh) {
- if (forceRefresh && useRegionFinder) {
- regionFinder.refreshAndWait(regions);
- }
+ protected Cluster createCluster(List<ServerName> servers, Collection<RegionInfo> regions) {
// Get the snapshot of the current assignments for the regions in question, and then create
// a cluster out of it. Note that we might have replicas already assigned to some servers
// earlier. So we want to get the snapshot to see those assignments, but this will only contain
@@ -1346,7 +1355,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
final List<ServerName> finalServers = idleServers.isEmpty() ?
servers : idleServers;
List<RegionInfo> regions = Lists.newArrayList(regionInfo);
- Cluster cluster = createCluster(finalServers, regions, false);
+ Cluster cluster = createCluster(finalServers, regions);
return randomAssignment(cluster, regionInfo, finalServers);
}
@@ -1419,8 +1428,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
int numRandomAssignments = 0;
int numRetainedAssigments = 0;
- Cluster cluster = createCluster(servers, regions.keySet(), true);
-
for (Map.Entry<RegionInfo, ServerName> entry : regions.entrySet()) {
RegionInfo region = entry.getKey();
ServerName oldServerName = entry.getValue();
@@ -1463,6 +1470,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
// If servers from prior assignment aren't present, then lets do randomAssignment on regions.
if (randomAssignRegions.size() > 0) {
+ Cluster cluster = createCluster(servers, regions.keySet());
for (Map.Entry<ServerName, List<RegionInfo>> entry : assignments.entrySet()) {
ServerName sn = entry.getKey();
for (RegionInfo region : entry.getValue()) {