You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ec...@apache.org on 2016/07/29 17:00:44 UTC
hbase git commit: HBASE-16209 provide an ExponentialBackOffPolicy
sleep between failed region open requests
Repository: hbase
Updated Branches:
refs/heads/master 2b1cbeb28 -> fe4438361
HBASE-16209 provide an ExponentialBackOffPolicy sleep between failed region open requests
Signed-off-by: Elliott Clark <ec...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/fe443836
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/fe443836
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/fe443836
Branch: refs/heads/master
Commit: fe44383619d0d11396382f2bfa493ad93c8b0e5f
Parents: 2b1cbeb
Author: Joseph Hwang <jz...@fb.com>
Authored: Tue Jul 26 11:13:32 2016 -0700
Committer: Elliott Clark <ec...@apache.org>
Committed: Fri Jul 29 09:47:00 2016 -0700
----------------------------------------------------------------------
.../master/AssignmentManagerStatusTmpl.jamon | 20 ++++++-
.../hadoop/hbase/master/AssignmentManager.java | 56 ++++++++++++++++++--
.../hadoop/hbase/master/RegionStates.java | 8 ++-
.../hbase/master/TestMasterStatusServlet.java | 8 ++-
4 files changed, 84 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/fe443836/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon
index e2ae09d..06b320f 100644
--- a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon
+++ b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon
@@ -25,6 +25,8 @@ org.apache.hadoop.hbase.HBaseConfiguration;
org.apache.hadoop.hbase.HConstants;
java.util.HashSet;
java.util.SortedSet;
+java.util.Map;
+java.util.concurrent.atomic.AtomicInteger;
</%import>
<%args>
AssignmentManager assignmentManager;
@@ -32,7 +34,9 @@ int limit = 100;
</%args>
<%java SortedSet<RegionState> rit = assignmentManager
- .getRegionStates().getRegionsInTransitionOrderedByTimestamp(); %>
+ .getRegionStates().getRegionsInTransitionOrderedByTimestamp();
+ Map<String, AtomicInteger> failedRegionTracker = assignmentManager.getFailedOpenTracker();
+ %>
<%if !rit.isEmpty() %>
<%java>
@@ -83,7 +87,7 @@ int numOfPages = (int) Math.ceil(numOfRITs * 1.0 / ritsPerPage);
<div class="tab-pane" id="tab_rits<% (recordItr / ritsPerPage) + 1 %>">
</%if>
<table class="table table-striped" style="margin-bottom:0px;"><tr><th>Region</th>
- <th>State</th><th>RIT time (ms)</th></tr>
+ <th>State</th><th>RIT time (ms)</th> <th>Retries </th></tr>
</%if>
<%if ritsOverThreshold.contains(rs.getRegion().getEncodedName()) %>
@@ -93,9 +97,21 @@ int numOfPages = (int) Math.ceil(numOfRITs * 1.0 / ritsPerPage);
<%else>
<tr>
</%if>
+ <%java>
+ String retryStatus = "0";
+ String name = rs.getRegion().getEncodedName();
+ RegionState state = assignmentManager.getState(name);
+ AtomicInteger numOpenRetries = failedRegionTracker.get(name);
+ if (numOpenRetries != null ) {
+ retryStatus = Integer.toString(numOpenRetries.get());
+ } else if (state.getState() == RegionState.State.FAILED_OPEN) {
+ retryStatus = "Failed";
+ }
+ </%java>
<td><% rs.getRegion().getEncodedName() %></td><td>
<% HRegionInfo.getDescriptiveNameFromRegionStateForDisplay(rs, conf) %></td>
<td><% (currentTime - rs.getStamp()) %> </td>
+ <td> <% retryStatus %> </td>
</tr>
<%java recordItr++; %>
<%if (recordItr % ritsPerPage) == 0 %>
http://git-wip-us.apache.org/repos/asf/hbase/blob/fe443836/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
index 2ffe466..ffdbac8 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
@@ -36,6 +36,7 @@ import java.util.TreeMap;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArrayList;
+import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
@@ -87,6 +88,7 @@ import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.KeyLocker;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.PairOfSameType;
+import org.apache.hadoop.hbase.util.RetryCounter;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
@@ -149,8 +151,8 @@ public class AssignmentManager {
private final ExecutorService executorService;
- // Thread pool executor service. TODO, consolidate with executorService?
private java.util.concurrent.ExecutorService threadPoolExecutorService;
+ private ScheduledThreadPoolExecutor scheduledThreadPoolExecutor;
private final RegionStates regionStates;
@@ -202,6 +204,8 @@ public class AssignmentManager {
private RegionStateListener regionStateListener;
+ private RetryCounter.BackoffPolicy backoffPolicy;
+ private RetryCounter.RetryConfig retryConfig;
/**
* Constructs a new assignment manager.
*
@@ -240,8 +244,13 @@ public class AssignmentManager {
"hbase.meta.assignment.retry.sleeptime", 1000l);
this.balancer = balancer;
int maxThreads = conf.getInt("hbase.assignment.threads.max", 30);
+
this.threadPoolExecutorService = Threads.getBoundedCachedThreadPool(
- maxThreads, 60L, TimeUnit.SECONDS, Threads.newDaemonThreadFactory("AM."));
+ maxThreads, 60L, TimeUnit.SECONDS, Threads.newDaemonThreadFactory("AM."));
+
+ this.scheduledThreadPoolExecutor = new ScheduledThreadPoolExecutor(1,
+ Threads.newDaemonThreadFactory("AM.Scheduler"));
+
this.regionStates = new RegionStates(
server, tableStateManager, serverManager, regionStateStore);
@@ -254,6 +263,23 @@ public class AssignmentManager {
this.metricsAssignmentManager = new MetricsAssignmentManager();
this.tableLockManager = tableLockManager;
+
+ // Configurations for retrying opening a region on receiving a FAILED_OPEN
+ this.retryConfig = new RetryCounter.RetryConfig();
+ this.retryConfig.setSleepInterval(conf.getLong("hbase.assignment.retry.sleep.initial", 0l));
+ // Set the max time limit to the initial sleep interval so we use a constant time sleep strategy
+ // if the user does not set a max sleep limit
+ this.retryConfig.setMaxSleepTime(conf.getLong("hbase.assignment.retry.sleep.max",
+ retryConfig.getSleepInterval()));
+ this.backoffPolicy = getBackoffPolicy();
+ }
+
+ /**
+ * Returns the backoff policy used for Failed Region Open retries
+ * @return the backoff policy used for Failed Region Open retries
+ */
+ RetryCounter.BackoffPolicy getBackoffPolicy() {
+ return new RetryCounter.ExponentialBackoffPolicyWithLimit();
}
MetricsAssignmentManager getAssignmentManagerMetrics() {
@@ -2028,6 +2054,11 @@ public class AssignmentManager {
threadPoolExecutorService.submit(new AssignCallable(this, regionInfo));
}
+ void invokeAssignLater(HRegionInfo regionInfo, long sleepMillis) {
+ scheduledThreadPoolExecutor.schedule(new DelayedAssignCallable(
+ new AssignCallable(this, regionInfo)), sleepMillis, TimeUnit.MILLISECONDS);
+ }
+
void invokeUnAssign(HRegionInfo regionInfo) {
threadPoolExecutorService.submit(new UnAssignCallable(this, regionInfo));
}
@@ -2233,7 +2264,10 @@ public class AssignmentManager {
} catch (HBaseIOException e) {
LOG.warn("Failed to get region plan", e);
}
- invokeAssign(hri);
+ // Have the current thread sleep a bit before resubmitting the RPC request
+ long sleepTime = backoffPolicy.getBackoffTime(retryConfig,
+ failedOpenTracker.get(encodedName).get());
+ invokeAssignLater(hri, sleepTime);
}
}
// Null means no error
@@ -2732,6 +2766,10 @@ public class AssignmentManager {
return replicasToClose;
}
+ public Map<String, AtomicInteger> getFailedOpenTracker() {return failedOpenTracker;}
+
+ public RegionState getState(String encodedName) {return regionStates.getRegionState(encodedName);}
+
/**
* A region is offline. The new state should be the specified one,
* if not null. If the specified state is null, the new state is Offline.
@@ -2934,4 +2972,16 @@ public class AssignmentManager {
void setRegionStateListener(RegionStateListener listener) {
this.regionStateListener = listener;
}
+
+ private class DelayedAssignCallable implements Runnable {
+ Callable callable;
+ public DelayedAssignCallable(Callable callable) {
+ this.callable = callable;
+ }
+
+ @Override
+ public void run() {
+ threadPoolExecutorService.submit(callable);
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/fe443836/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
index b95b894..ba08a05 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
@@ -67,10 +67,16 @@ public class RegionStates {
public final static RegionStateStampComparator REGION_STATE_COMPARATOR =
new RegionStateStampComparator();
+
+ // This comparator sorts the RegionStates by time stamp then Region name.
+ // Comparing by timestamp alone can lead us to discard different RegionStates that happen
+ // to share a timestamp.
private static class RegionStateStampComparator implements Comparator<RegionState> {
@Override
public int compare(RegionState l, RegionState r) {
- return Long.compare(l.getStamp(), r.getStamp());
+ return Long.compare(l.getStamp(), r.getStamp()) == 0 ?
+ Bytes.compareTo(l.getRegion().getRegionName(), r.getRegion().getRegionName()) :
+ Long.compare(l.getStamp(), r.getStamp());
}
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/fe443836/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterStatusServlet.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterStatusServlet.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterStatusServlet.java
index 02f01c4..f975d9d 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterStatusServlet.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterStatusServlet.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.hbase.master;
import static org.junit.Assert.*;
+import static org.mockito.Matchers.any;
import java.io.IOException;
import java.io.StringWriter;
@@ -26,8 +27,6 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
@@ -178,6 +177,11 @@ public class TestMasterStatusServlet {
Mockito.doReturn(rs).when(am).getRegionStates();
Mockito.doReturn(regionsInTransition).when(rs).getRegionsInTransition();
Mockito.doReturn(regionsInTransition).when(rs).getRegionsInTransitionOrderedByTimestamp();
+ Mockito.when(
+ am.getState(
+ any(String.class)
+ )
+ ).thenReturn(new RegionState(null, null));
// Render to a string
StringWriter sw = new StringWriter();