You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by sy...@apache.org on 2017/06/20 19:36:35 UTC
hbase git commit: HBASE-18036 Data locality is not maintained after
cluster restart or SSH (Stephen Yuan Jiang)
Repository: hbase
Updated Branches:
refs/heads/branch-1.2 4160f7273 -> 3f9ba2f24
HBASE-18036 Data locality is not maintained after cluster restart or SSH (Stephen Yuan Jiang)
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/3f9ba2f2
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/3f9ba2f2
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/3f9ba2f2
Branch: refs/heads/branch-1.2
Commit: 3f9ba2f247ef0fb7cebf35a4501bd7cfa36197bc
Parents: 4160f72
Author: Stephen Yuan Jiang <sy...@gmail.com>
Authored: Tue Jun 20 12:34:23 2017 -0700
Committer: Stephen Yuan Jiang <sy...@gmail.com>
Committed: Tue Jun 20 12:34:56 2017 -0700
----------------------------------------------------------------------
.../hadoop/hbase/master/ServerManager.java | 8 ++++++
.../master/procedure/ServerCrashProcedure.java | 30 +++++++++++++++++---
2 files changed, 34 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/3f9ba2f2/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
index f6f2d03..8313604 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
@@ -1115,6 +1115,14 @@ public class ServerManager {
}
/**
+ * Check whether a server is online based on hostname and port
+ * @return true if finding a server with matching hostname and port.
+ */
+ public boolean isServerWithSameHostnamePortOnline(final ServerName serverName) {
+ return findServerWithSameHostnamePortWithLock(serverName) != null;
+ }
+
+ /**
* Check if a server is known to be dead. A server can be online,
* or known to be dead, or unknown to this manager (i.e, not online,
* not known to be dead either. it is simply not tracked by the
http://git-wip-us.apache.org/repos/asf/hbase/blob/3f9ba2f2/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
index bfe3cc6..2788354 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java
@@ -23,8 +23,10 @@ import java.io.InterruptedIOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
+import java.util.Map;
import java.util.Set;
import java.util.concurrent.locks.Lock;
@@ -546,14 +548,34 @@ implements ServerProcedureInterface {
private boolean assign(final MasterProcedureEnv env, final List<HRegionInfo> hris)
throws InterruptedIOException {
- AssignmentManager am = env.getMasterServices().getAssignmentManager();
+ MasterServices masterServices = env.getMasterServices();
+ AssignmentManager am = masterServices.getAssignmentManager();
+ // Determine what type of assignment to do if the dead server already restarted.
+ boolean retainAssignment =
+ (masterServices.getConfiguration().getBoolean("hbase.master.retain.assignment", true) &&
+ masterServices.getServerManager().isServerWithSameHostnamePortOnline(serverName)) ?
+ true : false;
try {
- am.assign(hris);
+ if (retainAssignment) {
+ Map<HRegionInfo, ServerName> hriServerMap =
+ new HashMap<HRegionInfo, ServerName>(hris.size());
+ for (HRegionInfo hri: hris) {
+ hriServerMap.put(hri, serverName);
+ }
+ LOG.info("Best effort in SSH to retain assignment of " + hris.size()
+ + " regions from the dead server " + serverName);
+ am.assign(hriServerMap);
+ } else {
+ LOG.info("Using round robin in SSH to assign " + hris.size()
+ + " regions from the dead server " + serverName);
+ am.assign(hris);
+ }
} catch (InterruptedException ie) {
- LOG.error("Caught " + ie + " during round-robin assignment");
+ LOG.error("Caught " + ie + " during " + (retainAssignment ? "retaining" : "round-robin")
+ + " assignment");
throw (InterruptedIOException)new InterruptedIOException().initCause(ie);
} catch (IOException ioe) {
- LOG.info("Caught " + ioe + " during region assignment, will retry");
+ LOG.warn("Caught " + ioe + " during region assignment, will retry");
return false;
}
return true;