You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by sy...@apache.org on 2016/02/22 23:20:42 UTC
[14/22] hbase git commit: HBASE-15251 During a cluster restart,
Hmaster thinks it is a failover by mistake (Clara Xiong)
HBASE-15251 During a cluster restart, Hmaster thinks it is a failover by mistake (Clara Xiong)
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/8eedc967
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/8eedc967
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/8eedc967
Branch: refs/heads/hbase-12439
Commit: 8eedc967515a4d9133068962fe029160d24e6f95
Parents: f352f3c
Author: tedyu <yu...@gmail.com>
Authored: Thu Feb 18 23:46:54 2016 -0800
Committer: tedyu <yu...@gmail.com>
Committed: Thu Feb 18 23:46:54 2016 -0800
----------------------------------------------------------------------
.../hadoop/hbase/master/AssignmentManager.java | 80 +++++++++++++++-----
1 file changed, 61 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/8eedc967/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
index 7639004..53a080e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
@@ -18,6 +18,8 @@
*/
package org.apache.hadoop.hbase.master;
+import com.google.common.annotations.VisibleForTesting;
+
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
@@ -44,6 +46,7 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.CoordinatedStateException;
@@ -92,8 +95,6 @@ import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.util.StringUtils;
import org.apache.zookeeper.KeeperException;
-import com.google.common.annotations.VisibleForTesting;
-
/**
* Manages and performs region assignment.
* Related communications with regionserver are all done over RPC.
@@ -443,31 +444,43 @@ public class AssignmentManager {
if (LOG.isDebugEnabled()) {
LOG.debug("Found dead servers out on cluster " + serverManager.getDeadServers());
}
- } else {
+ // Check if there are any regions on these servers
+ failover = false;
+ for (ServerName serverName : serverManager.getDeadServers().copyServerNames()) {
+ if (regionStates.getRegionAssignments().values().contains(serverName)) {
+ LOG.debug("Found regions on dead server: " + serverName);
+ failover = true;
+ break;
+ }
+ }
+ }
+ Set<ServerName> onlineServers = serverManager.getOnlineServers().keySet();
+ if (!failover) {
// If any one region except meta is assigned, it's a failover.
- Set<ServerName> onlineServers = serverManager.getOnlineServers().keySet();
for (Map.Entry<HRegionInfo, ServerName> en:
regionStates.getRegionAssignments().entrySet()) {
HRegionInfo hri = en.getKey();
if (!hri.isMetaTable()
&& onlineServers.contains(en.getValue())) {
- LOG.debug("Found " + hri + " out on cluster");
+ LOG.debug("Found region " + hri + " out on cluster");
failover = true;
break;
}
}
- if (!failover) {
- // If any region except meta is in transition on a live server, it's a failover.
- Map<String, RegionState> regionsInTransition = regionStates.getRegionsInTransition();
- if (!regionsInTransition.isEmpty()) {
- for (RegionState regionState: regionsInTransition.values()) {
- ServerName serverName = regionState.getServerName();
- if (!regionState.getRegion().isMetaRegion()
- && serverName != null && onlineServers.contains(serverName)) {
- LOG.debug("Found " + regionState + " in RITs");
- failover = true;
- break;
- }
+ }
+ if (!failover) {
+ // If any region except meta is in transition on a live server, it's a failover.
+ Map<String, RegionState> regionsInTransition = regionStates.getRegionsInTransition();
+ if (!regionsInTransition.isEmpty()) {
+ for (RegionState regionState: regionsInTransition.values()) {
+ ServerName serverName = regionState.getServerName();
+ if (!regionState.getRegion().isMetaRegion()
+ && serverName != null && onlineServers.contains(serverName)) {
+ LOG.debug("Found " + regionState + " for region " +
+ regionState.getRegion().getRegionNameAsString() + " for server " +
+ serverName + "in RITs");
+ failover = true;
+ break;
}
}
}
@@ -488,7 +501,7 @@ public class AssignmentManager {
Path logDir = new Path(rootdir,
DefaultWALProvider.getWALDirectoryName(serverName.toString()));
Path splitDir = logDir.suffix(DefaultWALProvider.SPLITTING_EXT);
- if (fs.exists(logDir) || fs.exists(splitDir)) {
+ if (checkWals(fs, logDir) || checkWals(fs, splitDir)) {
LOG.debug("Found queued dead server " + serverName);
failover = true;
break;
@@ -538,8 +551,10 @@ public class AssignmentManager {
failoverCleanupDone();
if (!failover) {
// Fresh cluster startup.
- LOG.info("Clean cluster startup. Assigning user regions");
+ LOG.info("Clean cluster startup. Don't reassign user regions");
assignAllUserRegions(allRegions);
+ } else {
+ LOG.info("Failover! Reassign user regions");
}
// unassign replicas of the split parents and the merged regions
// the daughter replicas are opened in assignAllUserRegions if it was
@@ -551,6 +566,33 @@ public class AssignmentManager {
return failover;
}
+ private boolean checkWals(FileSystem fs, Path dir) throws IOException {
+ if (!fs.exists(dir)) {
+ LOG.debug(dir + " doesn't exist");
+ return false;
+ }
+ if (!fs.getFileStatus(dir).isDirectory()) {
+ LOG.warn(dir + " is not a directory");
+ return false;
+ }
+ FileStatus[] files = FSUtils.listStatus(fs, dir);
+ if (files == null || files.length == 0) {
+ LOG.debug(dir + " has no files");
+ return false;
+ }
+ for (int i = 0; i < files.length; i++) {
+ if (files[i].isFile() && files[i].getLen() > 0) {
+ LOG.debug(dir + " has a non-empty file: " + files[i].getPath());
+ return true;
+ } else if (files[i].isDirectory() && checkWals(fs, dir)) {
+ LOG.debug(dir + " is a directory and has a non-empty file: " + files[i].getPath());
+ return true;
+ }
+ }
+ LOG.debug("Found 0 non-empty wal files for :" + dir);
+ return false;
+ }
+
/**
* When a region is closed, it should be removed from the regionsToReopen
* @param hri HRegionInfo of the region which was closed