You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2015/05/29 05:46:11 UTC
[2/4] hbase git commit: HBASE-13616 Move ServerShutdownHandler to Pv2
http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-protocol/src/main/protobuf/MasterProcedure.proto
----------------------------------------------------------------------
diff --git a/hbase-protocol/src/main/protobuf/MasterProcedure.proto b/hbase-protocol/src/main/protobuf/MasterProcedure.proto
index e1c6880..5e94721 100644
--- a/hbase-protocol/src/main/protobuf/MasterProcedure.proto
+++ b/hbase-protocol/src/main/protobuf/MasterProcedure.proto
@@ -183,3 +183,25 @@ message DisableTableStateData {
required TableName table_name = 2;
required bool skip_table_state_check = 3;
}
+
+message ServerCrashStateData {
+ required ServerName server_name = 1;
+ optional bool distributed_log_replay = 2;
+ repeated RegionInfo regions_on_crashed_server = 3;
+ repeated RegionInfo regions_to_assign = 4;
+ optional bool carrying_meta = 5;
+ optional bool should_split_wal = 6 [default = true];
+}
+
+enum ServerCrashState {
+ SERVER_CRASH_START = 1;
+ SERVER_CRASH_PROCESS_META = 2;
+ SERVER_CRASH_GET_REGIONS = 3;
+ SERVER_CRASH_NO_SPLIT_LOGS = 4;
+ SERVER_CRASH_SPLIT_LOGS = 5;
+ SERVER_CRASH_PREPARE_LOG_REPLAY = 6;
+ SERVER_CRASH_CALC_REGIONS_TO_ASSIGN = 7;
+ SERVER_CRASH_ASSIGN = 8;
+ SERVER_CRASH_WAIT_ON_ASSIGN = 9;
+ SERVER_CRASH_FINISH = 100;
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZKSplitLogManagerCoordination.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZKSplitLogManagerCoordination.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZKSplitLogManagerCoordination.java
index 0abbd2f..e2c5319 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZKSplitLogManagerCoordination.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZKSplitLogManagerCoordination.java
@@ -69,7 +69,7 @@ import org.apache.zookeeper.data.Stat;
/**
* ZooKeeper based implementation of
- * {@link org.apache.hadoop.hbase.master.SplitLogManagerCoordination}
+ * {@link SplitLogManagerCoordination}
*/
@InterfaceAudience.Private
public class ZKSplitLogManagerCoordination extends ZooKeeperListener implements
@@ -647,7 +647,7 @@ public class ZKSplitLogManagerCoordination extends ZooKeeperListener implements
ZKUtil.createSetData(this.watcher, nodePath,
ZKUtil.regionSequenceIdsToByteArray(lastSequenceId, null));
if (LOG.isDebugEnabled()) {
- LOG.debug("Marked " + regionEncodeName + " as recovering from " + serverName +
+ LOG.debug("Marked " + regionEncodeName + " recovering from " + serverName +
": " + nodePath);
}
// break retry loop
@@ -684,7 +684,7 @@ public class ZKSplitLogManagerCoordination extends ZooKeeperListener implements
/**
* ZooKeeper implementation of
- * {@link org.apache.hadoop.hbase.master.
+ * {@link org.apache.hadoop.hbase.coordination.
* SplitLogManagerCoordination#removeStaleRecoveringRegions(Set)}
*/
@Override
@@ -789,7 +789,7 @@ public class ZKSplitLogManagerCoordination extends ZooKeeperListener implements
public void setRecoveryMode(boolean isForInitialization) throws IOException {
synchronized(this) {
if (this.isDrainingDone) {
- // when there is no outstanding splitlogtask after master start up, we already have up to
+ // when there is no outstanding splitlogtask after master start up, we already have up to
// date recovery mode
return;
}
@@ -920,9 +920,9 @@ public class ZKSplitLogManagerCoordination extends ZooKeeperListener implements
/**
- * {@link org.apache.hadoop.hbase.master.SplitLogManager} can use objects implementing this
- * interface to finish off a partially done task by
- * {@link org.apache.hadoop.hbase.regionserver.SplitLogWorker}. This provides a
+ * {@link org.apache.hadoop.hbase.master.SplitLogManager} can use objects implementing this
+ * interface to finish off a partially done task by
+ * {@link org.apache.hadoop.hbase.regionserver.SplitLogWorker}. This provides a
* serialization point at the end of the task processing. Must be restartable and idempotent.
*/
public interface TaskFinisher {
http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZkSplitLogWorkerCoordination.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZkSplitLogWorkerCoordination.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZkSplitLogWorkerCoordination.java
index 637920b..b682764 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZkSplitLogWorkerCoordination.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZkSplitLogWorkerCoordination.java
@@ -104,7 +104,7 @@ public class ZkSplitLogWorkerCoordination extends ZooKeeperListener implements
@Override
public void nodeChildrenChanged(String path) {
if (path.equals(watcher.splitLogZNode)) {
- LOG.debug("tasks arrived or departed");
+ if (LOG.isTraceEnabled()) LOG.trace("tasks arrived or departed on " + path);
synchronized (taskReadyLock) {
taskReadySeq++;
taskReadyLock.notify();
http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
index 0fe59a6..34db4e4 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
@@ -46,6 +46,7 @@ import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.CoordinatedStateException;
import org.apache.hadoop.hbase.HBaseIOException;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
@@ -392,9 +393,10 @@ public class AssignmentManager {
* @throws IOException
* @throws KeeperException
* @throws InterruptedException
+ * @throws CoordinatedStateException
*/
- void joinCluster() throws IOException,
- KeeperException, InterruptedException {
+ void joinCluster()
+ throws IOException, KeeperException, InterruptedException, CoordinatedStateException {
long startTime = System.currentTimeMillis();
// Concurrency note: In the below the accesses on regionsInTransition are
// outside of a synchronization block where usually all accesses to RIT are
@@ -410,8 +412,7 @@ public class AssignmentManager {
Set<ServerName> deadServers = rebuildUserRegions();
// This method will assign all user regions if a clean server startup or
- // it will reconstruct master state and cleanup any leftovers from
- // previous master process.
+ // it will reconstruct master state and cleanup any leftovers from previous master process.
boolean failover = processDeadServersAndRegionsInTransition(deadServers);
recoverTableInDisablingState();
@@ -422,16 +423,17 @@ public class AssignmentManager {
/**
* Process all regions that are in transition in zookeeper and also
- * processes the list of dead servers by scanning the META.
+ * processes the list of dead servers.
* Used by master joining an cluster. If we figure this is a clean cluster
* startup, will assign all user regions.
- * @param deadServers
- * Map of dead servers and their regions. Can be null.
+ * @param deadServers Set of servers that are offline probably legitimately that were carrying
+ * regions according to a scan of hbase:meta. Can be null.
* @throws IOException
* @throws InterruptedException
*/
boolean processDeadServersAndRegionsInTransition(final Set<ServerName> deadServers)
- throws IOException, InterruptedException {
+ throws KeeperException, IOException, InterruptedException, CoordinatedStateException {
+ // TODO Needed? List<String> nodes = ZKUtil.listChildrenNoWatch(watcher, watcher.assignmentZNode);
boolean failover = !serverManager.getDeadServers().isEmpty();
if (failover) {
// This may not be a failover actually, especially if meta is on this master.
@@ -1483,15 +1485,13 @@ public class AssignmentManager {
}
// Generate a round-robin bulk assignment plan
- Map<ServerName, List<HRegionInfo>> bulkPlan
- = balancer.roundRobinAssignment(regions, servers);
+ Map<ServerName, List<HRegionInfo>> bulkPlan = balancer.roundRobinAssignment(regions, servers);
if (bulkPlan == null) {
throw new IOException("Unable to determine a plan to assign region(s)");
}
processFavoredNodes(regions);
- assign(regions.size(), servers.size(),
- "round-robin=true", bulkPlan);
+ assign(regions.size(), servers.size(), "round-robin=true", bulkPlan);
}
private void assign(int regions, int totalServers,
@@ -1607,10 +1607,8 @@ public class AssignmentManager {
/**
* Rebuild the list of user regions and assignment information.
- * <p>
- * Returns a set of servers that are not found to be online that hosted
- * some regions.
- * @return set of servers not online that hosted some regions per meta
+ * Updates regionstates with findings as we go through list of regions.
+ * @return set of servers not online that hosted some regions according to a scan of hbase:meta
* @throws IOException
*/
Set<ServerName> rebuildUserRegions() throws
@@ -2058,15 +2056,15 @@ public class AssignmentManager {
}
/**
- * Process shutdown server removing any assignments.
+ * Clean out crashed server removing any assignments.
* @param sn Server that went down.
* @return list of regions in transition on this server
*/
- public List<HRegionInfo> processServerShutdown(final ServerName sn) {
+ public List<HRegionInfo> cleanOutCrashedServerReferences(final ServerName sn) {
// Clean out any existing assignment plans for this server
synchronized (this.regionPlans) {
- for (Iterator <Map.Entry<String, RegionPlan>> i =
- this.regionPlans.entrySet().iterator(); i.hasNext();) {
+ for (Iterator <Map.Entry<String, RegionPlan>> i = this.regionPlans.entrySet().iterator();
+ i.hasNext();) {
Map.Entry<String, RegionPlan> e = i.next();
ServerName otherSn = e.getValue().getDestination();
// The name will be null if the region is planned for a random assign.
@@ -2084,8 +2082,7 @@ public class AssignmentManager {
// We need a lock on the region as we could update it
Lock lock = locker.acquireLock(encodedName);
try {
- RegionState regionState =
- regionStates.getRegionTransitionState(encodedName);
+ RegionState regionState = regionStates.getRegionTransitionState(encodedName);
if (regionState == null
|| (regionState.getServerName() != null && !regionState.isOnServer(sn))
|| !RegionStates.isOneOfStates(regionState, State.PENDING_OPEN,
http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
index 83b12dd..8b16b00 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
@@ -38,6 +38,7 @@ import java.util.Set;
/**
* Class to hold dead servers list and utility querying dead server list.
+ * On znode expiration, servers are added here.
*/
@InterfaceAudience.Private
public class DeadServer {
@@ -115,7 +116,7 @@ public class DeadServer {
}
public synchronized void finish(ServerName sn) {
- LOG.debug("Finished processing " + sn);
+ if (LOG.isDebugEnabled()) LOG.debug("Finished " + sn + "; numProcessing=" + this.numProcessing);
this.numProcessing--;
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index d5f8937..40d34e9 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -265,7 +265,7 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
volatile boolean serviceStarted = false;
// flag set after we complete assignMeta.
- private volatile boolean serverShutdownHandlerEnabled = false;
+ private volatile boolean serverCrashProcessingEnabled = false;
LoadBalancer balancer;
private BalancerChore balancerChore;
@@ -669,11 +669,8 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
// get a list for previously failed RS which need log splitting work
// we recover hbase:meta region servers inside master initialization and
// handle other failed servers in SSH in order to start up master node ASAP
- Set<ServerName> previouslyFailedServers = this.fileSystemManager
- .getFailedServersFromLogFolders();
-
- // remove stale recovering regions from previous run
- this.fileSystemManager.removeStaleRecoveringRegionsFromZK(previouslyFailedServers);
+ Set<ServerName> previouslyFailedServers =
+ this.fileSystemManager.getFailedServersFromLogFolders();
// log splitting for hbase:meta server
ServerName oldMetaServerLocation = metaTableLocator.getMetaRegionLocation(this.getZooKeeper());
@@ -707,14 +704,14 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
// Check if master is shutting down because of some issue
// in initializing the regionserver or the balancer.
- if(isStopped()) return;
+ if (isStopped()) return;
// Make sure meta assigned before proceeding.
status.setStatus("Assigning Meta Region");
assignMeta(status, previouslyFailedMetaRSs, HRegionInfo.DEFAULT_REPLICA_ID);
// check if master is shutting down because above assignMeta could return even hbase:meta isn't
// assigned when master is shutting down
- if(isStopped()) return;
+ if (isStopped()) return;
// migrating existent table state from zk, so splitters
// and recovery process treat states properly.
@@ -736,11 +733,10 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
status.setStatus("Starting assignment manager");
this.assignmentManager.joinCluster();
- //set cluster status again after user regions are assigned
+ // set cluster status again after user regions are assigned
this.balancer.setClusterStatus(getClusterStatus());
- // Start balancer and meta catalog janitor after meta and regions have
- // been assigned.
+ // Start balancer and meta catalog janitor after meta and regions have been assigned.
status.setStatus("Starting balancer and catalog janitor");
this.clusterStatusChore = new ClusterStatusChore(this, balancer);
getChoreService().scheduleChore(clusterStatusChore);
@@ -763,6 +759,7 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
status.markComplete("Initialization successful");
LOG.info("Master has completed initialization");
configurationManager.registerObserver(this.balancer);
+ // Set master as 'initialized'.
initialized = true;
// assign the meta replicas
Set<ServerName> EMPTY_SET = new HashSet<ServerName>();
@@ -910,7 +907,7 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
// if the meta region server is died at this time, we need it to be re-assigned
// by SSH so that system tables can be assigned.
// No need to wait for meta is assigned = 0 when meta is just verified.
- if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) enableServerShutdownHandler(assigned != 0);
+ if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) enableCrashedServerProcessing(assigned != 0);
LOG.info("hbase:meta with replicaId " + replicaId + " assigned=" + assigned + ", location="
+ metaTableLocator.getMetaRegionLocation(this.getZooKeeper(), replicaId));
status.setStatus("META assigned.");
@@ -946,15 +943,14 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
}
}
- private void enableServerShutdownHandler(
- final boolean waitForMeta) throws IOException, InterruptedException {
- // If ServerShutdownHandler is disabled, we enable it and expire those dead
- // but not expired servers. This is required so that if meta is assigning to
- // a server which dies after assignMeta starts assignment,
- // SSH can re-assign it. Otherwise, we will be
+ private void enableCrashedServerProcessing(final boolean waitForMeta)
+ throws IOException, InterruptedException {
+ // If crashed server processing is disabled, we enable it and expire those dead but not expired
+ // servers. This is required so that if meta is assigning to a server which dies after
+ // assignMeta starts assignment, ServerCrashProcedure can re-assign it. Otherwise, we will be
// stuck here waiting forever if waitForMeta is specified.
- if (!serverShutdownHandlerEnabled) {
- serverShutdownHandlerEnabled = true;
+ if (!serverCrashProcessingEnabled) {
+ serverCrashProcessingEnabled = true;
this.serverManager.processQueuedDeadServers();
}
@@ -2065,13 +2061,18 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
}
/**
- * ServerShutdownHandlerEnabled is set false before completing
- * assignMeta to prevent processing of ServerShutdownHandler.
+ * ServerCrashProcessingEnabled is set false before completing assignMeta to prevent processing
+ * of crashed servers.
* @return true if assignMeta has completed;
*/
@Override
- public boolean isServerShutdownHandlerEnabled() {
- return this.serverShutdownHandlerEnabled;
+ public boolean isServerCrashProcessingEnabled() {
+ return this.serverCrashProcessingEnabled;
+ }
+
+ @VisibleForTesting
+ public void setServerCrashProcessingEnabled(final boolean b) {
+ this.serverCrashProcessingEnabled = b;
}
/**
http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
index 904da84..3718a5a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
@@ -59,6 +59,8 @@ import org.apache.hadoop.hbase.util.FSTableDescriptors;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.ipc.RemoteException;
+import com.google.common.annotations.VisibleForTesting;
+
/**
* This class abstracts a bunch of operations the HMaster needs to interact with
* the underlying file system, including splitting log files, checking file
@@ -132,6 +134,11 @@ public class MasterFileSystem {
this.distributedLogReplay = this.splitLogManager.isLogReplaying();
}
+ @VisibleForTesting
+ SplitLogManager getSplitLogManager() {
+ return this.splitLogManager;
+ }
+
/**
* Create initial layout in filesystem.
* <ol>
http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
index d93ca94..7085187 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
@@ -182,7 +182,7 @@ public interface MasterServices extends Server {
/**
* @return true if master enables ServerShutdownHandler;
*/
- boolean isServerShutdownHandlerEnabled();
+ boolean isServerCrashProcessingEnabled();
/**
* Registers a new protocol buffer {@link Service} subclass as a master coprocessor endpoint.
http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
index d1fffbe..5b7a8ad 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
@@ -388,8 +388,7 @@ public class RegionStates {
return updateRegionState(hri, state, serverName, HConstants.NO_SEQNUM);
}
- public void regionOnline(
- final HRegionInfo hri, final ServerName serverName) {
+ public void regionOnline(final HRegionInfo hri, final ServerName serverName) {
regionOnline(hri, serverName, HConstants.NO_SEQNUM);
}
@@ -398,16 +397,14 @@ public class RegionStates {
* We can't confirm it is really online on specified region server
* because it hasn't been put in region server's online region list yet.
*/
- public void regionOnline(final HRegionInfo hri,
- final ServerName serverName, long openSeqNum) {
+ public void regionOnline(final HRegionInfo hri, final ServerName serverName, long openSeqNum) {
String encodedName = hri.getEncodedName();
if (!serverManager.isServerOnline(serverName)) {
// This is possible if the region server dies before master gets a
// chance to handle ZK event in time. At this time, if the dead server
// is already processed by SSH, we should ignore this event.
// If not processed yet, ignore and let SSH deal with it.
- LOG.warn("Ignored, " + encodedName
- + " was opened on a dead server: " + serverName);
+ LOG.warn("Ignored, " + encodedName + " was opened on a dead server: " + serverName);
return;
}
updateRegionState(hri, State.OPEN, serverName, openSeqNum);
@@ -489,7 +486,7 @@ public class RegionStates {
}
long now = System.currentTimeMillis();
if (LOG.isDebugEnabled()) {
- LOG.debug("Adding to processed servers " + serverName);
+ LOG.debug("Adding to log splitting servers " + serverName);
}
processedServers.put(serverName, Long.valueOf(now));
Configuration conf = server.getConfiguration();
@@ -503,7 +500,7 @@ public class RegionStates {
Map.Entry<ServerName, Long> e = it.next();
if (e.getValue().longValue() < cutoff) {
if (LOG.isDebugEnabled()) {
- LOG.debug("Removed from processed servers " + e.getKey());
+ LOG.debug("Removed from log splitting servers " + e.getKey());
}
it.remove();
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
index fa99a92..5cd0301 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
@@ -52,8 +52,7 @@ import org.apache.hadoop.hbase.client.ClusterConnection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.RetriesExhaustedException;
import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
-import org.apache.hadoop.hbase.master.handler.MetaServerShutdownHandler;
-import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
+import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.RequestConverter;
@@ -579,7 +578,7 @@ public class ServerManager {
}
return;
}
- if (!services.isServerShutdownHandlerEnabled()) {
+ if (!services.isServerCrashProcessingEnabled()) {
LOG.info("Master doesn't enable ServerShutdownHandler during initialization, "
+ "delay expiring server " + serverName);
this.queuedDeadServers.add(serverName);
@@ -591,18 +590,8 @@ public class ServerManager {
" but server shutdown already in progress");
return;
}
- synchronized (onlineServers) {
- if (!this.onlineServers.containsKey(serverName)) {
- LOG.warn("Expiration of " + serverName + " but server not online");
- }
- // Remove the server from the known servers lists and update load info BUT
- // add to deadservers first; do this so it'll show in dead servers list if
- // not in online servers list.
- this.deadservers.add(serverName);
- this.onlineServers.remove(serverName);
- onlineServers.notifyAll();
- }
- this.rsAdmins.remove(serverName);
+ moveFromOnelineToDeadServers(serverName);
+
// If cluster is going down, yes, servers are going to be expiring; don't
// process as a dead server
if (this.clusterShutdown) {
@@ -615,13 +604,8 @@ public class ServerManager {
}
boolean carryingMeta = services.getAssignmentManager().isCarryingMeta(serverName);
- if (carryingMeta) {
- this.services.getExecutorService().submit(new MetaServerShutdownHandler(this.master,
- this.services, this.deadservers, serverName));
- } else {
- this.services.getExecutorService().submit(new ServerShutdownHandler(this.master,
- this.services, this.deadservers, serverName, true));
- }
+ this.services.getMasterProcedureExecutor().
+ submitProcedure(new ServerCrashProcedure(serverName, true, carryingMeta));
LOG.debug("Added=" + serverName +
" to dead servers, submitted shutdown handler to be executed meta=" + carryingMeta);
@@ -633,8 +617,20 @@ public class ServerManager {
}
}
- public synchronized void processDeadServer(final ServerName serverName) {
- this.processDeadServer(serverName, false);
+ @VisibleForTesting
+ public void moveFromOnelineToDeadServers(final ServerName sn) {
+ synchronized (onlineServers) {
+ if (!this.onlineServers.containsKey(sn)) {
+ LOG.warn("Expiration of " + sn + " but server not online");
+ }
+ // Remove the server from the known servers lists and update load info BUT
+ // add to deadservers first; do this so it'll show in dead servers list if
+ // not in online servers list.
+ this.deadservers.add(sn);
+ this.onlineServers.remove(sn);
+ onlineServers.notifyAll();
+ }
+ this.rsAdmins.remove(sn);
}
public synchronized void processDeadServer(final ServerName serverName, boolean shouldSplitWal) {
@@ -652,9 +648,8 @@ public class ServerManager {
}
this.deadservers.add(serverName);
- this.services.getExecutorService().submit(
- new ServerShutdownHandler(this.master, this.services, this.deadservers, serverName,
- shouldSplitWal));
+ this.services.getMasterProcedureExecutor().
+ submitProcedure(new ServerCrashProcedure(serverName, shouldSplitWal, false));
}
/**
@@ -662,7 +657,7 @@ public class ServerManager {
* called after HMaster#assignMeta and AssignmentManager#joinCluster.
* */
synchronized void processQueuedDeadServers() {
- if (!services.isServerShutdownHandlerEnabled()) {
+ if (!services.isServerCrashProcessingEnabled()) {
LOG.info("Master hasn't enabled ServerShutdownHandler");
}
Iterator<ServerName> serverIterator = queuedDeadServers.iterator();
http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java
index c93ecf6..8caddc1 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java
@@ -406,16 +406,15 @@ public class SplitLogManager {
// the function is only used in WALEdit direct replay mode
return;
}
+ if (serverNames == null || serverNames.isEmpty()) return;
Set<String> recoveredServerNameSet = new HashSet<String>();
- if (serverNames != null) {
- for (ServerName tmpServerName : serverNames) {
- recoveredServerNameSet.add(tmpServerName.getServerName());
- }
+ for (ServerName tmpServerName : serverNames) {
+ recoveredServerNameSet.add(tmpServerName.getServerName());
}
-
+
+ this.recoveringRegionLock.lock();
try {
- this.recoveringRegionLock.lock();
((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
.getSplitLogManagerCoordination().removeRecoveringRegions(recoveredServerNameSet,
isMetaRecovery);
http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableLockManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableLockManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableLockManager.java
index cfaeb98..ef1e84f 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableLockManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableLockManager.java
@@ -25,17 +25,16 @@ import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.InterProcessLock;
import org.apache.hadoop.hbase.InterProcessLock.MetadataHandler;
import org.apache.hadoop.hbase.InterProcessReadWriteLock;
import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.exceptions.LockTimeoutException;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
-import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/LogReplayHandler.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/LogReplayHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/LogReplayHandler.java
deleted file mode 100644
index 008a04e..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/LogReplayHandler.java
+++ /dev/null
@@ -1,88 +0,0 @@
-/**
- * Copyright The Apache Software Foundation
- *
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with this
- * work for additional information regarding copyright ownership. The ASF
- * licenses this file to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.hadoop.hbase.master.handler;
-
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.Server;
-import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.executor.EventHandler;
-import org.apache.hadoop.hbase.executor.EventType;
-import org.apache.hadoop.hbase.master.DeadServer;
-import org.apache.hadoop.hbase.master.MasterServices;
-
-/**
- * Handle logReplay work from SSH. Having a separate handler is not to block SSH in re-assigning
- * regions from dead servers. Otherwise, available SSH handlers could be blocked by logReplay work
- * (from {@link org.apache.hadoop.hbase.master.MasterFileSystem#splitLog(ServerName)}).
- * During logReplay, if a receiving RS(say A) fails again, regions on A won't be able
- * to be assigned to another live RS which causes the log replay unable to complete
- * because WAL edits replay depends on receiving RS to be live
- */
-@InterfaceAudience.Private
-public class LogReplayHandler extends EventHandler {
- private static final Log LOG = LogFactory.getLog(LogReplayHandler.class);
- private final ServerName serverName;
- protected final Server master;
- protected final MasterServices services;
- protected final DeadServer deadServers;
-
- public LogReplayHandler(final Server server, final MasterServices services,
- final DeadServer deadServers, final ServerName serverName) {
- super(server, EventType.M_LOG_REPLAY);
- this.master = server;
- this.services = services;
- this.deadServers = deadServers;
- this.serverName = serverName;
- this.deadServers.add(serverName);
- }
-
- @Override
- public String toString() {
- String name = serverName.toString();
- return getClass().getSimpleName() + "-" + name + "-" + getSeqid();
- }
-
- @Override
- public void process() throws IOException {
- try {
- if (this.master != null && this.master.isStopped()) {
- // we're exiting ...
- return;
- }
- this.services.getMasterFileSystem().splitLog(serverName);
- } catch (Exception ex) {
- if (ex instanceof IOException) {
- // resubmit log replay work when failed
- this.services.getExecutorService().submit((LogReplayHandler) this);
- this.deadServers.add(serverName);
- throw new IOException("failed log replay for " + serverName + ", will retry", ex);
- } else {
- throw new IOException(ex);
- }
- } finally {
- this.deadServers.finish(serverName);
- }
- // logReplay is the last step of SSH so log a line to indicate that
- LOG.info("Finished processing shutdown of " + serverName);
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java
deleted file mode 100644
index 629f941..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java
+++ /dev/null
@@ -1,216 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.master.handler;
-
-import java.io.IOException;
-import java.io.InterruptedIOException;
-import java.util.HashSet;
-import java.util.Set;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.Server;
-import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.executor.EventType;
-import org.apache.hadoop.hbase.master.AssignmentManager;
-import org.apache.hadoop.hbase.master.DeadServer;
-import org.apache.hadoop.hbase.master.MasterServices;
-import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
-import org.apache.hadoop.hbase.util.Threads;
-import org.apache.zookeeper.KeeperException;
-
-import com.google.common.annotations.VisibleForTesting;
-
-import java.util.concurrent.atomic.AtomicInteger;
-
-/**
- * Shutdown handler for the server hosting <code>hbase:meta</code>
- */
-@InterfaceAudience.Private
-public class MetaServerShutdownHandler extends ServerShutdownHandler {
- private static final Log LOG = LogFactory.getLog(MetaServerShutdownHandler.class);
- private AtomicInteger eventExceptionCount = new AtomicInteger(0);
- @VisibleForTesting
- static final int SHOW_STRACKTRACE_FREQUENCY = 100;
-
- public MetaServerShutdownHandler(final Server server,
- final MasterServices services,
- final DeadServer deadServers, final ServerName serverName) {
- super(server, services, deadServers, serverName,
- EventType.M_META_SERVER_SHUTDOWN, true);
- }
-
- @Override
- public void process() throws IOException {
- boolean gotException = true;
- try {
- AssignmentManager am = this.services.getAssignmentManager();
- this.services.getMasterFileSystem().setLogRecoveryMode();
- boolean distributedLogReplay =
- (this.services.getMasterFileSystem().getLogRecoveryMode() == RecoveryMode.LOG_REPLAY);
- try {
- if (this.shouldSplitWal) {
- LOG.info("Splitting hbase:meta logs for " + serverName);
- if (distributedLogReplay) {
- Set<HRegionInfo> regions = new HashSet<HRegionInfo>();
- regions.add(HRegionInfo.FIRST_META_REGIONINFO);
- this.services.getMasterFileSystem().prepareLogReplay(serverName, regions);
- } else {
- this.services.getMasterFileSystem().splitMetaLog(serverName);
- }
- am.getRegionStates().logSplit(HRegionInfo.FIRST_META_REGIONINFO);
- }
- } catch (IOException ioe) {
- this.services.getExecutorService().submit(this);
- this.deadServers.add(serverName);
- throw new IOException("failed log splitting for " + serverName + ", will retry", ioe);
- }
-
- // Assign meta if we were carrying it.
- // Check again: region may be assigned to other where because of RIT
- // timeout
- if (am.isCarryingMeta(serverName)) {
- LOG.info("Server " + serverName + " was carrying META. Trying to assign.");
- verifyAndAssignMetaWithRetries();
- } else {
- LOG.info("META has been assigned to otherwhere, skip assigning.");
- }
-
- try {
- if (this.shouldSplitWal && distributedLogReplay) {
- if (!am.waitOnRegionToClearRegionsInTransition(HRegionInfo.FIRST_META_REGIONINFO,
- regionAssignmentWaitTimeout)) {
- // Wait here is to avoid log replay hits current dead server and incur a RPC timeout
- // when replay happens before region assignment completes.
- LOG.warn("Region " + HRegionInfo.FIRST_META_REGIONINFO.getEncodedName()
- + " didn't complete assignment in time");
- }
- this.services.getMasterFileSystem().splitMetaLog(serverName);
- }
- } catch (Exception ex) {
- if (ex instanceof IOException) {
- this.services.getExecutorService().submit(this);
- this.deadServers.add(serverName);
- throw new IOException("failed log splitting for " + serverName + ", will retry", ex);
- } else {
- throw new IOException(ex);
- }
- }
-
- gotException = false;
- } finally {
- if (gotException){
- // If we had an exception, this.deadServers.finish will be skipped in super.process()
- this.deadServers.finish(serverName);
- }
- }
-
- super.process();
- // Clear this counter on successful handling.
- this.eventExceptionCount.set(0);
- }
-
- @Override
- boolean isCarryingMeta() {
- return true;
- }
-
- /**
- * Before assign the hbase:meta region, ensure it haven't
- * been assigned by other place
- * <p>
- * Under some scenarios, the hbase:meta region can be opened twice, so it seemed online
- * in two regionserver at the same time.
- * If the hbase:meta region has been assigned, so the operation can be canceled.
- * @throws InterruptedException
- * @throws IOException
- * @throws KeeperException
- */
- private void verifyAndAssignMeta()
- throws InterruptedException, IOException, KeeperException {
- long timeout = this.server.getConfiguration().
- getLong("hbase.catalog.verification.timeout", 1000);
- if (!server.getMetaTableLocator().verifyMetaRegionLocation(server.getConnection(),
- this.server.getZooKeeper(), timeout)) {
- this.services.getAssignmentManager().assignMeta(HRegionInfo.FIRST_META_REGIONINFO);
- } else if (serverName.equals(server.getMetaTableLocator().getMetaRegionLocation(
- this.server.getZooKeeper()))) {
- throw new IOException("hbase:meta is onlined on the dead server "
- + serverName);
- } else {
- LOG.info("Skip assigning hbase:meta, because it is online on the "
- + server.getMetaTableLocator().getMetaRegionLocation(this.server.getZooKeeper()));
- }
- }
-
- /**
- * Failed many times, shutdown processing
- * @throws IOException
- */
- private void verifyAndAssignMetaWithRetries() throws IOException {
- int iTimes = this.server.getConfiguration().getInt(
- "hbase.catalog.verification.retries", 10);
-
- long waitTime = this.server.getConfiguration().getLong(
- "hbase.catalog.verification.timeout", 1000);
-
- int iFlag = 0;
- while (true) {
- try {
- verifyAndAssignMeta();
- break;
- } catch (KeeperException e) {
- this.server.abort("In server shutdown processing, assigning meta", e);
- throw new IOException("Aborting", e);
- } catch (Exception e) {
- if (iFlag >= iTimes) {
- this.server.abort("verifyAndAssignMeta failed after" + iTimes
- + " times retries, aborting", e);
- throw new IOException("Aborting", e);
- }
- try {
- Thread.sleep(waitTime);
- } catch (InterruptedException e1) {
- LOG.warn("Interrupted when is the thread sleep", e1);
- Thread.currentThread().interrupt();
- throw (InterruptedIOException)new InterruptedIOException().initCause(e1);
- }
- iFlag++;
- }
- }
- }
-
- @Override
- protected void handleException(Throwable t) {
- int count = eventExceptionCount.getAndIncrement();
- if (count < 0) count = eventExceptionCount.getAndSet(0);
- if (count > SHOW_STRACKTRACE_FREQUENCY) { // Too frequent, let's slow reporting
- Threads.sleep(1000);
- }
- if (count % SHOW_STRACKTRACE_FREQUENCY == 0) {
- LOG.error("Caught " + eventType + ", count=" + this.eventExceptionCount, t);
- } else {
- LOG.error("Caught " + eventType + ", count=" + this.eventExceptionCount +
- "; " + t.getMessage() + "; stack trace shows every " + SHOW_STRACKTRACE_FREQUENCY +
- "th time.");
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
deleted file mode 100644
index 26594f7..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
+++ /dev/null
@@ -1,371 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.master.handler;
-
-import java.io.IOException;
-import java.io.InterruptedIOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Set;
-import java.util.concurrent.locks.Lock;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.Server;
-import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.client.RegionReplicaUtil;
-import org.apache.hadoop.hbase.client.TableState;
-import org.apache.hadoop.hbase.executor.EventHandler;
-import org.apache.hadoop.hbase.executor.EventType;
-import org.apache.hadoop.hbase.master.AssignmentManager;
-import org.apache.hadoop.hbase.master.DeadServer;
-import org.apache.hadoop.hbase.master.MasterFileSystem;
-import org.apache.hadoop.hbase.master.MasterServices;
-import org.apache.hadoop.hbase.master.RegionState;
-import org.apache.hadoop.hbase.master.RegionStates;
-import org.apache.hadoop.hbase.master.ServerManager;
-import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
-import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
-
-/**
- * Process server shutdown.
- * Server-to-handle must be already in the deadservers lists. See
- * {@link ServerManager#expireServer(ServerName)}
- */
-@InterfaceAudience.Private
-public class ServerShutdownHandler extends EventHandler {
- private static final Log LOG = LogFactory.getLog(ServerShutdownHandler.class);
- protected final ServerName serverName;
- protected final MasterServices services;
- protected final DeadServer deadServers;
- protected final boolean shouldSplitWal; // whether to split WAL or not
- protected final int regionAssignmentWaitTimeout;
-
- public ServerShutdownHandler(final Server server, final MasterServices services,
- final DeadServer deadServers, final ServerName serverName,
- final boolean shouldSplitWal) {
- this(server, services, deadServers, serverName, EventType.M_SERVER_SHUTDOWN,
- shouldSplitWal);
- }
-
- ServerShutdownHandler(final Server server, final MasterServices services,
- final DeadServer deadServers, final ServerName serverName, EventType type,
- final boolean shouldSplitWal) {
- super(server, type);
- this.serverName = serverName;
- this.server = server;
- this.services = services;
- this.deadServers = deadServers;
- if (!this.deadServers.isDeadServer(this.serverName)) {
- LOG.warn(this.serverName + " is NOT in deadservers; it should be!");
- }
- this.shouldSplitWal = shouldSplitWal;
- this.regionAssignmentWaitTimeout = server.getConfiguration().getInt(
- HConstants.LOG_REPLAY_WAIT_REGION_TIMEOUT, 15000);
- }
-
- @Override
- public String getInformativeName() {
- if (serverName != null) {
- return this.getClass().getSimpleName() + " for " + serverName;
- } else {
- return super.getInformativeName();
- }
- }
-
- /**
- * @return True if the server we are processing was carrying <code>hbase:meta</code>
- */
- boolean isCarryingMeta() {
- return false;
- }
-
- @Override
- public String toString() {
- return getClass().getSimpleName() + "-" + serverName + "-" + getSeqid();
- }
-
- @Override
- public void process() throws IOException {
- boolean hasLogReplayWork = false;
- final ServerName serverName = this.serverName;
- try {
-
- // We don't want worker thread in the MetaServerShutdownHandler
- // executor pool to block by waiting availability of hbase:meta
- // Otherwise, it could run into the following issue:
- // 1. The current MetaServerShutdownHandler instance For RS1 waits for the hbase:meta
- // to come online.
- // 2. The newly assigned hbase:meta region server RS2 was shutdown right after
- // it opens the hbase:meta region. So the MetaServerShutdownHandler
- // instance For RS1 will still be blocked.
- // 3. The new instance of MetaServerShutdownHandler for RS2 is queued.
- // 4. The newly assigned hbase:meta region server RS3 was shutdown right after
- // it opens the hbase:meta region. So the MetaServerShutdownHandler
- // instance For RS1 and RS2 will still be blocked.
- // 5. The new instance of MetaServerShutdownHandler for RS3 is queued.
- // 6. Repeat until we run out of MetaServerShutdownHandler worker threads
- // The solution here is to resubmit a ServerShutdownHandler request to process
- // user regions on that server so that MetaServerShutdownHandler
- // executor pool is always available.
- //
- // If AssignmentManager hasn't finished rebuilding user regions,
- // we are not ready to assign dead regions either. So we re-queue up
- // the dead server for further processing too.
- AssignmentManager am = services.getAssignmentManager();
- ServerManager serverManager = services.getServerManager();
- if (isCarryingMeta() /* hbase:meta */ || !am.isFailoverCleanupDone()) {
- serverManager.processDeadServer(serverName, this.shouldSplitWal);
- return;
- }
-
- // Wait on meta to come online; we need it to progress.
- // TODO: Best way to hold strictly here? We should build this retry logic
- // into the MetaTableAccessor operations themselves.
- // TODO: Is the reading of hbase:meta necessary when the Master has state of
- // cluster in its head? It should be possible to do without reading hbase:meta
- // in all but one case. On split, the RS updates the hbase:meta
- // table and THEN informs the master of the split via zk nodes in
- // 'unassigned' dir. Currently the RS puts ephemeral nodes into zk so if
- // the regionserver dies, these nodes do not stick around and this server
- // shutdown processing does fixup (see the fixupDaughters method below).
- // If we wanted to skip the hbase:meta scan, we'd have to change at least the
- // final SPLIT message to be permanent in zk so in here we'd know a SPLIT
- // completed (zk is updated after edits to hbase:meta have gone in). See
- // {@link SplitTransaction}. We'd also have to be figure another way for
- // doing the below hbase:meta daughters fixup.
- Set<HRegionInfo> hris = null;
- try {
- server.getMetaTableLocator().waitMetaRegionLocation(server.getZooKeeper());
- if (BaseLoadBalancer.tablesOnMaster(server.getConfiguration())) {
- while (!this.server.isStopped() && serverManager.countOfRegionServers() < 2) {
- // Wait till at least another regionserver is up besides the active master
- // so that we don't assign all regions to the active master.
- // This is best of efforts, because newly joined regionserver
- // could crash right after that.
- Thread.sleep(100);
- }
- }
- hris = am.getRegionStates().getServerRegions(serverName);
- } catch (InterruptedException e) {
- Thread.currentThread().interrupt();
- throw (InterruptedIOException)new InterruptedIOException().initCause(e);
- }
- if (this.server.isStopped()) {
- throw new IOException("Server is stopped");
- }
-
- // delayed to set recovery mode based on configuration only after all outstanding splitlogtask
- // drained
- this.services.getMasterFileSystem().setLogRecoveryMode();
- boolean distributedLogReplay =
- (this.services.getMasterFileSystem().getLogRecoveryMode() == RecoveryMode.LOG_REPLAY);
-
- try {
- if (this.shouldSplitWal) {
- if (distributedLogReplay) {
- LOG.info("Mark regions in recovery for crashed server " + serverName +
- " before assignment; regions=" + hris);
- MasterFileSystem mfs = this.services.getMasterFileSystem();
- mfs.prepareLogReplay(serverName, hris);
- } else {
- LOG.info("Splitting logs for " + serverName +
- " before assignment; region count=" + (hris == null ? 0 : hris.size()));
- this.services.getMasterFileSystem().splitLog(serverName);
- }
- am.getRegionStates().logSplit(serverName);
- } else {
- LOG.info("Skipping log splitting for " + serverName);
- }
- } catch (IOException ioe) {
- resubmit(serverName, ioe);
- }
- List<HRegionInfo> toAssignRegions = new ArrayList<HRegionInfo>();
- int replicaCount = services.getConfiguration().getInt(HConstants.META_REPLICAS_NUM,
- HConstants.DEFAULT_META_REPLICA_NUM);
- for (int i = 1; i < replicaCount; i++) {
- HRegionInfo metaHri =
- RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, i);
- if (am.isCarryingMetaReplica(serverName, metaHri)) {
- LOG.info("Reassigning meta replica" + metaHri + " that was on " + serverName);
- toAssignRegions.add(metaHri);
- }
- }
- // Clean out anything in regions in transition. Being conservative and
- // doing after log splitting. Could do some states before -- OPENING?
- // OFFLINE? -- and then others after like CLOSING that depend on log
- // splitting.
- List<HRegionInfo> regionsInTransition = am.processServerShutdown(serverName);
- LOG.info("Reassigning " + ((hris == null)? 0: hris.size()) +
- " region(s) that " + (serverName == null? "null": serverName) +
- " was carrying (and " + regionsInTransition.size() +
- " regions(s) that were opening on this server)");
-
- toAssignRegions.addAll(regionsInTransition);
-
- // Iterate regions that were on this server and assign them
- if (hris != null && !hris.isEmpty()) {
- RegionStates regionStates = am.getRegionStates();
- for (HRegionInfo hri: hris) {
- if (regionsInTransition.contains(hri)) {
- continue;
- }
- String encodedName = hri.getEncodedName();
- Lock lock = am.acquireRegionLock(encodedName);
- try {
- RegionState rit = regionStates.getRegionTransitionState(hri);
- if (processDeadRegion(hri, am)) {
- ServerName addressFromAM = regionStates.getRegionServerOfRegion(hri);
- if (addressFromAM != null && !addressFromAM.equals(this.serverName)) {
- // If this region is in transition on the dead server, it must be
- // opening or pending_open, which should have been covered by AM#processServerShutdown
- LOG.info("Skip assigning region " + hri.getRegionNameAsString()
- + " because it has been opened in " + addressFromAM.getServerName());
- continue;
- }
- if (rit != null) {
- if (rit.getServerName() != null && !rit.isOnServer(serverName)) {
- // Skip regions that are in transition on other server
- LOG.info("Skip assigning region in transition on other server" + rit);
- continue;
- }
- LOG.info("Reassigning region with rs = " + rit);
- regionStates.updateRegionState(hri, RegionState.State.OFFLINE);
- } else if (regionStates.isRegionInState(
- hri, RegionState.State.SPLITTING_NEW, RegionState.State.MERGING_NEW)) {
- regionStates.updateRegionState(hri, RegionState.State.OFFLINE);
- }
- toAssignRegions.add(hri);
- } else if (rit != null) {
- if ((rit.isClosing() || rit.isFailedClose() || rit.isOffline())
- && am.getTableStateManager().isTableState(hri.getTable(),
- TableState.State.DISABLED, TableState.State.DISABLING) ||
- am.getReplicasToClose().contains(hri)) {
- // If the table was partially disabled and the RS went down, we should clear the RIT
- // and remove the node for the region.
- // The rit that we use may be stale in case the table was in DISABLING state
- // but though we did assign we will not be clearing the znode in CLOSING state.
- // Doing this will have no harm. See HBASE-5927
- regionStates.updateRegionState(hri, RegionState.State.OFFLINE);
- am.offlineDisabledRegion(hri);
- } else {
- LOG.warn("THIS SHOULD NOT HAPPEN: unexpected region in transition "
- + rit + " not to be assigned by SSH of server " + serverName);
- }
- }
- } finally {
- lock.unlock();
- }
- }
- }
-
- try {
- am.assign(toAssignRegions);
- } catch (InterruptedException ie) {
- LOG.error("Caught " + ie + " during round-robin assignment");
- throw (InterruptedIOException)new InterruptedIOException().initCause(ie);
- } catch (IOException ioe) {
- LOG.info("Caught " + ioe + " during region assignment, will retry");
- // Only do wal splitting if shouldSplitWal and in DLR mode
- serverManager.processDeadServer(serverName,
- this.shouldSplitWal && distributedLogReplay);
- return;
- }
-
- if (this.shouldSplitWal && distributedLogReplay) {
- // wait for region assignment completes
- for (HRegionInfo hri : toAssignRegions) {
- try {
- if (!am.waitOnRegionToClearRegionsInTransition(hri, regionAssignmentWaitTimeout)) {
- // Wait here is to avoid log replay hits current dead server and incur a RPC timeout
- // when replay happens before region assignment completes.
- LOG.warn("Region " + hri.getEncodedName()
- + " didn't complete assignment in time");
- }
- } catch (InterruptedException ie) {
- throw new InterruptedIOException("Caught " + ie
- + " during waitOnRegionToClearRegionsInTransition");
- }
- }
- // submit logReplay work
- this.services.getExecutorService().submit(
- new LogReplayHandler(this.server, this.services, this.deadServers, this.serverName));
- hasLogReplayWork = true;
- }
- } finally {
- this.deadServers.finish(serverName);
- }
-
- if (!hasLogReplayWork) {
- LOG.info("Finished processing of shutdown of " + serverName);
- }
- }
-
- private void resubmit(final ServerName serverName, IOException ex) throws IOException {
- // typecast to SSH so that we make sure that it is the SSH instance that
- // gets submitted as opposed to MSSH or some other derived instance of SSH
- this.services.getExecutorService().submit((ServerShutdownHandler) this);
- this.deadServers.add(serverName);
- throw new IOException("failed log splitting for " + serverName + ", will retry", ex);
- }
-
- /**
- * Process a dead region from a dead RS. Checks if the region is disabled or
- * disabling or if the region has a partially completed split.
- * @param hri
- * @param assignmentManager
- * @return Returns true if specified region should be assigned, false if not.
- * @throws IOException
- */
- public static boolean processDeadRegion(HRegionInfo hri,
- AssignmentManager assignmentManager)
- throws IOException {
- boolean tablePresent = assignmentManager.getTableStateManager().isTablePresent(hri.getTable());
- if (!tablePresent) {
- LOG.info("The table " + hri.getTable()
- + " was deleted. Hence not proceeding.");
- return false;
- }
- // If table is not disabled but the region is offlined,
- boolean disabled = assignmentManager.getTableStateManager().isTableState(hri.getTable(),
- TableState.State.DISABLED);
- if (disabled){
- LOG.info("The table " + hri.getTable()
- + " was disabled. Hence not proceeding.");
- return false;
- }
- if (hri.isOffline() && hri.isSplit()) {
- //HBASE-7721: Split parent and daughters are inserted into hbase:meta as an atomic operation.
- //If the meta scanner saw the parent split, then it should see the daughters as assigned
- //to the dead server. We don't have to do anything.
- return false;
- }
- boolean disabling = assignmentManager.getTableStateManager().isTableState(hri.getTable(),
- TableState.State.DISABLING);
- if (disabling) {
- LOG.info("The table " + hri.getTable()
- + " is disabled. Hence not assigning region" + hri.getEncodedName());
- return false;
- }
- return true;
- }
-}
http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AddColumnFamilyProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AddColumnFamilyProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AddColumnFamilyProcedure.java
index 377ccb5..941aec0 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AddColumnFamilyProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AddColumnFamilyProcedure.java
@@ -184,14 +184,14 @@ public class AddColumnFamilyProcedure
@Override
protected boolean acquireLock(final MasterProcedureEnv env) {
if (!env.isInitialized()) return false;
- return env.getProcedureQueue().tryAcquireTableWrite(
+ return env.getProcedureQueue().tryAcquireTableExclusiveLock(
tableName,
EventType.C_M_ADD_FAMILY.toString());
}
@Override
protected void releaseLock(final MasterProcedureEnv env) {
- env.getProcedureQueue().releaseTableWrite(tableName);
+ env.getProcedureQueue().releaseTableExclusiveLock(tableName);
}
@Override
@@ -404,4 +404,4 @@ public class AddColumnFamilyProcedure
}
return regionInfoList;
}
-}
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java
index edaf7fb..f7e2b84 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java
@@ -265,12 +265,12 @@ public class CreateTableProcedure
@Override
protected boolean acquireLock(final MasterProcedureEnv env) {
- return env.getProcedureQueue().tryAcquireTableWrite(getTableName(), "create table");
+ return env.getProcedureQueue().tryAcquireTableExclusiveLock(getTableName(), "create table");
}
@Override
protected void releaseLock(final MasterProcedureEnv env) {
- env.getProcedureQueue().releaseTableWrite(getTableName());
+ env.getProcedureQueue().releaseTableExclusiveLock(getTableName());
}
private boolean prepareCreate(final MasterProcedureEnv env) throws IOException {
@@ -444,4 +444,4 @@ public class CreateTableProcedure
final TableName tableName) throws IOException {
env.getMasterServices().getTableDescriptors().get(tableName);
}
-}
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteColumnFamilyProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteColumnFamilyProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteColumnFamilyProcedure.java
index 6e96910..49941d3 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteColumnFamilyProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteColumnFamilyProcedure.java
@@ -200,14 +200,14 @@ public class DeleteColumnFamilyProcedure
@Override
protected boolean acquireLock(final MasterProcedureEnv env) {
if (!env.isInitialized()) return false;
- return env.getProcedureQueue().tryAcquireTableWrite(
+ return env.getProcedureQueue().tryAcquireTableExclusiveLock(
tableName,
EventType.C_M_DELETE_FAMILY.toString());
}
@Override
protected void releaseLock(final MasterProcedureEnv env) {
- env.getProcedureQueue().releaseTableWrite(tableName);
+ env.getProcedureQueue().releaseTableExclusiveLock(tableName);
}
@Override
@@ -436,4 +436,4 @@ public class DeleteColumnFamilyProcedure
}
return regionInfoList;
}
-}
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
index dfe70c7..6772f0c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
@@ -200,12 +200,12 @@ public class DeleteTableProcedure
@Override
protected boolean acquireLock(final MasterProcedureEnv env) {
if (!env.isInitialized()) return false;
- return env.getProcedureQueue().tryAcquireTableWrite(getTableName(), "delete table");
+ return env.getProcedureQueue().tryAcquireTableExclusiveLock(getTableName(), "delete table");
}
@Override
protected void releaseLock(final MasterProcedureEnv env) {
- env.getProcedureQueue().releaseTableWrite(getTableName());
+ env.getProcedureQueue().releaseTableExclusiveLock(getTableName());
}
@Override
@@ -407,4 +407,4 @@ public class DeleteTableProcedure
throws IOException {
ProcedureSyncWait.getMasterQuotaManager(env).removeTableFromNamespaceQuota(tableName);
}
-}
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java
index 2507cec..006449d 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java
@@ -214,14 +214,14 @@ public class DisableTableProcedure
@Override
protected boolean acquireLock(final MasterProcedureEnv env) {
if (!env.isInitialized()) return false;
- return env.getProcedureQueue().tryAcquireTableWrite(
+ return env.getProcedureQueue().tryAcquireTableExclusiveLock(
tableName,
EventType.C_M_DISABLE_TABLE.toString());
}
@Override
protected void releaseLock(final MasterProcedureEnv env) {
- env.getProcedureQueue().releaseTableWrite(tableName);
+ env.getProcedureQueue().releaseTableExclusiveLock(tableName);
}
@Override
@@ -537,4 +537,4 @@ public class DisableTableProcedure
return regions != null && regions.isEmpty();
}
}
-}
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java
index aefb0b1..332bd13 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java
@@ -93,9 +93,9 @@ public class EnableTableProcedure
/**
* Constructor
* @param env MasterProcedureEnv
- * @throws IOException
* @param tableName the table to operate on
* @param skipTableStateCheck whether to check table state
+ * @throws IOException
*/
public EnableTableProcedure(
final MasterProcedureEnv env,
@@ -234,14 +234,14 @@ public class EnableTableProcedure
@Override
protected boolean acquireLock(final MasterProcedureEnv env) {
if (!env.isInitialized()) return false;
- return env.getProcedureQueue().tryAcquireTableWrite(
+ return env.getProcedureQueue().tryAcquireTableExclusiveLock(
tableName,
EventType.C_M_ENABLE_TABLE.toString());
}
@Override
protected void releaseLock(final MasterProcedureEnv env) {
- env.getProcedureQueue().releaseTableWrite(tableName);
+ env.getProcedureQueue().releaseTableExclusiveLock(tableName);
}
@Override
http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureEnv.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureEnv.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureEnv.java
index 0a33cd4..f2f4bf3 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureEnv.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureEnv.java
@@ -120,4 +120,4 @@ public class MasterProcedureEnv {
public boolean isInitialized() {
return master.isInitialized();
}
-}
+}
\ No newline at end of file