You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2015/05/29 05:46:11 UTC

[2/4] hbase git commit: HBASE-13616 Move ServerShutdownHandler to Pv2

http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-protocol/src/main/protobuf/MasterProcedure.proto
----------------------------------------------------------------------
diff --git a/hbase-protocol/src/main/protobuf/MasterProcedure.proto b/hbase-protocol/src/main/protobuf/MasterProcedure.proto
index e1c6880..5e94721 100644
--- a/hbase-protocol/src/main/protobuf/MasterProcedure.proto
+++ b/hbase-protocol/src/main/protobuf/MasterProcedure.proto
@@ -183,3 +183,25 @@ message DisableTableStateData {
   required TableName table_name = 2;
   required bool skip_table_state_check = 3;
 }
+
+message ServerCrashStateData {
+  required ServerName server_name = 1;
+  optional bool distributed_log_replay = 2;
+  repeated RegionInfo regions_on_crashed_server = 3;
+  repeated RegionInfo regions_to_assign = 4;
+  optional bool carrying_meta = 5;
+  optional bool should_split_wal = 6 [default = true];
+}
+
+enum ServerCrashState {
+  SERVER_CRASH_START = 1;
+  SERVER_CRASH_PROCESS_META = 2;
+  SERVER_CRASH_GET_REGIONS = 3;
+  SERVER_CRASH_NO_SPLIT_LOGS = 4;
+  SERVER_CRASH_SPLIT_LOGS = 5;
+  SERVER_CRASH_PREPARE_LOG_REPLAY = 6;
+  SERVER_CRASH_CALC_REGIONS_TO_ASSIGN = 7;
+  SERVER_CRASH_ASSIGN = 8;
+  SERVER_CRASH_WAIT_ON_ASSIGN = 9;
+  SERVER_CRASH_FINISH = 100;
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZKSplitLogManagerCoordination.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZKSplitLogManagerCoordination.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZKSplitLogManagerCoordination.java
index 0abbd2f..e2c5319 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZKSplitLogManagerCoordination.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZKSplitLogManagerCoordination.java
@@ -69,7 +69,7 @@ import org.apache.zookeeper.data.Stat;
 
 /**
  * ZooKeeper based implementation of
- *  {@link org.apache.hadoop.hbase.master.SplitLogManagerCoordination}
+ * {@link SplitLogManagerCoordination}
  */
 @InterfaceAudience.Private
 public class ZKSplitLogManagerCoordination extends ZooKeeperListener implements
@@ -647,7 +647,7 @@ public class ZKSplitLogManagerCoordination extends ZooKeeperListener implements
           ZKUtil.createSetData(this.watcher, nodePath,
             ZKUtil.regionSequenceIdsToByteArray(lastSequenceId, null));
           if (LOG.isDebugEnabled()) {
-            LOG.debug("Marked " + regionEncodeName + " as recovering from " + serverName +
+            LOG.debug("Marked " + regionEncodeName + " recovering from " + serverName +
               ": " + nodePath);
           }
           // break retry loop
@@ -684,7 +684,7 @@ public class ZKSplitLogManagerCoordination extends ZooKeeperListener implements
 
   /**
    * ZooKeeper implementation of
-   * {@link org.apache.hadoop.hbase.master.
+   * {@link org.apache.hadoop.hbase.coordination.
    * SplitLogManagerCoordination#removeStaleRecoveringRegions(Set)}
    */
   @Override
@@ -789,7 +789,7 @@ public class ZKSplitLogManagerCoordination extends ZooKeeperListener implements
   public void setRecoveryMode(boolean isForInitialization) throws IOException {
     synchronized(this) {
       if (this.isDrainingDone) {
-        // when there is no outstanding splitlogtask after master start up, we already have up to 
+        // when there is no outstanding splitlogtask after master start up, we already have up to
         // date recovery mode
         return;
       }
@@ -920,9 +920,9 @@ public class ZKSplitLogManagerCoordination extends ZooKeeperListener implements
 
 
   /**
-   * {@link org.apache.hadoop.hbase.master.SplitLogManager} can use objects implementing this 
-   * interface to finish off a partially done task by 
-   * {@link org.apache.hadoop.hbase.regionserver.SplitLogWorker}. This provides a 
+   * {@link org.apache.hadoop.hbase.master.SplitLogManager} can use objects implementing this
+   * interface to finish off a partially done task by
+   * {@link org.apache.hadoop.hbase.regionserver.SplitLogWorker}. This provides a
    * serialization point at the end of the task processing. Must be restartable and idempotent.
    */
   public interface TaskFinisher {

http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZkSplitLogWorkerCoordination.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZkSplitLogWorkerCoordination.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZkSplitLogWorkerCoordination.java
index 637920b..b682764 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZkSplitLogWorkerCoordination.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZkSplitLogWorkerCoordination.java
@@ -104,7 +104,7 @@ public class ZkSplitLogWorkerCoordination extends ZooKeeperListener implements
   @Override
   public void nodeChildrenChanged(String path) {
     if (path.equals(watcher.splitLogZNode)) {
-      LOG.debug("tasks arrived or departed");
+      if (LOG.isTraceEnabled()) LOG.trace("tasks arrived or departed on " + path);
       synchronized (taskReadyLock) {
         taskReadySeq++;
         taskReadyLock.notify();

http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
index 0fe59a6..34db4e4 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
@@ -46,6 +46,7 @@ import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.CoordinatedStateException;
 import org.apache.hadoop.hbase.HBaseIOException;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
@@ -392,9 +393,10 @@ public class AssignmentManager {
    * @throws IOException
    * @throws KeeperException
    * @throws InterruptedException
+   * @throws CoordinatedStateException 
    */
-  void joinCluster() throws IOException,
-          KeeperException, InterruptedException {
+  void joinCluster()
+  throws IOException, KeeperException, InterruptedException, CoordinatedStateException {
     long startTime = System.currentTimeMillis();
     // Concurrency note: In the below the accesses on regionsInTransition are
     // outside of a synchronization block where usually all accesses to RIT are
@@ -410,8 +412,7 @@ public class AssignmentManager {
     Set<ServerName> deadServers = rebuildUserRegions();
 
     // This method will assign all user regions if a clean server startup or
-    // it will reconstruct master state and cleanup any leftovers from
-    // previous master process.
+    // it will reconstruct master state and cleanup any leftovers from previous master process.
     boolean failover = processDeadServersAndRegionsInTransition(deadServers);
 
     recoverTableInDisablingState();
@@ -422,16 +423,17 @@ public class AssignmentManager {
 
   /**
    * Process all regions that are in transition in zookeeper and also
-   * processes the list of dead servers by scanning the META.
+   * processes the list of dead servers.
    * Used by master joining an cluster.  If we figure this is a clean cluster
    * startup, will assign all user regions.
-   * @param deadServers
-   *          Map of dead servers and their regions. Can be null.
+   * @param deadServers Set of servers that are offline probably legitimately that were carrying
+   * regions according to a scan of hbase:meta. Can be null.
    * @throws IOException
    * @throws InterruptedException
    */
   boolean processDeadServersAndRegionsInTransition(final Set<ServerName> deadServers)
-          throws IOException, InterruptedException {
+  throws KeeperException, IOException, InterruptedException, CoordinatedStateException {
+    // TODO Needed? List<String> nodes = ZKUtil.listChildrenNoWatch(watcher, watcher.assignmentZNode);
     boolean failover = !serverManager.getDeadServers().isEmpty();
     if (failover) {
       // This may not be a failover actually, especially if meta is on this master.
@@ -1483,15 +1485,13 @@ public class AssignmentManager {
     }
 
     // Generate a round-robin bulk assignment plan
-    Map<ServerName, List<HRegionInfo>> bulkPlan
-      = balancer.roundRobinAssignment(regions, servers);
+    Map<ServerName, List<HRegionInfo>> bulkPlan = balancer.roundRobinAssignment(regions, servers);
     if (bulkPlan == null) {
       throw new IOException("Unable to determine a plan to assign region(s)");
     }
 
     processFavoredNodes(regions);
-    assign(regions.size(), servers.size(),
-      "round-robin=true", bulkPlan);
+    assign(regions.size(), servers.size(), "round-robin=true", bulkPlan);
   }
 
   private void assign(int regions, int totalServers,
@@ -1607,10 +1607,8 @@ public class AssignmentManager {
 
   /**
    * Rebuild the list of user regions and assignment information.
-   * <p>
-   * Returns a set of servers that are not found to be online that hosted
-   * some regions.
-   * @return set of servers not online that hosted some regions per meta
+   * Updates regionstates with findings as we go through list of regions.
+   * @return set of servers not online that hosted some regions according to a scan of hbase:meta
    * @throws IOException
    */
   Set<ServerName> rebuildUserRegions() throws
@@ -2058,15 +2056,15 @@ public class AssignmentManager {
   }
 
   /**
-   * Process shutdown server removing any assignments.
+   * Clean out crashed server removing any assignments.
    * @param sn Server that went down.
    * @return list of regions in transition on this server
    */
-  public List<HRegionInfo> processServerShutdown(final ServerName sn) {
+  public List<HRegionInfo> cleanOutCrashedServerReferences(final ServerName sn) {
     // Clean out any existing assignment plans for this server
     synchronized (this.regionPlans) {
-      for (Iterator <Map.Entry<String, RegionPlan>> i =
-          this.regionPlans.entrySet().iterator(); i.hasNext();) {
+      for (Iterator <Map.Entry<String, RegionPlan>> i = this.regionPlans.entrySet().iterator();
+          i.hasNext();) {
         Map.Entry<String, RegionPlan> e = i.next();
         ServerName otherSn = e.getValue().getDestination();
         // The name will be null if the region is planned for a random assign.
@@ -2084,8 +2082,7 @@ public class AssignmentManager {
       // We need a lock on the region as we could update it
       Lock lock = locker.acquireLock(encodedName);
       try {
-        RegionState regionState =
-          regionStates.getRegionTransitionState(encodedName);
+        RegionState regionState = regionStates.getRegionTransitionState(encodedName);
         if (regionState == null
             || (regionState.getServerName() != null && !regionState.isOnServer(sn))
             || !RegionStates.isOneOfStates(regionState, State.PENDING_OPEN,

http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
index 83b12dd..8b16b00 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
@@ -38,6 +38,7 @@ import java.util.Set;
 
 /**
  * Class to hold dead servers list and utility querying dead server list.
+ * On znode expiration, servers are added here.
  */
 @InterfaceAudience.Private
 public class DeadServer {
@@ -115,7 +116,7 @@ public class DeadServer {
   }
 
   public synchronized void finish(ServerName sn) {
-    LOG.debug("Finished processing " + sn);
+    if (LOG.isDebugEnabled()) LOG.debug("Finished " + sn + "; numProcessing=" + this.numProcessing);
     this.numProcessing--;
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index d5f8937..40d34e9 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -265,7 +265,7 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
   volatile boolean serviceStarted = false;
 
   // flag set after we complete assignMeta.
-  private volatile boolean serverShutdownHandlerEnabled = false;
+  private volatile boolean serverCrashProcessingEnabled = false;
 
   LoadBalancer balancer;
   private BalancerChore balancerChore;
@@ -669,11 +669,8 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
     // get a list for previously failed RS which need log splitting work
     // we recover hbase:meta region servers inside master initialization and
     // handle other failed servers in SSH in order to start up master node ASAP
-    Set<ServerName> previouslyFailedServers = this.fileSystemManager
-        .getFailedServersFromLogFolders();
-
-    // remove stale recovering regions from previous run
-    this.fileSystemManager.removeStaleRecoveringRegionsFromZK(previouslyFailedServers);
+    Set<ServerName> previouslyFailedServers =
+      this.fileSystemManager.getFailedServersFromLogFolders();
 
     // log splitting for hbase:meta server
     ServerName oldMetaServerLocation = metaTableLocator.getMetaRegionLocation(this.getZooKeeper());
@@ -707,14 +704,14 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
 
     // Check if master is shutting down because of some issue
     // in initializing the regionserver or the balancer.
-    if(isStopped()) return;
+    if (isStopped()) return;
 
     // Make sure meta assigned before proceeding.
     status.setStatus("Assigning Meta Region");
     assignMeta(status, previouslyFailedMetaRSs, HRegionInfo.DEFAULT_REPLICA_ID);
     // check if master is shutting down because above assignMeta could return even hbase:meta isn't
     // assigned when master is shutting down
-    if(isStopped()) return;
+    if (isStopped()) return;
 
     // migrating existent table state from zk, so splitters
     // and recovery process treat states properly.
@@ -736,11 +733,10 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
     status.setStatus("Starting assignment manager");
     this.assignmentManager.joinCluster();
 
-    //set cluster status again after user regions are assigned
+    // set cluster status again after user regions are assigned
     this.balancer.setClusterStatus(getClusterStatus());
 
-    // Start balancer and meta catalog janitor after meta and regions have
-    // been assigned.
+    // Start balancer and meta catalog janitor after meta and regions have been assigned.
     status.setStatus("Starting balancer and catalog janitor");
     this.clusterStatusChore = new ClusterStatusChore(this, balancer);
     getChoreService().scheduleChore(clusterStatusChore);
@@ -763,6 +759,7 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
     status.markComplete("Initialization successful");
     LOG.info("Master has completed initialization");
     configurationManager.registerObserver(this.balancer);
+    // Set master as 'initialized'.
     initialized = true;
     // assign the meta replicas
     Set<ServerName> EMPTY_SET = new HashSet<ServerName>();
@@ -910,7 +907,7 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
     // if the meta region server is died at this time, we need it to be re-assigned
     // by SSH so that system tables can be assigned.
     // No need to wait for meta is assigned = 0 when meta is just verified.
-    if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) enableServerShutdownHandler(assigned != 0);
+    if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) enableCrashedServerProcessing(assigned != 0);
     LOG.info("hbase:meta with replicaId " + replicaId + " assigned=" + assigned + ", location="
       + metaTableLocator.getMetaRegionLocation(this.getZooKeeper(), replicaId));
     status.setStatus("META assigned.");
@@ -946,15 +943,14 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
     }
   }
 
-  private void enableServerShutdownHandler(
-      final boolean waitForMeta) throws IOException, InterruptedException {
-    // If ServerShutdownHandler is disabled, we enable it and expire those dead
-    // but not expired servers. This is required so that if meta is assigning to
-    // a server which dies after assignMeta starts assignment,
-    // SSH can re-assign it. Otherwise, we will be
+  private void enableCrashedServerProcessing(final boolean waitForMeta)
+  throws IOException, InterruptedException {
+    // If crashed server processing is disabled, we enable it and expire those dead but not expired
+    // servers. This is required so that if meta is assigning to a server which dies after
+    // assignMeta starts assignment, ServerCrashProcedure can re-assign it. Otherwise, we will be
     // stuck here waiting forever if waitForMeta is specified.
-    if (!serverShutdownHandlerEnabled) {
-      serverShutdownHandlerEnabled = true;
+    if (!serverCrashProcessingEnabled) {
+      serverCrashProcessingEnabled = true;
       this.serverManager.processQueuedDeadServers();
     }
 
@@ -2065,13 +2061,18 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
   }
 
   /**
-   * ServerShutdownHandlerEnabled is set false before completing
-   * assignMeta to prevent processing of ServerShutdownHandler.
+   * ServerCrashProcessingEnabled is set false before completing assignMeta to prevent processing
+   * of crashed servers.
    * @return true if assignMeta has completed;
    */
   @Override
-  public boolean isServerShutdownHandlerEnabled() {
-    return this.serverShutdownHandlerEnabled;
+  public boolean isServerCrashProcessingEnabled() {
+    return this.serverCrashProcessingEnabled;
+  }
+
+  @VisibleForTesting
+  public void setServerCrashProcessingEnabled(final boolean b) {
+    this.serverCrashProcessingEnabled = b;
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
index 904da84..3718a5a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
@@ -59,6 +59,8 @@ import org.apache.hadoop.hbase.util.FSTableDescriptors;
 import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.ipc.RemoteException;
 
+import com.google.common.annotations.VisibleForTesting;
+
 /**
  * This class abstracts a bunch of operations the HMaster needs to interact with
  * the underlying file system, including splitting log files, checking file
@@ -132,6 +134,11 @@ public class MasterFileSystem {
     this.distributedLogReplay = this.splitLogManager.isLogReplaying();
   }
 
+  @VisibleForTesting
+  SplitLogManager getSplitLogManager() {
+    return this.splitLogManager;
+  }
+
   /**
    * Create initial layout in filesystem.
    * <ol>

http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
index d93ca94..7085187 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
@@ -182,7 +182,7 @@ public interface MasterServices extends Server {
   /**
    * @return true if master enables ServerShutdownHandler;
    */
-  boolean isServerShutdownHandlerEnabled();
+  boolean isServerCrashProcessingEnabled();
 
   /**
    * Registers a new protocol buffer {@link Service} subclass as a master coprocessor endpoint.

http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
index d1fffbe..5b7a8ad 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
@@ -388,8 +388,7 @@ public class RegionStates {
     return updateRegionState(hri, state, serverName, HConstants.NO_SEQNUM);
   }
 
-  public void regionOnline(
-      final HRegionInfo hri, final ServerName serverName) {
+  public void regionOnline(final HRegionInfo hri, final ServerName serverName) {
     regionOnline(hri, serverName, HConstants.NO_SEQNUM);
   }
 
@@ -398,16 +397,14 @@ public class RegionStates {
    * We can't confirm it is really online on specified region server
    * because it hasn't been put in region server's online region list yet.
    */
-  public void regionOnline(final HRegionInfo hri,
-      final ServerName serverName, long openSeqNum) {
+  public void regionOnline(final HRegionInfo hri, final ServerName serverName, long openSeqNum) {
     String encodedName = hri.getEncodedName();
     if (!serverManager.isServerOnline(serverName)) {
       // This is possible if the region server dies before master gets a
       // chance to handle ZK event in time. At this time, if the dead server
       // is already processed by SSH, we should ignore this event.
       // If not processed yet, ignore and let SSH deal with it.
-      LOG.warn("Ignored, " + encodedName
-        + " was opened on a dead server: " + serverName);
+      LOG.warn("Ignored, " + encodedName + " was opened on a dead server: " + serverName);
       return;
     }
     updateRegionState(hri, State.OPEN, serverName, openSeqNum);
@@ -489,7 +486,7 @@ public class RegionStates {
     }
     long now = System.currentTimeMillis();
     if (LOG.isDebugEnabled()) {
-      LOG.debug("Adding to processed servers " + serverName);
+      LOG.debug("Adding to log splitting servers " + serverName);
     }
     processedServers.put(serverName, Long.valueOf(now));
     Configuration conf = server.getConfiguration();
@@ -503,7 +500,7 @@ public class RegionStates {
         Map.Entry<ServerName, Long> e = it.next();
         if (e.getValue().longValue() < cutoff) {
           if (LOG.isDebugEnabled()) {
-            LOG.debug("Removed from processed servers " + e.getKey());
+            LOG.debug("Removed from log splitting servers " + e.getKey());
           }
           it.remove();
         }

http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
index fa99a92..5cd0301 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
@@ -52,8 +52,7 @@ import org.apache.hadoop.hbase.client.ClusterConnection;
 import org.apache.hadoop.hbase.client.ConnectionFactory;
 import org.apache.hadoop.hbase.client.RetriesExhaustedException;
 import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
-import org.apache.hadoop.hbase.master.handler.MetaServerShutdownHandler;
-import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
+import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
 import org.apache.hadoop.hbase.protobuf.RequestConverter;
@@ -579,7 +578,7 @@ public class ServerManager {
       }
       return;
     }
-    if (!services.isServerShutdownHandlerEnabled()) {
+    if (!services.isServerCrashProcessingEnabled()) {
       LOG.info("Master doesn't enable ServerShutdownHandler during initialization, "
           + "delay expiring server " + serverName);
       this.queuedDeadServers.add(serverName);
@@ -591,18 +590,8 @@ public class ServerManager {
           " but server shutdown already in progress");
       return;
     }
-    synchronized (onlineServers) {
-      if (!this.onlineServers.containsKey(serverName)) {
-        LOG.warn("Expiration of " + serverName + " but server not online");
-      }
-      // Remove the server from the known servers lists and update load info BUT
-      // add to deadservers first; do this so it'll show in dead servers list if
-      // not in online servers list.
-      this.deadservers.add(serverName);
-      this.onlineServers.remove(serverName);
-      onlineServers.notifyAll();
-    }
-    this.rsAdmins.remove(serverName);
+    moveFromOnelineToDeadServers(serverName);
+
     // If cluster is going down, yes, servers are going to be expiring; don't
     // process as a dead server
     if (this.clusterShutdown) {
@@ -615,13 +604,8 @@ public class ServerManager {
     }
 
     boolean carryingMeta = services.getAssignmentManager().isCarryingMeta(serverName);
-    if (carryingMeta) {
-      this.services.getExecutorService().submit(new MetaServerShutdownHandler(this.master,
-        this.services, this.deadservers, serverName));
-    } else {
-      this.services.getExecutorService().submit(new ServerShutdownHandler(this.master,
-        this.services, this.deadservers, serverName, true));
-    }
+    this.services.getMasterProcedureExecutor().
+      submitProcedure(new ServerCrashProcedure(serverName, true, carryingMeta));
     LOG.debug("Added=" + serverName +
       " to dead servers, submitted shutdown handler to be executed meta=" + carryingMeta);
 
@@ -633,8 +617,20 @@ public class ServerManager {
     }
   }
 
-  public synchronized void processDeadServer(final ServerName serverName) {
-    this.processDeadServer(serverName, false);
+  @VisibleForTesting
+  public void moveFromOnelineToDeadServers(final ServerName sn) {
+    synchronized (onlineServers) {
+      if (!this.onlineServers.containsKey(sn)) {
+        LOG.warn("Expiration of " + sn + " but server not online");
+      }
+      // Remove the server from the known servers lists and update load info BUT
+      // add to deadservers first; do this so it'll show in dead servers list if
+      // not in online servers list.
+      this.deadservers.add(sn);
+      this.onlineServers.remove(sn);
+      onlineServers.notifyAll();
+    }
+    this.rsAdmins.remove(sn);
   }
 
   public synchronized void processDeadServer(final ServerName serverName, boolean shouldSplitWal) {
@@ -652,9 +648,8 @@ public class ServerManager {
     }
 
     this.deadservers.add(serverName);
-    this.services.getExecutorService().submit(
-      new ServerShutdownHandler(this.master, this.services, this.deadservers, serverName,
-          shouldSplitWal));
+    this.services.getMasterProcedureExecutor().
+    submitProcedure(new ServerCrashProcedure(serverName, shouldSplitWal, false));
   }
 
   /**
@@ -662,7 +657,7 @@ public class ServerManager {
    * called after HMaster#assignMeta and AssignmentManager#joinCluster.
    * */
   synchronized void processQueuedDeadServers() {
-    if (!services.isServerShutdownHandlerEnabled()) {
+    if (!services.isServerCrashProcessingEnabled()) {
       LOG.info("Master hasn't enabled ServerShutdownHandler");
     }
     Iterator<ServerName> serverIterator = queuedDeadServers.iterator();

http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java
index c93ecf6..8caddc1 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java
@@ -406,16 +406,15 @@ public class SplitLogManager {
       // the function is only used in WALEdit direct replay mode
       return;
     }
+    if (serverNames == null || serverNames.isEmpty()) return;
 
     Set<String> recoveredServerNameSet = new HashSet<String>();
-    if (serverNames != null) {
-      for (ServerName tmpServerName : serverNames) {
-        recoveredServerNameSet.add(tmpServerName.getServerName());
-      }
+    for (ServerName tmpServerName : serverNames) {
+      recoveredServerNameSet.add(tmpServerName.getServerName());
     }
-
+   
+    this.recoveringRegionLock.lock();
     try {
-      this.recoveringRegionLock.lock();
       ((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
           .getSplitLogManagerCoordination().removeRecoveringRegions(recoveredServerNameSet,
             isMetaRecovery);

http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableLockManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableLockManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableLockManager.java
index cfaeb98..ef1e84f 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableLockManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableLockManager.java
@@ -25,17 +25,16 @@ import java.util.List;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.InterProcessLock;
 import org.apache.hadoop.hbase.InterProcessLock.MetadataHandler;
 import org.apache.hadoop.hbase.InterProcessReadWriteLock;
 import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.hbase.exceptions.LockTimeoutException;
 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
-import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;

http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/LogReplayHandler.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/LogReplayHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/LogReplayHandler.java
deleted file mode 100644
index 008a04e..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/LogReplayHandler.java
+++ /dev/null
@@ -1,88 +0,0 @@
-/**
- * Copyright The Apache Software Foundation
- *
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with this
- * work for additional information regarding copyright ownership. The ASF
- * licenses this file to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.hadoop.hbase.master.handler;
-
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.Server;
-import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.executor.EventHandler;
-import org.apache.hadoop.hbase.executor.EventType;
-import org.apache.hadoop.hbase.master.DeadServer;
-import org.apache.hadoop.hbase.master.MasterServices;
-
-/**
- * Handle logReplay work from SSH. Having a separate handler is not to block SSH in re-assigning
- * regions from dead servers. Otherwise, available SSH handlers could be blocked by logReplay work
- * (from {@link org.apache.hadoop.hbase.master.MasterFileSystem#splitLog(ServerName)}). 
- * During logReplay, if a receiving RS(say A) fails again, regions on A won't be able 
- * to be assigned to another live RS which causes the log replay unable to complete 
- * because WAL edits replay depends on receiving RS to be live
- */
-@InterfaceAudience.Private
-public class LogReplayHandler extends EventHandler {
-  private static final Log LOG = LogFactory.getLog(LogReplayHandler.class);
-  private final ServerName serverName;
-  protected final Server master;
-  protected final MasterServices services;
-  protected final DeadServer deadServers;
-
-  public LogReplayHandler(final Server server, final MasterServices services,
-      final DeadServer deadServers, final ServerName serverName) {
-    super(server, EventType.M_LOG_REPLAY);
-    this.master = server;
-    this.services = services;
-    this.deadServers = deadServers;
-    this.serverName = serverName;
-    this.deadServers.add(serverName);
-  }
-
-  @Override
-  public String toString() {
-    String name = serverName.toString();
-    return getClass().getSimpleName() + "-" + name + "-" + getSeqid();
-  }
-
-  @Override
-  public void process() throws IOException {
-    try {
-      if (this.master != null && this.master.isStopped()) {
-        // we're exiting ...
-        return;
-      }
-      this.services.getMasterFileSystem().splitLog(serverName);
-    } catch (Exception ex) {
-      if (ex instanceof IOException) {
-        // resubmit log replay work when failed
-        this.services.getExecutorService().submit((LogReplayHandler) this);
-        this.deadServers.add(serverName);
-        throw new IOException("failed log replay for " + serverName + ", will retry", ex);
-      } else {
-        throw new IOException(ex);
-      }
-    } finally {
-      this.deadServers.finish(serverName);
-    }
-    // logReplay is the last step of SSH so log a line to indicate that
-    LOG.info("Finished processing shutdown of " + serverName);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java
deleted file mode 100644
index 629f941..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java
+++ /dev/null
@@ -1,216 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.master.handler;
-
-import java.io.IOException;
-import java.io.InterruptedIOException;
-import java.util.HashSet;
-import java.util.Set;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.Server;
-import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.executor.EventType;
-import org.apache.hadoop.hbase.master.AssignmentManager;
-import org.apache.hadoop.hbase.master.DeadServer;
-import org.apache.hadoop.hbase.master.MasterServices;
-import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
-import org.apache.hadoop.hbase.util.Threads;
-import org.apache.zookeeper.KeeperException;
-
-import com.google.common.annotations.VisibleForTesting;
-
-import java.util.concurrent.atomic.AtomicInteger;
-
-/**
- * Shutdown handler for the server hosting <code>hbase:meta</code>
- */
-@InterfaceAudience.Private
-public class MetaServerShutdownHandler extends ServerShutdownHandler {
-  private static final Log LOG = LogFactory.getLog(MetaServerShutdownHandler.class);
-  private AtomicInteger eventExceptionCount = new AtomicInteger(0);
-  @VisibleForTesting
-  static final int SHOW_STRACKTRACE_FREQUENCY = 100;
-
-  public MetaServerShutdownHandler(final Server server,
-      final MasterServices services,
-      final DeadServer deadServers, final ServerName serverName) {
-    super(server, services, deadServers, serverName,
-      EventType.M_META_SERVER_SHUTDOWN, true);
-  }
-
-  @Override
-  public void process() throws IOException {
-    boolean gotException = true; 
-    try {
-      AssignmentManager am = this.services.getAssignmentManager();
-      this.services.getMasterFileSystem().setLogRecoveryMode();
-      boolean distributedLogReplay = 
-        (this.services.getMasterFileSystem().getLogRecoveryMode() == RecoveryMode.LOG_REPLAY);
-      try {
-        if (this.shouldSplitWal) {
-          LOG.info("Splitting hbase:meta logs for " + serverName);
-          if (distributedLogReplay) {
-            Set<HRegionInfo> regions = new HashSet<HRegionInfo>();
-            regions.add(HRegionInfo.FIRST_META_REGIONINFO);
-            this.services.getMasterFileSystem().prepareLogReplay(serverName, regions);
-          } else {
-            this.services.getMasterFileSystem().splitMetaLog(serverName);
-          }
-          am.getRegionStates().logSplit(HRegionInfo.FIRST_META_REGIONINFO);
-        }
-      } catch (IOException ioe) {
-        this.services.getExecutorService().submit(this);
-        this.deadServers.add(serverName);
-        throw new IOException("failed log splitting for " + serverName + ", will retry", ioe);
-      }
-  
-      // Assign meta if we were carrying it.
-      // Check again: region may be assigned to other where because of RIT
-      // timeout
-      if (am.isCarryingMeta(serverName)) {
-        LOG.info("Server " + serverName + " was carrying META. Trying to assign.");
-        verifyAndAssignMetaWithRetries();
-      } else {
-        LOG.info("META has been assigned to otherwhere, skip assigning.");
-      }
-
-      try {
-        if (this.shouldSplitWal && distributedLogReplay) {
-          if (!am.waitOnRegionToClearRegionsInTransition(HRegionInfo.FIRST_META_REGIONINFO,
-            regionAssignmentWaitTimeout)) {
-            // Wait here is to avoid log replay hits current dead server and incur a RPC timeout
-            // when replay happens before region assignment completes.
-            LOG.warn("Region " + HRegionInfo.FIRST_META_REGIONINFO.getEncodedName()
-                + " didn't complete assignment in time");
-          }
-          this.services.getMasterFileSystem().splitMetaLog(serverName);
-        }
-      } catch (Exception ex) {
-        if (ex instanceof IOException) {
-          this.services.getExecutorService().submit(this);
-          this.deadServers.add(serverName);
-          throw new IOException("failed log splitting for " + serverName + ", will retry", ex);
-        } else {
-          throw new IOException(ex);
-        }
-      }
-
-      gotException = false;
-    } finally {
-      if (gotException){
-        // If we had an exception, this.deadServers.finish will be skipped in super.process()
-        this.deadServers.finish(serverName);
-      }     
-    }
-
-    super.process();
-    // Clear this counter on successful handling.
-    this.eventExceptionCount.set(0);
-  }
-
-  @Override
-  boolean isCarryingMeta() {
-    return true;
-  }
-
-  /**
-   * Before assign the hbase:meta region, ensure it haven't
-   *  been assigned by other place
-   * <p>
-   * Under some scenarios, the hbase:meta region can be opened twice, so it seemed online
-   * in two regionserver at the same time.
-   * If the hbase:meta region has been assigned, so the operation can be canceled.
-   * @throws InterruptedException
-   * @throws IOException
-   * @throws KeeperException
-   */
-  private void verifyAndAssignMeta()
-      throws InterruptedException, IOException, KeeperException {
-    long timeout = this.server.getConfiguration().
-        getLong("hbase.catalog.verification.timeout", 1000);
-    if (!server.getMetaTableLocator().verifyMetaRegionLocation(server.getConnection(),
-      this.server.getZooKeeper(), timeout)) {
-      this.services.getAssignmentManager().assignMeta(HRegionInfo.FIRST_META_REGIONINFO);
-    } else if (serverName.equals(server.getMetaTableLocator().getMetaRegionLocation(
-      this.server.getZooKeeper()))) {
-      throw new IOException("hbase:meta is onlined on the dead server "
-          + serverName);
-    } else {
-      LOG.info("Skip assigning hbase:meta, because it is online on the "
-          + server.getMetaTableLocator().getMetaRegionLocation(this.server.getZooKeeper()));
-    }
-  }
-
-  /**
-   * Failed many times, shutdown processing
-   * @throws IOException
-   */
-  private void verifyAndAssignMetaWithRetries() throws IOException {
-    int iTimes = this.server.getConfiguration().getInt(
-        "hbase.catalog.verification.retries", 10);
-
-    long waitTime = this.server.getConfiguration().getLong(
-        "hbase.catalog.verification.timeout", 1000);
-
-    int iFlag = 0;
-    while (true) {
-      try {
-        verifyAndAssignMeta();
-        break;
-      } catch (KeeperException e) {
-        this.server.abort("In server shutdown processing, assigning meta", e);
-        throw new IOException("Aborting", e);
-      } catch (Exception e) {
-        if (iFlag >= iTimes) {
-          this.server.abort("verifyAndAssignMeta failed after" + iTimes
-              + " times retries, aborting", e);
-          throw new IOException("Aborting", e);
-        }
-        try {
-          Thread.sleep(waitTime);
-        } catch (InterruptedException e1) {
-          LOG.warn("Interrupted when is the thread sleep", e1);
-          Thread.currentThread().interrupt();
-          throw (InterruptedIOException)new InterruptedIOException().initCause(e1);
-        }
-        iFlag++;
-      }
-    }
-  }
-
-  @Override
-  protected void handleException(Throwable t) {
-    int count = eventExceptionCount.getAndIncrement();
-    if (count < 0) count = eventExceptionCount.getAndSet(0);
-    if (count > SHOW_STRACKTRACE_FREQUENCY) { // Too frequent, let's slow reporting
-      Threads.sleep(1000);
-    }
-    if (count % SHOW_STRACKTRACE_FREQUENCY == 0) {
-      LOG.error("Caught " + eventType + ", count=" + this.eventExceptionCount, t); 
-    } else {
-      LOG.error("Caught " + eventType + ", count=" + this.eventExceptionCount +
-        "; " + t.getMessage() + "; stack trace shows every " + SHOW_STRACKTRACE_FREQUENCY +
-        "th time.");
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
deleted file mode 100644
index 26594f7..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
+++ /dev/null
@@ -1,371 +0,0 @@
-/**
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.master.handler;
-
-import java.io.IOException;
-import java.io.InterruptedIOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Set;
-import java.util.concurrent.locks.Lock;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.Server;
-import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.client.RegionReplicaUtil;
-import org.apache.hadoop.hbase.client.TableState;
-import org.apache.hadoop.hbase.executor.EventHandler;
-import org.apache.hadoop.hbase.executor.EventType;
-import org.apache.hadoop.hbase.master.AssignmentManager;
-import org.apache.hadoop.hbase.master.DeadServer;
-import org.apache.hadoop.hbase.master.MasterFileSystem;
-import org.apache.hadoop.hbase.master.MasterServices;
-import org.apache.hadoop.hbase.master.RegionState;
-import org.apache.hadoop.hbase.master.RegionStates;
-import org.apache.hadoop.hbase.master.ServerManager;
-import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
-import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
-
-/**
- * Process server shutdown.
- * Server-to-handle must be already in the deadservers lists.  See
- * {@link ServerManager#expireServer(ServerName)}
- */
-@InterfaceAudience.Private
-public class ServerShutdownHandler extends EventHandler {
-  private static final Log LOG = LogFactory.getLog(ServerShutdownHandler.class);
-  protected final ServerName serverName;
-  protected final MasterServices services;
-  protected final DeadServer deadServers;
-  protected final boolean shouldSplitWal; // whether to split WAL or not
-  protected final int regionAssignmentWaitTimeout;
-
-  public ServerShutdownHandler(final Server server, final MasterServices services,
-      final DeadServer deadServers, final ServerName serverName,
-      final boolean shouldSplitWal) {
-    this(server, services, deadServers, serverName, EventType.M_SERVER_SHUTDOWN,
-        shouldSplitWal);
-  }
-
-  ServerShutdownHandler(final Server server, final MasterServices services,
-      final DeadServer deadServers, final ServerName serverName, EventType type,
-      final boolean shouldSplitWal) {
-    super(server, type);
-    this.serverName = serverName;
-    this.server = server;
-    this.services = services;
-    this.deadServers = deadServers;
-    if (!this.deadServers.isDeadServer(this.serverName)) {
-      LOG.warn(this.serverName + " is NOT in deadservers; it should be!");
-    }
-    this.shouldSplitWal = shouldSplitWal;
-    this.regionAssignmentWaitTimeout = server.getConfiguration().getInt(
-      HConstants.LOG_REPLAY_WAIT_REGION_TIMEOUT, 15000);
-  }
-
-  @Override
-  public String getInformativeName() {
-    if (serverName != null) {
-      return this.getClass().getSimpleName() + " for " + serverName;
-    } else {
-      return super.getInformativeName();
-    }
-  }
-
-  /**
-   * @return True if the server we are processing was carrying <code>hbase:meta</code>
-   */
-  boolean isCarryingMeta() {
-    return false;
-  }
-
-  @Override
-  public String toString() {
-    return getClass().getSimpleName() + "-" + serverName + "-" + getSeqid();
-  }
-
-  @Override
-  public void process() throws IOException {
-    boolean hasLogReplayWork = false;
-    final ServerName serverName = this.serverName;
-    try {
-
-      // We don't want worker thread in the MetaServerShutdownHandler
-      // executor pool to block by waiting availability of hbase:meta
-      // Otherwise, it could run into the following issue:
-      // 1. The current MetaServerShutdownHandler instance For RS1 waits for the hbase:meta
-      //    to come online.
-      // 2. The newly assigned hbase:meta region server RS2 was shutdown right after
-      //    it opens the hbase:meta region. So the MetaServerShutdownHandler
-      //    instance For RS1 will still be blocked.
-      // 3. The new instance of MetaServerShutdownHandler for RS2 is queued.
-      // 4. The newly assigned hbase:meta region server RS3 was shutdown right after
-      //    it opens the hbase:meta region. So the MetaServerShutdownHandler
-      //    instance For RS1 and RS2 will still be blocked.
-      // 5. The new instance of MetaServerShutdownHandler for RS3 is queued.
-      // 6. Repeat until we run out of MetaServerShutdownHandler worker threads
-      // The solution here is to resubmit a ServerShutdownHandler request to process
-      // user regions on that server so that MetaServerShutdownHandler
-      // executor pool is always available.
-      //
-      // If AssignmentManager hasn't finished rebuilding user regions,
-      // we are not ready to assign dead regions either. So we re-queue up
-      // the dead server for further processing too.
-      AssignmentManager am = services.getAssignmentManager();
-      ServerManager serverManager = services.getServerManager();
-      if (isCarryingMeta() /* hbase:meta */ || !am.isFailoverCleanupDone()) {
-        serverManager.processDeadServer(serverName, this.shouldSplitWal);
-        return;
-      }
-
-      // Wait on meta to come online; we need it to progress.
-      // TODO: Best way to hold strictly here?  We should build this retry logic
-      // into the MetaTableAccessor operations themselves.
-      // TODO: Is the reading of hbase:meta necessary when the Master has state of
-      // cluster in its head?  It should be possible to do without reading hbase:meta
-      // in all but one case. On split, the RS updates the hbase:meta
-      // table and THEN informs the master of the split via zk nodes in
-      // 'unassigned' dir.  Currently the RS puts ephemeral nodes into zk so if
-      // the regionserver dies, these nodes do not stick around and this server
-      // shutdown processing does fixup (see the fixupDaughters method below).
-      // If we wanted to skip the hbase:meta scan, we'd have to change at least the
-      // final SPLIT message to be permanent in zk so in here we'd know a SPLIT
-      // completed (zk is updated after edits to hbase:meta have gone in).  See
-      // {@link SplitTransaction}.  We'd also have to be figure another way for
-      // doing the below hbase:meta daughters fixup.
-      Set<HRegionInfo> hris = null;
-      try {
-        server.getMetaTableLocator().waitMetaRegionLocation(server.getZooKeeper());
-        if (BaseLoadBalancer.tablesOnMaster(server.getConfiguration())) {
-          while (!this.server.isStopped() && serverManager.countOfRegionServers() < 2) {
-            // Wait till at least another regionserver is up besides the active master
-            // so that we don't assign all regions to the active master.
-            // This is best of efforts, because newly joined regionserver
-            // could crash right after that.
-            Thread.sleep(100);
-          }
-        }
-        hris = am.getRegionStates().getServerRegions(serverName);
-      } catch (InterruptedException e) {
-        Thread.currentThread().interrupt();
-        throw (InterruptedIOException)new InterruptedIOException().initCause(e);
-      }
-      if (this.server.isStopped()) {
-        throw new IOException("Server is stopped");
-      }
-
-      // delayed to set recovery mode based on configuration only after all outstanding splitlogtask
-      // drained
-      this.services.getMasterFileSystem().setLogRecoveryMode();
-      boolean distributedLogReplay = 
-        (this.services.getMasterFileSystem().getLogRecoveryMode() == RecoveryMode.LOG_REPLAY);
-
-      try {
-        if (this.shouldSplitWal) {
-          if (distributedLogReplay) {
-            LOG.info("Mark regions in recovery for crashed server " + serverName +
-              " before assignment; regions=" + hris);
-            MasterFileSystem mfs = this.services.getMasterFileSystem();
-            mfs.prepareLogReplay(serverName, hris);
-          } else {
-            LOG.info("Splitting logs for " + serverName +
-              " before assignment; region count=" + (hris == null ? 0 : hris.size()));
-            this.services.getMasterFileSystem().splitLog(serverName);
-          }
-          am.getRegionStates().logSplit(serverName);
-        } else {
-          LOG.info("Skipping log splitting for " + serverName);
-        }
-      } catch (IOException ioe) {
-        resubmit(serverName, ioe);
-      }
-      List<HRegionInfo> toAssignRegions = new ArrayList<HRegionInfo>();
-      int replicaCount = services.getConfiguration().getInt(HConstants.META_REPLICAS_NUM,
-          HConstants.DEFAULT_META_REPLICA_NUM);
-      for (int i = 1; i < replicaCount; i++) {
-        HRegionInfo metaHri =
-            RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, i);
-        if (am.isCarryingMetaReplica(serverName, metaHri)) {
-          LOG.info("Reassigning meta replica" + metaHri + " that was on " + serverName);
-          toAssignRegions.add(metaHri);
-        }
-      }
-      // Clean out anything in regions in transition.  Being conservative and
-      // doing after log splitting.  Could do some states before -- OPENING?
-      // OFFLINE? -- and then others after like CLOSING that depend on log
-      // splitting.
-      List<HRegionInfo> regionsInTransition = am.processServerShutdown(serverName);
-      LOG.info("Reassigning " + ((hris == null)? 0: hris.size()) +
-        " region(s) that " + (serverName == null? "null": serverName)  +
-        " was carrying (and " + regionsInTransition.size() +
-        " regions(s) that were opening on this server)");
-      
-      toAssignRegions.addAll(regionsInTransition);
-
-      // Iterate regions that were on this server and assign them
-      if (hris != null && !hris.isEmpty()) {
-        RegionStates regionStates = am.getRegionStates();
-        for (HRegionInfo hri: hris) {
-          if (regionsInTransition.contains(hri)) {
-            continue;
-          }
-          String encodedName = hri.getEncodedName();
-          Lock lock = am.acquireRegionLock(encodedName);
-          try {
-            RegionState rit = regionStates.getRegionTransitionState(hri);
-            if (processDeadRegion(hri, am)) {
-              ServerName addressFromAM = regionStates.getRegionServerOfRegion(hri);
-              if (addressFromAM != null && !addressFromAM.equals(this.serverName)) {
-                // If this region is in transition on the dead server, it must be
-                // opening or pending_open, which should have been covered by AM#processServerShutdown
-                LOG.info("Skip assigning region " + hri.getRegionNameAsString()
-                  + " because it has been opened in " + addressFromAM.getServerName());
-                continue;
-              }
-              if (rit != null) {
-                if (rit.getServerName() != null && !rit.isOnServer(serverName)) {
-                  // Skip regions that are in transition on other server
-                  LOG.info("Skip assigning region in transition on other server" + rit);
-                  continue;
-                }
-                LOG.info("Reassigning region with rs = " + rit);
-                regionStates.updateRegionState(hri, RegionState.State.OFFLINE);
-              } else if (regionStates.isRegionInState(
-                  hri, RegionState.State.SPLITTING_NEW, RegionState.State.MERGING_NEW)) {
-                regionStates.updateRegionState(hri, RegionState.State.OFFLINE);
-              }
-              toAssignRegions.add(hri);
-            } else if (rit != null) {
-              if ((rit.isClosing() || rit.isFailedClose() || rit.isOffline())
-                  && am.getTableStateManager().isTableState(hri.getTable(),
-                  TableState.State.DISABLED, TableState.State.DISABLING) ||
-                  am.getReplicasToClose().contains(hri)) {
-                // If the table was partially disabled and the RS went down, we should clear the RIT
-                // and remove the node for the region.
-                // The rit that we use may be stale in case the table was in DISABLING state
-                // but though we did assign we will not be clearing the znode in CLOSING state.
-                // Doing this will have no harm. See HBASE-5927
-                regionStates.updateRegionState(hri, RegionState.State.OFFLINE);
-                am.offlineDisabledRegion(hri);
-              } else {
-                LOG.warn("THIS SHOULD NOT HAPPEN: unexpected region in transition "
-                  + rit + " not to be assigned by SSH of server " + serverName);
-              }
-            }
-          } finally {
-            lock.unlock();
-          }
-        }
-      }
-
-      try {
-        am.assign(toAssignRegions);
-      } catch (InterruptedException ie) {
-        LOG.error("Caught " + ie + " during round-robin assignment");
-        throw (InterruptedIOException)new InterruptedIOException().initCause(ie);
-      } catch (IOException ioe) {
-        LOG.info("Caught " + ioe + " during region assignment, will retry");
-        // Only do wal splitting if shouldSplitWal and in DLR mode
-        serverManager.processDeadServer(serverName,
-          this.shouldSplitWal && distributedLogReplay);
-        return;
-      }
-
-      if (this.shouldSplitWal && distributedLogReplay) {
-        // wait for region assignment completes
-        for (HRegionInfo hri : toAssignRegions) {
-          try {
-            if (!am.waitOnRegionToClearRegionsInTransition(hri, regionAssignmentWaitTimeout)) {
-              // Wait here is to avoid log replay hits current dead server and incur a RPC timeout
-              // when replay happens before region assignment completes.
-              LOG.warn("Region " + hri.getEncodedName()
-                  + " didn't complete assignment in time");
-            }
-          } catch (InterruptedException ie) {
-            throw new InterruptedIOException("Caught " + ie
-                + " during waitOnRegionToClearRegionsInTransition");
-          }
-        }
-        // submit logReplay work
-        this.services.getExecutorService().submit(
-          new LogReplayHandler(this.server, this.services, this.deadServers, this.serverName));
-        hasLogReplayWork = true;
-      }
-    } finally {
-      this.deadServers.finish(serverName);
-    }
-
-    if (!hasLogReplayWork) {
-      LOG.info("Finished processing of shutdown of " + serverName);
-    }
-  }
-
-  private void resubmit(final ServerName serverName, IOException ex) throws IOException {
-    // typecast to SSH so that we make sure that it is the SSH instance that
-    // gets submitted as opposed to MSSH or some other derived instance of SSH
-    this.services.getExecutorService().submit((ServerShutdownHandler) this);
-    this.deadServers.add(serverName);
-    throw new IOException("failed log splitting for " + serverName + ", will retry", ex);
-  }
-
-  /**
-   * Process a dead region from a dead RS. Checks if the region is disabled or
-   * disabling or if the region has a partially completed split.
-   * @param hri
-   * @param assignmentManager
-   * @return Returns true if specified region should be assigned, false if not.
-   * @throws IOException
-   */
-  public static boolean processDeadRegion(HRegionInfo hri,
-      AssignmentManager assignmentManager)
-  throws IOException {
-    boolean tablePresent = assignmentManager.getTableStateManager().isTablePresent(hri.getTable());
-    if (!tablePresent) {
-      LOG.info("The table " + hri.getTable()
-          + " was deleted.  Hence not proceeding.");
-      return false;
-    }
-    // If table is not disabled but the region is offlined,
-    boolean disabled = assignmentManager.getTableStateManager().isTableState(hri.getTable(),
-      TableState.State.DISABLED);
-    if (disabled){
-      LOG.info("The table " + hri.getTable()
-          + " was disabled.  Hence not proceeding.");
-      return false;
-    }
-    if (hri.isOffline() && hri.isSplit()) {
-      //HBASE-7721: Split parent and daughters are inserted into hbase:meta as an atomic operation.
-      //If the meta scanner saw the parent split, then it should see the daughters as assigned
-      //to the dead server. We don't have to do anything.
-      return false;
-    }
-    boolean disabling = assignmentManager.getTableStateManager().isTableState(hri.getTable(),
-      TableState.State.DISABLING);
-    if (disabling) {
-      LOG.info("The table " + hri.getTable()
-          + " is disabled.  Hence not assigning region" + hri.getEncodedName());
-      return false;
-    }
-    return true;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AddColumnFamilyProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AddColumnFamilyProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AddColumnFamilyProcedure.java
index 377ccb5..941aec0 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AddColumnFamilyProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AddColumnFamilyProcedure.java
@@ -184,14 +184,14 @@ public class AddColumnFamilyProcedure
   @Override
   protected boolean acquireLock(final MasterProcedureEnv env) {
     if (!env.isInitialized()) return false;
-    return env.getProcedureQueue().tryAcquireTableWrite(
+    return env.getProcedureQueue().tryAcquireTableExclusiveLock(
       tableName,
       EventType.C_M_ADD_FAMILY.toString());
   }
 
   @Override
   protected void releaseLock(final MasterProcedureEnv env) {
-    env.getProcedureQueue().releaseTableWrite(tableName);
+    env.getProcedureQueue().releaseTableExclusiveLock(tableName);
   }
 
   @Override
@@ -404,4 +404,4 @@ public class AddColumnFamilyProcedure
     }
     return regionInfoList;
   }
-}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java
index edaf7fb..f7e2b84 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java
@@ -265,12 +265,12 @@ public class CreateTableProcedure
 
   @Override
   protected boolean acquireLock(final MasterProcedureEnv env) {
-    return env.getProcedureQueue().tryAcquireTableWrite(getTableName(), "create table");
+    return env.getProcedureQueue().tryAcquireTableExclusiveLock(getTableName(), "create table");
   }
 
   @Override
   protected void releaseLock(final MasterProcedureEnv env) {
-    env.getProcedureQueue().releaseTableWrite(getTableName());
+    env.getProcedureQueue().releaseTableExclusiveLock(getTableName());
   }
 
   private boolean prepareCreate(final MasterProcedureEnv env) throws IOException {
@@ -444,4 +444,4 @@ public class CreateTableProcedure
       final TableName tableName) throws IOException {
     env.getMasterServices().getTableDescriptors().get(tableName);
   }
-}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteColumnFamilyProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteColumnFamilyProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteColumnFamilyProcedure.java
index 6e96910..49941d3 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteColumnFamilyProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteColumnFamilyProcedure.java
@@ -200,14 +200,14 @@ public class DeleteColumnFamilyProcedure
   @Override
   protected boolean acquireLock(final MasterProcedureEnv env) {
     if (!env.isInitialized()) return false;
-    return env.getProcedureQueue().tryAcquireTableWrite(
+    return env.getProcedureQueue().tryAcquireTableExclusiveLock(
       tableName,
       EventType.C_M_DELETE_FAMILY.toString());
   }
 
   @Override
   protected void releaseLock(final MasterProcedureEnv env) {
-    env.getProcedureQueue().releaseTableWrite(tableName);
+    env.getProcedureQueue().releaseTableExclusiveLock(tableName);
   }
 
   @Override
@@ -436,4 +436,4 @@ public class DeleteColumnFamilyProcedure
     }
     return regionInfoList;
   }
-}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
index dfe70c7..6772f0c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
@@ -200,12 +200,12 @@ public class DeleteTableProcedure
   @Override
   protected boolean acquireLock(final MasterProcedureEnv env) {
     if (!env.isInitialized()) return false;
-    return env.getProcedureQueue().tryAcquireTableWrite(getTableName(), "delete table");
+    return env.getProcedureQueue().tryAcquireTableExclusiveLock(getTableName(), "delete table");
   }
 
   @Override
   protected void releaseLock(final MasterProcedureEnv env) {
-    env.getProcedureQueue().releaseTableWrite(getTableName());
+    env.getProcedureQueue().releaseTableExclusiveLock(getTableName());
   }
 
   @Override
@@ -407,4 +407,4 @@ public class DeleteTableProcedure
       throws IOException {
     ProcedureSyncWait.getMasterQuotaManager(env).removeTableFromNamespaceQuota(tableName);
   }
-}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java
index 2507cec..006449d 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java
@@ -214,14 +214,14 @@ public class DisableTableProcedure
   @Override
   protected boolean acquireLock(final MasterProcedureEnv env) {
     if (!env.isInitialized()) return false;
-    return env.getProcedureQueue().tryAcquireTableWrite(
+    return env.getProcedureQueue().tryAcquireTableExclusiveLock(
       tableName,
       EventType.C_M_DISABLE_TABLE.toString());
   }
 
   @Override
   protected void releaseLock(final MasterProcedureEnv env) {
-    env.getProcedureQueue().releaseTableWrite(tableName);
+    env.getProcedureQueue().releaseTableExclusiveLock(tableName);
   }
 
   @Override
@@ -537,4 +537,4 @@ public class DisableTableProcedure
       return regions != null && regions.isEmpty();
     }
   }
-}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java
index aefb0b1..332bd13 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java
@@ -93,9 +93,9 @@ public class EnableTableProcedure
   /**
    * Constructor
    * @param env MasterProcedureEnv
-   * @throws IOException
    * @param tableName the table to operate on
    * @param skipTableStateCheck whether to check table state
+   * @throws IOException
    */
   public EnableTableProcedure(
       final MasterProcedureEnv env,
@@ -234,14 +234,14 @@ public class EnableTableProcedure
   @Override
   protected boolean acquireLock(final MasterProcedureEnv env) {
     if (!env.isInitialized()) return false;
-    return env.getProcedureQueue().tryAcquireTableWrite(
+    return env.getProcedureQueue().tryAcquireTableExclusiveLock(
       tableName,
       EventType.C_M_ENABLE_TABLE.toString());
   }
 
   @Override
   protected void releaseLock(final MasterProcedureEnv env) {
-    env.getProcedureQueue().releaseTableWrite(tableName);
+    env.getProcedureQueue().releaseTableExclusiveLock(tableName);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hbase/blob/32561422/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureEnv.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureEnv.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureEnv.java
index 0a33cd4..f2f4bf3 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureEnv.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureEnv.java
@@ -120,4 +120,4 @@ public class MasterProcedureEnv {
   public boolean isInitialized() {
     return master.isInitialized();
   }
-}
+}
\ No newline at end of file