You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by je...@apache.org on 2013/08/05 19:28:13 UTC

svn commit: r1510615 - in /hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master: AssignmentManager.java handler/MetaServerShutdownHandler.java handler/ServerShutdownHandler.java

Author: jeffreyz
Date: Mon Aug  5 17:28:13 2013
New Revision: 1510615

URL: http://svn.apache.org/r1510615
Log:
HBASE-9099: logReplay could trigger double region assignment

Modified:
    hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
    hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java
    hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java

Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1510615&r1=1510614&r2=1510615&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Mon Aug  5 17:28:13 2013
@@ -2776,11 +2776,10 @@ public class AssignmentManager extends Z
    * @param hri
    * @param timeOut Milliseconds to wait for current region to be out of transition state.
    * @return True when a region clears regions-in-transition before timeout otherwise false
-   * @throws IOException
    * @throws InterruptedException
    */
   public boolean waitOnRegionToClearRegionsInTransition(final HRegionInfo hri, long timeOut)
-      throws IOException, InterruptedException {
+      throws InterruptedException {
     if (!regionStates.isRegionInTransition(hri)) return true;
     RegionState rs = null;
     long end = (timeOut <= 0) ? Long.MAX_VALUE : EnvironmentEdgeManager.currentTimeMillis()

Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java?rev=1510615&r1=1510614&r2=1510615&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java Mon Aug  5 17:28:13 2013
@@ -85,7 +85,9 @@ public class MetaServerShutdownHandler e
         if (this.shouldSplitHlog && this.distributedLogReplay) {
           if (!am.waitOnRegionToClearRegionsInTransition(HRegionInfo.FIRST_META_REGIONINFO,
             regionAssignmentWaitTimeout)) {
-            throw new IOException("Region " + HRegionInfo.FIRST_META_REGIONINFO.getEncodedName()
+            // Wait here is to avoid log replay hits current dead server and incur a RPC timeout
+            // when replay happens before region assignment completes.
+            LOG.warn("Region " + HRegionInfo.FIRST_META_REGIONINFO.getEncodedName()
                 + " didn't complete assignment in time");
           }
           this.services.getMasterFileSystem().splitMetaLog(serverName);

Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java?rev=1510615&r1=1510614&r2=1510615&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java Mon Aug  5 17:28:13 2013
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hbase.master.handler;
 
 import java.io.IOException;
+import java.io.InterruptedIOException;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
@@ -278,26 +279,25 @@ public class ServerShutdownHandler exten
         throw new IOException(ie);
       }
 
-      try {
-        if (this.shouldSplitHlog && this.distributedLogReplay) {
-          // wait for region assignment completes
-          for (HRegionInfo hri : toAssignRegions) {
+      if (this.shouldSplitHlog && this.distributedLogReplay) {
+        // wait for region assignment completes
+        for (HRegionInfo hri : toAssignRegions) {
+          try {
             if (!am.waitOnRegionToClearRegionsInTransition(hri, regionAssignmentWaitTimeout)) {
-              throw new IOException("Region " + hri.getEncodedName()
+              // Wait here is to avoid log replay hits current dead server and incur a RPC timeout
+              // when replay happens before region assignment completes.
+              LOG.warn("Region " + hri.getEncodedName()
                   + " didn't complete assignment in time");
             }
+          } catch (InterruptedException ie) {
+            throw new InterruptedIOException("Caught " + ie
+                + " during waitOnRegionToClearRegionsInTransition");
           }
-          // submit logReplay work
-          this.services.getExecutorService().submit(
-            new LogReplayHandler(this.server, this.services, this.deadServers, this.serverName));
-          hasLogReplayWork = true;
-        }
-      } catch (Exception ex) {
-        if (ex instanceof IOException) {
-          resubmit(serverName, (IOException)ex);
-        } else {
-          throw new IOException(ex);
         }
+        // submit logReplay work
+        this.services.getExecutorService().submit(
+          new LogReplayHandler(this.server, this.services, this.deadServers, this.serverName));
+        hasLogReplayWork = true;
       }
     } finally {
       this.deadServers.finish(serverName);