You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ra...@apache.org on 2012/05/25 18:54:09 UTC

svn commit: r1342724 - in /hbase/trunk/src: main/java/org/apache/hadoop/hbase/master/ main/java/org/apache/hadoop/hbase/master/handler/ main/java/org/apache/hadoop/hbase/protobuf/ test/java/org/apache/hadoop/hbase/master/

Author: ramkrishna
Date: Fri May 25 16:54:09 2012
New Revision: 1342724

URL: http://svn.apache.org/viewvc?rev=1342724&view=rev
Log:
HBASE-6070 AM.nodeDeleted and SSH races creating problems for regions under SPLIT (Ramkrishna)

Modified:
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/Mocking.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1342724&r1=1342723&r2=1342724&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Fri May 25 16:54:09 2012
@@ -1145,7 +1145,7 @@ public class AssignmentManager extends Z
       RegionState rs = this.regionsInTransition.get(regionName);
       if (rs != null) {
         HRegionInfo regionInfo = rs.getRegion();
-        if (rs.isSplitting() || rs.isSplit()) {
+        if (rs.isSplit()) {
           LOG.debug("Ephemeral node deleted, regionserver crashed?, " +
             "clearing from RIT; rs=" + rs);
           regionOffline(rs.getRegion());

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java?rev=1342724&r1=1342723&r2=1342724&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java Fri May 25 16:54:09 2012
@@ -294,7 +294,7 @@ public class ServerShutdownHandler exten
               this.server.getCatalogTracker())) {
             ServerName addressFromAM = this.services.getAssignmentManager()
                 .getRegionServerOfRegion(e.getKey());
-            if (rit != null && !rit.isClosing() && !rit.isPendingClose()) {
+            if (rit != null && !rit.isClosing() && !rit.isPendingClose() && !rit.isSplitting()) {
               // Skip regions that were in transition unless CLOSING or
               // PENDING_CLOSE
               LOG.info("Skip assigning region " + rit.toString());
@@ -307,6 +307,16 @@ public class ServerShutdownHandler exten
               } else {
                 toAssignRegions.add(e.getKey());
               }
+          } else if (rit != null && (rit.isSplitting() || rit.isSplit())) {
+            // This will happen when the RS went down and the call back for the SPLIITING or SPLIT
+            // has not yet happened for node Deleted event. In that case if the region was actually
+            // split
+            // but the RS had gone down before completing the split process then will not try to
+            // assign the parent region again. In that case we should make the region offline and
+            // also delete the region from RIT.
+            HRegionInfo region = rit.getRegion();
+            AssignmentManager am = this.services.getAssignmentManager();
+            am.regionOffline(region);
           }
           // If the table was partially disabled and the RS went down, we should clear the RIT
           // and remove the node for the region.

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java?rev=1342724&r1=1342723&r2=1342724&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java Fri May 25 16:54:09 2012
@@ -1039,6 +1039,7 @@ public final class ProtobufUtil {
       RequestConverter.buildGetRequest(regionName, get);
     try {
       GetResponse response = client.get(null, request);
+      if (response == null) return null;
       return toResult(response.getResult());
     } catch (ServiceException se) {
       throw getRemoteException(se);

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/Mocking.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/Mocking.java?rev=1342724&r1=1342723&r2=1342724&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/Mocking.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/Mocking.java Fri May 25 16:54:09 2012
@@ -67,6 +67,22 @@ public class Mocking {
       Bytes.toBytes(sn.getStartcode())));
     return new Result(kvs);
   }
+  
+    
+  /**
+   * @param sn
+   *          ServerName to use making startcode and server in meta
+   * @param hri
+   *          Region to serialize into HRegionInfo
+   * @return A mocked up Result that fakes a Get on a row in the <code>.META.</code> table.
+   * @throws IOException
+   */
+  static Result getMetaTableRowResultAsSplitRegion(final HRegionInfo hri, final ServerName sn)
+      throws IOException {
+    hri.setOffline(true);
+    hri.setSplit(true);
+    return getMetaTableRowResult(hri, sn);
+  }
 
   /**
    * Fakes the regionserver-side zk transitions of a region open.

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java?rev=1342724&r1=1342723&r2=1342724&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java Fri May 25 16:54:09 2012
@@ -405,7 +405,7 @@ public class TestAssignmentManager {
     AssignmentManager am = new AssignmentManager(this.server,
         this.serverManager, ct, balancer, executor, null);
     try {
-      processServerShutdownHandler(ct, am);
+      processServerShutdownHandler(ct, am, false);
     } finally {
       executor.shutdown();
       am.shutdown();
@@ -429,6 +429,70 @@ public class TestAssignmentManager {
     testCaseWithPartiallyDisabledState(Table.State.DISABLING);
     testCaseWithPartiallyDisabledState(Table.State.DISABLED);
   }
+  
+    
+  /**
+   * To test if the split region is removed from RIT if the region was in SPLITTING state but the RS
+   * has actually completed the splitting in META but went down. See HBASE-6070 and also HBASE-5806
+   * 
+   * @throws KeeperException
+   * @throws IOException
+   */
+  @Test
+  public void testSSHWhenSplitRegionInProgress() throws KeeperException, IOException, Exception {
+    // true indicates the region is split but still in RIT
+    testCaseWithSplitRegionPartial(true);
+    // false indicate the region is not split
+    testCaseWithSplitRegionPartial(false);
+  }
+  
+  private void testCaseWithSplitRegionPartial(boolean regionSplitDone) throws KeeperException,
+      IOException, NodeExistsException, InterruptedException, ServiceException {
+    // Create and startup an executor. This is used by AssignmentManager
+    // handling zk callbacks.
+    ExecutorService executor = startupMasterExecutor("testSSHWhenSplitRegionInProgress");
+
+    // We need a mocked catalog tracker.
+    CatalogTracker ct = Mockito.mock(CatalogTracker.class);
+    LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server.getConfiguration());
+    // Create an AM.
+    AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(this.server,
+        this.serverManager);
+    // adding region to regions and servers maps.
+    am.regionOnline(REGIONINFO, SERVERNAME_A);
+    // adding region in pending close.
+    am.regionsInTransition.put(REGIONINFO.getEncodedName(), new RegionState(REGIONINFO,
+        State.SPLITTING, System.currentTimeMillis(), SERVERNAME_A));
+    am.getZKTable().setEnabledTable(REGIONINFO.getTableNameAsString());
+    RegionTransition data = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING,
+        REGIONINFO.getRegionName(), SERVERNAME_A);
+    String node = ZKAssign.getNodeName(this.watcher, REGIONINFO.getEncodedName());
+    // create znode in M_ZK_REGION_CLOSING state.
+    ZKUtil.createAndWatch(this.watcher, node, data.toByteArray());
+
+    try {
+      processServerShutdownHandler(ct, am, regionSplitDone);
+      // check znode deleted or not.
+      // In both cases the znode should be deleted.
+
+      if (regionSplitDone) {
+        assertTrue("Region state of region in SPLITTING should be removed from rit.",
+            am.regionsInTransition.isEmpty());
+      } else {
+        while (!am.assignInvoked) {
+          Thread.sleep(1);
+        }
+        assertTrue("Assign should be invoked.", am.assignInvoked);
+      }
+    } finally {
+      REGIONINFO.setOffline(false);
+      REGIONINFO.setSplit(false);
+      executor.shutdown();
+      am.shutdown();
+      // Clean up all znodes
+      ZKAssign.deleteAllNodes(this.watcher);
+    }
+  }
 
   private void testCaseWithPartiallyDisabledState(Table.State state) throws KeeperException,
       IOException, NodeExistsException, ServiceException {
@@ -460,7 +524,7 @@ public class TestAssignmentManager {
     // create znode in M_ZK_REGION_CLOSING state.
     ZKUtil.createAndWatch(this.watcher, node, data.toByteArray());
     try {
-      processServerShutdownHandler(ct, am);
+      processServerShutdownHandler(ct, am, false);
       // check znode deleted or not.
       // In both cases the znode should be deleted.
       assertTrue("The znode should be deleted.", ZKUtil.checkExists(this.watcher, node) == -1);
@@ -480,7 +544,7 @@ public class TestAssignmentManager {
     }
   }
      
-  private void processServerShutdownHandler(CatalogTracker ct, AssignmentManager am)
+  private void processServerShutdownHandler(CatalogTracker ct, AssignmentManager am, boolean splitRegion)
       throws IOException, ServiceException {
     // Make sure our new AM gets callbacks; once registered, can't unregister.
     // Thats ok because we make a new zk watcher for each test.
@@ -490,7 +554,14 @@ public class TestAssignmentManager {
     // Make an RS Interface implementation.  Make it so a scanner can go against it.
     ClientProtocol implementation = Mockito.mock(ClientProtocol.class);
     // Get a meta row result that has region up on SERVERNAME_A
-    Result r = Mocking.getMetaTableRowResult(REGIONINFO, SERVERNAME_A);
+    
+    Result r = null;
+    if (splitRegion) {
+      r = Mocking.getMetaTableRowResultAsSplitRegion(REGIONINFO, SERVERNAME_A);
+    } else {
+      r = Mocking.getMetaTableRowResult(REGIONINFO, SERVERNAME_A);
+    }
+    
     ScanResponse.Builder builder = ScanResponse.newBuilder();
     builder.setMoreResults(true);
     builder.addResult(ProtobufUtil.toResult(r));
@@ -830,6 +901,7 @@ public class TestAssignmentManager {
     // Ditto for ct
     private final CatalogTracker ct;
     boolean processRITInvoked = false;
+    boolean assignInvoked = false;
     AtomicBoolean gate = new AtomicBoolean(true);
 
     public AssignmentManagerWithExtrasForTesting(final Server master,
@@ -865,6 +937,17 @@ public class TestAssignmentManager {
       super.assign(region, setOfflineInZK, forceNewPlan, hijack);
       this.gate.set(true);
     }
+    
+    @Override
+    public ServerName getRegionServerOfRegion(HRegionInfo hri) {
+      return SERVERNAME_A;
+    }
+    
+    @Override
+    public void assign(java.util.List<HRegionInfo> regions, java.util.List<ServerName> servers) 
+    {
+      assignInvoked = true;
+    };
     /** reset the watcher */
     void setWatcher(ZooKeeperWatcher watcher) {
       this.watcher = watcher;