You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by jx...@apache.org on 2013/05/21 19:19:02 UTC

svn commit: r1484875 - in /hbase/trunk: hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ hbase-server/src/main/java/org/apache/hadoop/hbase/master/ hbase-server/src/test/java/org/apache/hadoop/hbase/master/

Author: jxiang
Date: Tue May 21 17:19:02 2013
New Revision: 1484875

URL: http://svn.apache.org/r1484875
Log:
HBASE-8545 Meta stuck in transition when it is assigned to a just restarted dead region sever

Modified:
    hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java
    hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
    hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java
    hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java

Modified: hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java?rev=1484875&r1=1484874&r2=1484875&view=diff
==============================================================================
--- hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java (original)
+++ hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java Tue May 21 17:19:02 2013
@@ -798,7 +798,6 @@ public class ZKAssign {
       // Node no longer exists.  Return -1. It means unsuccessful transition.
       return -1;
     }
-    RegionTransition rt = getRegionTransition(existingBytes);
 
     // Verify it is the expected version
     if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
@@ -808,7 +807,9 @@ public class ZKAssign {
         "the node existed but was version " + stat.getVersion() +
         " not the expected version " + expectedVersion));
         return -1;
-    } else if (beginState.equals(EventType.M_ZK_REGION_OFFLINE)
+    }
+
+    if (beginState.equals(EventType.M_ZK_REGION_OFFLINE)
         && endState.equals(EventType.RS_ZK_REGION_OPENING)
         && expectedVersion == -1 && stat.getVersion() != 0) {
       // the below check ensures that double assignment doesnot happen.
@@ -822,6 +823,18 @@ public class ZKAssign {
       return -1;
     }
 
+    RegionTransition rt = getRegionTransition(existingBytes);
+
+    // Verify the server transition happens on is not changed
+    if (!rt.getServerName().equals(serverName)) {
+      LOG.warn(zkw.prefix("Attempt to transition the " +
+        "unassigned node for " + encoded +
+        " from " + beginState + " to " + endState + " failed, " +
+        "the server that tried to transition was " + serverName +
+        " not the expected " + rt.getServerName()));
+      return -1;
+    }
+
     // Verify it is in expected state
     EventType et = rt.getEventType();
     if (!et.equals(beginState)) {

Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1484875&r1=1484874&r2=1484875&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Tue May 21 17:19:02 2013
@@ -534,7 +534,7 @@ public class AssignmentManager extends Z
     EventType et = rt.getEventType();
     // Get ServerName.  Could not be null.
     final ServerName sn = rt.getServerName();
-    String encodedRegionName = regionInfo.getEncodedName();
+    final String encodedRegionName = regionInfo.getEncodedName();
     LOG.info("Processing region " + regionInfo.getRegionNameAsString() + " in state " + et);
 
 
@@ -592,6 +592,8 @@ public class AssignmentManager extends Z
                 public void process() throws IOException {
                   ReentrantLock lock = locker.acquireLock(regionInfo.getEncodedName());
                   try {
+                    RegionPlan plan = new RegionPlan(regionInfo, null, sn);
+                    addPlan(encodedRegionName, plan);
                     assign(rs, false, false);
                   } finally {
                     lock.unlock();

Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java?rev=1484875&r1=1484874&r2=1484875&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java Tue May 21 17:19:02 2013
@@ -399,7 +399,7 @@ public class TestAssignmentManager {
       assertNotSame(-1, versionid);
       // This uglyness below is what the openregionhandler on RS side does.
       versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
-        SERVERNAME_A, EventType.M_ZK_REGION_OFFLINE,
+        SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
         EventType.RS_ZK_REGION_OPENING, versionid);
       assertNotSame(-1, versionid);
       // Move znode from OPENING to OPENED as RS does on successful open.

Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java?rev=1484875&r1=1484874&r2=1484875&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java Tue May 21 17:19:02 2013
@@ -24,6 +24,7 @@ import static org.junit.Assert.fail;
 
 import java.io.IOException;
 import java.util.List;
+import java.util.Set;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -34,6 +35,7 @@ import org.apache.hadoop.hbase.HConstant
 import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.MediumTests;
+import org.apache.hadoop.hbase.ServerLoad;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.catalog.MetaEditor;
 import org.apache.hadoop.hbase.client.HBaseAdmin;
@@ -43,6 +45,7 @@ import org.apache.hadoop.hbase.coprocess
 import org.apache.hadoop.hbase.coprocessor.ObserverContext;
 import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
 import org.apache.hadoop.hbase.coprocessor.RegionObserver;
+import org.apache.hadoop.hbase.executor.EventType;
 import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer;
 import org.apache.hadoop.hbase.regionserver.HRegionServer;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -111,6 +114,64 @@ public class TestAssignmentManagerOnClus
   }
 
   /**
+   * This tests region assignment on a simulated restarted server
+   */
+  @Test
+  public void testAssignRegionOnRestartedServer() throws Exception {
+    String table = "testAssignRegionOnRestartedServer";
+    ServerName deadServer = null;
+    HMaster master = null;
+    try {
+      HTableDescriptor desc = new HTableDescriptor(table);
+      desc.addFamily(new HColumnDescriptor(FAMILY));
+      admin.createTable(desc);
+
+      HTable meta = new HTable(conf, HConstants.META_TABLE_NAME);
+      HRegionInfo hri = new HRegionInfo(
+        desc.getName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
+      MetaEditor.addRegionToMeta(meta, hri);
+
+      master = TEST_UTIL.getHBaseCluster().getMaster();
+      Set<ServerName> onlineServers = master.serverManager.getOnlineServers().keySet();
+      assertFalse("There should be some servers online", onlineServers.isEmpty());
+
+      // Use the first server as the destination server
+      ServerName destServer = onlineServers.iterator().next();
+
+      // Created faked dead server
+      deadServer = new ServerName(destServer.getHostname(),
+        destServer.getPort(), destServer.getStartcode() - 100L);
+      master.serverManager.recordNewServer(deadServer, ServerLoad.EMPTY_SERVERLOAD);
+
+      AssignmentManager am = master.getAssignmentManager();
+      RegionPlan plan = new RegionPlan(hri, null, deadServer);
+      am.addPlan(hri.getEncodedName(), plan);
+      master.assignRegion(hri);
+
+      int version = ZKAssign.transitionNode(master.getZooKeeper(), hri,
+        destServer, EventType.M_ZK_REGION_OFFLINE,
+        EventType.RS_ZK_REGION_OPENING, 0);
+      assertEquals("TansitionNode should fail", -1, version);
+
+      // Give region 2 seconds to assign, which may not be enough.
+      // However, if HBASE-8545 is broken, this test will be flaky.
+      // Otherwise, this test should never be flaky.
+      Thread.sleep(2000);
+
+      assertTrue("Region should still be in transition",
+        am.getRegionStates().isRegionInTransition(hri));
+      assertEquals("Assign node should still be in version 0", 0,
+        ZKAssign.getVersion(master.getZooKeeper(), hri));
+    } finally {
+      if (deadServer != null) {
+        master.serverManager.expireServer(deadServer);
+      }
+
+      TEST_UTIL.deleteTable(Bytes.toBytes(table));
+    }
+  }
+
+  /**
    * This tests offlining a region
    */
   @Test