You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by jx...@apache.org on 2013/05/15 00:55:11 UTC

svn commit: r1482635 - in /hbase/trunk: hbase-client/src/main/java/org/apache/hadoop/hbase/ hbase-server/src/main/java/org/apache/hadoop/hbase/master/ hbase-server/src/test/java/org/apache/hadoop/hbase/master/

Author: jxiang
Date: Tue May 14 22:55:11 2013
New Revision: 1482635

URL: http://svn.apache.org/r1482635
Log:
HBASE-8537 Dead region server pulled in from ZK

Modified:
    hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerName.java
    hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
    hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
    hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java

Modified: hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerName.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerName.java?rev=1482635&r1=1482634&r2=1482635&view=diff
==============================================================================
--- hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerName.java (original)
+++ hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerName.java Tue May 14 22:55:11 2013
@@ -28,7 +28,6 @@ import org.apache.hadoop.hbase.util.Addr
 import org.apache.hadoop.hbase.util.Bytes;
 
 import java.util.ArrayList;
-import java.util.Collection;
 import java.util.List;
 import java.util.regex.Pattern;
 
@@ -248,18 +247,6 @@ public class ServerName implements Compa
     return this.compareTo((ServerName)o) == 0;
   }
 
-
-  /**
-   * @return ServerName with matching hostname and port.
-   */
-  public static ServerName findServerWithSameHostnamePort(final Collection<ServerName> names,
-      final ServerName serverName) {
-    for (ServerName sn: names) {
-      if (isSameHostnameAndPort(serverName, sn)) return sn;
-    }
-    return null;
-  }
-
   /**
    * @param left
    * @param right

Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1482635&r1=1482634&r2=1482635&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Tue May 14 22:55:11 2013
@@ -252,7 +252,7 @@ MasterServices, Server {
   // Manager and zk listener for master election
   private ActiveMasterManager activeMasterManager;
   // Region server tracker
-  private RegionServerTracker regionServerTracker;
+  RegionServerTracker regionServerTracker;
   // Draining region server tracker
   private DrainingServerTracker drainingServerTracker;
   // Tracker for load balancer state
@@ -275,7 +275,7 @@ MasterServices, Server {
   private MasterFileSystem fileSystemManager;
 
   // server manager to deal with region server info
-  private ServerManager serverManager;
+  ServerManager serverManager;
 
   // manager of assignment nodes in zookeeper
   AssignmentManager assignmentManager;
@@ -599,7 +599,7 @@ MasterServices, Server {
    * @throws IOException
    * @throws InterruptedException
    */
-  private void initializeZKBasedSystemTrackers() throws IOException,
+  void initializeZKBasedSystemTrackers() throws IOException,
       InterruptedException, KeeperException {
     this.catalogTracker = createCatalogTracker(this.zooKeeper, this.conf, this);
     this.catalogTracker.start();
@@ -757,11 +757,11 @@ MasterServices, Server {
     this.serverManager.waitForRegionServers(status);
     // Check zk for region servers that are up but didn't register
     for (ServerName sn: this.regionServerTracker.getOnlineServers()) {
-      if (!this.serverManager.isServerOnline(sn)) {
-        // Not registered; add it.
-        LOG.info("Registering server found up in zk but who has not yet " +
-          "reported in: " + sn);
-        this.serverManager.recordNewServer(sn, ServerLoad.EMPTY_SERVERLOAD);
+      if (!this.serverManager.isServerOnline(sn)
+          && serverManager.checkAlreadySameHostPortAndRecordNewServer(
+              sn, ServerLoad.EMPTY_SERVERLOAD)) {
+        LOG.info("Registered server found up in zk but who has not yet "
+          + "reported in: " + sn);
       }
     }
 
@@ -2666,7 +2666,7 @@ MasterServices, Server {
     try {
       SnapshotDescription snapshot = request.getSnapshot();
       IsRestoreSnapshotDoneResponse.Builder builder = IsRestoreSnapshotDoneResponse.newBuilder();
-      boolean done = snapshotManager.isRestoreDone(request.getSnapshot());
+      boolean done = snapshotManager.isRestoreDone(snapshot);
       builder.setDone(done);
       return builder.build();
     } catch (IOException e) {

Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java?rev=1482635&r1=1482634&r2=1482635&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java Tue May 14 22:55:11 2013
@@ -60,7 +60,6 @@ import org.apache.hadoop.hbase.protobuf.
 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.OpenRegionResponse;
 import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
 import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.Pair;
 import org.apache.hadoop.hbase.util.Triple;
 
 import com.google.protobuf.ServiceException;
@@ -214,8 +213,11 @@ public class ServerManager {
     ServerName sn = new ServerName(ia.getHostName(), port, serverStartcode);
     checkClockSkew(sn, serverCurrentTime);
     checkIsDead(sn, "STARTUP");
-    checkAlreadySameHostPort(sn);
-    recordNewServer(sn, ServerLoad.EMPTY_SERVERLOAD);
+    if (!checkAlreadySameHostPortAndRecordNewServer(
+        sn, ServerLoad.EMPTY_SERVERLOAD)) {
+      LOG.warn("THIS SHOULD NOT HAPPEN, RegionServerStartup"
+        + " could not record the server: " + sn);
+    }
     return sn;
   }
 
@@ -246,18 +248,20 @@ public class ServerManager {
     }
   }
 
-  void regionServerReport(ServerName sn, ServerLoad sl)
-  throws YouAreDeadException, PleaseHoldException {
+  void regionServerReport(ServerName sn,
+      ServerLoad sl) throws YouAreDeadException {
     checkIsDead(sn, "REPORT");
     if (!this.onlineServers.containsKey(sn)) {
       // Already have this host+port combo and its just different start code?
-      checkAlreadySameHostPort(sn);
       // Just let the server in. Presume master joining a running cluster.
       // recordNewServer is what happens at the end of reportServerStartup.
       // The only thing we are skipping is passing back to the regionserver
       // the ServerName to use. Here we presume a master has already done
       // that so we'll press on with whatever it gave us for ServerName.
-      recordNewServer(sn, sl);
+      if (!checkAlreadySameHostPortAndRecordNewServer(sn, sl)) {
+        LOG.info("RegionServerReport ignored, could not record the sever: " + sn);
+        return; // Not recorded, so no need to move on
+      }
     } else {
       this.onlineServers.put(sn, sl);
     }
@@ -265,29 +269,29 @@ public class ServerManager {
   }
 
   /**
-   * Test to see if we have a server of same host and port already.
-   * @param serverName
-   * @throws PleaseHoldException
-   */
-  void checkAlreadySameHostPort(final ServerName serverName)
-  throws PleaseHoldException {
-    ServerName existingServer =
-      ServerName.findServerWithSameHostnamePort(getOnlineServersList(), serverName);
+   * Check is a server of same host and port already exists,
+   * if not, or the existed one got a smaller start code, record it.
+   *
+   * @param sn the server to check and record
+   * @param sl the server load on the server
+   * @return true if the server is recorded, otherwise, false
+   */
+  boolean checkAlreadySameHostPortAndRecordNewServer(
+      final ServerName serverName, final ServerLoad sl) {
+    ServerName existingServer = findServerWithSameHostnamePort(serverName);
     if (existingServer != null) {
-      String message = "Server serverName=" + serverName +
-        " rejected; we already have " + existingServer.toString() +
-        " registered with same hostname and port";
-      LOG.info(message);
-      if (existingServer.getStartcode() < serverName.getStartcode()) {
-        LOG.info("Triggering server recovery; existingServer " +
-          existingServer + " looks stale, new server:" + serverName);
-        expireServer(existingServer);
-      }
-      if (services.isServerShutdownHandlerEnabled()) {
-        // master has completed the initialization
-        throw new PleaseHoldException(message);
-      }
+      if (existingServer.getStartcode() > serverName.getStartcode()) {
+        LOG.info("Server serverName=" + serverName +
+          " rejected; we already have " + existingServer.toString() +
+          " registered with same hostname and port");
+        return false;
+      }
+      LOG.info("Triggering server recovery; existingServer " +
+        existingServer + " looks stale, new server:" + serverName);
+      expireServer(existingServer);
     }
+    recordNewServer(serverName, sl);
+    return true;
   }
 
   /**
@@ -345,6 +349,17 @@ public class ServerManager {
   }
 
   /**
+   * @return ServerName with matching hostname and port.
+   */
+  private ServerName findServerWithSameHostnamePort(
+      final ServerName serverName) {
+    for (ServerName sn: getOnlineServersList()) {
+      if (ServerName.isSameHostnameAndPort(serverName, sn)) return sn;
+    }
+    return null;
+  }
+
+  /**
    * Adds the onlineServers list.
    * @param serverName The remote servers name.
    * @param sl

Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java?rev=1482635&r1=1482634&r2=1482635&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java Tue May 14 22:55:11 2013
@@ -19,11 +19,14 @@ package org.apache.hadoop.hbase.master;
 
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
 import java.net.InetAddress;
 import java.net.UnknownHostException;
+import java.util.ArrayList;
+import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.Abortable;
@@ -49,6 +52,7 @@ import org.apache.hadoop.hbase.zookeeper
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
 import org.apache.hadoop.hbase.MediumTests;
 import org.apache.zookeeper.KeeperException;
+import org.apache.hadoop.hbase.monitoring.MonitoredTask;
 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.NameStringPair;
 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionServerReportRequest;
@@ -341,4 +345,62 @@ public class TestMasterNoCluster {
     }
   }
 
+  @Test
+  public void testNotPullingDeadRegionServerFromZK()
+      throws IOException, KeeperException, InterruptedException {
+    final Configuration conf = TESTUTIL.getConfiguration();
+    final ServerName newServer = new ServerName("test.sample", 1, 101);
+    final ServerName deadServer = new ServerName("test.sample", 1, 100);
+    final MockRegionServer rs0 = new MockRegionServer(conf, newServer);
+
+    HMaster master = new HMaster(conf) {
+      @Override
+      boolean assignMeta(MonitoredTask status) {
+        return true;
+      }
+
+      @Override
+      void initializeZKBasedSystemTrackers() throws IOException,
+      InterruptedException, KeeperException {
+        super.initializeZKBasedSystemTrackers();
+        // Record a newer server in server manager at first
+        serverManager.recordNewServer(newServer, ServerLoad.EMPTY_SERVERLOAD);
+
+        List<ServerName> onlineServers = new ArrayList<ServerName>();
+        onlineServers.add(deadServer);
+        onlineServers.add(newServer);
+        // Mock the region server tracker to pull the dead server from zk
+        regionServerTracker = Mockito.spy(regionServerTracker);
+        Mockito.doReturn(onlineServers).when(
+          regionServerTracker).getOnlineServers();
+      }
+
+      @Override
+      CatalogTracker createCatalogTracker(ZooKeeperWatcher zk,
+          Configuration conf, Abortable abortable)
+      throws IOException {
+        // Insert a mock for the connection used by the CatalogTracker.  Any
+        // regionserver should do.  Use TESTUTIL.getConfiguration rather than
+        // the conf from the master; the conf will already have an HConnection
+        // associate so the below mocking of a connection will fail.
+        HConnection connection =
+          HConnectionTestingUtility.getMockedConnectionAndDecorate(TESTUTIL.getConfiguration(),
+            rs0, rs0, rs0.getServerName(), HRegionInfo.ROOT_REGIONINFO);
+        return new CatalogTracker(zk, conf, connection, abortable);
+      }
+    };
+    master.start();
+
+    try {
+      // Wait till master is initialized.
+      while (!master.initialized) Threads.sleep(10);
+      LOG.info("Master is initialized");
+
+      assertFalse("The dead server should not be pulled in",
+        master.serverManager.isServerOnline(deadServer));
+    } finally {
+      master.stopMaster();
+      master.join();
+    }
+  }
 }