You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by jx...@apache.org on 2013/05/15 00:55:11 UTC
svn commit: r1482635 - in /hbase/trunk:
hbase-client/src/main/java/org/apache/hadoop/hbase/
hbase-server/src/main/java/org/apache/hadoop/hbase/master/
hbase-server/src/test/java/org/apache/hadoop/hbase/master/
Author: jxiang
Date: Tue May 14 22:55:11 2013
New Revision: 1482635
URL: http://svn.apache.org/r1482635
Log:
HBASE-8537 Dead region server pulled in from ZK
Modified:
hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerName.java
hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java
Modified: hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerName.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerName.java?rev=1482635&r1=1482634&r2=1482635&view=diff
==============================================================================
--- hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerName.java (original)
+++ hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerName.java Tue May 14 22:55:11 2013
@@ -28,7 +28,6 @@ import org.apache.hadoop.hbase.util.Addr
import org.apache.hadoop.hbase.util.Bytes;
import java.util.ArrayList;
-import java.util.Collection;
import java.util.List;
import java.util.regex.Pattern;
@@ -248,18 +247,6 @@ public class ServerName implements Compa
return this.compareTo((ServerName)o) == 0;
}
-
- /**
- * @return ServerName with matching hostname and port.
- */
- public static ServerName findServerWithSameHostnamePort(final Collection<ServerName> names,
- final ServerName serverName) {
- for (ServerName sn: names) {
- if (isSameHostnameAndPort(serverName, sn)) return sn;
- }
- return null;
- }
-
/**
* @param left
* @param right
Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1482635&r1=1482634&r2=1482635&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Tue May 14 22:55:11 2013
@@ -252,7 +252,7 @@ MasterServices, Server {
// Manager and zk listener for master election
private ActiveMasterManager activeMasterManager;
// Region server tracker
- private RegionServerTracker regionServerTracker;
+ RegionServerTracker regionServerTracker;
// Draining region server tracker
private DrainingServerTracker drainingServerTracker;
// Tracker for load balancer state
@@ -275,7 +275,7 @@ MasterServices, Server {
private MasterFileSystem fileSystemManager;
// server manager to deal with region server info
- private ServerManager serverManager;
+ ServerManager serverManager;
// manager of assignment nodes in zookeeper
AssignmentManager assignmentManager;
@@ -599,7 +599,7 @@ MasterServices, Server {
* @throws IOException
* @throws InterruptedException
*/
- private void initializeZKBasedSystemTrackers() throws IOException,
+ void initializeZKBasedSystemTrackers() throws IOException,
InterruptedException, KeeperException {
this.catalogTracker = createCatalogTracker(this.zooKeeper, this.conf, this);
this.catalogTracker.start();
@@ -757,11 +757,11 @@ MasterServices, Server {
this.serverManager.waitForRegionServers(status);
// Check zk for region servers that are up but didn't register
for (ServerName sn: this.regionServerTracker.getOnlineServers()) {
- if (!this.serverManager.isServerOnline(sn)) {
- // Not registered; add it.
- LOG.info("Registering server found up in zk but who has not yet " +
- "reported in: " + sn);
- this.serverManager.recordNewServer(sn, ServerLoad.EMPTY_SERVERLOAD);
+ if (!this.serverManager.isServerOnline(sn)
+ && serverManager.checkAlreadySameHostPortAndRecordNewServer(
+ sn, ServerLoad.EMPTY_SERVERLOAD)) {
+ LOG.info("Registered server found up in zk but who has not yet "
+ + "reported in: " + sn);
}
}
@@ -2666,7 +2666,7 @@ MasterServices, Server {
try {
SnapshotDescription snapshot = request.getSnapshot();
IsRestoreSnapshotDoneResponse.Builder builder = IsRestoreSnapshotDoneResponse.newBuilder();
- boolean done = snapshotManager.isRestoreDone(request.getSnapshot());
+ boolean done = snapshotManager.isRestoreDone(snapshot);
builder.setDone(done);
return builder.build();
} catch (IOException e) {
Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java?rev=1482635&r1=1482634&r2=1482635&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java Tue May 14 22:55:11 2013
@@ -60,7 +60,6 @@ import org.apache.hadoop.hbase.protobuf.
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.OpenRegionResponse;
import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.Triple;
import com.google.protobuf.ServiceException;
@@ -214,8 +213,11 @@ public class ServerManager {
ServerName sn = new ServerName(ia.getHostName(), port, serverStartcode);
checkClockSkew(sn, serverCurrentTime);
checkIsDead(sn, "STARTUP");
- checkAlreadySameHostPort(sn);
- recordNewServer(sn, ServerLoad.EMPTY_SERVERLOAD);
+ if (!checkAlreadySameHostPortAndRecordNewServer(
+ sn, ServerLoad.EMPTY_SERVERLOAD)) {
+ LOG.warn("THIS SHOULD NOT HAPPEN, RegionServerStartup"
+ + " could not record the server: " + sn);
+ }
return sn;
}
@@ -246,18 +248,20 @@ public class ServerManager {
}
}
- void regionServerReport(ServerName sn, ServerLoad sl)
- throws YouAreDeadException, PleaseHoldException {
+ void regionServerReport(ServerName sn,
+ ServerLoad sl) throws YouAreDeadException {
checkIsDead(sn, "REPORT");
if (!this.onlineServers.containsKey(sn)) {
// Already have this host+port combo and its just different start code?
- checkAlreadySameHostPort(sn);
// Just let the server in. Presume master joining a running cluster.
// recordNewServer is what happens at the end of reportServerStartup.
// The only thing we are skipping is passing back to the regionserver
// the ServerName to use. Here we presume a master has already done
// that so we'll press on with whatever it gave us for ServerName.
- recordNewServer(sn, sl);
+ if (!checkAlreadySameHostPortAndRecordNewServer(sn, sl)) {
+ LOG.info("RegionServerReport ignored, could not record the sever: " + sn);
+ return; // Not recorded, so no need to move on
+ }
} else {
this.onlineServers.put(sn, sl);
}
@@ -265,29 +269,29 @@ public class ServerManager {
}
/**
- * Test to see if we have a server of same host and port already.
- * @param serverName
- * @throws PleaseHoldException
- */
- void checkAlreadySameHostPort(final ServerName serverName)
- throws PleaseHoldException {
- ServerName existingServer =
- ServerName.findServerWithSameHostnamePort(getOnlineServersList(), serverName);
+ * Check is a server of same host and port already exists,
+ * if not, or the existed one got a smaller start code, record it.
+ *
+ * @param sn the server to check and record
+ * @param sl the server load on the server
+ * @return true if the server is recorded, otherwise, false
+ */
+ boolean checkAlreadySameHostPortAndRecordNewServer(
+ final ServerName serverName, final ServerLoad sl) {
+ ServerName existingServer = findServerWithSameHostnamePort(serverName);
if (existingServer != null) {
- String message = "Server serverName=" + serverName +
- " rejected; we already have " + existingServer.toString() +
- " registered with same hostname and port";
- LOG.info(message);
- if (existingServer.getStartcode() < serverName.getStartcode()) {
- LOG.info("Triggering server recovery; existingServer " +
- existingServer + " looks stale, new server:" + serverName);
- expireServer(existingServer);
- }
- if (services.isServerShutdownHandlerEnabled()) {
- // master has completed the initialization
- throw new PleaseHoldException(message);
- }
+ if (existingServer.getStartcode() > serverName.getStartcode()) {
+ LOG.info("Server serverName=" + serverName +
+ " rejected; we already have " + existingServer.toString() +
+ " registered with same hostname and port");
+ return false;
+ }
+ LOG.info("Triggering server recovery; existingServer " +
+ existingServer + " looks stale, new server:" + serverName);
+ expireServer(existingServer);
}
+ recordNewServer(serverName, sl);
+ return true;
}
/**
@@ -345,6 +349,17 @@ public class ServerManager {
}
/**
+ * @return ServerName with matching hostname and port.
+ */
+ private ServerName findServerWithSameHostnamePort(
+ final ServerName serverName) {
+ for (ServerName sn: getOnlineServersList()) {
+ if (ServerName.isSameHostnameAndPort(serverName, sn)) return sn;
+ }
+ return null;
+ }
+
+ /**
* Adds the onlineServers list.
* @param serverName The remote servers name.
* @param sl
Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java?rev=1482635&r1=1482634&r2=1482635&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java Tue May 14 22:55:11 2013
@@ -19,11 +19,14 @@ package org.apache.hadoop.hbase.master;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.net.InetAddress;
import java.net.UnknownHostException;
+import java.util.ArrayList;
+import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Abortable;
@@ -49,6 +52,7 @@ import org.apache.hadoop.hbase.zookeeper
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.hadoop.hbase.MediumTests;
import org.apache.zookeeper.KeeperException;
+import org.apache.hadoop.hbase.monitoring.MonitoredTask;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.NameStringPair;
import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionServerReportRequest;
@@ -341,4 +345,62 @@ public class TestMasterNoCluster {
}
}
+ @Test
+ public void testNotPullingDeadRegionServerFromZK()
+ throws IOException, KeeperException, InterruptedException {
+ final Configuration conf = TESTUTIL.getConfiguration();
+ final ServerName newServer = new ServerName("test.sample", 1, 101);
+ final ServerName deadServer = new ServerName("test.sample", 1, 100);
+ final MockRegionServer rs0 = new MockRegionServer(conf, newServer);
+
+ HMaster master = new HMaster(conf) {
+ @Override
+ boolean assignMeta(MonitoredTask status) {
+ return true;
+ }
+
+ @Override
+ void initializeZKBasedSystemTrackers() throws IOException,
+ InterruptedException, KeeperException {
+ super.initializeZKBasedSystemTrackers();
+ // Record a newer server in server manager at first
+ serverManager.recordNewServer(newServer, ServerLoad.EMPTY_SERVERLOAD);
+
+ List<ServerName> onlineServers = new ArrayList<ServerName>();
+ onlineServers.add(deadServer);
+ onlineServers.add(newServer);
+ // Mock the region server tracker to pull the dead server from zk
+ regionServerTracker = Mockito.spy(regionServerTracker);
+ Mockito.doReturn(onlineServers).when(
+ regionServerTracker).getOnlineServers();
+ }
+
+ @Override
+ CatalogTracker createCatalogTracker(ZooKeeperWatcher zk,
+ Configuration conf, Abortable abortable)
+ throws IOException {
+ // Insert a mock for the connection used by the CatalogTracker. Any
+ // regionserver should do. Use TESTUTIL.getConfiguration rather than
+ // the conf from the master; the conf will already have an HConnection
+ // associate so the below mocking of a connection will fail.
+ HConnection connection =
+ HConnectionTestingUtility.getMockedConnectionAndDecorate(TESTUTIL.getConfiguration(),
+ rs0, rs0, rs0.getServerName(), HRegionInfo.ROOT_REGIONINFO);
+ return new CatalogTracker(zk, conf, connection, abortable);
+ }
+ };
+ master.start();
+
+ try {
+ // Wait till master is initialized.
+ while (!master.initialized) Threads.sleep(10);
+ LOG.info("Master is initialized");
+
+ assertFalse("The dead server should not be pulled in",
+ master.serverManager.isServerOnline(deadServer));
+ } finally {
+ master.stopMaster();
+ master.join();
+ }
+ }
}