You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by nk...@apache.org on 2013/03/19 10:24:02 UTC
svn commit: r1458184 - in /hbase/trunk: hbase-client/
hbase-client/src/main/java/org/apache/hadoop/hbase/
hbase-client/src/main/java/org/apache/hadoop/hbase/client/
hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/
hbase-client/src/main/java/org/...
Author: nkeywal
Date: Tue Mar 19 09:24:01 2013
New Revision: 1458184
URL: http://svn.apache.org/r1458184
Log:
HBASE-7590 Add a costless notifications mechanism from master to regionservers & clients
Modified:
hbase/trunk/hbase-client/pom.xml
hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/Chore.java
hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ClusterStatus.java
hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HConnection.java
hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java
hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ServerCallable.java
hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/HBaseClient.java
hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/HBaseClientRPC.java
hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/ProtobufRpcClientEngine.java
hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/RpcClientEngine.java
hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/RecoverableZooKeeper.java
hbase/trunk/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java
hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreFlusher.java
hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/thrift/IncrementCoalescer.java
hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseCluster.java
hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMultiVersions.java
hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/catalog/TestMetaReaderEditorNoCluster.java
hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/client/HConnectionTestingUtility.java
hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java
hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHCM.java
hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterWithScanLimits.java
hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterWrapper.java
hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTimeRangeMapRed.java
Modified: hbase/trunk/hbase-client/pom.xml
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-client/pom.xml?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-client/pom.xml (original)
+++ hbase/trunk/hbase-client/pom.xml Tue Mar 19 09:24:01 2013
@@ -80,6 +80,10 @@
<groupId>org.cloudera.htrace</groupId>
<artifactId>htrace</artifactId>
</dependency>
+ <dependency>
+ <groupId>io.netty</groupId>
+ <artifactId>netty</artifactId>
+ </dependency>
</dependencies>
<profiles>
Modified: hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/Chore.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/Chore.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/Chore.java (original)
+++ hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/Chore.java Tue Mar 19 09:24:01 2013
@@ -21,6 +21,7 @@ package org.apache.hadoop.hbase;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.HasThread;
import org.apache.hadoop.hbase.util.Sleeper;
@@ -48,11 +49,23 @@ public abstract class Chore extends HasT
*/
public Chore(String name, final int p, final Stoppable stopper) {
super(name);
+ if (stopper == null){
+ throw new NullPointerException("stopper cannot be null");
+ }
this.sleeper = new Sleeper(p, stopper);
this.stopper = stopper;
}
/**
+ * This constructor is for test only. It allows to create an object and to call chore() on
+ * it. There is no sleeper nor stoppable.
+ */
+ protected Chore(){
+ sleeper = null;
+ stopper = null;
+ }
+
+ /**
* @see java.lang.Thread#run()
*/
@Override
@@ -60,7 +73,7 @@ public abstract class Chore extends HasT
try {
boolean initialChoreComplete = false;
while (!this.stopper.isStopped()) {
- long startTime = System.currentTimeMillis();
+ long startTime = EnvironmentEdgeManager.currentTimeMillis();
try {
if (!initialChoreComplete) {
initialChoreComplete = initialChore();
Modified: hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ClusterStatus.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ClusterStatus.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ClusterStatus.java (original)
+++ hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ClusterStatus.java Tue Mar 19 09:24:01 2013
@@ -34,13 +34,14 @@ import org.apache.hadoop.hbase.protobuf.
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.VersionedWritable;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
-import java.util.LinkedList;
import java.util.Map;
+
/**
* Status information on the HBase cluster.
* <p>
@@ -82,7 +83,7 @@ public class ClusterStatus extends Versi
private Map<String, RegionState> intransition;
private String clusterId;
private String[] masterCoprocessors;
- private boolean balancerOn;
+ private Boolean balancerOn;
/**
* Constructor, for Writable
@@ -100,7 +101,7 @@ public class ClusterStatus extends Versi
final Collection<ServerName> backupMasters,
final Map<String, RegionState> rit,
final String[] masterCoprocessors,
- final boolean balancerOn) {
+ final Boolean balancerOn) {
this.hbaseVersion = hbaseVersion;
this.liveServers = servers;
@@ -261,53 +262,81 @@ public class ClusterStatus extends Versi
return clusterId;
}
- public String[] getMasterCoprocessors() {
- return masterCoprocessors;
+ public String[] getMasterCoprocessors() {
+ return masterCoprocessors;
}
public boolean isBalancerOn() {
+ return balancerOn != null && balancerOn;
+ }
+
+ public Boolean getBalancerOn(){
return balancerOn;
}
/**
- * Convert a ClutserStatus to a protobuf ClusterStatus
+ * Convert a ClusterStatus to a protobuf ClusterStatus
*
* @return the protobuf ClusterStatus
*/
public ClusterStatusProtos.ClusterStatus convert() {
- ClusterStatusProtos.ClusterStatus.Builder builder = ClusterStatusProtos.ClusterStatus.newBuilder();
+ ClusterStatusProtos.ClusterStatus.Builder builder =
+ ClusterStatusProtos.ClusterStatus.newBuilder();
builder.setHbaseVersion(HBaseVersionFileContent.newBuilder().setVersion(getHBaseVersion()));
- for (Map.Entry<ServerName, ServerLoad> entry : liveServers.entrySet()) {
- LiveServerInfo.Builder lsi =
- LiveServerInfo.newBuilder().setServer(ProtobufUtil.toServerName(entry.getKey()));
- lsi.setServerLoad(entry.getValue().obtainServerLoadPB());
- builder.addLiveServers(lsi.build());
- }
- for (ServerName deadServer : getDeadServerNames()) {
- builder.addDeadServers(ProtobufUtil.toServerName(deadServer));
- }
- for (Map.Entry<String, RegionState> rit : getRegionsInTransition().entrySet()) {
- ClusterStatusProtos.RegionState rs = rit.getValue().convert();
- RegionSpecifier.Builder spec =
- RegionSpecifier.newBuilder().setType(RegionSpecifierType.REGION_NAME);
- spec.setValue(ByteString.copyFrom(Bytes.toBytes(rit.getKey())));
-
- RegionInTransition pbRIT =
- RegionInTransition.newBuilder().setSpec(spec.build()).setRegionState(rs).build();
- builder.addRegionsInTransition(pbRIT);
- }
- builder.setClusterId(new ClusterId(getClusterId()).convert());
- for (String coprocessor : getMasterCoprocessors()) {
- builder.addMasterCoprocessors(HBaseProtos.Coprocessor.newBuilder().setName(coprocessor));
- }
- builder.setMaster(
- ProtobufUtil.toServerName(getMaster()));
- for (ServerName backup : getBackupMasters()) {
- builder.addBackupMasters(ProtobufUtil.toServerName(backup));
+ if (liveServers != null){
+ for (Map.Entry<ServerName, ServerLoad> entry : liveServers.entrySet()) {
+ LiveServerInfo.Builder lsi =
+ LiveServerInfo.newBuilder().setServer(ProtobufUtil.toServerName(entry.getKey()));
+ lsi.setServerLoad(entry.getValue().obtainServerLoadPB());
+ builder.addLiveServers(lsi.build());
+ }
}
- builder.setBalancerOn(balancerOn);
+
+ if (deadServers != null){
+ for (ServerName deadServer : deadServers) {
+ builder.addDeadServers(ProtobufUtil.toServerName(deadServer));
+ }
+ }
+
+ if (intransition != null) {
+ for (Map.Entry<String, RegionState> rit : getRegionsInTransition().entrySet()) {
+ ClusterStatusProtos.RegionState rs = rit.getValue().convert();
+ RegionSpecifier.Builder spec =
+ RegionSpecifier.newBuilder().setType(RegionSpecifierType.REGION_NAME);
+ spec.setValue(ByteString.copyFrom(Bytes.toBytes(rit.getKey())));
+
+ RegionInTransition pbRIT =
+ RegionInTransition.newBuilder().setSpec(spec.build()).setRegionState(rs).build();
+ builder.addRegionsInTransition(pbRIT);
+ }
+ }
+
+ if (clusterId != null) {
+ builder.setClusterId(new ClusterId(clusterId).convert());
+ }
+
+ if (masterCoprocessors != null) {
+ for (String coprocessor : masterCoprocessors) {
+ builder.addMasterCoprocessors(HBaseProtos.Coprocessor.newBuilder().setName(coprocessor));
+ }
+ }
+
+ if (master != null){
+ builder.setMaster(ProtobufUtil.toServerName(getMaster()));
+ }
+
+ if (backupMasters != null) {
+ for (ServerName backup : backupMasters) {
+ builder.addBackupMasters(ProtobufUtil.toServerName(backup));
+ }
+ }
+
+ if (balancerOn != null){
+ builder.setBalancerOn(balancerOn);
+ }
+
return builder.build();
}
@@ -318,29 +347,51 @@ public class ClusterStatus extends Versi
* @return the converted ClusterStatus
*/
public static ClusterStatus convert(ClusterStatusProtos.ClusterStatus proto) {
- Map<ServerName, ServerLoad> servers = new HashMap<ServerName, ServerLoad>();
- for (LiveServerInfo lsi : proto.getLiveServersList()) {
- servers.put(ProtobufUtil.toServerName(lsi.getServer()), new ServerLoad(lsi.getServerLoad()));
- }
- Collection<ServerName> deadServers = new LinkedList<ServerName>();
- for (HBaseProtos.ServerName sn : proto.getDeadServersList()) {
- deadServers.add(ProtobufUtil.toServerName(sn));
- }
- Collection<ServerName> backupMasters = new LinkedList<ServerName>();
- for (HBaseProtos.ServerName sn : proto.getBackupMastersList()) {
- backupMasters.add(ProtobufUtil.toServerName(sn));
- }
- final Map<String, RegionState> rit = new HashMap<String, RegionState>();
- for (RegionInTransition region : proto.getRegionsInTransitionList()) {
- String key = new String(region.getSpec().getValue().toByteArray());
- RegionState value = RegionState.convert(region.getRegionState());
- rit.put(key,value);
- }
- final int numMasterCoprocessors = proto.getMasterCoprocessorsCount();
- final String[] masterCoprocessors = new String[numMasterCoprocessors];
- for (int i = 0; i < numMasterCoprocessors; i++) {
- masterCoprocessors[i] = proto.getMasterCoprocessors(i).getName();
+
+ Map<ServerName, ServerLoad> servers = null;
+ if (proto.getLiveServersList() != null) {
+ servers = new HashMap<ServerName, ServerLoad>(proto.getLiveServersList().size());
+ for (LiveServerInfo lsi : proto.getLiveServersList()) {
+ servers.put(ProtobufUtil.toServerName(
+ lsi.getServer()), new ServerLoad(lsi.getServerLoad()));
+ }
+ }
+
+ Collection<ServerName> deadServers = null;
+ if (proto.getDeadServersList() != null) {
+ deadServers = new ArrayList<ServerName>(proto.getDeadServersList().size());
+ for (HBaseProtos.ServerName sn : proto.getDeadServersList()) {
+ deadServers.add(ProtobufUtil.toServerName(sn));
+ }
+ }
+
+ Collection<ServerName> backupMasters = null;
+ if (proto.getBackupMastersList() != null) {
+ backupMasters = new ArrayList<ServerName>(proto.getBackupMastersList().size());
+ for (HBaseProtos.ServerName sn : proto.getBackupMastersList()) {
+ backupMasters.add(ProtobufUtil.toServerName(sn));
+ }
+ }
+
+ Map<String, RegionState> rit = null;
+ if (proto.getRegionsInTransitionList() != null) {
+ rit = new HashMap<String, RegionState>(proto.getRegionsInTransitionList().size());
+ for (RegionInTransition region : proto.getRegionsInTransitionList()) {
+ String key = new String(region.getSpec().getValue().toByteArray());
+ RegionState value = RegionState.convert(region.getRegionState());
+ rit.put(key, value);
+ }
+ }
+
+ String[] masterCoprocessors = null;
+ if (proto.getMasterCoprocessorsList() != null) {
+ final int numMasterCoprocessors = proto.getMasterCoprocessorsCount();
+ masterCoprocessors = new String[numMasterCoprocessors];
+ for (int i = 0; i < numMasterCoprocessors; i++) {
+ masterCoprocessors[i] = proto.getMasterCoprocessors(i).getName();
+ }
}
+
return new ClusterStatus(proto.getHbaseVersion().getVersion(),
ClusterId.convert(proto.getClusterId()).toString(),servers,deadServers,
ProtobufUtil.toServerName(proto.getMaster()),backupMasters,rit,masterCoprocessors,
Modified: hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java (original)
+++ hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java Tue Mar 19 09:24:01 2013
@@ -155,7 +155,7 @@ public class HBaseAdmin implements Abort
* @param c Configuration object. Copied internally.
*/
public HBaseAdmin(Configuration c)
- throws MasterNotRunningException, ZooKeeperConnectionException {
+ throws MasterNotRunningException, ZooKeeperConnectionException, IOException {
// Will not leak connections, as the new implementation of the constructor
// does not throw exceptions anymore.
this(HConnectionManager.getConnection(new Configuration(c)));
@@ -554,8 +554,6 @@ public class HBaseAdmin implements Abort
});
// Wait until all regions deleted
- ClientProtocol server =
- connection.getClient(firstMetaServer.getServerName());
for (int tries = 0; tries < (this.numRetries * this.retryLongerMultiplier); tries++) {
try {
@@ -565,6 +563,7 @@ public class HBaseAdmin implements Abort
firstMetaServer.getRegionInfo().getRegionName(), scan, 1, true);
Result[] values = null;
// Get a batch at a time.
+ ClientProtocol server = connection.getClient(firstMetaServer.getServerName());
try {
ScanResponse response = server.scan(null, request);
values = ResponseConverter.getResults(response);
@@ -1934,7 +1933,7 @@ public class HBaseAdmin implements Abort
* @throws ZooKeeperConnectionException if unable to connect to zookeeper
*/
public static void checkHBaseAvailable(Configuration conf)
- throws MasterNotRunningException, ZooKeeperConnectionException, ServiceException {
+ throws MasterNotRunningException, ZooKeeperConnectionException, ServiceException, IOException {
Configuration copyOfConf = HBaseConfiguration.create(conf);
// We set it to make it fail as soon as possible if HBase is not available
@@ -2435,7 +2434,7 @@ public class HBaseAdmin implements Abort
* Execute Restore/Clone snapshot and wait for the server to complete (blocking).
* To check if the cloned table exists, use {@link #isTableAvailable} -- it is not safe to
* create an HTable instance to this table before it is available.
- * @param snapshot snapshot to restore
+ * @param snapshotName snapshot to restore
* @param tableName table name to restore the snapshot on
* @throws IOException if a remote or network exception occurs
* @throws RestoreSnapshotException if snapshot failed to be restored
Modified: hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HConnection.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HConnection.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HConnection.java (original)
+++ hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HConnection.java Tue Mar 19 09:24:01 2013
@@ -403,11 +403,12 @@ public interface HConnection extends Abo
*/
public boolean isClosed();
+
/**
* Clear any caches that pertain to server name <code>sn</code>
- * @param sn A server name as hostname:port
+ * @param sn A server name
*/
- public void clearCaches(final String sn);
+ public void clearCaches(final ServerName sn);
/**
* This function allows HBaseAdminProtocol and potentially others to get a shared MasterMonitor
@@ -425,5 +426,11 @@ public interface HConnection extends Abo
* @throws MasterNotRunningException
*/
public MasterAdminKeepAliveConnection getKeepAliveMasterAdmin() throws MasterNotRunningException;
+
+ /**
+ * @param serverName
+ * @return true if the server is known as dead, false otherwise.
+ */
+ public boolean isDeadServer(ServerName serverName);
}
Modified: hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java (original)
+++ hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java Tue Mar 19 09:24:01 2013
@@ -72,6 +72,7 @@ import org.apache.hadoop.hbase.client.co
import org.apache.hadoop.hbase.exceptions.DoNotRetryIOException;
import org.apache.hadoop.hbase.exceptions.MasterNotRunningException;
import org.apache.hadoop.hbase.exceptions.RegionMovedException;
+import org.apache.hadoop.hbase.exceptions.RegionServerStoppedException;
import org.apache.hadoop.hbase.exceptions.TableNotFoundException;
import org.apache.hadoop.hbase.exceptions.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.ipc.HBaseClientRPC;
@@ -202,7 +203,7 @@ public class HConnectionManager {
* @throws ZooKeeperConnectionException
*/
public static HConnection getConnection(Configuration conf)
- throws ZooKeeperConnectionException {
+ throws IOException {
HConnectionKey connectionKey = new HConnectionKey(conf);
synchronized (HBASE_INSTANCES) {
HConnectionImplementation connection = HBASE_INSTANCES.get(connectionKey);
@@ -230,7 +231,7 @@ public class HConnectionManager {
* @throws ZooKeeperConnectionException
*/
public static HConnection createConnection(Configuration conf)
- throws ZooKeeperConnectionException {
+ throws IOException {
return new HConnectionImplementation(conf, false);
}
@@ -260,7 +261,6 @@ public class HConnectionManager {
/**
* Delete information for all connections.
- * @throws IOException
*/
public static void deleteAllConnections() {
synchronized (HBASE_INSTANCES) {
@@ -520,6 +520,9 @@ public class HConnectionManager {
private volatile boolean closed;
private volatile boolean aborted;
+ // package protected for the tests
+ ClusterStatusListener clusterStatusListener;
+
private final Object metaRegionLock = new Object();
private final Object userRegionLock = new Object();
@@ -558,8 +561,8 @@ public class HConnectionManager {
// entry in cachedRegionLocations that map to this server; but the absence
// of a server in this map guarentees that there is no entry in cache that
// maps to the absent server.
- private final Set<String> cachedServers =
- new HashSet<String>();
+ // The access to this attribute must be protected by a lock on cachedRegionLocations
+ private final Set<ServerName> cachedServers = new HashSet<ServerName>();
// region cache prefetch is enabled by default. this set contains all
// tables whose region cache prefetch are disabled.
@@ -575,8 +578,7 @@ public class HConnectionManager {
* @param conf Configuration object
*/
@SuppressWarnings("unchecked")
- public HConnectionImplementation(Configuration conf, boolean managed)
- throws ZooKeeperConnectionException {
+ public HConnectionImplementation(Configuration conf, boolean managed) throws IOException {
this.conf = conf;
this.managed = managed;
String adminClassName = conf.get(REGION_PROTOCOL_CLASS,
@@ -613,10 +615,29 @@ public class HConnectionManager {
HConstants.DEFAULT_HBASE_CLIENT_PREFETCH_LIMIT);
retrieveClusterId();
+
// ProtobufRpcClientEngine is the main RpcClientEngine implementation,
// but we maintain access through an interface to allow overriding for tests
// RPC engine setup must follow obtaining the cluster ID for token authentication to work
this.rpcEngine = new ProtobufRpcClientEngine(this.conf, this.clusterId);
+
+
+ // Do we publish the status?
+ Class<? extends ClusterStatusListener.Listener> listenerClass =
+ conf.getClass(ClusterStatusListener.STATUS_LISTENER_CLASS,
+ ClusterStatusListener.DEFAULT_STATUS_LISTENER_CLASS,
+ ClusterStatusListener.Listener.class);
+
+ if (listenerClass != null) {
+ clusterStatusListener = new ClusterStatusListener(
+ new ClusterStatusListener.DeadServerHandler() {
+ @Override
+ public void newDead(ServerName sn) {
+ clearCaches(sn);
+ rpcEngine.getClient().cancelConnections(sn.getHostname(), sn.getPort(), null);
+ }
+ }, conf, listenerClass);
+ }
}
/**
@@ -755,7 +776,7 @@ public class HConnectionManager {
// tries at this point is 1 or more; decrement to start from 0.
long pauseTime = ConnectionUtils.getPauseTime(this.pause, tries - 1);
LOG.info("getMaster attempt " + tries + " of " + numRetries +
- " failed; retrying after sleep of " +pauseTime, exceptionCaught);
+ " failed; retrying after sleep of " +pauseTime + ", exception=" + exceptionCaught);
try {
Thread.sleep(pauseTime);
@@ -922,6 +943,7 @@ public class HConnectionManager {
}
}
+
@Override
public HRegionLocation locateRegion(final byte[] regionName) throws IOException {
return locateRegion(HRegionInfo.getTableName(regionName),
@@ -929,6 +951,15 @@ public class HConnectionManager {
}
@Override
+ public boolean isDeadServer(ServerName sn) {
+ if (clusterStatusListener == null) {
+ return false;
+ } else {
+ return clusterStatusListener.isDeadServer(sn);
+ }
+ }
+
+ @Override
public List<HRegionLocation> locateRegions(final byte[] tableName)
throws IOException {
return locateRegions (tableName, false, true);
@@ -1087,10 +1118,9 @@ public class HConnectionManager {
metaLocation = locateRegion(parentTable, metaKey, true, false);
// If null still, go around again.
if (metaLocation == null) continue;
- ClientProtocol server =
- getClient(metaLocation.getServerName());
+ ClientProtocol server = getClient(metaLocation.getServerName());
- Result regionInfoRow = null;
+ Result regionInfoRow;
// This block guards against two threads trying to load the meta
// region at the same time. The first will load the meta region and
// the second will use the value that the first one found.
@@ -1157,6 +1187,12 @@ public class HConnectionManager {
Bytes.toStringBinary(row));
}
+ if (isDeadServer(serverName)){
+ throw new RegionServerStoppedException(".META. says the region "+
+ regionInfo.getRegionNameAsString()+" is managed by the server " + serverName +
+ ", but it is dead.");
+ }
+
// Instantiate the location
location = new HRegionLocation(regionInfo, serverName,
HRegionInfo.getSeqNumDuringOpen(regionInfoRow));
@@ -1269,41 +1305,33 @@ public class HConnectionManager {
if ((rl != null) && LOG.isDebugEnabled()) {
LOG.debug("Removed " + rl.getHostname() + ":" + rl.getPort()
+ " as a location of " + rl.getRegionInfo().getRegionNameAsString() +
- " for tableName=" + Bytes.toString(tableName) +
- " from cache to make sure we don't use cache for " + Bytes.toStringBinary(row));
+ " for tableName=" + Bytes.toString(tableName) + " from cache");
}
}
- @Override
- public void clearCaches(String sn) {
- clearCachedLocationForServer(sn);
- }
-
/*
* Delete all cached entries of a table that maps to a specific location.
- *
- * @param tablename
- * @param server
*/
- private void clearCachedLocationForServer(final String server) {
+ @Override
+ public void clearCaches(final ServerName serverName){
boolean deletedSomething = false;
synchronized (this.cachedRegionLocations) {
- if (!cachedServers.contains(server)) {
+ if (!cachedServers.contains(serverName)) {
return;
}
for (Map<byte[], HRegionLocation> tableLocations :
- cachedRegionLocations.values()) {
+ cachedRegionLocations.values()) {
for (Entry<byte[], HRegionLocation> e : tableLocations.entrySet()) {
- if (e.getValue().getHostnamePort().equals(server)) {
+ if (serverName.equals(e.getValue().getServerName())) {
tableLocations.remove(e.getKey());
deletedSomething = true;
}
}
}
- cachedServers.remove(server);
+ cachedServers.remove(serverName);
}
if (deletedSomething && LOG.isDebugEnabled()) {
- LOG.debug("Removed all cached region locations that map to " + server);
+ LOG.debug("Removed all cached region locations that map to " + serverName);
}
}
@@ -1359,7 +1387,7 @@ public class HConnectionManager {
boolean isStaleUpdate = false;
HRegionLocation oldLocation = null;
synchronized (this.cachedRegionLocations) {
- cachedServers.add(location.getHostnamePort());
+ cachedServers.add(location.getServerName());
oldLocation = tableLocations.get(startKey);
isNewCacheEntry = (oldLocation == null);
// If the server in cache sends us a redirect, assume it's always valid.
@@ -1414,6 +1442,9 @@ public class HConnectionManager {
@Override
public ClientProtocol getClient(final ServerName serverName)
throws IOException {
+ if (isDeadServer(serverName)){
+ throw new RegionServerStoppedException("The server " + serverName + " is dead.");
+ }
return (ClientProtocol)
getProtocol(serverName.getHostname(), serverName.getPort(), clientClass);
}
@@ -1429,6 +1460,9 @@ public class HConnectionManager {
@Override
public AdminProtocol getAdmin(final ServerName serverName, final boolean master)
throws IOException {
+ if (isDeadServer(serverName)){
+ throw new RegionServerStoppedException("The server " + serverName + " is dead.");
+ }
return (AdminProtocol)getProtocol(
serverName.getHostname(), serverName.getPort(), adminClass);
}
@@ -1997,7 +2031,7 @@ public class HConnectionManager {
if (LOG.isTraceEnabled() && (sleepTime > 0)) {
StringBuilder sb = new StringBuilder();
for (Action<R> action : e.getValue().allActions()) {
- sb.append(Bytes.toStringBinary(action.getAction().getRow()) + ";");
+ sb.append(Bytes.toStringBinary(action.getAction().getRow())).append(';');
}
LOG.trace("Sending requests to [" + e.getKey().getHostnamePort()
+ "] with delay of [" + sleepTime + "] for rows [" + sb.toString() + "]");
@@ -2391,6 +2425,9 @@ public class HConnectionManager {
closeZooKeeperWatcher();
this.servers.clear();
this.rpcEngine.close();
+ if (clusterStatusListener != null) {
+ clusterStatusListener.close();
+ }
this.closed = true;
}
Modified: hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ServerCallable.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ServerCallable.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ServerCallable.java (original)
+++ hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ServerCallable.java Tue Mar 19 09:24:01 2013
@@ -19,6 +19,8 @@
package org.apache.hadoop.hbase.client;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import com.google.protobuf.ServiceException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@@ -53,6 +55,8 @@ import java.util.concurrent.Callable;
@InterfaceAudience.Public
@InterfaceStability.Stable
public abstract class ServerCallable<T> implements Callable<T> {
+ static final Log LOG = LogFactory.getLog(ServerCallable.class);
+
protected final HConnection connection;
protected final byte [] tableName;
protected final byte [] row;
@@ -62,6 +66,7 @@ public abstract class ServerCallable<T>
protected long globalStartTime;
protected long startTime, endTime;
protected final static int MIN_RPC_TIMEOUT = 2000;
+ protected final static int MIN_WAIT_DEAD_SERVER = 10000;
/**
* @param connection Connection to use.
@@ -154,32 +159,46 @@ public abstract class ServerCallable<T>
List<RetriesExhaustedException.ThrowableWithExtraContext> exceptions =
new ArrayList<RetriesExhaustedException.ThrowableWithExtraContext>();
this.globalStartTime = EnvironmentEdgeManager.currentTimeMillis();
- for (int tries = 0; tries < numRetries; tries++) {
+ for (int tries = 0;; tries++) {
+ long expectedSleep = 0;
try {
beforeCall();
- connect(tries != 0);
+ connect(tries != 0); // if called with false, check table status on ZK
return call();
} catch (Throwable t) {
+ LOG.warn("Received exception, tries=" + tries + ", numRetries=" + numRetries +
+ " message=" + t.getMessage());
+
t = translateException(t);
+ // translateException throws an exception when we should not retry, i.e. when it's the
+ // request that is bad.
+
if (t instanceof SocketTimeoutException ||
t instanceof ConnectException ||
- t instanceof RetriesExhaustedException) {
+ t instanceof RetriesExhaustedException ||
+ getConnection().isDeadServer(location.getServerName())) {
// if thrown these exceptions, we clear all the cache entries that
// map to that slow/dead server; otherwise, let cache miss and ask
// .META. again to find the new location
- HRegionLocation hrl = location;
- if (hrl != null) {
- getConnection().clearCaches(hrl.getHostnamePort());
- }
+ getConnection().clearCaches(location.getServerName());
}
+
RetriesExhaustedException.ThrowableWithExtraContext qt =
new RetriesExhaustedException.ThrowableWithExtraContext(t,
EnvironmentEdgeManager.currentTimeMillis(), toString());
exceptions.add(qt);
- if (tries == numRetries - 1) {
+ if (tries >= numRetries - 1) {
throw new RetriesExhaustedException(tries, exceptions);
}
- long expectedSleep = ConnectionUtils.getPauseTime(pause, tries);
+
+ // If the server is dead, we need to wait a little before retrying, to give
+ // a chance to the regions to be
+ expectedSleep = ConnectionUtils.getPauseTime(pause, tries);
+ if (expectedSleep < MIN_WAIT_DEAD_SERVER &&
+ getConnection().isDeadServer(location.getServerName())){
+ expectedSleep = ConnectionUtils.addJitter(MIN_WAIT_DEAD_SERVER, 0.10f);
+ }
+
// If, after the planned sleep, there won't be enough time left, we stop now.
if (((this.endTime - this.globalStartTime) + MIN_RPC_TIMEOUT + expectedSleep) >
this.callTimeout) {
@@ -193,13 +212,12 @@ public abstract class ServerCallable<T>
afterCall();
}
try {
- Thread.sleep(ConnectionUtils.getPauseTime(pause, tries));
+ Thread.sleep(expectedSleep);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
- throw new IOException("Interrupted after tries=" + tries, e);
+ throw new IOException("Interrupted after " + tries + " tries on " + numRetries, e);
}
}
- return null;
}
/**
@@ -210,6 +228,7 @@ public abstract class ServerCallable<T>
*/
public T withoutRetries()
throws IOException, RuntimeException {
+ // The code of this method should be shared with withRetries.
this.globalStartTime = EnvironmentEdgeManager.currentTimeMillis();
try {
beforeCall();
@@ -217,6 +236,7 @@ public abstract class ServerCallable<T>
return call();
} catch (Throwable t) {
Throwable t2 = translateException(t);
+ // It would be nice to clear the location cache here.
if (t2 instanceof IOException) {
throw (IOException)t2;
} else {
@@ -227,7 +247,13 @@ public abstract class ServerCallable<T>
}
}
- protected static Throwable translateException(Throwable t) throws IOException {
+ /**
+ * Get the good or the remote exception if any, throws the DoNotRetryIOException.
+ * @param t the throwable to analyze
+ * @return the translated exception, if it's not a DoNotRetryIOException
+ * @throws DoNotRetryIOException - if we find it, we throw it instead of translating.
+ */
+ protected static Throwable translateException(Throwable t) throws DoNotRetryIOException {
if (t instanceof UndeclaredThrowableException) {
t = t.getCause();
}
@@ -245,4 +271,4 @@ public abstract class ServerCallable<T>
}
return t;
}
-}
\ No newline at end of file
+}
Modified: hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/HBaseClient.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/HBaseClient.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/HBaseClient.java (original)
+++ hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/HBaseClient.java Tue Mar 19 09:24:01 2013
@@ -28,6 +28,7 @@ import org.apache.hadoop.classification.
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.IpcProtocol;
+import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.protobuf.generated.RPCProtos;
import org.apache.hadoop.hbase.protobuf.generated.RPCProtos.ConnectionHeader;
import org.apache.hadoop.hbase.protobuf.generated.RPCProtos.RpcException;
@@ -184,7 +185,6 @@ public class HBaseClient {
return false;
}
-
}
public static class FailedServerException extends IOException {
@@ -1340,6 +1340,30 @@ public class HBaseClient {
}
}
+ /**
+ * Interrupt the connections to the given ip:port server. This should be called if the server
+ * is known as actually dead. This will not prevent current operation to be retried, and,
+ * depending on their own behavior, they may retry on the same server. This can be a feature,
+ * for example at startup. In any case, they're likely to get connection refused (if the
+ * process died) or no route to host: i.e. there next retries should be faster and with a
+ * safe exception.
+ */
+ public void cancelConnections(String hostname, int port, IOException ioe) {
+ synchronized (connections) {
+ for (Connection connection : connections.values()) {
+ if (connection.isAlive() &&
+ connection.getRemoteAddress().getPort() == port &&
+ connection.getRemoteAddress().getHostName().equals(hostname)) {
+ LOG.info("The server on " + hostname + ":" + port +
+ " is dead - stopping the connection " + connection.remoteId);
+ connection.closeConnection();
+ // We could do a connection.interrupt(), but it's safer not to do it, as the
+ // interrupted exception behavior is not defined nor enforced enough.
+ }
+ }
+ }
+ }
+
/** Makes a set of calls in parallel. Each parameter is sent to the
* corresponding address. When all values are available, or have timed out
* or errored, the collected results are returned in an array. The array
Modified: hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/HBaseClientRPC.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/HBaseClientRPC.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/HBaseClientRPC.java (original)
+++ hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/HBaseClientRPC.java Tue Mar 19 09:24:01 2013
@@ -28,6 +28,7 @@ import org.apache.hadoop.hbase.IpcProtoc
import org.apache.hadoop.hbase.client.RetriesExhaustedException;
import java.io.IOException;
+import java.io.InterruptedIOException;
import java.net.ConnectException;
import java.net.InetSocketAddress;
import java.net.SocketTimeoutException;
@@ -108,7 +109,8 @@ public class HBaseClientRPC {
try {
Thread.sleep(1000);
} catch (InterruptedException ie) {
- // IGNORE
+ Thread.interrupted();
+ throw new InterruptedIOException();
}
}
}
@@ -147,4 +149,4 @@ public class HBaseClientRPC {
public static void resetRpcTimeout() {
rpcTimeout.remove();
}
-}
\ No newline at end of file
+}
Modified: hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/ProtobufRpcClientEngine.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/ProtobufRpcClientEngine.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/ProtobufRpcClientEngine.java (original)
+++ hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/ProtobufRpcClientEngine.java Tue Mar 19 09:24:01 2013
@@ -42,12 +42,17 @@ public class ProtobufRpcClientEngine imp
private static final Log LOG =
LogFactory.getLog("org.apache.hadoop.hbase.ipc.ProtobufRpcClientEngine");
+ public HBaseClient getClient() {
+ return client;
+ }
+
protected HBaseClient client;
public ProtobufRpcClientEngine(Configuration conf, String clusterId) {
this.client = new HBaseClient(conf, clusterId);
}
+
@Override
public <T extends IpcProtocol> T getProxy(
Class<T> protocol, InetSocketAddress addr,
Modified: hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/RpcClientEngine.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/RpcClientEngine.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/RpcClientEngine.java (original)
+++ hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/RpcClientEngine.java Tue Mar 19 09:24:01 2013
@@ -35,4 +35,6 @@ public interface RpcClientEngine {
/** Shutdown this instance */
void close();
+
+ public HBaseClient getClient();
}
\ No newline at end of file
Modified: hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/RecoverableZooKeeper.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/RecoverableZooKeeper.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/RecoverableZooKeeper.java (original)
+++ hbase/trunk/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/RecoverableZooKeeper.java Tue Mar 19 09:24:01 2013
@@ -233,7 +233,7 @@ public class RecoverableZooKeeper {
private void retryOrThrow(RetryCounter retryCounter, KeeperException e,
String opName) throws KeeperException {
- LOG.warn("Possibly transient ZooKeeper exception: " + e);
+ LOG.warn("Possibly transient ZooKeeper, quorum=" + quorumServers + ", exception=" + e);
if (!retryCounter.shouldRetry()) {
LOG.error("ZooKeeper " + opName + " failed after "
+ retryCounter.getMaxRetries() + " retries");
Modified: hbase/trunk/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java (original)
+++ hbase/trunk/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java Tue Mar 19 09:24:01 2013
@@ -808,6 +808,23 @@ public final class HConstants {
"hbase.node.health.failure.threshold";
public static final int DEFAULT_HEALTH_FAILURE_THRESHOLD = 3;
+
+ /**
+ * IP to use for the multicast status messages between the master and the clients.
+ * The default address is chosen as one among others within the ones suitable for multicast
+ * messages.
+ */
+ public static final String STATUS_MULTICAST_ADDRESS = "hbase.status.multicast.address.ip";
+ public static final String DEFAULT_STATUS_MULTICAST_ADDRESS = "226.1.1.3";
+
+ /**
+ * The port to use for the multicast messages.
+ */
+ public static final String STATUS_MULTICAST_PORT = "hbase.status.multicast.port";
+ public static final int DEFAULT_STATUS_MULTICAST_PORT = 60100;
+
+
+
private HConstants() {
// Can't be instantiated with this ctor.
}
Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Tue Mar 19 09:24:01 2013
@@ -1898,7 +1898,8 @@ public class AssignmentManager extends Z
if (existingPlan != null && existingPlan.getDestination() != null) {
LOG.debug("Found an existing plan for " + region.getRegionNameAsString()
- + " destination server is " + existingPlan.getDestination());
+ + " destination server is " + existingPlan.getDestination() +
+ " accepted as a dest server = " + destServers.contains(existingPlan.getDestination()));
}
if (forceNewPlan
@@ -1918,7 +1919,8 @@ public class AssignmentManager extends Z
" so generated a random one; " + randomPlan + "; " +
serverManager.countOfRegionServers() +
" (online=" + serverManager.getOnlineServers().size() +
- ", available=" + destServers.size() + ") available servers");
+ ", available=" + destServers.size() + ") available servers" +
+ ", forceNewPlan=" + forceNewPlan);
return randomPlan;
}
LOG.debug("Using pre-existing plan for region " +
Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java Tue Mar 19 09:24:01 2013
@@ -74,8 +74,8 @@ public class DeadServer {
}
/**
- * @param serverName
- * @return true if this server is on the dead servers list.
+ * @param serverName server name.
+ * @return true if this server is on the dead servers list false otherwise
*/
public synchronized boolean isDeadServer(final ServerName serverName) {
return deadServers.containsKey(serverName);
Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Tue Mar 19 09:24:01 2013
@@ -305,6 +305,7 @@ Server {
private LoadBalancer balancer;
private Thread balancerChore;
private Thread clusterStatusChore;
+ private ClusterStatusPublisher clusterStatusPublisherChore = null;
private CatalogJanitor catalogJanitorChore;
private LogCleaner logCleaner;
@@ -429,12 +430,23 @@ Server {
if (isHealthCheckerConfigured()) {
healthCheckChore = new HealthCheckChore(sleepTime, this, getConfiguration());
}
+
+ // Do we publish the status?
+ Class<? extends ClusterStatusPublisher.Publisher> publisherClass =
+ conf.getClass(ClusterStatusPublisher.STATUS_PUBLISHER_CLASS,
+ ClusterStatusPublisher.DEFAULT_STATUS_PUBLISHER_CLASS,
+ ClusterStatusPublisher.Publisher.class);
+
+ if (publisherClass != null) {
+ clusterStatusPublisherChore = new ClusterStatusPublisher(this, conf, publisherClass);
+ Threads.setDaemonThreadRunning(clusterStatusPublisherChore.getThread());
+ }
}
/**
* Stall startup if we are designated a backup master; i.e. we want someone
* else to become the master before proceeding.
- * @param c
+ * @param c configuration
* @param amm
* @throws InterruptedException
*/
@@ -841,7 +853,7 @@ Server {
// Work on .META. region. Is it in zk in transition?
status.setStatus("Assigning META region");
assignmentManager.getRegionStates().createRegionState(
- HRegionInfo.FIRST_META_REGIONINFO);
+ HRegionInfo.FIRST_META_REGIONINFO);
boolean rit = this.assignmentManager.
processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.FIRST_META_REGIONINFO);
ServerName currentMetaServer = null;
@@ -1080,6 +1092,9 @@ Server {
if (this.catalogJanitorChore != null) {
this.catalogJanitorChore.interrupt();
}
+ if (this.clusterStatusPublisherChore != null){
+ clusterStatusPublisherChore.interrupt();
+ }
}
@Override
@@ -2539,7 +2554,7 @@ Server {
* No exceptions are thrown if the restore is not running, the result will be "done".
*
* @return done <tt>true</tt> if the restore/clone operation is completed.
- * @throws RestoreSnapshotExcepton if the operation failed.
+ * @throws ServiceException if the operation failed.
*/
@Override
public IsRestoreSnapshotDoneResponse isRestoreSnapshotDone(RpcController controller,
Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java Tue Mar 19 09:24:01 2013
@@ -183,6 +183,9 @@ public class HMasterCommandLine extends
} catch (ZooKeeperConnectionException e) {
LOG.error("ZooKeeper not available");
return -1;
+ } catch (IOException e) {
+ LOG.error("Got IOException: " +e.getMessage(), e);
+ return -1;
}
try {
adm.shutdown();
Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java Tue Mar 19 09:24:01 2013
@@ -177,12 +177,12 @@ public class ServerManager {
* @throws ZooKeeperConnectionException
*/
public ServerManager(final Server master, final MasterServices services)
- throws ZooKeeperConnectionException {
+ throws IOException {
this(master, services, true);
}
ServerManager(final Server master, final MasterServices services,
- final boolean connect) throws ZooKeeperConnectionException {
+ final boolean connect) throws IOException {
this.master = master;
this.services = services;
Configuration c = master.getConfiguration();
Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreFlusher.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreFlusher.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreFlusher.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreFlusher.java Tue Mar 19 09:24:01 2013
@@ -89,8 +89,7 @@ class MemStoreFlusher implements FlushRe
private long blockingWaitTime;
private final Counter updatesBlockedMsHighWater = new Counter();
- private FlushHandler[] flushHandlers = null;
- private int handlerCount;
+ private final FlushHandler[] flushHandlers;
/**
* @param conf
@@ -116,7 +115,8 @@ class MemStoreFlusher implements FlushRe
conf.getInt("hbase.hstore.blockingStoreFiles", HStore.DEFAULT_BLOCKING_STOREFILE_COUNT);
this.blockingWaitTime = conf.getInt("hbase.hstore.blockingWaitTime",
90000);
- this.handlerCount = conf.getInt("hbase.hstore.flusher.count", 1);
+ int handlerCount = conf.getInt("hbase.hstore.flusher.count", 1);
+ this.flushHandlers = new FlushHandler[handlerCount];
LOG.info("globalMemStoreLimit=" +
StringUtils.humanReadableInt(this.globalMemStoreLimit) +
", globalMemStoreLimitLowMark=" +
@@ -350,7 +350,6 @@ class MemStoreFlusher implements FlushRe
synchronized void start(UncaughtExceptionHandler eh) {
ThreadFactory flusherThreadFactory = Threads.newDaemonThreadFactory(
server.getServerName().toString() + "-MemStoreFlusher", eh);
- flushHandlers = new FlushHandler[handlerCount];
for (int i = 0; i < flushHandlers.length; i++) {
flushHandlers[i] = new FlushHandler();
flusherThreadFactory.newThread(flushHandlers[i]);
@@ -607,7 +606,7 @@ class MemStoreFlusher implements FlushRe
}
/**
- * @return Count of times {@link #resetDelay()} was called; i.e this is
+ * @return Count of times {@link #requeue(long)} was called; i.e this is
* number of times we've been requeued.
*/
public int getRequeueCount() {
Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/thrift/IncrementCoalescer.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/thrift/IncrementCoalescer.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/thrift/IncrementCoalescer.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/thrift/IncrementCoalescer.java Tue Mar 19 09:24:01 2013
@@ -39,6 +39,7 @@ import org.apache.hadoop.hbase.client.HT
import org.apache.hadoop.hbase.thrift.ThriftServerRunner.HBaseHandler;
import org.apache.hadoop.hbase.thrift.generated.TIncrement;
import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.metrics.util.MBeanUtil;
import org.apache.thrift.TException;
@@ -168,7 +169,7 @@ public class IncrementCoalescer implemen
LinkedBlockingQueue<Runnable> queue = new LinkedBlockingQueue<Runnable>();
pool =
new ThreadPoolExecutor(CORE_POOL_SIZE, CORE_POOL_SIZE, 50, TimeUnit.MILLISECONDS, queue,
- new DaemonThreadFactory());
+ Threads.newDaemonThreadFactory("IncrementCoalescer"));
MBeanUtil.registerMBean("thrift", "Thrift", this);
}
Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseCluster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseCluster.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseCluster.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseCluster.java Tue Mar 19 09:24:01 2013
@@ -277,4 +277,15 @@ public abstract class HBaseCluster imple
*/
@Override
public abstract void close() throws IOException;
+
+ /**
+ * Wait for the namenode.
+ *
+ * @throws InterruptedException
+ */
+ public void waitForNamenodeAvailable() throws InterruptedException {
+ }
+
+ public void waitForDatanodesRegistered(int nbDN) throws Exception {
+ }
}
\ No newline at end of file
Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java Tue Mar 19 09:24:01 2013
@@ -2354,6 +2354,7 @@ public class HBaseTestingUtility extends
private static final int MIN_RANDOM_PORT = 0xc000;
private static final int MAX_RANDOM_PORT = 0xfffe;
+ private static Random random = new Random();
/**
* Returns a random port. These ports cannot be registered with IANA and are
@@ -2361,7 +2362,7 @@ public class HBaseTestingUtility extends
*/
public static int randomPort() {
return MIN_RANDOM_PORT
- + new Random().nextInt(MAX_RANDOM_PORT - MIN_RANDOM_PORT);
+ + random.nextInt(MAX_RANDOM_PORT - MIN_RANDOM_PORT);
}
/**
@@ -2387,6 +2388,13 @@ public class HBaseTestingUtility extends
return port;
}
+
+ public static String randomMultiCastAddress() {
+ return "226.1.1." + random.nextInt(254);
+ }
+
+
+
public static void waitForHostPort(String host, int port)
throws IOException {
final int maxTimeMs = 10000;
Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMultiVersions.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMultiVersions.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMultiVersions.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMultiVersions.java Tue Mar 19 09:24:01 2013
@@ -74,7 +74,7 @@ public class TestMultiVersions {
@Before
public void before()
- throws MasterNotRunningException, ZooKeeperConnectionException {
+ throws MasterNotRunningException, ZooKeeperConnectionException, IOException {
this.admin = new HBaseAdmin(UTIL.getConfiguration());
}
Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/catalog/TestMetaReaderEditorNoCluster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/catalog/TestMetaReaderEditorNoCluster.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/catalog/TestMetaReaderEditorNoCluster.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/catalog/TestMetaReaderEditorNoCluster.java Tue Mar 19 09:24:01 2013
@@ -211,4 +211,4 @@ public class TestMetaReaderEditorNoClust
zkw.close();
}
}
-}
\ No newline at end of file
+}
Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/client/HConnectionTestingUtility.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/client/HConnectionTestingUtility.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/client/HConnectionTestingUtility.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/client/HConnectionTestingUtility.java Tue Mar 19 09:24:01 2013
@@ -131,7 +131,7 @@ public class HConnectionTestingUtility {
* {http://mockito.googlecode.com/svn/branches/1.6/javadoc/org/mockito/Mockito.html#spy(T)}
*/
public static HConnection getSpiedConnection(final Configuration conf)
- throws ZooKeeperConnectionException {
+ throws IOException {
HConnectionKey connectionKey = new HConnectionKey(conf);
synchronized (HConnectionManager.HBASE_INSTANCES) {
HConnectionImplementation connection =
Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java Tue Mar 19 09:24:01 2013
@@ -1592,6 +1592,7 @@ public class TestAdmin {
} catch (MasterNotRunningException ignored) {
} catch (ZooKeeperConnectionException ignored) {
} catch (ServiceException ignored) {
+ } catch (IOException ignored) {
}
long end = System.currentTimeMillis();
Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHCM.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHCM.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHCM.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHCM.java Tue Mar 19 09:24:01 2013
@@ -23,6 +23,7 @@ import static org.junit.Assert.assertNot
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
+import java.io.IOException;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.util.ArrayList;
@@ -44,13 +45,17 @@ import org.apache.hadoop.hbase.HConstant
import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.MediumTests;
import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.Waiter;
import org.apache.hadoop.hbase.client.HConnectionManager.HConnectionImplementation;
import org.apache.hadoop.hbase.client.HConnectionManager.HConnectionKey;
+import org.apache.hadoop.hbase.exceptions.RegionServerStoppedException;
import org.apache.hadoop.hbase.exceptions.ZooKeeperConnectionException;
+import org.apache.hadoop.hbase.master.ClusterStatusPublisher;
import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.JVMClusterUtil;
import org.apache.hadoop.hbase.util.Threads;
import org.junit.AfterClass;
import org.junit.Assert;
@@ -77,6 +82,10 @@ public class TestHCM {
@BeforeClass
public static void setUpBeforeClass() throws Exception {
+ TEST_UTIL.getConfiguration().setClass(ClusterStatusPublisher.STATUS_PUBLISHER_CLASS,
+ ClusterStatusPublisher.MulticastPublisher.class, ClusterStatusPublisher.Publisher.class);
+ TEST_UTIL.getConfiguration().setClass(ClusterStatusListener.STATUS_LISTENER_CLASS,
+ ClusterStatusListener.MultiCastListener.class, ClusterStatusListener.Listener.class);
TEST_UTIL.startMiniCluster(2);
}
@@ -88,7 +97,7 @@ public class TestHCM {
public static void createNewConfigurations() throws SecurityException,
IllegalArgumentException, NoSuchFieldException,
- IllegalAccessException, InterruptedException, ZooKeeperConnectionException {
+ IllegalAccessException, InterruptedException, ZooKeeperConnectionException, IOException {
HConnection last = null;
for (int i = 0; i <= (HConnectionManager.MAX_CACHED_HBASE_INSTANCES * 2); i++) {
// set random key to differentiate the connection from previous ones
@@ -117,6 +126,61 @@ public class TestHCM {
return HConnectionTestingUtility.getConnectionCount();
}
+ @Test(expected = RegionServerStoppedException.class)
+ public void testClusterStatus() throws Exception {
+ byte[] tn = "testClusterStatus".getBytes();
+ byte[] cf = "cf".getBytes();
+ byte[] rk = "rk1".getBytes();
+
+ JVMClusterUtil.RegionServerThread rs = TEST_UTIL.getHBaseCluster().startRegionServer();
+ rs.waitForServerOnline();
+ final ServerName sn = rs.getRegionServer().getServerName();
+
+ HTable t = TEST_UTIL.createTable(tn, cf);
+ TEST_UTIL.waitTableAvailable(tn);
+
+ while(TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().
+ getRegionStates().isRegionsInTransition()){
+ Thread.sleep(1);
+ }
+ final HConnectionImplementation hci = (HConnectionImplementation)t.getConnection();
+ while (t.getRegionLocation(rk).getPort() != sn.getPort()){
+ TEST_UTIL.getHBaseAdmin().move(t.getRegionLocation(rk).getRegionInfo().
+ getEncodedNameAsBytes(), sn.getVersionedBytes());
+ while(TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().
+ getRegionStates().isRegionsInTransition()){
+ Thread.sleep(1);
+ }
+ hci.clearRegionCache(tn);
+ }
+ Assert.assertNotNull(hci.clusterStatusListener);
+ TEST_UTIL.assertRegionOnServer(t.getRegionLocation(rk).getRegionInfo(), sn, 20000);
+
+ Put p1 = new Put(rk);
+ p1.add(cf, "qual".getBytes(), "val".getBytes());
+ t.put(p1);
+
+ rs.getRegionServer().abort("I'm dead");
+
+ // We want the status to be updated. That's a least 10 second
+ TEST_UTIL.waitFor(40000, 1000, true, new Waiter.Predicate<Exception>() {
+ @Override
+ public boolean evaluate() throws Exception {
+ return TEST_UTIL.getHBaseCluster().getMaster().getServerManager().
+ getDeadServers().isDeadServer(sn);
+ }
+ });
+
+ TEST_UTIL.waitFor(40000, 1000, true, new Waiter.Predicate<Exception>() {
+ @Override
+ public boolean evaluate() throws Exception {
+ return hci.clusterStatusListener.isDeadServer(sn);
+ }
+ });
+
+ hci.getClient(sn); // will throw an exception: RegionServerStoppedException
+ }
+
@Test
public void abortingHConnectionRemovesItselfFromHCM() throws Exception {
// Save off current HConnections
Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterWithScanLimits.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterWithScanLimits.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterWithScanLimits.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterWithScanLimits.java Tue Mar 19 09:24:01 2013
@@ -163,6 +163,8 @@ public class TestFilterWithScanLimits {
assertNull("Master is not running", e);
} catch (ZooKeeperConnectionException e) {
assertNull("Cannot connect to Zookeeper", e);
+ } catch (IOException e) {
+ assertNull("IOException", e);
}
createTable();
prepareData();
Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterWrapper.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterWrapper.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterWrapper.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterWrapper.java Tue Mar 19 09:24:01 2013
@@ -173,6 +173,8 @@ public class TestFilterWrapper {
assertNull("Master is not running", e);
} catch (ZooKeeperConnectionException e) {
assertNull("Cannot connect to Zookeeper", e);
+ } catch (IOException e) {
+ assertNull("Caught IOException", e);
}
createTable();
prepareData();
Modified: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTimeRangeMapRed.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTimeRangeMapRed.java?rev=1458184&r1=1458183&r2=1458184&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTimeRangeMapRed.java (original)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTimeRangeMapRed.java Tue Mar 19 09:24:01 2013
@@ -90,7 +90,7 @@ public class TestTimeRangeMapRed {
}
@Before
- public void before() throws MasterNotRunningException, ZooKeeperConnectionException {
+ public void before() throws Exception {
this.admin = new HBaseAdmin(UTIL.getConfiguration());
}