You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2009/05/31 16:32:57 UTC
svn commit: r780436 - in /hadoop/hbase/trunk: ./
src/java/org/apache/hadoop/hbase/client/
src/java/org/apache/hadoop/hbase/ipc/
src/java/org/apache/hadoop/hbase/master/
Author: stack
Date: Sun May 31 14:32:56 2009
New Revision: 780436
URL: http://svn.apache.org/viewvc?rev=780436&view=rev
Log:
HBASE-1457 Taking down ROOT/META regionserver can result in cluster becoming in-operational
Modified:
hadoop/hbase/trunk/CHANGES.txt
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HBaseClient.java
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/MetaRegion.java
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionStatusChange.java
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionServerOperation.java
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RetryableMetaOperation.java
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RootScanner.java
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java
Modified: hadoop/hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=780436&r1=780435&r2=780436&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Sun May 31 14:32:56 2009
@@ -156,6 +156,8 @@
HBASE-1395 InfoServers no longer put up a UI
HBASE-1302 When a new master comes up, regionservers should continue with
their region assignments from the last master
+ HBASE-1457 Taking down ROOT/META regionserver can result in cluster
+ becoming in-operational (Ryan Rawson via Stack)
IMPROVEMENTS
HBASE-1089 Add count of regions on filesystem to master UI; add percentage
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java?rev=780436&r1=780435&r2=780436&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java Sun May 31 14:32:56 2009
@@ -344,7 +344,7 @@
rowResult.get(COL_REGIONINFO));
// Only examine the rows where the startKey is zero length
- if (info.getStartKey().length == 0) {
+ if (info != null && info.getStartKey().length == 0) {
uniqueTables.add(info.getTableDesc());
}
return true;
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HBaseClient.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HBaseClient.java?rev=780436&r1=780435&r2=780436&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HBaseClient.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HBaseClient.java Sun May 31 14:32:56 2009
@@ -65,7 +65,7 @@
public class HBaseClient {
public static final Log LOG =
- LogFactory.getLog("org.apache.hadoop.ipc.HBaseClass");
+ LogFactory.getLog("org.apache.hadoop.ipc.HBaseClient");
protected Hashtable<ConnectionId, Connection> connections =
new Hashtable<ConnectionId, Connection>();
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java?rev=780436&r1=780435&r2=780436&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java Sun May 31 14:32:56 2009
@@ -34,6 +34,7 @@
import java.util.concurrent.DelayQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.PriorityBlockingQueue;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.commons.logging.Log;
@@ -124,7 +125,7 @@
volatile DelayQueue<RegionServerOperation> delayedToDoQueue =
new DelayQueue<RegionServerOperation>();
volatile BlockingQueue<RegionServerOperation> toDoQueue =
- new LinkedBlockingQueue<RegionServerOperation>();
+ new PriorityBlockingQueue<RegionServerOperation>();
private final HBaseServer server;
private final HServerAddress address;
@@ -235,6 +236,9 @@
// The rpc-server port can be ephemeral... ensure we have the correct info
this.address = new HServerAddress(server.getListenerAddress());
+ // dont retry too much
+ conf.setInt("hbase.client.retries.number", 3);
+
this.connection = ServerConnectionManager.getConnection(conf);
this.metaRescanInterval =
@@ -494,15 +498,7 @@
return false;
}
LOG.warn("Processing pending operations: " + op.toString(), ex);
- try {
- // put the operation back on the queue... maybe it'll work next time.
- toDoQueue.put(op);
- } catch (InterruptedException e) {
- throw new RuntimeException(
- "Putting into toDoQueue was interrupted.", e);
- } catch (Exception e) {
- LOG.error("main processing loop: " + op.toString(), e);
- }
+ delayedToDoQueue.put(op);
}
return true;
}
@@ -549,7 +545,7 @@
else if(region.isMetaRegion()) {
MetaRegion m =
new MetaRegion(new HServerAddress(address),
- region.getRegionName(), region.getStartKey());
+ region);
regionManager.addMetaRegionToScan(m);
}
assignedRegions.put(region.getRegionName(), region);
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/MetaRegion.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/MetaRegion.java?rev=780436&r1=780435&r2=780436&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/MetaRegion.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/MetaRegion.java Sun May 31 14:32:56 2009
@@ -23,42 +23,36 @@
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HServerAddress;
+import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.util.Bytes;
/** Describes a meta region and its server */
public class MetaRegion implements Comparable<MetaRegion> {
private final HServerAddress server;
- private final byte [] regionName;
- private final byte [] startKey;
+ private HRegionInfo regionInfo;
- MetaRegion(final HServerAddress server, final byte [] regionName) {
- this (server, regionName, HConstants.EMPTY_START_ROW);
- }
-
- MetaRegion(final HServerAddress server, final byte [] regionName,
- final byte [] startKey) {
+ MetaRegion(final HServerAddress server, HRegionInfo regionInfo) {
if (server == null) {
throw new IllegalArgumentException("server cannot be null");
}
this.server = server;
- if (regionName == null) {
- throw new IllegalArgumentException("regionName cannot be null");
+ if (regionInfo == null) {
+ throw new IllegalArgumentException("regionInfo cannot be null");
}
- this.regionName = regionName;
- this.startKey = startKey;
+ this.regionInfo = regionInfo;
}
@Override
public String toString() {
- return "{regionname: " + Bytes.toString(this.regionName) +
- ", startKey: <" + Bytes.toString(this.startKey) +
- ">, server: " + this.server.toString() + "}";
+ return "{server: " + this.server.toString() + ", regionname: " +
+ regionInfo.getRegionNameAsString() + ", startKey: <" +
+ Bytes.toString(regionInfo.getStartKey()) + ">}";
}
/** @return the regionName */
public byte [] getRegionName() {
- return regionName;
+ return regionInfo.getRegionName();
}
/** @return the server */
@@ -68,7 +62,11 @@
/** @return the startKey */
public byte [] getStartKey() {
- return startKey;
+ return regionInfo.getStartKey();
+ }
+
+ public HRegionInfo getRegionInfo() {
+ return regionInfo;
}
@Override
@@ -78,22 +76,17 @@
@Override
public int hashCode() {
- int result = Arrays.hashCode(this.regionName);
- result ^= Arrays.hashCode(this.startKey);
- return result;
+ return regionInfo.hashCode();
}
// Comparable
public int compareTo(MetaRegion other) {
- int result = Bytes.compareTo(this.regionName, other.getRegionName());
- if(result == 0) {
- result = Bytes.compareTo(this.startKey, other.getStartKey());
- if (result == 0) {
- // Might be on different host?
- result = this.server.compareTo(other.server);
- }
+ int cmp = regionInfo.compareTo(other.regionInfo);
+ if(cmp == 0) {
+ // Might be on different host?
+ cmp = this.server.compareTo(other.server);
}
- return result;
+ return cmp;
}
}
\ No newline at end of file
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java?rev=780436&r1=780435&r2=780436&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java Sun May 31 14:32:56 2009
@@ -25,6 +25,7 @@
import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.HServerInfo;
import org.apache.hadoop.hbase.RegionHistorian;
+import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.util.Bytes;
@@ -58,67 +59,68 @@
@Override
protected boolean process() throws IOException {
- Boolean result =
- new RetryableMetaOperation<Boolean>(getMetaRegion(), this.master) {
- private final RegionHistorian historian = RegionHistorian.getInstance();
-
- public Boolean call() throws IOException {
- LOG.info(regionInfo.getRegionNameAsString() + " open on " +
- serverInfo.getServerAddress().toString());
- if (!metaRegionAvailable()) {
- // We can't proceed unless the meta region we are going to update
- // is online. metaRegionAvailable() has put this operation on the
- // delayedToDoQueue, so return true so the operation is not put
- // back on the toDoQueue
- return true;
- }
+ if (!metaRegionAvailable()) {
+ // We can't proceed unless the meta region we are going to update
+ // is online. metaRegionAvailable() has put this operation on the
+ // delayedToDoQueue, so return true so the operation is not put
+ // back on the toDoQueue
+ return true;
+ }
- // Register the newly-available Region's location.
- LOG.info("updating row " + regionInfo.getRegionNameAsString() +
- " in region " + Bytes.toString(metaRegionName) + " with " +
- " with startcode " + serverInfo.getStartCode() + " and server " +
- serverInfo.getServerAddress());
- BatchUpdate b = new BatchUpdate(regionInfo.getRegionName());
- b.put(COL_SERVER,
- Bytes.toBytes(serverInfo.getServerAddress().toString()));
- b.put(COL_STARTCODE, Bytes.toBytes(serverInfo.getStartCode()));
- server.batchUpdate(metaRegionName, b, -1L);
- if (!this.historian.isOnline()) {
- // This is safest place to do the onlining of the historian in
- // the master. When we get to here, we know there is a .META.
- // for the historian to go against.
- this.historian.online(this.master.getConfiguration());
+ final RegionHistorian historian = RegionHistorian.getInstance();
+ HRegionInterface server =
+ master.connection.getHRegionConnection(getMetaRegion().getServer());
+ LOG.info(regionInfo.getRegionNameAsString() + " open on " +
+ serverInfo.getServerAddress().toString());
+
+ // Register the newly-available Region's location.
+ LOG.info("updating row " + regionInfo.getRegionNameAsString() +
+ " in region " + Bytes.toString(metaRegionName) + " with " +
+ " with startcode " + serverInfo.getStartCode() + " and server " +
+ serverInfo.getServerAddress());
+ BatchUpdate b = new BatchUpdate(regionInfo.getRegionName());
+ b.put(COL_SERVER,
+ Bytes.toBytes(serverInfo.getServerAddress().toString()));
+ b.put(COL_STARTCODE, Bytes.toBytes(serverInfo.getStartCode()));
+ server.batchUpdate(metaRegionName, b, -1L);
+ if (!historian.isOnline()) {
+ // This is safest place to do the onlining of the historian in
+ // the master. When we get to here, we know there is a .META.
+ // for the historian to go against.
+ historian.online(this.master.getConfiguration());
+ }
+ historian.addRegionOpen(regionInfo, serverInfo.getServerAddress());
+ synchronized (master.regionManager) {
+ if (isMetaTable) {
+ // It's a meta region.
+ MetaRegion m =
+ new MetaRegion(new HServerAddress(serverInfo.getServerAddress()),
+ regionInfo);
+ if (!master.regionManager.isInitialMetaScanComplete()) {
+ // Put it on the queue to be scanned for the first time.
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Adding " + m.toString() + " to regions to scan");
}
- this.historian.addRegionOpen(regionInfo, serverInfo.getServerAddress());
- synchronized (master.regionManager) {
- if (isMetaTable) {
- // It's a meta region.
- MetaRegion m =
- new MetaRegion(new HServerAddress(serverInfo.getServerAddress()),
- regionInfo.getRegionName(), regionInfo.getStartKey());
- if (!master.regionManager.isInitialMetaScanComplete()) {
- // Put it on the queue to be scanned for the first time.
- if (LOG.isDebugEnabled()) {
- LOG.debug("Adding " + m.toString() + " to regions to scan");
- }
- master.regionManager.addMetaRegionToScan(m);
- } else {
- // Add it to the online meta regions
- if (LOG.isDebugEnabled()) {
- LOG.debug("Adding to onlineMetaRegions: " + m.toString());
- }
- master.regionManager.putMetaRegionOnline(m);
- // Interrupting the Meta Scanner sleep so that it can
- // process regions right away
- master.regionManager.metaScannerThread.interrupt();
- }
- }
- // If updated successfully, remove from pending list.
- master.regionManager.removeRegion(regionInfo);
- return true;
+ master.regionManager.addMetaRegionToScan(m);
+ } else {
+ // Add it to the online meta regions
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Adding to onlineMetaRegions: " + m.toString());
}
+ master.regionManager.putMetaRegionOnline(m);
+ // Interrupting the Meta Scanner sleep so that it can
+ // process regions right away
+ master.regionManager.metaScannerThread.interrupt();
}
- }.doWithRetries();
- return result == null ? true : result;
+ }
+ // If updated successfully, remove from pending list.
+ master.regionManager.removeRegion(regionInfo);
+ return true;
+ }
+ }
+
+ @Override
+ protected int getPriority() {
+ return 0; // highest priority
}
}
\ No newline at end of file
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionStatusChange.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionStatusChange.java?rev=780436&r1=780435&r2=780436&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionStatusChange.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionStatusChange.java Sun May 31 14:32:56 2009
@@ -31,7 +31,7 @@
protected final HRegionInfo regionInfo;
private volatile MetaRegion metaRegion = null;
protected volatile byte[] metaRegionName = null;
-
+
/**
* @param master
* @param regionInfo
@@ -47,6 +47,7 @@
if (isMetaTable) {
// This operation is for the meta table
if (!rootAvailable()) {
+ requeue();
// But we can't proceed unless the root region is available
available = false;
}
@@ -67,7 +68,7 @@
if (isMetaTable) {
this.metaRegionName = HRegionInfo.ROOT_REGIONINFO.getRegionName();
this.metaRegion = new MetaRegion(master.getRootRegionLocation(),
- this.metaRegionName, HConstants.EMPTY_START_ROW);
+ HRegionInfo.ROOT_REGIONINFO);
} else {
this.metaRegion =
master.regionManager.getFirstMetaRegionForRegion(regionInfo);
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java?rev=780436&r1=780435&r2=780436&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java Sun May 31 14:32:56 2009
@@ -30,6 +30,7 @@
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HServerInfo;
import org.apache.hadoop.hbase.RemoteExceptionHandler;
+import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.regionserver.HLog;
import org.apache.hadoop.hbase.regionserver.HRegion;
@@ -44,12 +45,14 @@
*/
class ProcessServerShutdown extends RegionServerOperation {
private final String deadServer;
- private final boolean rootRegionServer;
- private boolean rootRegionReassigned = false;
+ private boolean isRootServer;
+ private List<MetaRegion> metaRegions;
+
private Path oldLogDir;
private boolean logSplit;
private boolean rootRescanned;
-
+ private HServerAddress deadServerAddress;
+
private static class ToDoEntry {
boolean regionOffline;
@@ -66,17 +69,33 @@
/**
* @param master
* @param serverInfo
- * @param rootRegionServer
*/
- public ProcessServerShutdown(HMaster master, HServerInfo serverInfo,
- boolean rootRegionServer) {
+ public ProcessServerShutdown(HMaster master, HServerInfo serverInfo) {
super(master);
this.deadServer = HServerInfo.getServerName(serverInfo);
- this.rootRegionServer = rootRegionServer;
+ this.deadServerAddress = serverInfo.getServerAddress();
this.logSplit = false;
this.rootRescanned = false;
this.oldLogDir =
new Path(master.rootdir, HLog.getHLogDirectoryName(serverInfo));
+
+ // check to see if I am responsible for either ROOT or any of the META tables.
+
+ closeMetaRegions();
+ }
+
+ private void closeMetaRegions() {
+ isRootServer = master.regionManager.isRootServer(deadServerAddress);
+ if (isRootServer) {
+ master.regionManager.unsetRootRegion();
+ }
+ List<byte[]> metaStarts = master.regionManager.listMetaRegionsForServer(deadServerAddress);
+
+ metaRegions = new ArrayList<MetaRegion>();
+ for (byte [] region : metaStarts) {
+ MetaRegion r = master.regionManager.offlineMetaRegion(region);
+ metaRegions.add(r);
+ }
}
@Override
@@ -254,16 +273,22 @@
logSplit = true;
}
- if (this.rootRegionServer && !this.rootRegionReassigned) {
- // avoid multiple root region reassignment
- this.rootRegionReassigned = true;
- // The server that died was serving the root region. Now that the log
- // has been split, get it reassigned.
+ LOG.info("Log split complete, meta reassignment and scanning:");
+
+ if (this.isRootServer) {
+ LOG.info("ProcessServerShutdown reassigning ROOT region");
master.regionManager.reassignRootRegion();
- // When we call rootAvailable below, it will put us on the delayed
- // to do queue to allow some time to pass during which the root
- // region will hopefully get reassigned.
+
+ isRootServer = false; // prevent double reassignment... heh.
+ }
+
+ for (MetaRegion metaRegion : metaRegions) {
+ LOG.info("ProcessServerShutdown setting to unassigned: " + metaRegion.toString());
+ master.regionManager.setUnassigned(metaRegion.getRegionInfo(), true);
}
+ // one the meta regions are online, "forget" about them. Since there are explicit
+ // checks below to make sure meta/root are online, this is likely to occur.
+ metaRegions.clear();
if (!rootAvailable()) {
// Return true so that worker does not put this request back on the
@@ -276,8 +301,7 @@
// Scan the ROOT region
Boolean result = new ScanRootRegion(
new MetaRegion(master.getRootRegionLocation(),
- HRegionInfo.ROOT_REGIONINFO.getRegionName(),
- HConstants.EMPTY_START_ROW), this.master).doWithRetries();
+ HRegionInfo.ROOT_REGIONINFO), this.master).doWithRetries();
if (result == null) {
// Master is closing - give up
return true;
@@ -315,4 +339,9 @@
}
return true;
}
+
+ @Override
+ protected int getPriority() {
+ return 2; // high but not highest priority
+ }
}
\ No newline at end of file
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java?rev=780436&r1=780435&r2=780436&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java Sun May 31 14:32:56 2009
@@ -96,7 +96,7 @@
*
* @see RegionState inner-class below
*/
- private final SortedMap<String, RegionState> regionsInTransition =
+ final SortedMap<String, RegionState> regionsInTransition =
Collections.synchronizedSortedMap(new TreeMap<String, RegionState>());
// How many regions to assign a server at a time.
@@ -164,6 +164,7 @@
rootRegionLocation.set(null);
regionsInTransition.remove(
HRegionInfo.ROOT_REGIONINFO.getRegionNameAsString());
+ LOG.info("-ROOT- region unset (but not set to be reassigned)");
}
}
@@ -175,6 +176,7 @@
s.setUnassigned();
regionsInTransition.put(
HRegionInfo.ROOT_REGIONINFO.getRegionNameAsString(), s);
+ LOG.info("ROOT inserted into regionsInTransition");
}
}
}
@@ -191,9 +193,12 @@
void assignRegions(HServerInfo info, HRegionInfo[] mostLoadedRegions,
ArrayList<HMsg> returnMsgs) {
HServerLoad thisServersLoad = info.getLoad();
+ boolean isSingleServer = master.serverManager.numServers() == 1;
+
// figure out what regions need to be assigned and aren't currently being
// worked on elsewhere.
- Set<RegionState> regionsToAssign = regionsAwaitingAssignment();
+ Set<RegionState> regionsToAssign = regionsAwaitingAssignment(info.getServerAddress(),
+ isSingleServer);
if (regionsToAssign.size() == 0) {
// There are no regions waiting to be assigned.
if (!inSafeMode()) {
@@ -203,12 +208,12 @@
}
} else {
// if there's only one server, just give it all the regions
- if (master.serverManager.numServers() == 1) {
+ if (isSingleServer) {
assignRegionsToOneServer(regionsToAssign, info, returnMsgs);
} else {
// otherwise, give this server a few regions taking into account the
// load of all the other servers.
- assignRegionsToMultipleServers(thisServersLoad, regionsToAssign,
+ assignRegionsToMultipleServers(thisServersLoad, regionsToAssign,
info, returnMsgs);
}
}
@@ -224,11 +229,21 @@
private void assignRegionsToMultipleServers(final HServerLoad thisServersLoad,
final Set<RegionState> regionsToAssign, final HServerInfo info,
final ArrayList<HMsg> returnMsgs) {
-
+
+ boolean isMetaAssign = false;
+ for (RegionState s : regionsToAssign) {
+ if (s.getRegionInfo().isMetaRegion())
+ isMetaAssign = true;
+ }
+
int nRegionsToAssign = regionsToAssign.size();
int nregions = regionsPerServer(nRegionsToAssign, thisServersLoad);
+ LOG.debug("multi assing for " + info + ": nregions to assign: "
+ + nRegionsToAssign
+ +" and nregions: " + nregions
+ + " metaAssign: " + isMetaAssign);
nRegionsToAssign -= nregions;
- if (nRegionsToAssign > 0) {
+ if (nRegionsToAssign > 0 || isMetaAssign) {
// We still have more regions to assign. See how many we can assign
// before this server becomes more heavily loaded than the next
// most heavily loaded server.
@@ -244,6 +259,8 @@
// continue;
}
+ LOG.debug("Doing for " + info + " nregions: " + nregions +
+ " and nRegionsToAssign: " + nRegionsToAssign);
if (nregions < nRegionsToAssign) {
// There are some more heavily loaded servers
// but we can't assign all the regions to this server.
@@ -306,8 +323,32 @@
LOG.info("Assigning region " + regionName + " to " + sinfo.getServerName());
rs.setPendingOpen(sinfo.getServerName());
this.regionsInTransition.put(regionName, rs);
- this.historian.addRegionAssignment(rs.getRegionInfo(),
- sinfo.getServerName());
+
+ // Since the meta/root may not be available at this moment, we
+ try {
+ // TODO move this into an actual class, and use the RetryableMetaOperation
+ master.toDoQueue.put(
+ new RegionServerOperation(master) {
+ protected boolean process() throws IOException {
+ if (!rootAvailable() || !metaTableAvailable()) {
+ return true; // the two above us will put us on the delayed queue
+ }
+
+ // this call can cause problems if meta/root is offline!
+ historian.addRegionAssignment(rs.getRegionInfo(),
+ sinfo.getServerName());
+ return true;
+ }
+ public String toString() {
+ return "RegionAssignmentHistorian from " + sinfo.getServerName();
+ }
+ }
+ );
+ } catch (InterruptedException e) {
+ // ignore and don't write the region historian
+ LOG.info("doRegionAssignment: Couldn't queue the region historian due to exception: " + e);
+ }
+
returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_OPEN, rs.getRegionInfo()));
}
@@ -355,18 +396,40 @@
* only caller (assignRegions, whose caller is ServerManager.processMsgs) owns
* the monitor for RegionManager
*/
- private Set<RegionState> regionsAwaitingAssignment() {
+ private Set<RegionState> regionsAwaitingAssignment(HServerAddress addr,
+ boolean isSingleServer) {
// set of regions we want to assign to this server
Set<RegionState> regionsToAssign = new HashSet<RegionState>();
-
- // Look over the set of regions that aren't currently assigned to
+
+ boolean isMetaServer = isMetaServer(addr);
+
+ // Handle if root is unassigned... only assign root if root is offline.
+ RegionState rootState = regionsInTransition.get(HRegionInfo.ROOT_REGIONINFO.getRegionNameAsString());
+ if (rootState != null && rootState.isUnassigned()) {
+ // make sure root isnt assigned here first.
+ // if so return 'empty list'
+ // by definition there is no way this could be a ROOT region (since it's
+ // unassigned) so just make sure it isn't hosting META regions.
+ if (!isMetaServer) {
+ regionsToAssign.add(rootState);
+ }
+ return regionsToAssign;
+ }
+
+ // Look over the set of regions that aren't currently assigned to
// determine which we should assign to this server.
+ boolean reassigningMetas = numberOfMetaRegions.get() != onlineMetaRegions.size();
+ boolean isMetaOrRoot = isMetaServer || isRootServer(addr);
+ if (reassigningMetas && isMetaOrRoot && !isSingleServer) {
+ return regionsToAssign; // dont assign anything to this server.
+ }
+
for (RegionState s: regionsInTransition.values()) {
HRegionInfo i = s.getRegionInfo();
if (i == null) {
continue;
}
- if (numberOfMetaRegions.get() != onlineMetaRegions.size() &&
+ if (reassigningMetas &&
!i.isMetaRegion()) {
// Can't assign user regions until all meta regions have been assigned
// and are on-line
@@ -455,7 +518,7 @@
}
LOG.info("Skipped " + skipped + " region(s) that are in transition states");
}
-
+
static class TableDirFilter implements PathFilter {
public boolean accept(Path path) {
@@ -607,7 +670,7 @@
Bytes.toString(HConstants.ROOT_TABLE_NAME));
}
metaRegions.add(new MetaRegion(rootRegionLocation.get(),
- HRegionInfo.ROOT_REGIONINFO.getRegionName()));
+ HRegionInfo.ROOT_REGIONINFO));
} else {
if (!areAllMetaRegionsOnline()) {
throw new NotAllMetaRegionsOnlineException();
@@ -685,7 +748,7 @@
* @return list of MetaRegion objects
*/
public List<MetaRegion> getListOfOnlineMetaRegions() {
- List<MetaRegion> regions = null;
+ List<MetaRegion> regions;
synchronized(onlineMetaRegions) {
regions = new ArrayList<MetaRegion>(onlineMetaRegions.values());
}
@@ -712,11 +775,104 @@
/**
* Set an online MetaRegion offline - remove it from the map.
* @param startKey region name
+ * @return the MetaRegion that was taken offline.
*/
- public void offlineMetaRegion(byte [] startKey) {
- onlineMetaRegions.remove(startKey);
+ public MetaRegion offlineMetaRegion(byte [] startKey) {
+ LOG.info("META region removed from onlineMetaRegions");
+ return onlineMetaRegions.remove(startKey);
}
-
+
+ public boolean isRootServer(HServerAddress server) {
+ if (master.getRootRegionLocation() != null
+ && server.equals(master.getRootRegionLocation()))
+ return true;
+ return false;
+ }
+
+ /**
+ * Returns the list of byte[] start-keys for any .META. regions hosted
+ * on the indicated server.
+ *
+ * @param server server address
+ * @return list of meta region start-keys.
+ */
+ public List<byte[]> listMetaRegionsForServer(HServerAddress server) {
+ List<byte[]> metas = new ArrayList<byte[]>();
+
+ for ( MetaRegion region : onlineMetaRegions.values() ) {
+ if (server.equals(region.getServer())) {
+ metas.add(region.getStartKey());
+ }
+ }
+
+ return metas;
+ }
+
+ /**
+ * Does this server have any META regions open on it, or any meta
+ * regions being assigned to it?
+ *
+ * @param server Server IP:port
+ * @return true if server has meta region assigned
+ */
+ public boolean isMetaServer(HServerAddress server) {
+ for ( MetaRegion region : onlineMetaRegions.values() ) {
+ if (server.equals(region.getServer())) {
+ return true;
+ }
+ }
+
+ // This might be expensive, but we need to make sure we dont
+ // get double assignment to the same regionserver.
+ for (RegionState s : regionsInTransition.values()) {
+ if (s.getRegionInfo().isMetaRegion()
+ && !s.isUnassigned()
+ && s.getServerName() != null
+ && s.getServerName().equals(server.toString())) {
+ // Has an outstanding meta region to be assigned.
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Call to take this metaserver offline for immediate reassignment. Used only
+ * when we know a region has shut down cleanly.
+ *
+ * A meta server is a server that hosts either -ROOT- or any .META. regions.
+ *
+ * If you are considering a unclean shutdown potentially, use ProcessServerShutdown which
+ * calls other methods to immediately unassign root/meta but delay the reassign until the
+ * log has been split.
+ *
+ * @param server the server that went down
+ * @return true if this was in fact a meta server, false if it did not carry meta regions.
+ */
+ public synchronized boolean offlineMetaServer(HServerAddress server) {
+ boolean hasMeta = false;
+
+ // check to see if ROOT and/or .META. are on this server, reassign them.
+ // use master.getRootRegionLocation.
+ if (master.getRootRegionLocation() != null &&
+ server.equals(master.getRootRegionLocation())) {
+ LOG.info("Offlined ROOT server: " + server);
+ reassignRootRegion();
+ hasMeta = true;
+ }
+ // AND
+ for ( MetaRegion region : onlineMetaRegions.values() ) {
+ if (server.equals(region.getServer())) {
+ LOG.info("Offlining META region: " + region);
+ offlineMetaRegion(region.getStartKey());
+ // Set for reassignment.
+ setUnassigned(region.getRegionInfo(), true);
+ hasMeta = true;
+ }
+ }
+ return hasMeta;
+ }
+
/**
* Remove a region from the region state map.
*
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionServerOperation.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionServerOperation.java?rev=780436&r1=780435&r2=780436&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionServerOperation.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionServerOperation.java Sun May 31 14:32:56 2009
@@ -89,6 +89,14 @@
}
return available;
}
-
+
+ public int compareTo(RegionServerOperation other) {
+ return getPriority() - other.getPriority();
+ }
+
+ // the Priority of this operation, 0 is lowest priority
+ protected int getPriority() {
+ return Integer.MAX_VALUE;
+ }
protected abstract boolean process() throws IOException;
}
\ No newline at end of file
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RetryableMetaOperation.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RetryableMetaOperation.java?rev=780436&r1=780435&r2=780436&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RetryableMetaOperation.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RetryableMetaOperation.java Sun May 31 14:32:56 2009
@@ -91,6 +91,7 @@
exceptions.add(e);
}
} catch (Exception e) {
+ LOG.debug("Exception in RetryableMetaOperation: ", e);
throw new RuntimeException(e);
}
sleeper.sleep();
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RootScanner.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RootScanner.java?rev=780436&r1=780435&r2=780436&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RootScanner.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RootScanner.java Sun May 31 14:32:56 2009
@@ -52,7 +52,7 @@
synchronized(scannerLock) {
if (master.getRootRegionLocation() != null) {
scanRegion(new MetaRegion(master.getRootRegionLocation(),
- HRegionInfo.ROOT_REGIONINFO.getRegionName()));
+ HRegionInfo.ROOT_REGIONINFO));
}
}
} catch (IOException e) {
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java?rev=780436&r1=780435&r2=780436&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java Sun May 31 14:32:56 2009
@@ -184,14 +184,9 @@
// The startup message was from a known server with the same name.
// Timeout the old one right away.
HServerAddress root = master.getRootRegionLocation();
- boolean rootServer = false;
- if (root != null && root.equals(storedInfo.getServerAddress())) {
- master.regionManager.unsetRootRegion();
- rootServer = true;
- }
try {
master.toDoQueue.put(
- new ProcessServerShutdown(master, storedInfo, rootServer));
+ new ProcessServerShutdown(master, storedInfo));
} catch (InterruptedException e) {
LOG.error("Insertion into toDoQueue was interrupted", e);
}
@@ -321,7 +316,8 @@
private void processRegionServerExit(HServerInfo serverInfo, HMsg[] msgs) {
synchronized (serversToServerInfo) {
try {
- // HRegionServer is shutting down.
+ // This method removes ROOT/META from the list and marks them to be reassigned
+ // in addition to other housework.
if (removeServerInfo(serverInfo.getServerName(),
serverInfo.getServerAddress())) {
// Only process the exit message if the server still has registered info.
@@ -335,13 +331,9 @@
LOG.info("Processing " + msgs[i] + " from " +
serverInfo.getServerName());
HRegionInfo info = msgs[i].getRegionInfo();
- synchronized (master.regionManager) {
- if (info.isRootRegion()) {
- master.regionManager.reassignRootRegion();
- } else {
- if (info.isMetaTable()) {
- master.regionManager.offlineMetaRegion(info.getStartKey());
- }
+ // Meta/root region offlining is handed in removeServerInfo above.
+ if (!info.isMetaRegion()) {
+ synchronized (master.regionManager) {
if (!master.regionManager.isOfflined(
info.getRegionNameAsString())) {
master.regionManager.setUnassigned(info, true);
@@ -467,14 +459,18 @@
master.regionManager.setPendingClose(i.getRegionNameAsString());
}
+
// Figure out what the RegionServer ought to do, and write back.
// Should we tell it close regions because its overloaded? If its
// currently opening regions, leave it alone till all are open.
- if (openingCount < this.nobalancingCount) {
+ LOG.debug("Process all wells: " + serverInfo + " openingCount: " + openingCount +
+ ", nobalancingCount: " + nobalancingCount);
+ if ((openingCount < this.nobalancingCount)) {
this.master.regionManager.assignRegions(serverInfo, mostLoadedRegions,
returnMsgs);
}
+
// Send any pending table actions.
this.master.regionManager.applyActions(serverInfo, returnMsgs);
}
@@ -644,10 +640,8 @@
// This method can be called a couple of times during shutdown.
if (info != null) {
LOG.info("Removing server's info " + serverName);
- if (master.getRootRegionLocation() != null &&
- info.getServerAddress().equals(master.getRootRegionLocation())) {
- master.regionManager.unsetRootRegion();
- }
+ master.regionManager.offlineMetaServer(info.getServerAddress());
+
infoUpdated = true;
// update load information
@@ -785,17 +779,7 @@
// Remove the server from the known servers list and update load info
serverAddressToServerInfo.remove(serverAddress);
HServerInfo info = serversToServerInfo.remove(server);
- boolean rootServer = false;
if (info != null) {
- HServerAddress root = master.getRootRegionLocation();
- if (root != null && root.equals(info.getServerAddress())) {
- // NOTE: If the server was serving the root region, we cannot
- // reassign
- // it here because the new server will start serving the root region
- // before ProcessServerShutdown has a chance to split the log file.
- master.regionManager.unsetRootRegion();
- rootServer = true;
- }
String serverName = HServerInfo.getServerName(info);
HServerLoad load = serversToLoad.remove(serverName);
if (load != null) {
@@ -812,8 +796,7 @@
}
deadServers.add(server);
try {
- master.toDoQueue.put(new ProcessServerShutdown(master, info,
- rootServer));
+ master.toDoQueue.put(new ProcessServerShutdown(master, info));
} catch (InterruptedException e) {
LOG.error("insert into toDoQueue was interrupted", e);
}