You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ji...@apache.org on 2008/12/24 02:06:55 UTC
svn commit: r729186 - in /hadoop/hbase/trunk: ./
src/java/org/apache/hadoop/hbase/client/
src/java/org/apache/hadoop/hbase/master/
Author: jimk
Date: Tue Dec 23 17:06:54 2008
New Revision: 729186
URL: http://svn.apache.org/viewvc?rev=729186&view=rev
Log:
HBASE-543, HBASE-1046, HBase-1051 A region's state is kept in several places in the master opening the possibility for race conditions
Modified:
hadoop/hbase/trunk/CHANGES.txt
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/ServerConnection.java
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/BaseScanner.java
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ChangeTableState.java
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/MetaScanner.java
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionClose.java
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java
Modified: hadoop/hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=729186&r1=729185&r2=729186&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Tue Dec 23 17:06:54 2008
@@ -115,6 +115,8 @@
HBASE-1079 Dumb NPE in ServerCallable hides the RetriesExhausted exception
HBASE-782 The DELETE key in the hbase shell deletes the wrong character
(Tim Sell via Stack)
+ HBASE-543, HBASE-1046, HBase-1051 A region's state is kept in several places
+ in the master opening the possibility for race conditions
IMPROVEMENTS
HBASE-901 Add a limit to key length, check key and value length on client side
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java?rev=729186&r1=729185&r2=729186&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java Tue Dec 23 17:06:54 2008
@@ -177,7 +177,15 @@
return this.pause * HConstants.RETRY_BACKOFF[ntries];
}
+ public void unsetRootRegionLocation() {
+ this.rootRegionLocation = null;
+ }
+
public void setRootRegionLocation(HRegionLocation rootRegion) {
+ if (rootRegion == null) {
+ throw new IllegalArgumentException(
+ "Cannot set root region location to null.");
+ }
this.rootRegionLocation = rootRegion;
}
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/ServerConnection.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/ServerConnection.java?rev=729186&r1=729185&r2=729186&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/ServerConnection.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/ServerConnection.java Tue Dec 23 17:06:54 2008
@@ -33,4 +33,10 @@
* @param rootRegion
*/
public void setRootRegionLocation(HRegionLocation rootRegion);
+
+ /**
+ * Unset the root region location in the connection. Called by
+ * ServerManager.processRegionClose.
+ */
+ public void unsetRootRegionLocation();
}
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/BaseScanner.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/BaseScanner.java?rev=729186&r1=729185&r2=729186&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/BaseScanner.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/BaseScanner.java Tue Dec 23 17:06:54 2008
@@ -337,79 +337,85 @@
final String serverName, final long startCode)
throws IOException {
- // Skip region - if ...
- if(info.isOffline() // offline
- || regionManager.isClosing(info.getRegionName())) { // queued for offline
-
- regionManager.noLongerUnassigned(info);
- regionManager.noLongerPending(info.getRegionName());
- return;
- }
- HServerInfo storedInfo = null;
- boolean deadServer = false;
- if (serverName.length() != 0) {
-
- if (regionManager.isMarkedToClose(serverName, info.getRegionName())) {
- // Skip if region is on kill list
- if(LOG.isDebugEnabled()) {
- LOG.debug("not assigning region (on kill list): " +
- info.getRegionNameAsString());
- }
+ synchronized (regionManager) {
+ // Skip region - if ...
+ if(info.isOffline() // offline
+ || regionManager.isOfflined(info.getRegionName())) { // queued for offline
+
+ regionManager.removeRegion(info);
return;
}
-
- storedInfo = master.serverManager.getServerInfo(serverName);
- deadServer = master.serverManager.isDead(serverName);
- }
+ HServerInfo storedInfo = null;
+ boolean deadServer = false;
+ if (serverName.length() != 0) {
- /*
- * If the server is a dead server or its startcode is off -- either null
- * or doesn't match the start code for the address -- then add it to the
- * list of unassigned regions IF not already there (or pending open).
- */
- if (!deadServer && !regionManager.isUnassigned(info) &&
- !regionManager.isPending(info.getRegionName())
- && (storedInfo == null || storedInfo.getStartCode() != startCode)) {
- // The current assignment is invalid
- if (LOG.isDebugEnabled()) {
- LOG.debug("Current assignment of " +
- info.getRegionNameAsString() +
- " is not valid: serverInfo: " + storedInfo + ", passed startCode: " +
- startCode + ", storedInfo.startCode: " +
- ((storedInfo != null)? storedInfo.getStartCode(): -1) +
- ", unassignedRegions: " +
- regionManager.isUnassigned(info) +
- ", pendingRegions: " +
- regionManager.isPending(info.getRegionName()));
+ if (regionManager.isOfflined(info.getRegionName())) {
+ // Skip if region is on kill list
+ if(LOG.isDebugEnabled()) {
+ LOG.debug("not assigning region (on kill list): " +
+ info.getRegionNameAsString());
+ }
+ return;
+ }
+
+ storedInfo = master.serverManager.getServerInfo(serverName);
+ deadServer = master.serverManager.isDead(serverName);
}
- // Recover the region server's log if there is one.
- // This is only done from here if we are restarting and there is stale
- // data in the meta region. Once we are on-line, dead server log
- // recovery is handled by lease expiration and ProcessServerShutdown
- if (!regionManager.isInitialMetaScanComplete() && serverName.length() != 0) {
- StringBuilder dirName = new StringBuilder("log_");
- dirName.append(serverName.replace(":", "_"));
- Path logDir = new Path(master.rootdir, dirName.toString());
- try {
- if (master.fs.exists(logDir)) {
- regionManager.splitLogLock.lock();
- try {
- HLog.splitLog(master.rootdir, logDir, master.fs,
- master.getConfiguration());
- } finally {
- regionManager.splitLogLock.unlock();
+
+ /*
+ * If the server is a dead server or its startcode is off -- either null
+ * or doesn't match the start code for the address -- then add it to the
+ * list of unassigned regions IF not already there (or pending open).
+ */
+ if ((deadServer ||
+ (storedInfo == null || storedInfo.getStartCode() != startCode)) &&
+ (!regionManager.isUnassigned(info) &&
+ !regionManager.isPending(info.getRegionName()) &&
+ !regionManager.isAssigned(info.getRegionName()))) {
+
+ // The current assignment is invalid
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Current assignment of " +
+ info.getRegionNameAsString() +
+ " is not valid." +
+ (storedInfo == null ? " Server '" + serverName + "' unknown." :
+ " serverInfo: " + storedInfo + ", passed startCode: " +
+ startCode + ", storedInfo.startCode: " + storedInfo.getStartCode()) +
+ " Region is not unassigned, assigned or pending");
+ }
+
+ // Recover the region server's log if there is one.
+ // This is only done from here if we are restarting and there is stale
+ // data in the meta region. Once we are on-line, dead server log
+ // recovery is handled by lease expiration and ProcessServerShutdown
+
+ if (!regionManager.isInitialMetaScanComplete() &&
+ serverName.length() != 0) {
+ StringBuilder dirName = new StringBuilder("log_");
+ dirName.append(serverName.replace(":", "_"));
+ Path logDir = new Path(master.rootdir, dirName.toString());
+ try {
+ if (master.fs.exists(logDir)) {
+ regionManager.splitLogLock.lock();
+ try {
+ HLog.splitLog(master.rootdir, logDir, master.fs,
+ master.getConfiguration());
+ } finally {
+ regionManager.splitLogLock.unlock();
+ }
}
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Split " + logDir.toString());
+ }
+ } catch (IOException e) {
+ LOG.warn("unable to split region server log because: ", e);
+ throw e;
}
- if (LOG.isDebugEnabled()) {
- LOG.debug("Split " + logDir.toString());
- }
- } catch (IOException e) {
- LOG.warn("unable to split region server log because: ", e);
- throw e;
}
+ // Now get the region assigned
+ regionManager.setUnassigned(info, true);
}
- // Now get the region assigned
- regionManager.setUnassigned(info);
}
}
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ChangeTableState.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ChangeTableState.java?rev=729186&r1=729185&r2=729186&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ChangeTableState.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ChangeTableState.java Tue Dec 23 17:06:54 2008
@@ -23,14 +23,12 @@
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
-import java.util.TreeMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.io.BatchUpdate;
-import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Writables;
/** Instantiated to enable or disable a table */
@@ -90,15 +88,18 @@
LOG.debug("Updated columns in row: " + i.getRegionNameAsString());
}
- if (online) {
- // Bring offline regions on-line
- master.regionManager.noLongerClosing(i.getRegionName());
- if (!master.regionManager.isUnassigned(i)) {
- master.regionManager.setUnassigned(i);
+ synchronized (master.regionManager) {
+ if (online) {
+ // Bring offline regions on-line
+ if (!master.regionManager.isUnassigned(i) &&
+ !master.regionManager.isAssigned(i.getRegionName()) &&
+ !master.regionManager.isPending(i.getRegionName())) {
+ master.regionManager.setUnassigned(i, false);
+ }
+ } else {
+ // Prevent region from getting assigned.
+ master.regionManager.removeRegion(i);
}
- } else {
- // Prevent region from getting assigned.
- master.regionManager.noLongerUnassigned(i);
}
}
@@ -106,37 +107,23 @@
if (LOG.isDebugEnabled()) {
LOG.debug("processing regions currently being served");
}
- for (Map.Entry<String, HashSet<HRegionInfo>> e: servedRegions.entrySet()) {
- String serverName = e.getKey();
- if (online) {
- LOG.debug("Already online");
- continue; // Already being served
- }
+ synchronized (master.regionManager) {
+ for (Map.Entry<String, HashSet<HRegionInfo>> e: servedRegions.entrySet()) {
+ String serverName = e.getKey();
+ if (online) {
+ LOG.debug("Already online");
+ continue; // Already being served
+ }
- // Cause regions being served to be taken off-line and disabled
+ // Cause regions being served to be taken off-line and disabled
- Map<byte [], HRegionInfo> localKillList =
- new TreeMap<byte [], HRegionInfo>(Bytes.BYTES_COMPARATOR);
- for (HRegionInfo i: e.getValue()) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("adding region " + i.getRegionNameAsString() + " to kill list");
- }
- // this marks the regions to be closed
- localKillList.put(i.getRegionName(), i);
- // this marks the regions to be offlined once they are closed
- master.regionManager.markRegionForOffline(i.getRegionName());
- }
- Map<byte [], HRegionInfo> killedRegions =
- master.regionManager.removeMarkedToClose(serverName);
- if (killedRegions != null) {
- localKillList.putAll(killedRegions);
- }
- if (localKillList.size() > 0) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("inserted local kill list into kill list for server " +
- serverName);
+ for (HRegionInfo i: e.getValue()) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("adding region " + i.getRegionNameAsString() + " to kill list");
+ }
+ // this marks the regions to be closed
+ master.regionManager.setClosing(serverName, i, true);
}
- master.regionManager.markToCloseBulk(serverName, localKillList);
}
}
servedRegions.clear();
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java?rev=729186&r1=729185&r2=729186&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java Tue Dec 23 17:06:54 2008
@@ -93,7 +93,8 @@
static final Log LOG = LogFactory.getLog(HMaster.class.getName());
- public long getProtocolVersion(String protocol, long clientVersion) {
+ public long getProtocolVersion(@SuppressWarnings("unused") String protocol,
+ @SuppressWarnings("unused") long clientVersion) {
return HBaseRPCProtocolVersion.versionID;
}
@@ -531,8 +532,7 @@
/*
* HMasterRegionInterface
*/
- public MapWritable regionServerStartup(HServerInfo serverInfo)
- throws IOException {
+ public MapWritable regionServerStartup(HServerInfo serverInfo) {
// Set the address for now even tho it will not be persisted on
// the HRS side.
String rsAddress = HBaseServer.getRemoteAddress();
@@ -833,7 +833,7 @@
LOG.info("Marking " + hri.getRegionNameAsString() +
" as closed on " + servername + "; cleaning SERVER + STARTCODE; " +
"master will tell regionserver to close region on next heartbeat");
- this.regionManager.markToClose(servername, hri);
+ this.regionManager.setClosing(servername, hri, false);
MetaRegion meta = this.regionManager.getMetaRegionForRow(regionname);
HRegionInterface srvr = getMETAServer(meta);
HRegion.cleanRegionInMETA(srvr, meta.getRegionName(), hri);
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/MetaScanner.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/MetaScanner.java?rev=729186&r1=729185&r2=729186&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/MetaScanner.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/MetaScanner.java Tue Dec 23 17:06:54 2008
@@ -154,10 +154,12 @@
*/
synchronized boolean waitForMetaRegionsOrClose() {
while (!master.closed.get()) {
- if (regionManager.isInitialRootScanComplete() &&
- regionManager.numMetaRegions() ==
- regionManager.numOnlineMetaRegions()) {
- break;
+ synchronized (master.regionManager) {
+ if (regionManager.isInitialRootScanComplete() &&
+ regionManager.numMetaRegions() ==
+ regionManager.numOnlineMetaRegions()) {
+ break;
+ }
}
try {
wait(master.threadWakeFrequency);
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionClose.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionClose.java?rev=729186&r1=729185&r2=729186&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionClose.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionClose.java Tue Dec 23 17:06:54 2008
@@ -71,11 +71,11 @@
// back on the toDoQueue
if (metaRegionAvailable()) {
- // offline the region in meta and then note that we've offlined
- // the region.
+ // offline the region in meta and then remove it from the
+ // set of regions in transition
HRegion.offlineRegionInMETA(server, metaRegionName,
regionInfo);
- master.regionManager.regionOfflined(regionInfo.getRegionName());
+ master.regionManager.removeRegion(regionInfo);
}
return true;
}
@@ -84,7 +84,7 @@
} else if (reassignRegion) {
// we are reassigning the region eventually, so set it unassigned
- master.regionManager.setUnassigned(regionInfo);
+ master.regionManager.setUnassigned(regionInfo, false);
}
return result == null ? true : result;
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java?rev=729186&r1=729185&r2=729186&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java Tue Dec 23 17:06:54 2008
@@ -94,26 +94,28 @@
this.historian.online(this.master.getConfiguration());
}
this.historian.addRegionOpen(regionInfo, serverAddress);
- if (isMetaTable) {
- // It's a meta region.
- MetaRegion m = new MetaRegion(new HServerAddress(serverAddress),
- regionInfo.getRegionName(), regionInfo.getStartKey());
- if (!master.regionManager.isInitialMetaScanComplete()) {
- // Put it on the queue to be scanned for the first time.
- LOG.debug("Adding " + m.toString() + " to regions to scan");
- master.regionManager.addMetaRegionToScan(m);
- } else {
- // Add it to the online meta regions
- LOG.debug("Adding to onlineMetaRegions: " + m.toString());
- master.regionManager.putMetaRegionOnline(m);
- // Interrupting the Meta Scanner sleep so that it can
- // process regions right away
- master.regionManager.metaScannerThread.interrupt();
+ synchronized (master.regionManager) {
+ if (isMetaTable) {
+ // It's a meta region.
+ MetaRegion m = new MetaRegion(new HServerAddress(serverAddress),
+ regionInfo.getRegionName(), regionInfo.getStartKey());
+ if (!master.regionManager.isInitialMetaScanComplete()) {
+ // Put it on the queue to be scanned for the first time.
+ LOG.debug("Adding " + m.toString() + " to regions to scan");
+ master.regionManager.addMetaRegionToScan(m);
+ } else {
+ // Add it to the online meta regions
+ LOG.debug("Adding to onlineMetaRegions: " + m.toString());
+ master.regionManager.putMetaRegionOnline(m);
+ // Interrupting the Meta Scanner sleep so that it can
+ // process regions right away
+ master.regionManager.metaScannerThread.interrupt();
+ }
}
+ // If updated successfully, remove from pending list.
+ master.regionManager.removeRegion(regionInfo);
+ return true;
}
- // If updated successfully, remove from pending list.
- master.regionManager.noLongerPending(regionInfo.getRegionName());
- return true;
}
}.doWithRetries();
return result == null ? true : result;
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java?rev=729186&r1=729185&r2=729186&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java Tue Dec 23 17:06:54 2008
@@ -113,7 +113,7 @@
// region had been on shutdown server (could be null because we
// missed edits in hlog because hdfs does not do write-append).
String serverName = Writables.cellToString(values.get(COL_SERVER));
- if (serverName.length() > 0 &&
+ if (serverName != null && serverName.length() > 0 &&
deadServerName.compareTo(serverName) != 0) {
// This isn't the server you're looking for - move along
continue;
@@ -130,31 +130,33 @@
continue;
}
- if (info.isMetaTable()) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("removing meta region " +
- Bytes.toString(info.getRegionName()) +
+ synchronized (master.regionManager) {
+ if (info.isMetaTable()) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("removing meta region " +
+ Bytes.toString(info.getRegionName()) +
" from online meta regions");
+ }
+ master.regionManager.offlineMetaRegion(info.getStartKey());
}
- master.regionManager.offlineMetaRegion(info.getStartKey());
- }
- ToDoEntry todo = new ToDoEntry(row, info);
- toDoList.add(todo);
+ ToDoEntry todo = new ToDoEntry(row, info);
+ toDoList.add(todo);
- if (master.regionManager.isMarkedToClose(deadServerName, info.getRegionName())) {
- master.regionManager.noLongerMarkedToClose(deadServerName, info.getRegionName());
- master.regionManager.noLongerUnassigned(info);
- // Mark region offline
- todo.regionOffline = true;
- } else {
- if (!info.isOffline() && !info.isSplit()) {
- // Get region reassigned
- regions.add(info);
+ if (master.regionManager.isOfflined(info.getRegionName()) ||
+ info.isOffline()) {
+ master.regionManager.removeRegion(info);
+ // Mark region offline
+ if (!info.isOffline()) {
+ todo.regionOffline = true;
+ }
+ } else {
+ if (!info.isOffline() && !info.isSplit()) {
+ // Get region reassigned
+ regions.add(info);
+ }
}
}
- // If it was pending, remove.
- master.regionManager.noLongerPending(info.getRegionName());
}
} finally {
if(scannerId != -1L) {
@@ -184,7 +186,7 @@
// Get regions reassigned
for (HRegionInfo info: regions) {
- master.regionManager.setUnassigned(info);
+ master.regionManager.setUnassigned(info, true);
}
}
@@ -252,11 +254,11 @@
}
if (this.rootRegionServer && !this.rootRegionReassigned) {
+ // avoid multiple root region reassignment
+ this.rootRegionReassigned = true;
// The server that died was serving the root region. Now that the log
// has been split, get it reassigned.
master.regionManager.reassignRootRegion();
- // avoid multiple root region reassignment
- this.rootRegionReassigned = true;
// When we call rootAvailable below, it will put us on the delayed
// to do queue to allow some time to pass during which the root
// region will hopefully get reassigned.
@@ -307,7 +309,6 @@
Bytes.toString(r.getRegionName()) + " on " + r.getServer());
}
}
- master.regionManager.allRegionsClosed(deadServerName);
master.serverManager.removeDeadServer(deadServerName);
return true;
}
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java?rev=729186&r1=729185&r2=729186&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java Tue Dec 23 17:06:54 2008
@@ -34,8 +34,6 @@
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.Collections;
-import java.util.TreeSet;
-import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentSkipListMap;
import org.apache.commons.logging.Log;
@@ -80,45 +78,23 @@
private static final byte[] OVERLOADED = Bytes.toBytes("Overloaded");
- /**
- * The 'unassignedRegions' table maps from a HRegionInfo to a timestamp that
- * indicates the last time we *tried* to assign the region to a RegionServer.
- * If the timestamp is out of date, then we can try to reassign it.
+ /*
+ * Map of region name to RegionState for regions that are in transition such as
*
- * We fill 'unassignedRecords' by scanning ROOT and META tables, learning the
- * set of all known valid regions.
+ * unassigned -> assigned -> pending -> open
+ * closing -> closed -> offline
+ * closing -> closed -> unassigned -> assigned -> pending -> open
+ *
+ * At the end of a transition, removeRegion is used to remove the region from
+ * the map (since it is no longer in transition)
+ *
+ * Note: Needs to be SortedMap so we can specify a comparator
*
- * <p>Items are removed from this list when a region server reports in that
- * the region has been deployed.
- *
- * TODO: Need to be a sorted map?
- */
- private final SortedMap<HRegionInfo, Long> unassignedRegions =
- Collections.synchronizedSortedMap(new TreeMap<HRegionInfo, Long>());
-
- /**
- * Regions that have been assigned, and the server has reported that it has
- * started serving it, but that we have not yet recorded in the meta table.
- */
- private final Set<byte []> pendingRegions =
- Collections.synchronizedSet(new TreeSet<byte []>(Bytes.BYTES_COMPARATOR));
-
- /**
- * List of regions that are going to be closed.
*/
- private final Map<String, Map<byte [], HRegionInfo>> regionsToClose =
- new ConcurrentHashMap<String, Map<byte [], HRegionInfo>>();
-
- /** Regions that are in the process of being closed */
- private final Set<byte []> closingRegions =
- Collections.synchronizedSet(new TreeSet<byte []>(Bytes.BYTES_COMPARATOR));
+ private final SortedMap<byte[], RegionState> regionsInTransition =
+ Collections.synchronizedSortedMap(
+ new TreeMap<byte[], RegionState>(Bytes.BYTES_COMPARATOR));
- /**
- * Set of regions that, once closed, should be marked as offline so that they
- * are not reassigned.
- */
- private final Set<byte []> regionsToOffline =
- Collections.synchronizedSet(new TreeSet<byte []>(Bytes.BYTES_COMPARATOR));
// How many regions to assign a server at a time.
private final int maxAssignInOneGo;
@@ -127,12 +103,12 @@
private final float slop;
/** Set of regions to split. */
- private final Map<byte[],Pair<HRegionInfo,HServerAddress>> regionsToSplit =
+ private final SortedMap<byte[],Pair<HRegionInfo,HServerAddress>> regionsToSplit =
Collections.synchronizedSortedMap(
new TreeMap<byte[],Pair<HRegionInfo,HServerAddress>>
- (Bytes.BYTES_COMPARATOR));
+ (Bytes.BYTES_COMPARATOR));
/** Set of regions to compact. */
- private final Map<byte[],Pair<HRegionInfo,HServerAddress>> regionsToCompact =
+ private final SortedMap<byte[],Pair<HRegionInfo,HServerAddress>> regionsToCompact =
Collections.synchronizedSortedMap(
new TreeMap<byte[],Pair<HRegionInfo,HServerAddress>>
(Bytes.BYTES_COMPARATOR));
@@ -160,21 +136,31 @@
Threads.setDaemonThreadRunning(metaScannerThread,
"RegionManager.metaScanner");
}
-
+
void unsetRootRegion() {
- rootRegionLocation.set(null);
+ synchronized (regionsInTransition) {
+ rootRegionLocation.set(null);
+ regionsInTransition.remove(HRegionInfo.ROOT_REGIONINFO.getRegionName());
+ }
}
void reassignRootRegion() {
unsetRootRegion();
if (!master.shutdownRequested) {
- unassignedRegions.put(HRegionInfo.ROOT_REGIONINFO, ZERO_L);
+ synchronized (regionsInTransition) {
+ RegionState s = new RegionState(HRegionInfo.ROOT_REGIONINFO);
+ s.setUnassigned();
+ regionsInTransition.put(HRegionInfo.ROOT_REGIONINFO.getRegionName(), s);
+ }
}
}
/*
* Assigns regions to region servers attempting to balance the load across
* all region servers
+ *
+ * Note that no synchronization is necessary as the caller
+ * (ServerManager.processMsgs) already owns the monitor for the RegionManager.
*
* @param info
* @param serverName
@@ -183,53 +169,51 @@
void assignRegions(HServerInfo info, String serverName,
HRegionInfo[] mostLoadedRegions, ArrayList<HMsg> returnMsgs) {
HServerLoad thisServersLoad = info.getLoad();
- synchronized (unassignedRegions) {
- // We need to hold a lock on assign attempts while we figure out what to
- // do so that multiple threads do not execute this method in parallel
- // resulting in assigning the same region to multiple servers.
-
- // figure out what regions need to be assigned and aren't currently being
- // worked on elsewhere.
- Set<HRegionInfo> regionsToAssign = regionsAwaitingAssignment();
- if (regionsToAssign.size() == 0) {
- // There are no regions waiting to be assigned.
- if (!inSafeMode()) {
- // We only do load balancing once all regions are assigned.
- // This prevents churn while the cluster is starting up.
- double avgLoad = master.serverManager.getAverageLoad();
- double avgLoadWithSlop = avgLoad +
- ((this.slop != 0)? avgLoad * this.slop: avgLoad);
- if (avgLoad > 2.0 &&
- thisServersLoad.getNumberOfRegions() > avgLoadWithSlop) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Server " + serverName +
+ // figure out what regions need to be assigned and aren't currently being
+ // worked on elsewhere.
+ Set<RegionState> regionsToAssign = regionsAwaitingAssignment();
+ if (regionsToAssign.size() == 0) {
+ // There are no regions waiting to be assigned.
+ if (!inSafeMode()) {
+ // We only do load balancing once all regions are assigned.
+ // This prevents churn while the cluster is starting up.
+ double avgLoad = master.serverManager.getAverageLoad();
+ double avgLoadWithSlop = avgLoad +
+ ((this.slop != 0)? avgLoad * this.slop: avgLoad);
+ if (avgLoad > 2.0 &&
+ thisServersLoad.getNumberOfRegions() > avgLoadWithSlop) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Server " + serverName +
" is overloaded. Server load: " +
thisServersLoad.getNumberOfRegions() + " avg: " + avgLoad +
", slop: " + this.slop);
- }
- unassignSomeRegions(thisServersLoad, avgLoad, mostLoadedRegions,
- returnMsgs);
}
+ unassignSomeRegions(serverName, thisServersLoad,
+ avgLoad, mostLoadedRegions, returnMsgs);
}
+ }
+ } else {
+ // if there's only one server, just give it all the regions
+ if (master.serverManager.numServers() == 1) {
+ assignRegionsToOneServer(regionsToAssign, serverName, returnMsgs);
} else {
- // if there's only one server, just give it all the regions
- if (master.serverManager.numServers() == 1) {
- assignRegionsToOneServer(regionsToAssign, serverName, returnMsgs);
- } else {
- // otherwise, give this server a few regions taking into account the
- // load of all the other servers.
- assignRegionsToMultipleServers(thisServersLoad, regionsToAssign,
- serverName, returnMsgs);
- }
+ // otherwise, give this server a few regions taking into account the
+ // load of all the other servers.
+ assignRegionsToMultipleServers(thisServersLoad, regionsToAssign,
+ serverName, returnMsgs);
}
}
}
- /**
+ /*
* Make region assignments taking into account multiple servers' loads.
+ *
+ * Note that no synchronization is needed while we iterate over
+ * regionsInTransition because this method is only called by assignRegions
+ * whose caller owns the monitor for RegionManager
*/
private void assignRegionsToMultipleServers(final HServerLoad thisServersLoad,
- final Set<HRegionInfo> regionsToAssign, final String serverName,
+ final Set<RegionState> regionsToAssign, final String serverName,
final ArrayList<HMsg> returnMsgs) {
int nRegionsToAssign = regionsToAssign.size();
@@ -274,14 +258,12 @@
nregions = this.maxAssignInOneGo;
}
- long now = System.currentTimeMillis();
- for (HRegionInfo regionInfo: regionsToAssign) {
- LOG.info("assigning region " +
- Bytes.toString(regionInfo.getRegionName())+
+ for (RegionState s: regionsToAssign) {
+ LOG.info("assigning region " + Bytes.toString(s.getRegionName())+
" to server " + serverName);
- unassignedRegions.put(regionInfo, Long.valueOf(now));
- this.historian.addRegionAssignment(regionInfo, serverName);
- returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_OPEN, regionInfo));
+ s.setAssigned(serverName);
+ this.historian.addRegionAssignment(s.getRegionInfo(), serverName);
+ returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_OPEN, s.getRegionInfo()));
if (--nregions <= 0) {
break;
}
@@ -326,38 +308,38 @@
return nRegions;
}
- /**
+ /*
* Get the set of regions that should be assignable in this pass.
+ *
+ * Note that no synchronization on regionsInTransition is needed because the
+ * only caller (assignRegions) whose caller owns the monitor for RegionManager
*/
- private Set<HRegionInfo> regionsAwaitingAssignment() {
- long now = System.currentTimeMillis();
-
+ private Set<RegionState> regionsAwaitingAssignment() {
// set of regions we want to assign to this server
- Set<HRegionInfo> regionsToAssign = new HashSet<HRegionInfo>();
+ Set<RegionState> regionsToAssign = new HashSet<RegionState>();
// Look over the set of regions that aren't currently assigned to
// determine which we should assign to this server.
- synchronized (unassignedRegions) { //must synchronize when iterating
- for (Map.Entry<HRegionInfo, Long> e: unassignedRegions.entrySet()) {
- HRegionInfo i = e.getKey();
- if (numberOfMetaRegions.get() != onlineMetaRegions.size() &&
- !i.isMetaRegion()) {
- // Can't assign user regions until all meta regions have been assigned
- // and are on-line
- continue;
- }
- // If the last attempt to open this region was pretty recent, then we
- // don't want to try and assign it.
- long diff = now - e.getValue().longValue();
- if (diff > master.maxRegionOpenTime) {
- regionsToAssign.add(e.getKey());
- }
+ for (RegionState s: regionsInTransition.values()) {
+ HRegionInfo i = s.getRegionInfo();
+ if (i == null) {
+ continue;
+ }
+ if (numberOfMetaRegions.get() != onlineMetaRegions.size() &&
+ !i.isMetaRegion()) {
+ // Can't assign user regions until all meta regions have been assigned
+ // and are on-line
+ continue;
+ }
+ if (!s.isAssigned() && !s.isClosing() && !s.isPending()) {
+ s.setUnassigned();
+ regionsToAssign.add(s);
}
}
return regionsToAssign;
}
- /**
+ /*
* Figure out the load that is next highest amongst all regionservers. Also,
* return how many servers exist at that load.
*/
@@ -392,31 +374,37 @@
/*
* Assign all to the only server. An unlikely case but still possible.
+ *
+ * Note that no synchronization is needed on regionsInTransition while
+ * iterating on it because the only caller is assignRegions whose caller owns
+ * the monitor for RegionManager
+ *
* @param regionsToAssign
* @param serverName
* @param returnMsgs
*/
- private void assignRegionsToOneServer(final Set<HRegionInfo> regionsToAssign,
+ private void assignRegionsToOneServer(final Set<RegionState> regionsToAssign,
final String serverName, final ArrayList<HMsg> returnMsgs) {
- long now = System.currentTimeMillis();
- for (HRegionInfo regionInfo: regionsToAssign) {
- LOG.info("assigning region " +
- Bytes.toString(regionInfo.getRegionName()) +
+ for (RegionState s: regionsToAssign) {
+ LOG.info("assigning region " + Bytes.toString(s.getRegionName()) +
" to the only server " + serverName);
- unassignedRegions.put(regionInfo, Long.valueOf(now));
- this.historian.addRegionAssignment(regionInfo, serverName);
- returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_OPEN, regionInfo));
+ s.setAssigned(serverName);
+ this.historian.addRegionAssignment(s.getRegionInfo(), serverName);
+ returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_OPEN, s.getRegionInfo()));
}
}
- /**
+ /*
* The server checking in right now is overloaded. We will tell it to close
* some or all of its most loaded regions, allowing it to reduce its load.
* The closed regions will then get picked up by other underloaded machines.
+ *
+ * Note that no synchronization is needed because the only caller
+ * (assignRegions) whose caller owns the monitor for RegionManager
*/
- private synchronized void unassignSomeRegions(final HServerLoad load,
- final double avgLoad, final HRegionInfo[] mostLoadedRegions,
- ArrayList<HMsg> returnMsgs) {
+ private void unassignSomeRegions(final String serverName,
+ final HServerLoad load, final double avgLoad,
+ final HRegionInfo[] mostLoadedRegions, ArrayList<HMsg> returnMsgs) {
int numRegionsToClose = load.getNumberOfRegions() - (int)Math.ceil(avgLoad);
LOG.debug("Choosing to reassign " + numRegionsToClose
@@ -425,7 +413,7 @@
int regionIdx = 0;
int regionsClosed = 0;
- int skippedClosing = 0;
+ int skipped = 0;
while (regionsClosed < numRegionsToClose &&
regionIdx < mostLoadedRegions.length) {
HRegionInfo currentRegion = mostLoadedRegions[regionIdx];
@@ -435,8 +423,10 @@
continue;
}
- if (isClosing(currentRegion.getRegionName())) {
- skippedClosing++;
+ byte[] regionName = currentRegion.getRegionName();
+ if (isClosing(regionName) || isUnassigned(currentRegion) ||
+ isAssigned(regionName) || isPending(regionName)) {
+ skipped++;
continue;
}
@@ -446,11 +436,11 @@
returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_CLOSE, currentRegion,
OVERLOADED));
// mark the region as closing
- setClosing(currentRegion.getRegionName());
+ setClosing(serverName, currentRegion, false);
// increment the count of regions we've marked
regionsClosed++;
}
- LOG.info("Skipped " + skippedClosing + " region(s) as already closing");
+ LOG.info("Skipped " + skipped + " region(s) that are in transition states");
}
/**
@@ -503,12 +493,10 @@
* @return true if we found meta regions, false if we're closing.
*/
public boolean areAllMetaRegionsOnline() {
- boolean result = false;
- if (rootRegionLocation.get() != null &&
- numberOfMetaRegions.get() == onlineMetaRegions.size()) {
- result = true;
+ synchronized (onlineMetaRegions) {
+ return (rootRegionLocation.get() != null &&
+ numberOfMetaRegions.get() == onlineMetaRegions.size());
}
- return result;
}
/**
@@ -528,7 +516,7 @@
return onlineMetaRegions.get(newRegion.getRegionName());
}
return onlineMetaRegions.get(onlineMetaRegions.headMap(
- newRegion.getTableDesc().getName()).lastKey());
+ newRegion.getTableDesc().getName()).lastKey());
}
}
}
@@ -569,6 +557,15 @@
}
/**
+ * Remove a region from the region state map.
+ *
+ * @param info
+ */
+ public void removeRegion(HRegionInfo info) {
+ regionsInTransition.remove(info.getRegionName());
+ }
+
+ /**
* Get metaregion that would host passed in row.
* @param row Row need to know all the meta regions for
* @return set of MetaRegion objects that contain the table
@@ -611,7 +608,7 @@
region.getLog().closeAndDelete();
// 5. Get it assigned to a server
- unassignedRegions.put(info, ZERO_L);
+ setUnassigned(info, true);
}
/**
@@ -667,7 +664,13 @@
* it happens to be between states.
*/
public boolean isUnassigned(HRegionInfo info) {
- return unassignedRegions.containsKey(info);
+ synchronized (regionsInTransition) {
+ RegionState s = regionsInTransition.get(info.getRegionName());
+ if (s != null) {
+ return s.isUnassigned();
+ }
+ }
+ return false;
}
/**
@@ -676,89 +679,71 @@
* @return true if pending, false otherwise
*/
public boolean isPending(byte [] regionName) {
- return pendingRegions.contains(regionName);
- }
-
- /**
- * Set a region to unassigned
- * @param info Region to set unassigned
- */
- public void setUnassigned(HRegionInfo info) {
- synchronized(this.unassignedRegions) {
- if (!this.unassignedRegions.containsKey(info) &&
- !this.pendingRegions.contains(info.getRegionName())) {
- this.unassignedRegions.put(info, ZERO_L);
+ synchronized (regionsInTransition) {
+ RegionState s = regionsInTransition.get(regionName);
+ if (s != null) {
+ return s.isPending();
}
}
+ return false;
}
/**
- * Set a region to pending assignment
* @param regionName
+ * @return true if region has been assigned
*/
- public void setPending(byte [] regionName) {
- pendingRegions.add(regionName);
- }
-
- /**
- * Unset region's pending status
- * @param regionName
- */
- public void noLongerPending(byte [] regionName) {
- pendingRegions.remove(regionName);
+ public boolean isAssigned(byte[] regionName) {
+ synchronized (regionsInTransition) {
+ RegionState s = regionsInTransition.get(regionName);
+ if (s != null) {
+ return s.isAssigned() || s.isPending();
+ }
+ }
+ return false;
}
-
+
/**
- * Extend the update assignment deadline for a region.
- * @param info Region whose deadline you want to extend
+ * @param regionName
+ * @return true if region is marked to be offlined.
*/
- public void updateAssignmentDeadline(HRegionInfo info) {
- synchronized (unassignedRegions) {
- // Region server is reporting in that its working on region open
- // (We can get more than one of these messages if region is replaying
- // a bunch of edits and taking a while to open).
- // Extend region open time by max region open time.
- this.unassignedRegions.put(info,
- Long.valueOf(System.currentTimeMillis() + this.master.maxRegionOpenTime));
+ public boolean isOfflined(byte[] regionName) {
+ synchronized (regionsInTransition) {
+ RegionState s = regionsInTransition.get(regionName);
+ if (s != null) {
+ return s.isOfflined();
+ }
}
+ return false;
}
-
+
/**
- * Unset a region's unassigned status
- * @param info Region you want to take off the unassigned list
- */
- public void noLongerUnassigned(HRegionInfo info) {
- unassignedRegions.remove(info);
- }
-
- /**
- * Mark a region to be closed. Server manager will inform hosting region server
- * to close the region at its next opportunity.
- * @param serverName address info of server
- * @param info region to close
+ * Set a region to unassigned
+ * @param info Region to set unassigned
+ * @param force if true mark region unassigned whatever its current state
*/
- public void markToClose(final String serverName, final HRegionInfo info) {
- Map<byte [], HRegionInfo> toclose =
- new TreeMap<byte [], HRegionInfo>(Bytes.BYTES_COMPARATOR);
- toclose.put(info.getRegionName(), info);
- markToCloseBulk(serverName, toclose);
+ public void setUnassigned(HRegionInfo info, boolean force) {
+ synchronized(this.regionsInTransition) {
+ RegionState s = regionsInTransition.get(info.getRegionName());
+ if (s == null) {
+ s = new RegionState(info);
+ regionsInTransition.put(info.getRegionName(), s);
+ }
+ if (force || (!s.isAssigned() && !s.isPending())) {
+ s.setUnassigned();
+ }
+ }
}
/**
- * Mark a bunch of regions as to close at once for a server
- * @param serverName address info of server
- * @param map map of region names to region infos of regions to close
+ * Set a region to pending assignment
+ * @param regionName
*/
- public void markToCloseBulk(final String serverName,
- final Map<byte [], HRegionInfo> map) {
- synchronized (regionsToClose) {
- Map<byte [], HRegionInfo> regions = regionsToClose.get(serverName);
- if (regions != null) {
- regions.putAll(map);
- } else {
- regions = map;
+ public void setPending(byte [] regionName) {
+ synchronized (regionsInTransition) {
+ RegionState s = regionsInTransition.get(regionName);
+ if (s != null) {
+ s.setPending();
}
- regionsToClose.put(serverName, regions);
}
}
@@ -767,74 +752,71 @@
* given server
*
* @param serverName
- * @return map of region names to region infos to close
+ * @return set of infos to close
*/
- public Map<byte [], HRegionInfo> removeMarkedToClose(String serverName) {
- return regionsToClose.remove(serverName);
- }
-
- /**
- * Check if a region is marked as to close
- * @param serverName address info of server
- * @param regionName name of the region we might want to close
- * @return true if the region is marked to close, false otherwise
- */
- public boolean isMarkedToClose(String serverName, byte [] regionName) {
- synchronized (regionsToClose) {
- Map<byte [], HRegionInfo> serverToClose = regionsToClose.get(serverName);
- return (serverToClose != null && serverToClose.containsKey(regionName));
- }
- }
-
- /**
- * Mark a region as no longer waiting to be closed. Either it was closed or
- * we don't want to close it anymore for some reason.
- * @param serverName address info of server
- * @param regionName name of the region
- */
- public void noLongerMarkedToClose(String serverName, byte [] regionName) {
- synchronized (regionsToClose) {
- Map<byte [], HRegionInfo> serverToClose = regionsToClose.get(serverName);
- if (serverToClose != null) {
- serverToClose.remove(regionName);
+ public Set<HRegionInfo> getMarkedToClose(String serverName) {
+ Set<HRegionInfo> result = new HashSet<HRegionInfo>();
+ synchronized (regionsInTransition) {
+ for (RegionState s: regionsInTransition.values()) {
+ if (s.isClosing() && !s.isClosed() &&
+ s.getServerName().compareTo(serverName) == 0) {
+ result.add(s.getRegionInfo());
+ }
}
}
+ return result;
}
- /**
- * Called when all regions for a particular server have been closed
- *
- * @param serverName
- */
- public void allRegionsClosed(String serverName) {
- regionsToClose.remove(serverName);
- }
-
/**
* Check if a region is closing
* @param regionName
* @return true if the region is marked as closing, false otherwise
*/
public boolean isClosing(byte [] regionName) {
- return closingRegions.contains(regionName);
+ synchronized (regionsInTransition) {
+ RegionState s = regionsInTransition.get(regionName);
+ if (s != null) {
+ return s.isClosing();
+ }
+ }
+ return false;
}
/**
- * Set a region as no longer closing (closed?)
- * @param regionName
+ * Mark a region as closing
+ * @param serverName
+ * @param regionInfo
+ * @param setOffline
*/
- public void noLongerClosing(byte [] regionName) {
- closingRegions.remove(regionName);
+ public void setClosing(String serverName, HRegionInfo regionInfo,
+ boolean setOffline) {
+ synchronized (regionsInTransition) {
+ RegionState s = regionsInTransition.get(regionInfo.getRegionName());
+ if (s != null) {
+ if (!s.isClosing()) {
+ throw new IllegalStateException(
+ "Cannot transition to closing from any other state. Region: " +
+ Bytes.toString(regionInfo.getRegionName()));
+ }
+ return;
+ }
+ s = new RegionState(regionInfo);
+ regionsInTransition.put(regionInfo.getRegionName(), s);
+ s.setClosing(serverName, setOffline);
+ }
}
- /**
- * Mark a region as closing
+ /**
* @param regionName
*/
- public void setClosing(byte [] regionName) {
- closingRegions.add(regionName);
+ public void setClosed(byte[] regionName) {
+ synchronized (regionsInTransition) {
+ RegionState s = regionsInTransition.get(regionName);
+ if (s != null) {
+ s.setClosed();
+ }
+ }
}
-
/**
* Add a meta region to the scan queue
* @param m MetaRegion that needs to get scanned
@@ -844,31 +826,6 @@
}
/**
- * Note that a region should be offlined as soon as its closed.
- * @param regionName
- */
- public void markRegionForOffline(byte [] regionName) {
- regionsToOffline.add(regionName);
- }
-
- /**
- * Check if a region is marked for offline
- * @param regionName
- * @return true if marked for offline, false otherwise
- */
- public boolean isMarkedForOffline(byte [] regionName) {
- return regionsToOffline.contains(regionName);
- }
-
- /**
- * Region was offlined as planned, remove it from the list to offline
- * @param regionName
- */
- public void regionOfflined(byte [] regionName) {
- regionsToOffline.remove(regionName);
- }
-
- /**
* Check if the initial root scan has been completed.
* @return true if scan completed, false otherwise
*/
@@ -890,8 +847,7 @@
*/
public boolean inSafeMode() {
if (safeMode) {
- if(isInitialMetaScanComplete() && unassignedRegions.size() == 0 &&
- pendingRegions.size() == 0) {
+ if(isInitialMetaScanComplete() && regionsInTransition.size() == 0) {
safeMode = false;
LOG.info("exiting safe mode");
} else {
@@ -1017,30 +973,174 @@
*/
public void applyActions(HServerInfo serverInfo, ArrayList<HMsg> returnMsgs) {
HServerAddress addr = serverInfo.getServerAddress();
- Iterator<Pair<HRegionInfo,HServerAddress>> i =
+ Iterator<Pair<HRegionInfo, HServerAddress>> i =
regionsToCompact.values().iterator();
- while (i.hasNext()) {
- Pair<HRegionInfo,HServerAddress> pair = i.next();
- if (addr.equals(pair.getSecond())) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("sending MSG_REGION_COMPACT " + pair.getFirst() + " to " +
- addr);
+ synchronized (regionsToCompact) {
+ while (i.hasNext()) {
+ Pair<HRegionInfo,HServerAddress> pair = i.next();
+ if (addr.equals(pair.getSecond())) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("sending MSG_REGION_COMPACT " + pair.getFirst() + " to " +
+ addr);
+ }
+ returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_COMPACT, pair.getFirst()));
+ i.remove();
}
- returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_COMPACT, pair.getFirst()));
- i.remove();
}
}
i = regionsToSplit.values().iterator();
- while (i.hasNext()) {
- Pair<HRegionInfo,HServerAddress> pair = i.next();
- if (addr.equals(pair.getSecond())) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("sending MSG_REGION_SPLIT " + pair.getFirst() + " to " +
- addr);
+ synchronized (regionsToSplit) {
+ while (i.hasNext()) {
+ Pair<HRegionInfo,HServerAddress> pair = i.next();
+ if (addr.equals(pair.getSecond())) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("sending MSG_REGION_SPLIT " + pair.getFirst() + " to " +
+ addr);
+ }
+ returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_SPLIT, pair.getFirst()));
+ i.remove();
}
- returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_SPLIT, pair.getFirst()));
- i.remove();
}
}
}
+
+ private static class RegionState implements Comparable<RegionState> {
+ private final byte[] regionName;
+ private HRegionInfo regionInfo = null;
+ private boolean unassigned = false;
+ private boolean assigned = false;
+ private boolean pending = false;
+ private boolean closing = false;
+ private boolean closed = false;
+ private boolean offlined = false;
+ private String serverName = null;
+
+ RegionState(byte[] regionName) {
+ this.regionName = regionName;
+ }
+
+ RegionState(HRegionInfo info) {
+ this.regionName = info.getRegionName();
+ this.regionInfo = info;
+ }
+
+ byte[] getRegionName() {
+ return regionName;
+ }
+
+ synchronized HRegionInfo getRegionInfo() {
+ return this.regionInfo;
+ }
+
+ synchronized String getServerName() {
+ return this.serverName;
+ }
+
+ synchronized boolean isUnassigned() {
+ return unassigned;
+ }
+
+ /*
+ * Note: callers of this method (reassignRootRegion,
+ * regionsAwaitingAssignment, setUnassigned) ensure that this method is not
+ * called unless it is safe to do so.
+ */
+ synchronized void setUnassigned() {
+ this.unassigned = true;
+ this.assigned = false;
+ this.pending = false;
+ this.closing = false;
+ this.serverName = null;
+ }
+
+ synchronized boolean isAssigned() {
+ return assigned;
+ }
+
+ synchronized void setAssigned(String serverName) {
+ if (!this.unassigned) {
+ throw new IllegalStateException(
+ "Cannot assign a region that is not currently unassigned. Region: " +
+ Bytes.toString(regionName));
+ }
+ this.unassigned = false;
+ this.assigned = true;
+ this.pending = false;
+ this.closing = false;
+ this.serverName = serverName;
+ }
+
+ synchronized boolean isPending() {
+ return pending;
+ }
+
+ synchronized void setPending() {
+ if (!assigned) {
+ throw new IllegalStateException(
+ "Cannot set a region as pending if it has not been assigned. Region: " +
+ Bytes.toString(regionName));
+ }
+ this.unassigned = false;
+ this.assigned = false;
+ this.pending = true;
+ this.closing = false;
+ }
+
+ synchronized boolean isClosing() {
+ return closing;
+ }
+
+ synchronized void setClosing(String serverName, boolean setOffline) {
+ this.unassigned = false;
+ this.assigned = false;
+ this.pending = false;
+ this.closing = true;
+ this.offlined = setOffline;
+ this.serverName = serverName;
+ }
+
+ synchronized boolean isClosed() {
+ return this.closed;
+ }
+
+ synchronized void setClosed() {
+ if (!closing) {
+ throw new IllegalStateException(
+ "Cannot set a region to be closed if it was not already marked as" +
+ " closing. Region: " + Bytes.toString(regionName));
+ }
+ this.closed = true;
+ }
+
+ synchronized boolean isOfflined() {
+ return this.offlined;
+ }
+
+ @Override
+ public synchronized String toString() {
+ return "region name: " + Bytes.toString(this.regionName) +
+ ", isUnassigned: " + this.unassigned + ", isAssigned: " +
+ this.assigned + ", isPending: " + this.pending + ", isClosing: " +
+ this.closing + ", isClosed: " + this.closed + ", isOfflined: " +
+ this.offlined;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ return this.compareTo((RegionState) o) == 0;
+ }
+
+ @Override
+ public int hashCode() {
+ return Bytes.toString(regionName).hashCode();
+ }
+
+ @Override
+ public int compareTo(RegionState o) {
+ if (o == null) {
+ return 1;
+ }
+ return Bytes.compareTo(this.regionName, o.getRegionName());
+ }
+ }
}
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java?rev=729186&r1=729185&r2=729186&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java Tue Dec 23 17:06:54 2008
@@ -260,14 +260,17 @@
for (int i = 1; i < msgs.length; i++) {
LOG.info("Processing " + msgs[i] + " from " + serverName);
HRegionInfo info = msgs[i].getRegionInfo();
- if (info.isRootRegion()) {
- master.regionManager.reassignRootRegion();
- } else if (info.isMetaTable()) {
- master.regionManager.offlineMetaRegion(info.getStartKey());
- }
- if (!master.regionManager.isMarkedToClose(
- serverName, info.getRegionName())) {
- master.regionManager.setUnassigned(info);
+ synchronized (master.regionManager) {
+ if (info.isRootRegion()) {
+ master.regionManager.reassignRootRegion();
+ } else if (info.isMetaTable()) {
+ master.regionManager.offlineMetaRegion(info.getStartKey());
+ }
+ if (!master.regionManager.isOfflined(info.getRegionName())) {
+ master.regionManager.setUnassigned(info, true);
+ } else {
+ master.regionManager.removeRegion(info);
+ }
}
}
}
@@ -334,8 +337,6 @@
HRegionInfo[] mostLoadedRegions, HMsg incomingMsgs[])
throws IOException {
ArrayList<HMsg> returnMsgs = new ArrayList<HMsg>();
- Map<byte [], HRegionInfo> regionsToKill =
- master.regionManager.removeMarkedToClose(serverName);
if (serverInfo.getServerAddress() == null) {
throw new NullPointerException("Server address cannot be null; " +
"hbase-958 debugging");
@@ -346,7 +347,6 @@
LOG.info("Received " + incomingMsgs[i] + " from " + serverName);
switch (incomingMsgs[i].getType()) {
case MSG_REPORT_PROCESS_OPEN:
- master.regionManager.updateAssignmentDeadline(region);
break;
case MSG_REPORT_OPEN:
@@ -354,7 +354,7 @@
break;
case MSG_REPORT_CLOSE:
- processRegionClose(serverInfo, region);
+ processRegionClose(region);
break;
case MSG_REPORT_SPLIT:
@@ -369,22 +369,21 @@
}
}
- // Tell the region server to close regions that we have marked for closing.
- if (regionsToKill != null) {
- for (HRegionInfo i: regionsToKill.values()) {
+ synchronized (master.regionManager) {
+ // Tell the region server to close regions that we have marked for closing.
+ for (HRegionInfo i: master.regionManager.getMarkedToClose(serverName)) {
returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_CLOSE, i));
// Transition the region from toClose to closing state
- master.regionManager.setClosing(i.getRegionName());
+ master.regionManager.setClosed(i.getRegionName());
}
- }
-
- // Figure out what the RegionServer ought to do, and write back.
- master.regionManager.assignRegions(serverInfo, serverName,
- mostLoadedRegions, returnMsgs);
- // Send any pending table actions.
- master.regionManager.applyActions(serverInfo, returnMsgs);
+ // Figure out what the RegionServer ought to do, and write back.
+ master.regionManager.assignRegions(serverInfo, serverName,
+ mostLoadedRegions, returnMsgs);
+ // Send any pending table actions.
+ master.regionManager.applyActions(serverInfo, returnMsgs);
+ }
return returnMsgs.toArray(new HMsg[returnMsgs.size()]);
}
@@ -401,21 +400,23 @@
private void processSplitRegion(String serverName, HServerInfo serverInfo,
HRegionInfo region, HMsg splitA, HMsg splitB, ArrayList<HMsg> returnMsgs) {
- // Cancel any actions pending for the affected region.
- // This prevents the master from sending a SPLIT message if the table
- // has already split by the region server.
- master.regionManager.endActions(region.getRegionName());
-
- HRegionInfo newRegionA = splitA.getRegionInfo();
- master.regionManager.setUnassigned(newRegionA);
-
- HRegionInfo newRegionB = splitB.getRegionInfo();
- master.regionManager.setUnassigned(newRegionB);
-
- if (region.isMetaTable()) {
- // A meta region has split.
- master.regionManager.offlineMetaRegion(region.getStartKey());
- master.regionManager.incrementNumMetaRegions();
+ synchronized (master.regionManager) {
+ // Cancel any actions pending for the affected region.
+ // This prevents the master from sending a SPLIT message if the table
+ // has already split by the region server.
+ master.regionManager.endActions(region.getRegionName());
+
+ HRegionInfo newRegionA = splitA.getRegionInfo();
+ master.regionManager.setUnassigned(newRegionA, false);
+
+ HRegionInfo newRegionB = splitB.getRegionInfo();
+ master.regionManager.setUnassigned(newRegionB, false);
+
+ if (region.isMetaTable()) {
+ // A meta region has split.
+ master.regionManager.offlineMetaRegion(region.getStartKey());
+ master.regionManager.incrementNumMetaRegions();
+ }
}
}
@@ -424,119 +425,105 @@
HRegionInfo region, ArrayList<HMsg> returnMsgs)
throws IOException {
boolean duplicateAssignment = false;
- if (!master.regionManager.isUnassigned(region)) {
- if (region.isRootRegion()) {
- // Root region
- HServerAddress rootServer = master.getRootRegionLocation();
- if (rootServer != null) {
- if (rootServer.toString().compareTo(serverName) == 0) {
- // A duplicate open report from the correct server
+ synchronized (master.regionManager) {
+ if (!master.regionManager.isUnassigned(region) &&
+ !master.regionManager.isAssigned(region.getRegionName())) {
+ if (region.isRootRegion()) {
+ // Root region
+ HServerAddress rootServer = master.getRootRegionLocation();
+ if (rootServer != null) {
+ if (rootServer.toString().compareTo(serverName) == 0) {
+ // A duplicate open report from the correct server
+ return;
+ }
+ // We received an open report on the root region, but it is
+ // assigned to a different server
+ duplicateAssignment = true;
+ }
+ } else {
+ // Not root region. If it is not a pending region, then we are
+ // going to treat it as a duplicate assignment, although we can't
+ // tell for certain that's the case.
+ if (master.regionManager.isPending(region.getRegionName())) {
+ // A duplicate report from the correct server
return;
}
- // We received an open report on the root region, but it is
- // assigned to a different server
duplicateAssignment = true;
}
- } else {
- // Not root region. If it is not a pending region, then we are
- // going to treat it as a duplicate assignment, although we can't
- // tell for certain that's the case.
- if (master.regionManager.isPending(region.getRegionName())) {
- // A duplicate report from the correct server
- return;
- }
- duplicateAssignment = true;
}
- }
- if (duplicateAssignment) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("region server " + serverInfo.getServerAddress().toString()
- + " should not have opened region " + region.getRegionName());
- }
-
- // This Region should not have been opened.
- // Ask the server to shut it down, but don't report it as closed.
- // Otherwise the HMaster will think the Region was closed on purpose,
- // and then try to reopen it elsewhere; that's not what we want.
- returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_CLOSE_WITHOUT_REPORT,
- region, "Duplicate assignment".getBytes()));
- } else {
- // it was assigned, and it's not a duplicate assignment, so take it out
- // of the unassigned list.
- master.regionManager.noLongerUnassigned(region);
- if (region.isRootRegion()) {
- // Store the Root Region location (in memory)
- HServerAddress rootServer = serverInfo.getServerAddress();
- master.connection.setRootRegionLocation(
- new HRegionLocation(region, rootServer));
- master.regionManager.setRootRegionLocation(rootServer);
- } else {
- // Note that the table has been assigned and is waiting for the
- // meta table to be updated.
- master.regionManager.setPending(region.getRegionName());
- // Queue up an update to note the region location.
- try {
- master.toDoQueue.put(
- new ProcessRegionOpen(master, serverInfo, region));
- } catch (InterruptedException e) {
- throw new RuntimeException(
- "Putting into toDoQueue was interrupted.", e);
+ if (duplicateAssignment) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("region server " + serverInfo.getServerAddress().toString()
+ + " should not have opened region " + region.getRegionName());
}
- }
+
+ // This Region should not have been opened.
+ // Ask the server to shut it down, but don't report it as closed.
+ // Otherwise the HMaster will think the Region was closed on purpose,
+ // and then try to reopen it elsewhere; that's not what we want.
+ returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_CLOSE_WITHOUT_REPORT,
+ region, "Duplicate assignment".getBytes()));
+ } else {
+ if (region.isRootRegion()) {
+ // it was assigned, and it's not a duplicate assignment, so take it out
+ // of the unassigned list.
+ master.regionManager.removeRegion(region);
+
+ // Store the Root Region location (in memory)
+ HServerAddress rootServer = serverInfo.getServerAddress();
+ master.connection.setRootRegionLocation(
+ new HRegionLocation(region, rootServer));
+ master.regionManager.setRootRegionLocation(rootServer);
+ } else {
+ // Note that the table has been assigned and is waiting for the
+ // meta table to be updated.
+ master.regionManager.setPending(region.getRegionName());
+ // Queue up an update to note the region location.
+ try {
+ master.toDoQueue.put(
+ new ProcessRegionOpen(master, serverInfo, region));
+ } catch (InterruptedException e) {
+ throw new RuntimeException(
+ "Putting into toDoQueue was interrupted.", e);
+ }
+ }
+ }
}
}
- private void processRegionClose(
- @SuppressWarnings("unused") HServerInfo serverInfo, HRegionInfo region) {
- if (region.isRootRegion()) {
- // Root region
- if (region.isOffline()) {
- // Can't proceed without root region. Shutdown.
- LOG.fatal("root region is marked offline");
- master.shutdown();
- }
- master.connection.setRootRegionLocation(null);
- master.regionManager.reassignRootRegion();
-
- } else {
- boolean reassignRegion = !region.isOffline();
- boolean offlineRegion = false;
-
- // either this region is being closed because it was marked to close, or
- // the region server is going down peacefully. in either case, we should
- // at least try to remove it from the closing list.
- master.regionManager.noLongerClosing(region.getRegionName());
-
- // if the region is marked to be offlined, we don't want to reassign it.
- if (master.regionManager.isMarkedForOffline(region.getRegionName())) {
- reassignRegion = false;
- offlineRegion = true;
- }
+ private void processRegionClose(HRegionInfo region) {
+ synchronized (master.regionManager) {
+ if (region.isRootRegion()) {
+ // Root region
+ master.connection.unsetRootRegionLocation();
+ master.regionManager.unsetRootRegion();
+ if (region.isOffline()) {
+ // Can't proceed without root region. Shutdown.
+ LOG.fatal("root region is marked offline");
+ master.shutdown();
+ return;
+ }
- if (region.isMetaTable()) {
+ } else if (region.isMetaTable()) {
// Region is part of the meta table. Remove it from onlineMetaRegions
master.regionManager.offlineMetaRegion(region.getStartKey());
}
- // if the region is already on the unassigned list, let's remove it. this
- // is safe because if it's going to be reassigned, it'll get added again
- // shortly. if it's not going to get reassigned, then we need to make
- // sure it's not on the unassigned list, because that would contend with
- // the ProcessRegionClose going on asynchronously.
- master.regionManager.noLongerUnassigned(region);
-
- // NOTE: we cannot put the region into unassignedRegions as that
- // changes the ordering of the messages we've received. In
- // this case, a close could be processed before an open
- // resulting in the master not agreeing on the region's
- // state.
+ boolean offlineRegion =
+ master.regionManager.isOfflined(region.getRegionName());
+ boolean reassignRegion = !region.isOffline() && !offlineRegion;
+
+ // NOTE: If the region was just being closed and not offlined, we cannot
+ // mark the region unassignedRegions as that changes the ordering of
+ // the messages we've received. In this case, a close could be
+ // processed before an open resulting in the master not agreeing on
+ // the region's state.
try {
- master.toDoQueue.put(new ProcessRegionClose(master, region,
- offlineRegion, reassignRegion));
+ master.toDoQueue.put(new ProcessRegionClose(master, region, offlineRegion,
+ reassignRegion));
} catch (InterruptedException e) {
- throw new RuntimeException(
- "Putting into toDoQueue was interrupted.", e);
+ throw new RuntimeException("Putting into toDoQueue was interrupted.", e);
}
}
}
@@ -545,9 +532,9 @@
private boolean cancelLease(final String serverName) {
boolean leaseCancelled = false;
HServerInfo info = serversToServerInfo.remove(serverName);
+ // Only cancel lease and update load information once.
+ // This method can be called a couple of times during shutdown.
if (info != null) {
- // Only cancel lease and update load information once.
- // This method can be called a couple of times during shutdown.
if (master.getRootRegionLocation() != null &&
info.getServerAddress().equals(master.getRootRegionLocation())) {
master.regionManager.reassignRootRegion();