You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2010/05/17 18:48:57 UTC
svn commit: r945237 - in /hadoop/hbase/branches/0.20: ./
src/java/org/apache/hadoop/hbase/master/
Author: stack
Date: Mon May 17 16:48:57 2010
New Revision: 945237
URL: http://svn.apache.org/viewvc?rev=945237&view=rev
Log:
HBASE-2482 regions in transition do not get reassigned by master when RS crashes
Modified:
hadoop/hbase/branches/0.20/CHANGES.txt
hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/master/HMaster.java
hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java
hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/master/RegionManager.java
hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/master/ServerManager.java
Modified: hadoop/hbase/branches/0.20/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/CHANGES.txt?rev=945237&r1=945236&r2=945237&view=diff
==============================================================================
--- hadoop/hbase/branches/0.20/CHANGES.txt (original)
+++ hadoop/hbase/branches/0.20/CHANGES.txt Mon May 17 16:48:57 2010
@@ -5,6 +5,8 @@ Release 0.20.5
(Todd Lipcon via Stack)
HBASE-2521 no license headers in 5 files
HBASE-2503 PriorityQueue isn't thread safe, KeyValueHeap uses it that way
+ HBASE-2482 Regions in transition do not get reassigned by master when RS
+ crashes (Todd Lipcon via Stack)
Release 0.20.4 - Mon May 3 16:16:02 PDT 2010
INCOMPATIBLE CHANGES
Modified: hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/master/HMaster.java?rev=945237&r1=945236&r2=945237&view=diff
==============================================================================
--- hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/master/HMaster.java Mon May 17 16:48:57 2010
@@ -35,6 +35,8 @@ import java.util.concurrent.DelayQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.PriorityBlockingQueue;
import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -158,6 +160,7 @@ public class HMaster extends Thread impl
RegionManager regionManager;
private MasterMetrics metrics;
+ final Lock splitLogLock = new ReentrantLock();
/**
* Build the HMaster out of a raw configuration item.
@@ -609,14 +612,14 @@ public class HMaster extends Thread impl
if(this.serverManager.getServerInfo(serverName) == null) {
LOG.info("Log folder doesn't belong " +
"to a known region server, splitting");
- this.regionManager.splitLogLock.lock();
+ this.splitLogLock.lock();
Path logDir =
new Path(this.rootdir, HLog.getHLogDirectoryName(serverName));
try {
HLog.splitLog(this.rootdir, logDir, this.fs,
getConfiguration());
} finally {
- this.regionManager.splitLogLock.unlock();
+ this.splitLogLock.unlock();
}
} else {
LOG.info("Log folder belongs to an existing region server");
Modified: hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java?rev=945237&r1=945236&r2=945237&view=diff
==============================================================================
--- hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java (original)
+++ hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java Mon May 17 16:48:57 2010
@@ -23,6 +23,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
+import java.util.Map;
import java.util.Set;
import org.apache.hadoop.fs.Path;
@@ -36,6 +37,7 @@ import org.apache.hadoop.hbase.ipc.HRegi
import org.apache.hadoop.hbase.regionserver.HLog;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.master.RegionManager.RegionState;
/**
* Instantiated when a server's lease has expired, meaning it has crashed.
@@ -80,6 +82,7 @@ class ProcessServerShutdown extends Regi
// check to see if I am responsible for either ROOT or any of the META tables.
+ // TODO Why do we do this now instead of at processing time?
closeMetaRegions();
}
@@ -109,6 +112,19 @@ class ProcessServerShutdown extends Regi
}
}
+ private void closeRegionsInTransition() {
+ Map<String, RegionState> inTransition =
+ master.regionManager.getRegionsInTransitionOnServer(deadServer);
+ for (Map.Entry<String, RegionState> entry : inTransition.entrySet()) {
+ String regionName = entry.getKey();
+ RegionState state = entry.getValue();
+
+ LOG.info("Region " + regionName + " was in transition " +
+ state + " on dead server " + deadServer + " - marking unassigned");
+ master.regionManager.setUnassigned(state.getRegionInfo(), true);
+ }
+ }
+
@Override
public String toString() {
return "ProcessServerShutdown of " + this.deadServer;
@@ -275,14 +291,14 @@ class ProcessServerShutdown extends Regi
if (!logSplit) {
// Process the old log file
if (master.fs.exists(oldLogDir)) {
- if (!master.regionManager.splitLogLock.tryLock()) {
+ if (!master.splitLogLock.tryLock()) {
return false;
}
try {
HLog.splitLog(master.rootdir, oldLogDir, master.fs,
master.getConfiguration());
} finally {
- master.regionManager.splitLogLock.unlock();
+ master.splitLogLock.unlock();
}
}
logSplit = true;
@@ -347,6 +363,9 @@ class ProcessServerShutdown extends Regi
Bytes.toString(r.getRegionName()) + " on " + r.getServer());
}
}
+
+ closeRegionsInTransition();
+
// Remove this server from dead servers list. Finished splitting logs.
this.master.serverManager.removeDeadServer(deadServer);
if (LOG.isDebugEnabled()) {
Modified: hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/master/RegionManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/master/RegionManager.java?rev=945237&r1=945236&r2=945237&view=diff
==============================================================================
--- hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/master/RegionManager.java (original)
+++ hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/master/RegionManager.java Mon May 17 16:48:57 2010
@@ -22,6 +22,7 @@ package org.apache.hadoop.hbase.master;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
@@ -33,8 +34,6 @@ import java.util.TreeMap;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
-import java.util.concurrent.locks.Lock;
-import java.util.concurrent.locks.ReentrantLock;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -68,8 +67,6 @@ class RegionManager implements HConstant
private volatile boolean safeMode = true;
- final Lock splitLogLock = new ReentrantLock();
-
private final RootScanner rootScannerThread;
final MetaScanner metaScannerThread;
@@ -168,8 +165,8 @@ class RegionManager implements HConstant
unsetRootRegion();
if (!master.shutdownRequested.get()) {
synchronized (regionsInTransition) {
- RegionState s = new RegionState(HRegionInfo.ROOT_REGIONINFO);
- s.setUnassigned();
+ RegionState s = new RegionState(HRegionInfo.ROOT_REGIONINFO,
+ RegionState.State.UNASSIGNED);
regionsInTransition.put(
HRegionInfo.ROOT_REGIONINFO.getRegionNameAsString(), s);
LOG.info("ROOT inserted into regionsInTransition");
@@ -587,6 +584,23 @@ class RegionManager implements HConstant
}
return false;
}
+
+ /**
+ * Return a map of the regions in transition on a server.
+ * Returned map entries are region name -> RegionState
+ */
+ Map<String, RegionState> getRegionsInTransitionOnServer(String serverName) {
+ Map<String, RegionState> ret = new HashMap<String, RegionState>();
+ synchronized (regionsInTransition) {
+ for (Map.Entry<String, RegionState> entry : regionsInTransition.entrySet()) {
+ RegionState rs = entry.getValue();
+ if (serverName.equals(rs.getServerName())) {
+ ret.put(entry.getKey(), rs);
+ }
+ }
+ }
+ return ret;
+ }
/**
* Stop the root and meta scanners so that the region servers serving meta
@@ -736,8 +750,7 @@ class RegionManager implements HConstant
byte [] regionName = region.getRegionName();
Put put = new Put(regionName);
- byte [] infoBytes = Writables.getBytes(info);
- String infoString = new String(infoBytes);
+
put.add(CATALOG_FAMILY, REGIONINFO_QUALIFIER, Writables.getBytes(info));
server.put(metaRegionName, put);
@@ -844,6 +857,10 @@ class RegionManager implements HConstant
&& !s.isUnassigned()
&& s.getServerName() != null
&& s.getServerName().equals(server.toString())) {
+ // TODO this code appears to be entirely broken, since
+ // server.toString() has no start code, but s.getServerName()
+ // does!
+ LOG.fatal("I DONT BELIEVE YOU WILL EVER SEE THIS!");
// Has an outstanding meta region to be assigned.
return true;
}
@@ -976,7 +993,7 @@ class RegionManager implements HConstant
synchronized (this.regionsInTransition) {
s = regionsInTransition.get(info.getRegionNameAsString());
if (s == null) {
- s = new RegionState(info);
+ s = new RegionState(info, RegionState.State.UNASSIGNED);
regionsInTransition.put(info.getRegionNameAsString(), s);
}
}
@@ -1058,7 +1075,7 @@ class RegionManager implements HConstant
RegionState s =
this.regionsInTransition.get(regionInfo.getRegionNameAsString());
if (s == null) {
- s = new RegionState(regionInfo);
+ s = new RegionState(regionInfo, RegionState.State.CLOSING);
}
// If region was asked to open before getting here, we could be taking
// the wrong server name
@@ -1530,22 +1547,30 @@ class RegionManager implements HConstant
* note on regionsInTransition data member above for listing of state
* transitions.
*/
- private static class RegionState implements Comparable<RegionState> {
+ static class RegionState implements Comparable<RegionState> {
private final HRegionInfo regionInfo;
- private volatile boolean unassigned = false;
- private volatile boolean pendingOpen = false;
- private volatile boolean open = false;
- private volatile boolean closing = false;
- private volatile boolean pendingClose = false;
- private volatile boolean closed = false;
- private volatile boolean offlined = false;
+
+ enum State {
+ UNASSIGNED, // awaiting a server to be assigned
+ PENDING_OPEN, // told a server to open, hasn't opened yet
+ OPEN, // has been opened on RS, but not yet marked in META/ROOT
+ CLOSING, // a msg has been enqueued to close ths region, but not delivered to RS yet
+ PENDING_CLOSE, // msg has been delivered to RS to close this region
+ CLOSED // region has been closed but not yet marked in meta
+
+ }
+
+ private State state;
+
+ private boolean isOfflined;
/* Set when region is assigned or closing */
- private volatile String serverName = null;
+ private String serverName = null;
/* Constructor */
- RegionState(HRegionInfo info) {
+ RegionState(HRegionInfo info, State state) {
this.regionInfo = info;
+ this.state = state;
}
synchronized HRegionInfo getRegionInfo() {
@@ -1567,14 +1592,16 @@ class RegionManager implements HConstant
* @return true if the region is being opened
*/
synchronized boolean isOpening() {
- return this.unassigned || this.pendingOpen || this.open;
+ return state == State.UNASSIGNED ||
+ state == State.PENDING_OPEN ||
+ state == State.OPEN;
}
/*
* @return true if region is unassigned
*/
synchronized boolean isUnassigned() {
- return unassigned;
+ return state == State.UNASSIGNED;
}
/*
@@ -1583,120 +1610,84 @@ class RegionManager implements HConstant
* called unless it is safe to do so.
*/
synchronized void setUnassigned() {
- this.unassigned = true;
- this.pendingOpen = false;
- this.open = false;
- this.closing = false;
- this.pendingClose = false;
- this.closed = false;
- this.offlined = false;
+ state = State.UNASSIGNED;
this.serverName = null;
}
synchronized boolean isPendingOpen() {
- return pendingOpen;
+ return state == State.PENDING_OPEN;
}
/*
* @param serverName Server region was assigned to.
*/
synchronized void setPendingOpen(final String serverName) {
- if (!this.unassigned) {
+ if (state != State.UNASSIGNED) {
LOG.warn("Cannot assign a region that is not currently unassigned. " +
"FIX!! State: " + toString());
}
- this.unassigned = false;
- this.pendingOpen = true;
- this.open = false;
- this.closing = false;
- this.pendingClose = false;
- this.closed = false;
- this.offlined = false;
+ state = State.PENDING_OPEN;
this.serverName = serverName;
}
synchronized boolean isOpen() {
- return open;
+ return state == State.OPEN;
}
synchronized void setOpen() {
- if (!pendingOpen) {
+ if (state != State.PENDING_OPEN) {
LOG.warn("Cannot set a region as open if it has not been pending. " +
"FIX!! State: " + toString());
}
- this.unassigned = false;
- this.pendingOpen = false;
- this.open = true;
- this.closing = false;
- this.pendingClose = false;
- this.closed = false;
- this.offlined = false;
+ state = State.OPEN;
}
synchronized boolean isClosing() {
- return closing;
+ return state == State.CLOSING;
}
synchronized void setClosing(String serverName, boolean setOffline) {
- this.unassigned = false;
- this.pendingOpen = false;
- this.open = false;
- this.closing = true;
- this.pendingClose = false;
- this.closed = false;
- this.offlined = setOffline;
+ state = State.CLOSING;
this.serverName = serverName;
+ this.isOfflined = setOffline;
}
synchronized boolean isPendingClose() {
- return this.pendingClose;
+ return state == State.PENDING_CLOSE;
}
synchronized void setPendingClose() {
- if (!closing) {
+ if (state != State.CLOSING) {
LOG.warn("Cannot set a region as pending close if it has not been " +
"closing. FIX!! State: " + toString());
}
- this.unassigned = false;
- this.pendingOpen = false;
- this.open = false;
- this.closing = false;
- this.pendingClose = true;
- this.closed = false;
+ state = State.PENDING_CLOSE;
}
synchronized boolean isClosed() {
- return this.closed;
+ return state == State.CLOSED;
}
synchronized void setClosed() {
- if (!pendingClose && !pendingOpen && !closing) {
+ if (state != State.PENDING_CLOSE &&
+ state != State.PENDING_OPEN &&
+ state != State.CLOSING) {
throw new IllegalStateException(
"Cannot set a region to be closed if it was not already marked as" +
- " pending close, pending open or closing. State: " + toString());
+ " pending close, pending open or closing. State: " + this);
}
- this.unassigned = false;
- this.pendingOpen = false;
- this.open = false;
- this.closing = false;
- this.pendingClose = false;
- this.closed = true;
+ state = State.CLOSED;
}
synchronized boolean isOfflined() {
- return this.offlined;
+ return (state == State.CLOSING ||
+ state == State.PENDING_CLOSE) && isOfflined;
}
@Override
public synchronized String toString() {
return ("name=" + Bytes.toString(getRegionName()) +
- ", unassigned=" + this.unassigned +
- ", pendingOpen=" + this.pendingOpen +
- ", open=" + this.open +
- ", closing=" + this.closing +
- ", pendingClose=" + this.pendingClose +
- ", closed=" + this.closed +
- ", offlined=" + this.offlined);
+ ", state=" + this.state);
}
@Override
Modified: hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/master/ServerManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/master/ServerManager.java?rev=945237&r1=945236&r2=945237&view=diff
==============================================================================
--- hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/master/ServerManager.java (original)
+++ hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/master/ServerManager.java Mon May 17 16:48:57 2010
@@ -48,6 +48,7 @@ import org.apache.hadoop.hbase.HMsg.Type
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
+import org.apache.hadoop.hbase.master.RegionManager.RegionState;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
@@ -324,8 +325,15 @@ class ServerManager implements HConstant
}
}
- /** Region server is exiting */
+ /**
+ * Region server is exiting with a clean shutdown.
+ *
+ * In this case, the server sends MSG_REPORT_EXITING in msgs[0] followed by
+ * a MSG_REPORT_CLOSE for each region it was serving.
+ */
private void processRegionServerExit(HServerInfo serverInfo, HMsg[] msgs) {
+ assert msgs[0].getType() == Type.MSG_REPORT_EXITING;
+
synchronized (serversToServerInfo) {
try {
// This method removes ROOT/META from the list and marks them to be reassigned
@@ -342,6 +350,7 @@ class ServerManager implements HConstant
for (int i = 1; i < msgs.length; i++) {
LOG.info("Processing " + msgs[i] + " from " +
serverInfo.getServerName());
+ assert msgs[i].getType() == Type.MSG_REGION_CLOSE;
HRegionInfo info = msgs[i].getRegionInfo();
// Meta/root region offlining is handed in removeServerInfo above.
if (!info.isMetaRegion()) {
@@ -356,6 +365,18 @@ class ServerManager implements HConstant
}
}
}
+
+ // There should not be any regions in transition for this server - the
+ // server should finish transitions itself before closing
+ Map<String, RegionState> inTransition =
+ master.regionManager.getRegionsInTransitionOnServer(
+ serverInfo.getServerName());
+ for (Map.Entry<String, RegionState> entry : inTransition.entrySet()) {
+ LOG.warn("Region server " + serverInfo.getServerName() +
+ " shut down with region " + entry.getKey() + " in transition " +
+ "state " + entry.getValue());
+ master.regionManager.setUnassigned(entry.getValue().getRegionInfo(), true);
+ }
}
// We don't need to return anything to the server because it isn't
// going to do any more work.