You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/11 04:11:22 UTC
svn commit: r1181468 - in /hbase/branches/0.89/src:
main/java/org/apache/hadoop/hbase/ main/java/org/apache/hadoop/hbase/master/
main/java/org/apache/hadoop/hbase/regionserver/wal/
test/java/org/apache/hadoop/hbase/regionserver/ test/java/org/apache/ha...
Author: nspiegelberg
Date: Tue Oct 11 02:11:22 2011
New Revision: 1181468
URL: http://svn.apache.org/viewvc?rev=1181468&view=rev
Log:
Rename HLog Dir during Recovery
Summary:
If a RegionServer goes rogue (GC pause, ops issue), the master
needs to be able to forcibly prevent further writes before it can log
split. Do a non-recursive create and rename the dir on HLog recovery.
Test Plan:
- mvn test -Dtest=TestHLogSplit
DiffCamp Revision: 195313
Reviewed By: kannan
Reviewers: jgray, aaiyer, hkuang, kannan, kranganathan
CC: nspiegelberg, kannan
Tasks:
#207588: HBASE-2312: splitLog race condition
Revert Plan:
OK
Modified:
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/HConstants.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/wal/SequenceFileLogWriter.java
hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestStore.java
hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/HConstants.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/HConstants.java?rev=1181468&r1=1181467&r2=1181468&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/HConstants.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/HConstants.java Tue Oct 11 02:11:22 2011
@@ -148,6 +148,9 @@ public final class HConstants {
/** Used to construct the name of the compaction directory during compaction */
public static final String HREGION_COMPACTIONDIR_NAME = "compaction.dir";
+ /** File Extension used while splitting an HLog into regions (HBASE-2312) */
+ public static final String HLOG_SPLITTING_EXT = "-splitting";
+
/** Default maximum file size */
public static final long DEFAULT_MAX_FILE_SIZE = 256 * 1024 * 1024;
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1181468&r1=1181467&r2=1181468&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Tue Oct 11 02:11:22 2011
@@ -656,7 +656,8 @@ public class HMaster extends Thread impl
return;
}
for (FileStatus status : logFolders) {
- String serverName = status.getPath().getName();
+ Path logDir = status.getPath();
+ String serverName = logDir.getName();
LOG.info("Found log folder : " + serverName);
if(this.serverManager.getServerInfo(serverName) == null) {
LOG.info("Log folder doesn't belong " +
@@ -664,9 +665,18 @@ public class HMaster extends Thread impl
long splitTime = 0, splitSize = 0;
this.splitLogLock.lock();
- Path logDir =
- new Path(this.rootdir, HLog.getHLogDirectoryName(serverName));
try {
+ // rename the directory so a rogue RS doesn't create more HLogs
+ if (!serverName.endsWith(HConstants.HLOG_SPLITTING_EXT)) {
+ Path splitDir = new Path(logDir.getParent(),
+ logDir.getName()
+ + HConstants.HLOG_SPLITTING_EXT);
+ if (!this.fs.rename(logDir, splitDir)) {
+ throw new IOException("Failed fs.rename of " + logDir);
+ }
+ logDir = splitDir;
+ LOG.debug("Renamed region directory: " + splitDir);
+ }
HLog.splitLog(this.rootdir, logDir, oldLogDir, this.fs, getConfiguration());
splitTime = HLog.lastSplitTime;
splitSize = HLog.lastSplitSize;
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java?rev=1181468&r1=1181467&r2=1181468&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java Tue Oct 11 02:11:22 2011
@@ -19,6 +19,7 @@
*/
package org.apache.hadoop.hbase.master;
+import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
@@ -54,7 +55,7 @@ class ProcessServerShutdown extends Regi
private List<MetaRegion> metaRegions;
private Path rsLogDir;
- private boolean logSplit;
+ private boolean isSplitFinished;
private boolean rootRescanned;
private HServerAddress deadServerAddress;
@@ -76,7 +77,7 @@ class ProcessServerShutdown extends Regi
super(master);
this.deadServer = serverInfo.getServerName();
this.deadServerAddress = serverInfo.getServerAddress();
- this.logSplit = false;
+ this.isSplitFinished = false;
this.rootRescanned = false;
this.rsLogDir =
new Path(master.getRootDir(), HLog.getHLogDirectoryName(serverInfo));
@@ -286,21 +287,38 @@ class ProcessServerShutdown extends Regi
@Override
protected boolean process() throws IOException {
LOG.info("Process shutdown of server " + this.deadServer +
- ": logSplit: " + logSplit + ", rootRescanned: " + rootRescanned +
+ ": logSplit: " + isSplitFinished + ", rootRescanned: " + rootRescanned +
", numberOfMetaRegions: " + master.getRegionManager().numMetaRegions() +
", onlineMetaRegions.size(): " +
master.getRegionManager().numOnlineMetaRegions());
- if (!logSplit) {
- // Process the old log file
- if (this.master.getFileSystem().exists(rsLogDir)) {
- long splitTime = 0, splitSize = 0;
+ if (!isSplitFinished) {
+ long splitTime = 0, splitSize = 0;
+ FileSystem fs = this.master.getFileSystem();
+ // we rename during split, so check both names
+ Path rsSplitDir = new Path(rsLogDir.getParent(),
+ rsLogDir.getName()
+ + HConstants.HLOG_SPLITTING_EXT);
+ boolean logDirExists = fs.exists(rsLogDir);
+ boolean splitDirExists = fs.exists(rsSplitDir);
+ assert !(logDirExists && splitDirExists)
+ : "Both files shouldn't exist: " + rsLogDir + " and " + rsSplitDir;
+ if (logDirExists || splitDirExists) {
if (!master.splitLogLock.tryLock()) {
return false;
}
try {
- HLog.splitLog(master.getRootDir(), rsLogDir,
- this.master.getOldLogDir(), this.master.getFileSystem(),
+ // rename the directory so a rogue RS doesn't create more HLogs
+ if (logDirExists) {
+ if (!fs.rename(rsLogDir, rsSplitDir)) {
+ throw new IOException("Failed fs.rename of " + rsLogDir);
+ }
+ LOG.debug("Renamed region directory: " + rsSplitDir);
+ }
+
+ // Process the old log files
+ HLog.splitLog(master.getRootDir(), rsSplitDir,
+ this.master.getOldLogDir(), this.master.getFileSystem(),
this.master.getConfiguration());
splitTime = HLog.lastSplitTime;
splitSize = HLog.lastSplitSize;
@@ -310,7 +328,7 @@ class ProcessServerShutdown extends Regi
this.master.getMetrics().addSplit(splitTime, splitSize);
}
- logSplit = true;
+ isSplitFinished = true;
}
LOG.info("Log split complete, meta reassignment and scanning:");
if (this.isRootServer) {
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java?rev=1181468&r1=1181467&r2=1181468&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java Tue Oct 11 02:11:22 2011
@@ -658,13 +658,14 @@ public class HLog implements Syncable {
* @return Path to current writer or null if none.
* @throws IOException
*/
- private Path cleanupCurrentWriter(final long currentfilenum)
+ Path cleanupCurrentWriter(final long currentfilenum)
throws IOException {
Path oldFile = null;
if (this.writer != null) {
// Close the current writer, get a new one.
try {
this.writer.close();
+ this.writer = null;
} catch (IOException e) {
// Failed close of log file. Means we're losing edits. For now,
// shut ourselves down to minimize loss. Alternative is to try and
@@ -745,7 +746,9 @@ public class HLog implements Syncable {
if (LOG.isDebugEnabled()) {
LOG.debug("closing hlog writer in " + this.dir.toString());
}
- this.writer.close();
+ if (this.writer != null) {
+ this.writer.close();
+ }
}
} finally {
cacheFlushLock.unlock();
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/wal/SequenceFileLogWriter.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/wal/SequenceFileLogWriter.java?rev=1181468&r1=1181467&r2=1181468&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/wal/SequenceFileLogWriter.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/regionserver/wal/SequenceFileLogWriter.java Tue Oct 11 02:11:22 2011
@@ -23,6 +23,7 @@ package org.apache.hadoop.hbase.regionse
import java.io.IOException;
import java.io.OutputStream;
import java.lang.reflect.Field;
+import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import org.apache.commons.logging.Log;
@@ -32,7 +33,9 @@ import org.apache.hadoop.fs.FSDataOutput
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.SequenceFile.Metadata;
+import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.DefaultCodec;
/**
@@ -56,17 +59,52 @@ public class SequenceFileLogWriter imple
public void init(FileSystem fs, Path path, Configuration conf)
throws IOException {
// Create a SF.Writer instance.
- this.writer = SequenceFile.createWriter(fs, conf, path,
- HLog.getKeyClass(conf), WALEdit.class,
- fs.getConf().getInt("io.file.buffer.size", 4096),
- (short) conf.getInt("hbase.regionserver.hlog.replication",
- fs.getDefaultReplication()),
- conf.getLong("hbase.regionserver.hlog.blocksize",
- fs.getDefaultBlockSize()),
- SequenceFile.CompressionType.NONE,
- new DefaultCodec(),
- null,
- new Metadata());
+ try {
+ // reflection for a version of SequenceFile.createWriter that doesn't
+ // automatically create the parent directory (see HBASE-2312)
+ this.writer = (SequenceFile.Writer) SequenceFile.class
+ .getMethod("createWriter", new Class[] {FileSystem.class,
+ Configuration.class, Path.class, Class.class, Class.class,
+ Integer.TYPE, Short.TYPE, Long.TYPE, Boolean.TYPE,
+ CompressionType.class, CompressionCodec.class, Metadata.class})
+ .invoke(null, new Object[] {fs, conf, path, HLog.getKeyClass(conf),
+ WALEdit.class,
+ new Integer(fs.getConf().getInt("io.file.buffer.size", 4096)),
+ new Short((short)
+ conf.getInt("hbase.regionserver.hlog.replication",
+ fs.getDefaultReplication())),
+ new Long(conf.getLong("hbase.regionserver.hlog.blocksize",
+ fs.getDefaultBlockSize())),
+ new Boolean(false) /*createParent*/,
+ SequenceFile.CompressionType.NONE, new DefaultCodec(),
+ new Metadata()
+ });
+ } catch (InvocationTargetException ite) {
+ // function was properly called, but threw it's own exception
+ throw new IOException(ite.getCause());
+ } catch (Exception e) {
+ // ignore all other exceptions. related to reflection failure
+ }
+
+
+
+ // if reflection failed, use the old createWriter
+ if (this.writer == null) {
+ LOG.warn("new createWriter -- HADOOP-6840 -- not available");
+ this.writer = SequenceFile.createWriter(fs, conf, path,
+ HLog.getKeyClass(conf), WALEdit.class,
+ fs.getConf().getInt("io.file.buffer.size", 4096),
+ (short) conf.getInt("hbase.regionserver.hlog.replication",
+ fs.getDefaultReplication()),
+ conf.getLong("hbase.regionserver.hlog.blocksize",
+ fs.getDefaultBlockSize()),
+ SequenceFile.CompressionType.NONE,
+ new DefaultCodec(),
+ null,
+ new Metadata());
+ } else {
+ LOG.debug("using new createWriter -- HADOOP-6840");
+ }
// Get at the private FSDataOutputStream inside in SequenceFile so we can
// call sync on it. Make it accessible. Stash it aside for call up in
@@ -140,4 +178,4 @@ public class SequenceFileLogWriter imple
public OutputStream getDFSCOutputStream() {
return this.dfsClient_out;
}
-}
\ No newline at end of file
+}
Modified: hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestStore.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestStore.java?rev=1181468&r1=1181467&r2=1181468&view=diff
==============================================================================
--- hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestStore.java (original)
+++ hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/TestStore.java Tue Oct 11 02:11:22 2011
@@ -411,6 +411,14 @@ public class TestStore extends TestCase
blockSize, bytesPerChecksum, progress), faultPos);
}
+ @Override
+ public FSDataOutputStream createNonRecursive(Path f,
+ FsPermission permission, boolean overwrite,
+ int bufferSize, short replication, long blockSize,
+ Progressable progress) throws IOException {
+ return create(f, permission, overwrite, bufferSize,
+ replication, blockSize, progress);
+ }
}
static class FaultyOutputStream extends FSDataOutputStream {
@@ -519,4 +527,4 @@ public class TestStore extends TestCase
result = HBaseTestingUtility.getFromStoreFile(store, get);
assertTrue(result.size()==0);
}
-}
\ No newline at end of file
+}
Modified: hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java?rev=1181468&r1=1181467&r2=1181468&view=diff
==============================================================================
--- hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java (original)
+++ hbase/branches/0.89/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java Tue Oct 11 02:11:22 2011
@@ -31,6 +31,8 @@ import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
@@ -38,6 +40,7 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
@@ -47,14 +50,19 @@ import org.apache.hadoop.hbase.util.Thre
import org.apache.hadoop.ipc.RemoteException;
import org.junit.After;
import org.junit.AfterClass;
+import org.junit.Assert;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
/**
+ *
+ */
+/**
* Testing {@link HLog} splitting code.
*/
public class TestHLogSplit {
+ private static final Log LOG = LogFactory.getLog(TestHLogSplit.class);
private Configuration conf;
private FileSystem fs;
@@ -117,6 +125,8 @@ public class TestHLogSplit {
for (FileStatus dir : entries){
fs.delete(dir.getPath(), true);
}
+ // create the HLog directory because recursive log creates are not allowed
+ fs.mkdirs(hlogDir);
seq = 0;
regions = new ArrayList<String>();
Collections.addAll(regions, "bbb", "ccc");
@@ -510,6 +520,77 @@ public class TestHLogSplit {
assertEquals(0, compareHLogSplitDirs(firstSplitPath, splitPath));
}
+ /* HBASE-2312: tests the case where a RegionServer enters a GC pause or goes
+ * renegade due to Connect/ZK bugs. When the master declares it dead, HDFS
+ * should deny writes and prevent it from rolling HLogs so Log Splitting can
+ * safely commence on the master.
+ * */
+ @Test
+ public void testLogRollAfterSplitStart() throws IOException {
+ // set flush interval to a large number so it doesn't interrupt us
+ final String F_INTERVAL = "hbase.regionserver.optionallogflushinterval";
+ long oldFlushInterval = conf.getLong(F_INTERVAL, 1000);
+ conf.setLong(F_INTERVAL, 1000*1000*100);
+ HLog log = null;
+ Path thisTestsDir = new Path(hbaseDir, "testLogRollAfterSplitStart");
+ Path rsSplitDir = new Path(thisTestsDir.getParent(),
+ thisTestsDir.getName()
+ + HConstants.HLOG_SPLITTING_EXT);
+
+ try {
+ // put some entries in an HLog
+ byte [] tableName = Bytes.toBytes(this.getClass().getName());
+ HRegionInfo regioninfo = new HRegionInfo(new HTableDescriptor(tableName),
+ HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, false);
+ log = new HLog(fs, thisTestsDir, oldLogDir, conf, null);
+ final int total = 20;
+ for (int i = 0; i < total; i++) {
+ WALEdit kvs = new WALEdit();
+ kvs.add(new KeyValue(Bytes.toBytes(i), tableName, tableName));
+ log.append(regioninfo, tableName, kvs, System.currentTimeMillis());
+ }
+ // Send the data to HDFS datanodes and close the HDFS writer
+ log.sync(true);
+ log.cleanupCurrentWriter(log.getFilenum());
+
+ /* code taken from ProcessServerShutdown.process()
+ * handles RS shutdowns (as observed by the Master)
+ */
+ // rename the directory so a rogue RS doesn't create more HLogs
+ fs.rename(thisTestsDir, rsSplitDir);
+ LOG.debug("Renamed region directory: " + rsSplitDir);
+
+ // Process the old log files
+ // TODO: find a way a keep the log.writer around and call this
+ // currently, you look like the current leaseholder
+ HLog.splitLog(hbaseDir, rsSplitDir, oldLogDir, fs, conf);
+
+ // Now, try to write more data.
+ // verify that this fails and the subsequent roll of the HLog also fails
+ try {
+ log.rollWriter();
+ Assert.fail("rollWriter() did not throw any exception.");
+ } catch (IOException ioe) {
+ if (ioe.getCause().getMessage().contains("FileNotFound")) {
+ LOG.info("Got the expected exception: ", ioe.getCause());
+ } else {
+ Assert.fail("Unexpected exception: " + ioe);
+ }
+ }
+ } finally {
+ conf.setLong(F_INTERVAL, oldFlushInterval);
+ if (log != null) {
+ log.close();
+ }
+ if (fs.exists(thisTestsDir)) {
+ fs.delete(thisTestsDir, true);
+ }
+ if (fs.exists(rsSplitDir)) {
+ fs.delete(rsSplitDir, true);
+ }
+ }
+ }
+
/**
* This thread will keep writing to the file after the split process has started
* It simulates a region server that was considered dead but woke up and wrote