You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by jx...@apache.org on 2013/04/14 17:24:30 UTC
svn commit: r1467791 - in /hbase/branches/0.95/hbase-server/src:
main/java/org/apache/hadoop/hbase/regionserver/wal/
test/java/org/apache/hadoop/hbase/regionserver/wal/
Author: jxiang
Date: Sun Apr 14 15:24:30 2013
New Revision: 1467791
URL: http://svn.apache.org/r1467791
Log:
HBASE-8314 HLogSplitter can retry to open a 0-length hlog file
Modified:
hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogFactory.java
hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java
hbase/branches/0.95/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java
Modified: hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogFactory.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogFactory.java?rev=1467791&r1=1467790&r2=1467791&view=diff
==============================================================================
--- hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogFactory.java (original)
+++ hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogFactory.java Sun Apr 14 15:24:30 2013
@@ -21,6 +21,7 @@
package org.apache.hadoop.hbase.regionserver.wal;
import java.io.IOException;
+import java.io.InterruptedIOException;
import java.util.List;
import org.apache.commons.logging.Log;
@@ -31,6 +32,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.regionserver.wal.HLog.Reader;
import org.apache.hadoop.hbase.regionserver.wal.HLog.Writer;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
public class HLogFactory {
private static final Log LOG = LogFactory.getLog(HLogFactory.class);
@@ -67,7 +69,7 @@ public class HLogFactory {
static void resetLogReaderClass() {
logReaderClass = null;
}
-
+
/**
* Create a reader for the WAL. If you are reading from a file that's being written to
* and need to reopen it multiple times, use {@link HLog.Reader#reset()} instead of this method
@@ -76,28 +78,55 @@ public class HLogFactory {
* @throws IOException
*/
public static HLog.Reader createReader(final FileSystem fs,
- final Path path, Configuration conf)
- throws IOException {
- try {
-
- if (logReaderClass == null) {
+ final Path path, Configuration conf) throws IOException {
+ if (logReaderClass == null) {
+ logReaderClass = conf.getClass("hbase.regionserver.hlog.reader.impl",
+ SequenceFileLogReader.class, Reader.class);
+ }
- logReaderClass = conf.getClass("hbase.regionserver.hlog.reader.impl",
- SequenceFileLogReader.class, Reader.class);
+ try {
+ // A hlog file could be under recovery, so it may take several
+ // tries to get it open. Instead of claiming it is corrupted, retry
+ // to open it up to 5 minutes by default.
+ long startWaiting = EnvironmentEdgeManager.currentTimeMillis();
+ long openTimeout = conf.getInt("hbase.hlog.open.timeout", 300000) + startWaiting;
+ int nbAttempt = 0;
+ while (true) {
+ try {
+ HLog.Reader reader = logReaderClass.newInstance();
+ reader.init(fs, path, conf);
+ return reader;
+ } catch (IOException e) {
+ String msg = e.getMessage();
+ if (msg != null && msg.contains("Cannot obtain block length")) {
+ if (++nbAttempt == 1) {
+ LOG.warn("Lease should have recovered. This is not expected. Will retry", e);
+ }
+ if (nbAttempt > 2 && openTimeout < EnvironmentEdgeManager.currentTimeMillis()) {
+ LOG.error("Can't open after " + nbAttempt + " attempts and "
+ + (EnvironmentEdgeManager.currentTimeMillis() - startWaiting)
+ + "ms " + " for " + path);
+ } else {
+ try {
+ Thread.sleep(nbAttempt < 3 ? 500 : 1000);
+ continue; // retry
+ } catch (InterruptedException ie) {
+ InterruptedIOException iioe = new InterruptedIOException();
+ iioe.initCause(ie);
+ throw iioe;
+ }
+ }
+ }
+ throw e;
+ }
}
-
-
- HLog.Reader reader = logReaderClass.newInstance();
- reader.init(fs, path, conf);
- return reader;
- } catch (IOException e) {
- throw e;
- }
- catch (Exception e) {
+ } catch (IOException ie) {
+ throw ie;
+ } catch (Exception e) {
throw new IOException("Cannot get log reader", e);
}
}
-
+
/*
* WAL writer
*/
Modified: hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java?rev=1467791&r1=1467790&r2=1467791&view=diff
==============================================================================
--- hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java (original)
+++ hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java Sun Apr 14 15:24:30 2013
@@ -26,7 +26,6 @@ import java.lang.reflect.InvocationTarge
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collections;
-import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
Modified: hbase/branches/0.95/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.95/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java?rev=1467791&r1=1467790&r2=1467791&view=diff
==============================================================================
--- hbase/branches/0.95/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java (original)
+++ hbase/branches/0.95/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java Sun Apr 14 15:24:30 2013
@@ -880,6 +880,44 @@ public class TestHLogSplit {
}
}
+ @Test
+ public void testRetryOpenDuringRecovery() throws Exception {
+ generateHLogs(-1);
+
+ fs.initialize(fs.getUri(), conf);
+
+ FileSystem spiedFs = Mockito.spy(fs);
+ // The "Cannot obtain block length" part is very important,
+ // that's how it comes out of HDFS. If HDFS changes the exception
+ // message, this test needs to be adjusted accordingly.
+ //
+ // When DFSClient tries to open a file, HDFS needs to locate
+ // the last block of the file and get its length. However, if the
+ // last block is under recovery, HDFS may have problem to obtain
+ // the block length, in which case, retry may help.
+ Mockito.doAnswer(new Answer<FSDataInputStream>() {
+ private int count = 0;
+
+ public FSDataInputStream answer(InvocationOnMock invocation) throws Throwable {
+ if (count++ < 3) {
+ throw new IOException("Cannot obtain block length");
+ }
+ return (FSDataInputStream)invocation.callRealMethod();
+ }
+ }).when(spiedFs).open(Mockito.<Path>any(), Mockito.anyInt());
+
+ HLogSplitter logSplitter = new HLogSplitter(
+ conf, HBASEDIR, HLOGDIR, OLDLOGDIR, spiedFs, null);
+
+ try {
+ logSplitter.splitLog();
+ assertEquals(NUM_WRITERS, fs.listStatus(OLDLOGDIR).length);
+ assertFalse(fs.exists(HLOGDIR));
+ } catch (IOException e) {
+ fail("There shouldn't be any exception but: " + e.toString());
+ }
+ }
+
/**
* Test log split process with fake data and lots of edits to trigger threading
* issues.
@@ -1330,6 +1368,7 @@ public class TestHLogSplit {
private Path getLogForRegion(Path rootdir, byte[] table, String region)
throws IOException {
Path tdir = HTableDescriptor.getTableDir(rootdir, table);
+ @SuppressWarnings("deprecation")
Path editsdir = HLogUtil.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir,
Bytes.toString(region.getBytes())));
FileStatus [] files = this.fs.listStatus(editsdir);