You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/11 04:16:56 UTC
svn commit: r1181528 -
/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java
Author: nspiegelberg
Date: Tue Oct 11 02:16:55 2011
New Revision: 1181528
URL: http://svn.apache.org/viewvc?rev=1181528&view=rev
Log:
The HBase lease recovery should handle non-existant last block
Summary:
The RS may crash after allocating the a new block to a HLog but before it
writes any data to that block. Without this patch, lease recovery on that file
never succeeds because the last block is never found in HDFS-land.
The HDFs changes were made here:
https://phabricator.fb.com/D230657
This patch enhances the RS to use the new recoverLease API. This API will
discard the last block if fsync was not invoked earlier on that block.
Test Plan:
1. TestHLog
2. Running it on two 5 node test clusters
Reviewed By: kannan
Reviewers: kannan, hkuang
CC: hbase@lists, kannan, dhruba
Differential Revision: 239298
Modified:
hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java
Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java?rev=1181528&r1=1181527&r2=1181528&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java Tue Oct 11 02:16:55 2011
@@ -36,11 +36,14 @@ import org.apache.hadoop.hbase.regionser
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
import org.apache.hadoop.hdfs.protocol.FSConstants;
+import org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException;
import org.apache.hadoop.io.SequenceFile;
import java.io.DataInputStream;
+import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InterruptedIOException;
+import java.lang.reflect.InvocationTargetException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashMap;
@@ -601,7 +604,7 @@ public class FSUtils {
*/
public static void recoverFileLease(final FileSystem fs, final Path p, Configuration conf)
throws IOException{
- if (!isAppendSupported(conf)) {
+ if (!isAppendSupported(conf)) {
LOG.warn("Running on HDFS without append enabled may result in data loss");
return;
}
@@ -610,11 +613,61 @@ public class FSUtils {
if (!(fs instanceof DistributedFileSystem)) {
return;
}
- DistributedFileSystem dfs = (DistributedFileSystem)fs;
- LOG.info("Recovering file" + p);
+ long startWaiting = System.currentTimeMillis();
+
+ boolean discardlastBlock = conf.getBoolean("hbase.regionserver.discardLastNonExistantBlock",
+ true);
+ LOG.info("Recovering file" + p + ", discard last block: " + discardlastBlock);
// Trying recovery
- while (!dfs.recoverLease(p)) {
+ boolean recovered = false;
+ while (!recovered) {
+ try {
+ try {
+ if (fs instanceof DistributedFileSystem) {
+ DistributedFileSystem dfs = (DistributedFileSystem)fs;
+ try {
+ DistributedFileSystem.class.getMethod("recoverLease",
+ new Class[] {Path.class, Boolean.class}).
+ invoke(dfs, p, new Boolean(discardlastBlock));
+ } catch (NoSuchMethodException nsme) {
+ DistributedFileSystem.class.getMethod("recoverLease",
+ new Class[] {Path.class}).invoke(dfs, p);
+ }
+ } else {
+ throw new Exception("Not a DistributedFileSystem");
+ }
+ } catch (InvocationTargetException ite) {
+ // function was properly called, but threw it's own exception
+ throw (IOException) ite.getCause();
+ } catch (Exception e) {
+ LOG.debug("Failed fs.recoverLease invocation, " + e.toString() +
+ ", trying fs.append instead");
+ FSDataOutputStream out = fs.append(p);
+ out.close();
+ }
+ recovered = true;
+ } catch (IOException e) {
+ e = RemoteExceptionHandler.checkIOException(e);
+ if (e instanceof AlreadyBeingCreatedException) {
+ // We expect that we'll get this message while the lease is still
+ // within its soft limit, but if we get it past that, it means
+ // that the RS is holding onto the file even though it lost its
+ // znode. We could potentially abort after some time here.
+ long waitedFor = System.currentTimeMillis() - startWaiting;
+ if (waitedFor > FSConstants.LEASE_SOFTLIMIT_PERIOD) {
+ LOG.warn("Waited " + waitedFor + "ms for lease recovery on " + p +
+ ":" + e.getMessage());
+ }
+ } else if (e instanceof LeaseExpiredException &&
+ e.getMessage().contains("File does not exist")) {
+ // This exception comes out instead of FNFE, fix it
+ throw new FileNotFoundException(
+ "The given HLog wasn't found at " + p.toString());
+ } else {
+ throw new IOException("Failed to open " + p + " for append", e);
+ }
+ }
try {
Thread.sleep(1000);
} catch (InterruptedException ex) {