You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/11 04:16:56 UTC

svn commit: r1181528 - /hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java

Author: nspiegelberg
Date: Tue Oct 11 02:16:55 2011
New Revision: 1181528

URL: http://svn.apache.org/viewvc?rev=1181528&view=rev
Log:
The HBase lease recovery should handle non-existant last block

Summary:
The RS may crash after allocating the a new block to a HLog but before it
writes any data to that block. Without this patch, lease recovery on that file
never succeeds because the last block is never found in HDFS-land.

The HDFs changes were made here:
https://phabricator.fb.com/D230657

This patch enhances the RS to use the new recoverLease API. This API will
discard the last block if fsync was not invoked earlier on that block.

Test Plan:
1. TestHLog
2. Running it on two 5 node test clusters

Reviewed By: kannan
Reviewers: kannan, hkuang
CC: hbase@lists, kannan, dhruba
Differential Revision: 239298

Modified:
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java?rev=1181528&r1=1181527&r2=1181528&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java Tue Oct 11 02:16:55 2011
@@ -36,11 +36,14 @@ import org.apache.hadoop.hbase.regionser
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
 import org.apache.hadoop.hdfs.protocol.FSConstants;
+import org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException;
 import org.apache.hadoop.io.SequenceFile;
 
 import java.io.DataInputStream;
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InterruptedIOException;
+import java.lang.reflect.InvocationTargetException;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.HashMap;
@@ -601,7 +604,7 @@ public class FSUtils {
    */
   public static void recoverFileLease(final FileSystem fs, final Path p, Configuration conf)
   throws IOException{
-    if (!isAppendSupported(conf)) {
+      if (!isAppendSupported(conf)) {
       LOG.warn("Running on HDFS without append enabled may result in data loss");
       return;
     }
@@ -610,11 +613,61 @@ public class FSUtils {
     if (!(fs instanceof DistributedFileSystem)) {
       return;
     }
-    DistributedFileSystem dfs = (DistributedFileSystem)fs;
-    LOG.info("Recovering file" + p);
+    long startWaiting = System.currentTimeMillis();
+
+    boolean discardlastBlock =  conf.getBoolean("hbase.regionserver.discardLastNonExistantBlock",
+        true);
+    LOG.info("Recovering file" + p + ", discard last block: " + discardlastBlock);
 
     // Trying recovery
-    while (!dfs.recoverLease(p)) {
+    boolean recovered = false;
+    while (!recovered) {
+      try {
+        try {
+          if (fs instanceof DistributedFileSystem) {
+            DistributedFileSystem dfs = (DistributedFileSystem)fs;
+            try {
+              DistributedFileSystem.class.getMethod("recoverLease",
+                  new Class[] {Path.class, Boolean.class}).
+                  invoke(dfs, p, new Boolean(discardlastBlock));
+            } catch (NoSuchMethodException nsme) {
+              DistributedFileSystem.class.getMethod("recoverLease",
+                  new Class[] {Path.class}).invoke(dfs, p);
+            }
+          } else {
+            throw new Exception("Not a DistributedFileSystem");
+          }
+        } catch (InvocationTargetException ite) {
+          // function was properly called, but threw it's own exception
+          throw (IOException) ite.getCause();
+        } catch (Exception e) {
+          LOG.debug("Failed fs.recoverLease invocation, " + e.toString() +
+              ", trying fs.append instead");
+          FSDataOutputStream out = fs.append(p);
+          out.close();
+        }
+        recovered = true;
+      } catch (IOException e) {
+        e = RemoteExceptionHandler.checkIOException(e);
+        if (e instanceof AlreadyBeingCreatedException) {
+          // We expect that we'll get this message while the lease is still
+          // within its soft limit, but if we get it past that, it means
+          // that the RS is holding onto the file even though it lost its
+          // znode. We could potentially abort after some time here.
+          long waitedFor = System.currentTimeMillis() - startWaiting;
+          if (waitedFor > FSConstants.LEASE_SOFTLIMIT_PERIOD) {
+            LOG.warn("Waited " + waitedFor + "ms for lease recovery on " + p +
+                ":" + e.getMessage());
+          }
+        } else if (e instanceof LeaseExpiredException &&
+            e.getMessage().contains("File does not exist")) {
+          // This exception comes out instead of FNFE, fix it
+          throw new FileNotFoundException(
+              "The given HLog wasn't found at " + p.toString());
+        } else {
+          throw new IOException("Failed to open " + p + " for append", e);
+        }
+      }
       try {
         Thread.sleep(1000);
       } catch (InterruptedException ex) {