You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by to...@apache.org on 2007/03/29 17:43:08 UTC

svn commit: r523752 - in /lucene/hadoop/trunk: ./ src/java/org/apache/hadoop/dfs/ src/java/org/apache/hadoop/fs/ src/test/org/apache/hadoop/dfs/

Author: tomwhite
Date: Thu Mar 29 08:43:07 2007
New Revision: 523752

URL: http://svn.apache.org/viewvc?view=rev&rev=523752
Log:
HADOOP-1123.  Fix NullPointerException in LocalFileSystem when trying to recover from a checksum error.  Contributed by Hairong Kuang & Nigel Daley.

Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/ChecksumException.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/ChecksumFileSystem.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/InMemoryFileSystem.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/LocalFileSystem.java
    lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileCorruption.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=523752&r1=523751&r2=523752
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Thu Mar 29 08:43:07 2007
@@ -54,6 +54,10 @@
 15. HADOOP-1162.  Fix bug in record CSV and XML serialization of
     binary values.  (Milind Bhandarkar via cutting)
 
+16. HADOOP-1123.  Fix NullPointerException in LocalFileSystem when
+    trying to recover from a checksum error.
+    (Hairong Kuang & Nigel Daley via tomwhite)
+
 
 Release 0.12.2 - 2007-23-17
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java?view=diff&rev=523752&r1=523751&r2=523752
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java Thu Mar 29 08:43:07 2007
@@ -308,7 +308,7 @@
      * is corrupt but we will report both to the namenode.  In the future,
      * we can consider figuring out exactly which block is corrupt.
      */
-    public void reportChecksumFailure(Path f, 
+    public boolean reportChecksumFailure(Path f, 
                                       FSDataInputStream in, long inPos, 
                                       FSDataInputStream sums, long sumsPos) {
       
@@ -347,6 +347,7 @@
                  + StringUtils.stringifyException(ie));
       }
 
+      return true;
     }
     }
 
@@ -399,10 +400,10 @@
      * is corrupt but we will report both to the namenode.  In the future,
      * we can consider figuring out exactly which block is corrupt.
      */
-    public void reportChecksumFailure(Path f, 
+    public boolean reportChecksumFailure(Path f, 
                                       FSDataInputStream in, long inPos, 
                                       FSDataInputStream sums, long sumsPos) {
-      ((RawDistributedFileSystem)fs).reportChecksumFailure(
+      return ((RawDistributedFileSystem)fs).reportChecksumFailure(
                 f, in, inPos, sums, sumsPos);
     }
 }

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/ChecksumException.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/ChecksumException.java?view=diff&rev=523752&r1=523751&r2=523752
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/ChecksumException.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/ChecksumException.java Thu Mar 29 08:43:07 2007
@@ -22,7 +22,13 @@
 
 /** Thrown for checksum errors. */
 public class ChecksumException extends IOException {
-  public ChecksumException(String description) {
+  private long pos;
+  public ChecksumException(String description, long pos) {
     super(description);
+    this.pos = pos;
+  }
+  
+  public long getPos() {
+    return pos;
   }
 }

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/ChecksumFileSystem.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/ChecksumFileSystem.java?view=diff&rev=523752&r1=523751&r2=523752
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/ChecksumFileSystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/ChecksumFileSystem.java Thu Mar 29 08:43:07 2007
@@ -207,13 +207,18 @@
               summed += toSum;
               
               inSum += toSum;
-              if (inSum == bytesPerSum || endOfFile) {
+              if (inSum == bytesPerSum ) {
                 verifySum(read-(summed-bytesPerSum));
+              } else if( read == summed && endOfFile ) {
+                verifySum(read-read/bytesPerSum*bytesPerSum);
               }
             }
           } catch (ChecksumException ce) {
             LOG.info("Found checksum error: "+StringUtils.stringifyException(ce));
-            if (retriesLeft == 0) {
+            long errPos = ce.getPos();
+            boolean shouldRetry = fs.reportChecksumFailure(
+                file, datas, errPos, sums, errPos/bytesPerSum);
+            if (!shouldRetry || retriesLeft == 0) {
               throw ce;
             }
             
@@ -250,8 +255,7 @@
       inSum = 0;
       if (crc != sumValue) {
         long pos = getPos() - delta;
-        fs.reportChecksumFailure(file, datas, pos, sums, pos/bytesPerSum);
-        throw new ChecksumException("Checksum error: "+file+" at "+pos);
+        throw new ChecksumException("Checksum error: "+file+" at "+pos, pos);
       }
     }
     
@@ -629,7 +633,10 @@
    * @param inPos the position of the beginning of the bad data in the file
    * @param sums the stream open on the checksum file
    * @param sumsPos the position of the beginning of the bad data in the checksum file
+   * @return if retry is neccessary
    */
-  public abstract void reportChecksumFailure(Path f, FSDataInputStream in,
-                                             long inPos, FSDataInputStream sums, long sumsPos);
+  public boolean reportChecksumFailure(Path f, FSDataInputStream in,
+                                             long inPos, FSDataInputStream sums, long sumsPos) {
+    return false;
+  }
 }

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/InMemoryFileSystem.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/InMemoryFileSystem.java?view=diff&rev=523752&r1=523751&r2=523752
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/InMemoryFileSystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/InMemoryFileSystem.java Thu Mar 29 08:43:07 2007
@@ -447,11 +447,6 @@
     throws IOException {
     }
     
-    public void reportChecksumFailure(Path p, FSDataInputStream in,
-            long inPos,
-            FSDataInputStream sums, long sumsPos) {
-    }
-    
     /**
      * Register a file with its size. This will also register a checksum for the
      * file that the user is trying to create. This is required since none of

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/LocalFileSystem.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/LocalFileSystem.java?view=diff&rev=523752&r1=523751&r2=523752
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/LocalFileSystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/LocalFileSystem.java Thu Mar 29 08:43:07 2007
@@ -59,7 +59,7 @@
      * Moves files to a bad file directory on the same device, so that their
      * storage will not be reused.
      */
-    public void reportChecksumFailure(Path p, FSDataInputStream in,
+    public boolean reportChecksumFailure(Path p, FSDataInputStream in,
                                       long inPos,
                                       FSDataInputStream sums, long sumsPos) {
       try {
@@ -69,12 +69,17 @@
         // find highest writable parent dir of f on the same device
         String device = new DF(f, getConf()).getMount();
         File parent = f.getParentFile();
-        File dir;
-        do {
+        File dir = null;
+        while (parent!=null && parent.canWrite() && parent.toString().startsWith(device)) {
           dir = parent;
           parent = parent.getParentFile();
-        } while (parent.canWrite() && parent.toString().startsWith(device));
+        }
 
+        if (dir==null) {
+          throw new IOException(
+              "not able to find the highest writable parent dir");
+        }
+        
         // move the file there
         File badDir = new File(dir, "bad_files");
         if (!badDir.mkdirs()) {
@@ -95,5 +100,6 @@
       } catch (IOException e) {
         LOG.warn("Error moving bad file " + p + ": " + e);
       }
+      return false;
     }
 }

Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileCorruption.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileCorruption.java?view=diff&rev=523752&r1=523751&r2=523752
==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileCorruption.java (original)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileCorruption.java Thu Mar 29 08:43:07 2007
@@ -22,11 +22,12 @@
 import junit.framework.*;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.ChecksumException;
+import org.apache.hadoop.fs.Path;
 
 /**
  * A JUnit test for corrupted file handling.
- *
- * @author Milind Bhandarkar
  */
 public class TestFileCorruption extends TestCase {
   
@@ -34,8 +35,6 @@
     super(testName);
   }
 
-  
-  
   protected void setUp() throws Exception {
   }
 
@@ -70,5 +69,28 @@
     } finally {
       if (cluster != null) { cluster.shutdown(); }
     }
+  }
+
+  /** check if local FS can handle corrupted blocks properly */
+  public void testLocalFileCorruption() throws Exception {
+    Configuration conf = new Configuration();
+    Path file = new Path(System.getProperty("test.build.data"), "corruptFile");
+    FileSystem fs = FileSystem.getLocal(conf);
+    DataOutputStream dos = fs.create(file);
+    dos.writeBytes("original bytes");
+    dos.close();
+    // Now deliberately corrupt the file
+    dos = new DataOutputStream(new FileOutputStream(file.toString()));
+    dos.writeBytes("corruption");
+    dos.close();
+    // Now attempt to read the file
+    DataInputStream dis = fs.open(file,512);
+    try {
+      System.out.println("A ChecksumException is expected to be logged.");
+      dis.readByte();
+    } catch (ChecksumException ignore) {
+      //expect this exception but let any NPE get thrown
+    }
+    fs.delete(file);
   }
 }