You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by to...@apache.org on 2007/03/29 17:43:08 UTC
svn commit: r523752 - in /lucene/hadoop/trunk: ./
src/java/org/apache/hadoop/dfs/ src/java/org/apache/hadoop/fs/
src/test/org/apache/hadoop/dfs/
Author: tomwhite
Date: Thu Mar 29 08:43:07 2007
New Revision: 523752
URL: http://svn.apache.org/viewvc?view=rev&rev=523752
Log:
HADOOP-1123. Fix NullPointerException in LocalFileSystem when trying to recover from a checksum error. Contributed by Hairong Kuang & Nigel Daley.
Modified:
lucene/hadoop/trunk/CHANGES.txt
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/ChecksumException.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/ChecksumFileSystem.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/InMemoryFileSystem.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/LocalFileSystem.java
lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileCorruption.java
Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=523752&r1=523751&r2=523752
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Thu Mar 29 08:43:07 2007
@@ -54,6 +54,10 @@
15. HADOOP-1162. Fix bug in record CSV and XML serialization of
binary values. (Milind Bhandarkar via cutting)
+16. HADOOP-1123. Fix NullPointerException in LocalFileSystem when
+ trying to recover from a checksum error.
+ (Hairong Kuang & Nigel Daley via tomwhite)
+
Release 0.12.2 - 2007-23-17
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java?view=diff&rev=523752&r1=523751&r2=523752
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java Thu Mar 29 08:43:07 2007
@@ -308,7 +308,7 @@
* is corrupt but we will report both to the namenode. In the future,
* we can consider figuring out exactly which block is corrupt.
*/
- public void reportChecksumFailure(Path f,
+ public boolean reportChecksumFailure(Path f,
FSDataInputStream in, long inPos,
FSDataInputStream sums, long sumsPos) {
@@ -347,6 +347,7 @@
+ StringUtils.stringifyException(ie));
}
+ return true;
}
}
@@ -399,10 +400,10 @@
* is corrupt but we will report both to the namenode. In the future,
* we can consider figuring out exactly which block is corrupt.
*/
- public void reportChecksumFailure(Path f,
+ public boolean reportChecksumFailure(Path f,
FSDataInputStream in, long inPos,
FSDataInputStream sums, long sumsPos) {
- ((RawDistributedFileSystem)fs).reportChecksumFailure(
+ return ((RawDistributedFileSystem)fs).reportChecksumFailure(
f, in, inPos, sums, sumsPos);
}
}
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/ChecksumException.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/ChecksumException.java?view=diff&rev=523752&r1=523751&r2=523752
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/ChecksumException.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/ChecksumException.java Thu Mar 29 08:43:07 2007
@@ -22,7 +22,13 @@
/** Thrown for checksum errors. */
public class ChecksumException extends IOException {
- public ChecksumException(String description) {
+ private long pos;
+ public ChecksumException(String description, long pos) {
super(description);
+ this.pos = pos;
+ }
+
+ public long getPos() {
+ return pos;
}
}
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/ChecksumFileSystem.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/ChecksumFileSystem.java?view=diff&rev=523752&r1=523751&r2=523752
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/ChecksumFileSystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/ChecksumFileSystem.java Thu Mar 29 08:43:07 2007
@@ -207,13 +207,18 @@
summed += toSum;
inSum += toSum;
- if (inSum == bytesPerSum || endOfFile) {
+ if (inSum == bytesPerSum ) {
verifySum(read-(summed-bytesPerSum));
+ } else if( read == summed && endOfFile ) {
+ verifySum(read-read/bytesPerSum*bytesPerSum);
}
}
} catch (ChecksumException ce) {
LOG.info("Found checksum error: "+StringUtils.stringifyException(ce));
- if (retriesLeft == 0) {
+ long errPos = ce.getPos();
+ boolean shouldRetry = fs.reportChecksumFailure(
+ file, datas, errPos, sums, errPos/bytesPerSum);
+ if (!shouldRetry || retriesLeft == 0) {
throw ce;
}
@@ -250,8 +255,7 @@
inSum = 0;
if (crc != sumValue) {
long pos = getPos() - delta;
- fs.reportChecksumFailure(file, datas, pos, sums, pos/bytesPerSum);
- throw new ChecksumException("Checksum error: "+file+" at "+pos);
+ throw new ChecksumException("Checksum error: "+file+" at "+pos, pos);
}
}
@@ -629,7 +633,10 @@
* @param inPos the position of the beginning of the bad data in the file
* @param sums the stream open on the checksum file
* @param sumsPos the position of the beginning of the bad data in the checksum file
+ * @return if retry is neccessary
*/
- public abstract void reportChecksumFailure(Path f, FSDataInputStream in,
- long inPos, FSDataInputStream sums, long sumsPos);
+ public boolean reportChecksumFailure(Path f, FSDataInputStream in,
+ long inPos, FSDataInputStream sums, long sumsPos) {
+ return false;
+ }
}
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/InMemoryFileSystem.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/InMemoryFileSystem.java?view=diff&rev=523752&r1=523751&r2=523752
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/InMemoryFileSystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/InMemoryFileSystem.java Thu Mar 29 08:43:07 2007
@@ -447,11 +447,6 @@
throws IOException {
}
- public void reportChecksumFailure(Path p, FSDataInputStream in,
- long inPos,
- FSDataInputStream sums, long sumsPos) {
- }
-
/**
* Register a file with its size. This will also register a checksum for the
* file that the user is trying to create. This is required since none of
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/LocalFileSystem.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/LocalFileSystem.java?view=diff&rev=523752&r1=523751&r2=523752
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/LocalFileSystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/LocalFileSystem.java Thu Mar 29 08:43:07 2007
@@ -59,7 +59,7 @@
* Moves files to a bad file directory on the same device, so that their
* storage will not be reused.
*/
- public void reportChecksumFailure(Path p, FSDataInputStream in,
+ public boolean reportChecksumFailure(Path p, FSDataInputStream in,
long inPos,
FSDataInputStream sums, long sumsPos) {
try {
@@ -69,12 +69,17 @@
// find highest writable parent dir of f on the same device
String device = new DF(f, getConf()).getMount();
File parent = f.getParentFile();
- File dir;
- do {
+ File dir = null;
+ while (parent!=null && parent.canWrite() && parent.toString().startsWith(device)) {
dir = parent;
parent = parent.getParentFile();
- } while (parent.canWrite() && parent.toString().startsWith(device));
+ }
+ if (dir==null) {
+ throw new IOException(
+ "not able to find the highest writable parent dir");
+ }
+
// move the file there
File badDir = new File(dir, "bad_files");
if (!badDir.mkdirs()) {
@@ -95,5 +100,6 @@
} catch (IOException e) {
LOG.warn("Error moving bad file " + p + ": " + e);
}
+ return false;
}
}
Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileCorruption.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileCorruption.java?view=diff&rev=523752&r1=523751&r2=523752
==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileCorruption.java (original)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileCorruption.java Thu Mar 29 08:43:07 2007
@@ -22,11 +22,12 @@
import junit.framework.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.ChecksumException;
+import org.apache.hadoop.fs.Path;
/**
* A JUnit test for corrupted file handling.
- *
- * @author Milind Bhandarkar
*/
public class TestFileCorruption extends TestCase {
@@ -34,8 +35,6 @@
super(testName);
}
-
-
protected void setUp() throws Exception {
}
@@ -70,5 +69,28 @@
} finally {
if (cluster != null) { cluster.shutdown(); }
}
+ }
+
+ /** check if local FS can handle corrupted blocks properly */
+ public void testLocalFileCorruption() throws Exception {
+ Configuration conf = new Configuration();
+ Path file = new Path(System.getProperty("test.build.data"), "corruptFile");
+ FileSystem fs = FileSystem.getLocal(conf);
+ DataOutputStream dos = fs.create(file);
+ dos.writeBytes("original bytes");
+ dos.close();
+ // Now deliberately corrupt the file
+ dos = new DataOutputStream(new FileOutputStream(file.toString()));
+ dos.writeBytes("corruption");
+ dos.close();
+ // Now attempt to read the file
+ DataInputStream dis = fs.open(file,512);
+ try {
+ System.out.println("A ChecksumException is expected to be logged.");
+ dis.readByte();
+ } catch (ChecksumException ignore) {
+ //expect this exception but let any NPE get thrown
+ }
+ fs.delete(file);
}
}