You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/07/23 16:56:38 UTC
svn commit: r1612845 - in /lucene/dev/trunk/lucene: ./
codecs/src/java/org/apache/lucene/codecs/blockterms/
codecs/src/java/org/apache/lucene/codecs/blocktreeords/
codecs/src/java/org/apache/lucene/codecs/memory/
core/src/java/org/apache/lucene/codecs/...
Author: rmuir
Date: Wed Jul 23 14:56:37 2014
New Revision: 1612845
URL: http://svn.apache.org/r1612845
Log:
LUCENE-5842: Validate checksum footers for postings lists/docvalues/storedfields/vectors on init
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java
lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsReader.java
lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java
lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesProducer.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsProducer.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java
lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1612845&r1=1612844&r2=1612845&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Wed Jul 23 14:56:37 2014
@@ -128,6 +128,11 @@ New Features
* LUCENE-5825: Benchmark module can use custom postings format, e.g.:
codec.postingsFormat=Memory (Varun Shenoy, David Smiley)
+
+* LUCENE-5842: When opening large files (where its to expensive to compare
+ checksum against all the bytes), retrieve checksum to validate structure
+ of footer, this can detect some forms of corruption such as truncation.
+ (Robert Muir)
API Changes
Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java?rev=1612845&r1=1612844&r2=1612845&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java (original)
+++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java Wed Jul 23 14:56:37 2014
@@ -127,6 +127,14 @@ public class BlockTermsReader extends Fi
// Have PostingsReader init itself
postingsReader.init(in);
+
+ if (version >= BlockTermsWriter.VERSION_CHECKSUM) {
+ // NOTE: data file is too costly to verify checksum against all the bytes on open,
+ // but for now we at least verify proper structure of the checksum footer: which looks
+ // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+ // such as file truncation.
+ CodecUtil.retrieveChecksum(in);
+ }
// Read per-field details
seekDir(in, dirOffset);
Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsReader.java?rev=1612845&r1=1612844&r2=1612845&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsReader.java (original)
+++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsReader.java Wed Jul 23 14:56:37 2014
@@ -103,6 +103,13 @@ public final class OrdsBlockTreeTermsRea
// Have PostingsReader init itself
postingsReader.init(in);
+
+
+ // NOTE: data file is too costly to verify checksum against all the bytes on open,
+ // but for now we at least verify proper structure of the checksum footer: which looks
+ // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+ // such as file truncation.
+ CodecUtil.retrieveChecksum(in);
// Read per-field details
seekDir(in, dirOffset);
Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java?rev=1612845&r1=1612844&r2=1612845&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java (original)
+++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java Wed Jul 23 14:56:37 2014
@@ -117,6 +117,12 @@ class DirectDocValuesProducer extends Do
if (version != version2) {
throw new CorruptIndexException("Format versions mismatch");
}
+
+ // NOTE: data file is too costly to verify checksum against all the bytes on open,
+ // but for now we at least verify proper structure of the checksum footer: which looks
+ // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+ // such as file truncation.
+ CodecUtil.retrieveChecksum(data);
success = true;
} finally {
Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java?rev=1612845&r1=1612844&r2=1612845&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java (original)
+++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java Wed Jul 23 14:56:37 2014
@@ -136,6 +136,12 @@ class MemoryDocValuesProducer extends Do
if (version != version2) {
throw new CorruptIndexException("Format versions mismatch");
}
+
+ // NOTE: data file is too costly to verify checksum against all the bytes on open,
+ // but for now we at least verify proper structure of the checksum footer: which looks
+ // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+ // such as file truncation.
+ CodecUtil.retrieveChecksum(data);
success = true;
} finally {
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java?rev=1612845&r1=1612844&r2=1612845&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java Wed Jul 23 14:56:37 2014
@@ -122,6 +122,15 @@ public final class BlockTreeTermsReader
// Have PostingsReader init itself
postingsReader.init(in);
+
+
+ // NOTE: data file is too costly to verify checksum against all the bytes on open,
+ // but for now we at least verify proper structure of the checksum footer: which looks
+ // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+ // such as file truncation.
+ if (version >= BlockTreeTermsWriter.VERSION_CHECKSUM) {
+ CodecUtil.retrieveChecksum(in);
+ }
// Read per-field details
seekDir(in, dirOffset);
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java?rev=1612845&r1=1612844&r2=1612845&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java Wed Jul 23 14:56:37 2014
@@ -52,7 +52,6 @@ import org.apache.lucene.store.BufferedC
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataInput;
-import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
@@ -154,6 +153,14 @@ public final class CompressingStoredFiel
packedIntsVersion = fieldsStream.readVInt();
decompressor = compressionMode.newDecompressor();
this.bytes = new BytesRef();
+
+ if (version >= VERSION_CHECKSUM) {
+ // NOTE: data file is too costly to verify checksum against all the bytes on open,
+ // but for now we at least verify proper structure of the checksum footer: which looks
+ // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+ // such as file truncation.
+ CodecUtil.retrieveChecksum(fieldsStream);
+ }
success = true;
} finally {
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java?rev=1612845&r1=1612844&r2=1612845&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java Wed Jul 23 14:56:37 2014
@@ -131,6 +131,16 @@ public final class CompressingTermVector
throw new CorruptIndexException("Version mismatch between stored fields index and data: " + version + " != " + version2);
}
assert CodecUtil.headerLength(codecNameDat) == vectorsStream.getFilePointer();
+
+ long pos = vectorsStream.getFilePointer();
+ if (version >= VERSION_CHECKSUM) {
+ // NOTE: data file is too costly to verify checksum against all the bytes on open,
+ // but for now we at least verify proper structure of the checksum footer: which looks
+ // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+ // such as file truncation.
+ CodecUtil.retrieveChecksum(vectorsStream);
+ vectorsStream.seek(pos);
+ }
packedIntsVersion = vectorsStream.readVInt();
chunkSize = vectorsStream.readVInt();
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java?rev=1612845&r1=1612844&r2=1612845&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java Wed Jul 23 14:56:37 2014
@@ -79,16 +79,40 @@ public final class Lucene41PostingsReade
Lucene41PostingsWriter.VERSION_START,
Lucene41PostingsWriter.VERSION_CURRENT);
forUtil = new ForUtil(docIn);
+
+ if (version >= Lucene41PostingsWriter.VERSION_CHECKSUM) {
+ // NOTE: data file is too costly to verify checksum against all the bytes on open,
+ // but for now we at least verify proper structure of the checksum footer: which looks
+ // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+ // such as file truncation.
+ CodecUtil.retrieveChecksum(docIn);
+ }
if (fieldInfos.hasProx()) {
posIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene41PostingsFormat.POS_EXTENSION),
ioContext);
CodecUtil.checkHeader(posIn, Lucene41PostingsWriter.POS_CODEC, version, version);
+
+ if (version >= Lucene41PostingsWriter.VERSION_CHECKSUM) {
+ // NOTE: data file is too costly to verify checksum against all the bytes on open,
+ // but for now we at least verify proper structure of the checksum footer: which looks
+ // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+ // such as file truncation.
+ CodecUtil.retrieveChecksum(posIn);
+ }
if (fieldInfos.hasPayloads() || fieldInfos.hasOffsets()) {
payIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene41PostingsFormat.PAY_EXTENSION),
ioContext);
CodecUtil.checkHeader(payIn, Lucene41PostingsWriter.PAY_CODEC, version, version);
+
+ if (version >= Lucene41PostingsWriter.VERSION_CHECKSUM) {
+ // NOTE: data file is too costly to verify checksum against all the bytes on open,
+ // but for now we at least verify proper structure of the checksum footer: which looks
+ // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+ // such as file truncation.
+ CodecUtil.retrieveChecksum(payIn);
+ }
}
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java?rev=1612845&r1=1612844&r2=1612845&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java Wed Jul 23 14:56:37 2014
@@ -138,6 +138,14 @@ class Lucene42DocValuesProducer extends
if (version != version2) {
throw new CorruptIndexException("Format versions mismatch");
}
+
+ if (version >= VERSION_CHECKSUM) {
+ // NOTE: data file is too costly to verify checksum against all the bytes on open,
+ // but for now we at least verify proper structure of the checksum footer: which looks
+ // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+ // such as file truncation.
+ CodecUtil.retrieveChecksum(data);
+ }
success = true;
} finally {
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java?rev=1612845&r1=1612844&r2=1612845&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java Wed Jul 23 14:56:37 2014
@@ -138,6 +138,14 @@ class Lucene45DocValuesProducer extends
if (version != version2) {
throw new CorruptIndexException("Format versions mismatch");
}
+
+ if (version >= Lucene45DocValuesFormat.VERSION_CHECKSUM) {
+ // NOTE: data file is too costly to verify checksum against all the bytes on open,
+ // but for now we at least verify proper structure of the checksum footer: which looks
+ // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+ // such as file truncation.
+ CodecUtil.retrieveChecksum(data);
+ }
success = true;
} finally {
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesProducer.java?rev=1612845&r1=1612844&r2=1612845&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesProducer.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesProducer.java Wed Jul 23 14:56:37 2014
@@ -118,6 +118,12 @@ class Lucene49DocValuesProducer extends
if (version != version2) {
throw new CorruptIndexException("Format versions mismatch");
}
+
+ // NOTE: data file is too costly to verify checksum against all the bytes on open,
+ // but for now we at least verify proper structure of the checksum footer: which looks
+ // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+ // such as file truncation.
+ CodecUtil.retrieveChecksum(data);
success = true;
} finally {
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsProducer.java?rev=1612845&r1=1612844&r2=1612845&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsProducer.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsProducer.java Wed Jul 23 14:56:37 2014
@@ -92,6 +92,12 @@ class Lucene49NormsProducer extends DocV
if (version != version2) {
throw new CorruptIndexException("Format versions mismatch");
}
+
+ // NOTE: data file is too costly to verify checksum against all the bytes on open,
+ // but for now we at least verify proper structure of the checksum footer: which looks
+ // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+ // such as file truncation.
+ CodecUtil.retrieveChecksum(data);
success = true;
} finally {
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java?rev=1612845&r1=1612844&r2=1612845&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java Wed Jul 23 14:56:37 2014
@@ -104,6 +104,14 @@ public final class CompoundFileDirectory
handle = directory.openInput(fileName, context);
try {
this.entries = readEntries(directory, fileName);
+ if (version >= CompoundFileWriter.VERSION_CHECKSUM) {
+ CodecUtil.checkHeader(handle, CompoundFileWriter.DATA_CODEC, version, version);
+ // NOTE: data file is too costly to verify checksum against all the bytes on open,
+ // but for now we at least verify proper structure of the checksum footer: which looks
+ // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+ // such as file truncation.
+ CodecUtil.retrieveChecksum(handle);
+ }
success = true;
} finally {
if (!success) {
Modified: lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java?rev=1612845&r1=1612844&r2=1612845&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java (original)
+++ lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java Wed Jul 23 14:56:37 2014
@@ -96,6 +96,12 @@ public final class VersionBlockTreeTerms
// Have PostingsReader init itself
postingsReader.init(in);
+
+ // NOTE: data file is too costly to verify checksum against all the bytes on open,
+ // but for now we at least verify proper structure of the checksum footer: which looks
+ // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+ // such as file truncation.
+ CodecUtil.retrieveChecksum(in);
// Read per-field details
seekDir(in, dirOffset);