You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/07/23 17:30:42 UTC

svn commit: r1612852 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/codecs/ lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/ lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/ lucene/codecs/src/java/org/apache/lucene/code...

Author: rmuir
Date: Wed Jul 23 15:30:41 2014
New Revision: 1612852

URL: http://svn.apache.org/r1612852
Log:
LUCENE-5842: Validate checksum footers for postings lists/docvalues/storedfields/vectors on init

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/lucene/codecs/   (props changed)
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsReader.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java
    lucene/dev/branches/branch_4x/lucene/core/   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesProducer.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsProducer.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java
    lucene/dev/branches/branch_4x/lucene/sandbox/   (props changed)
    lucene/dev/branches/branch_4x/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1612852&r1=1612851&r2=1612852&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Wed Jul 23 15:30:41 2014
@@ -41,6 +41,11 @@ New Features
 
 * LUCENE-5825: Benchmark module can use custom postings format, e.g.:
  codec.postingsFormat=Memory (Varun Shenoy, David Smiley)
+
+* LUCENE-5842: When opening large files (where its to expensive to compare
+  checksum against all the bytes), retrieve checksum to validate structure
+  of footer, this can detect some forms of corruption such as truncation.
+  (Robert Muir)
   
 API Changes
 

Modified: lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java?rev=1612852&r1=1612851&r2=1612852&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java Wed Jul 23 15:30:41 2014
@@ -130,6 +130,14 @@ public class BlockTermsReader extends Fi
 
       // Have PostingsReader init itself
       postingsReader.init(in);
+      
+      if (version >= BlockTermsWriter.VERSION_CHECKSUM) {      
+        // NOTE: data file is too costly to verify checksum against all the bytes on open,
+        // but for now we at least verify proper structure of the checksum footer: which looks
+        // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+        // such as file truncation.
+        CodecUtil.retrieveChecksum(in);
+      }
 
       // Read per-field details
       seekDir(in, dirOffset);

Modified: lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsReader.java?rev=1612852&r1=1612851&r2=1612852&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsReader.java Wed Jul 23 15:30:41 2014
@@ -103,6 +103,13 @@ public final class OrdsBlockTreeTermsRea
 
       // Have PostingsReader init itself
       postingsReader.init(in);
+      
+      
+      // NOTE: data file is too costly to verify checksum against all the bytes on open,
+      // but for now we at least verify proper structure of the checksum footer: which looks
+      // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+      // such as file truncation.
+      CodecUtil.retrieveChecksum(in);
 
       // Read per-field details
       seekDir(in, dirOffset);

Modified: lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java?rev=1612852&r1=1612851&r2=1612852&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java Wed Jul 23 15:30:41 2014
@@ -117,6 +117,12 @@ class DirectDocValuesProducer extends Do
       if (version != version2) {
         throw new CorruptIndexException("Format versions mismatch");
       }
+      
+      // NOTE: data file is too costly to verify checksum against all the bytes on open,
+      // but for now we at least verify proper structure of the checksum footer: which looks
+      // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+      // such as file truncation.
+      CodecUtil.retrieveChecksum(data);
 
       success = true;
     } finally {

Modified: lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java?rev=1612852&r1=1612851&r2=1612852&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java Wed Jul 23 15:30:41 2014
@@ -137,6 +137,12 @@ class MemoryDocValuesProducer extends Do
       if (version != version2) {
         throw new CorruptIndexException("Format versions mismatch");
       }
+      
+      // NOTE: data file is too costly to verify checksum against all the bytes on open,
+      // but for now we at least verify proper structure of the checksum footer: which looks
+      // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+      // such as file truncation.
+      CodecUtil.retrieveChecksum(data);
 
       success = true;
     } finally {

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java?rev=1612852&r1=1612851&r2=1612852&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java Wed Jul 23 15:30:41 2014
@@ -127,6 +127,15 @@ public class BlockTreeTermsReader extend
 
       // Have PostingsReader init itself
       postingsReader.init(in);
+      
+      
+      // NOTE: data file is too costly to verify checksum against all the bytes on open,
+      // but for now we at least verify proper structure of the checksum footer: which looks
+      // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+      // such as file truncation.
+      if (version >= BlockTreeTermsWriter.VERSION_CHECKSUM) {
+        CodecUtil.retrieveChecksum(in);
+      }
 
       // Read per-field details
       seekDir(in, dirOffset);

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java?rev=1612852&r1=1612851&r2=1612852&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java Wed Jul 23 15:30:41 2014
@@ -52,7 +52,6 @@ import org.apache.lucene.store.BufferedC
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.DataInput;
-import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
@@ -154,6 +153,14 @@ public final class CompressingStoredFiel
       packedIntsVersion = fieldsStream.readVInt();
       decompressor = compressionMode.newDecompressor();
       this.bytes = new BytesRef();
+      
+      if (version >= VERSION_CHECKSUM) {
+        // NOTE: data file is too costly to verify checksum against all the bytes on open,
+        // but for now we at least verify proper structure of the checksum footer: which looks
+        // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+        // such as file truncation.
+        CodecUtil.retrieveChecksum(fieldsStream);
+      }
 
       success = true;
     } finally {

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java?rev=1612852&r1=1612851&r2=1612852&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java Wed Jul 23 15:30:41 2014
@@ -133,6 +133,16 @@ public final class CompressingTermVector
         throw new CorruptIndexException("Version mismatch between stored fields index and data: " + version + " != " + version2);
       }
       assert CodecUtil.headerLength(codecNameDat) == vectorsStream.getFilePointer();
+      
+      long pos = vectorsStream.getFilePointer();
+      if (version >= VERSION_CHECKSUM) {
+        // NOTE: data file is too costly to verify checksum against all the bytes on open,
+        // but for now we at least verify proper structure of the checksum footer: which looks
+        // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+        // such as file truncation.
+        CodecUtil.retrieveChecksum(vectorsStream);
+        vectorsStream.seek(pos);
+      }
 
       packedIntsVersion = vectorsStream.readVInt();
       chunkSize = vectorsStream.readVInt();

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java?rev=1612852&r1=1612851&r2=1612852&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java Wed Jul 23 15:30:41 2014
@@ -79,16 +79,40 @@ public final class Lucene41PostingsReade
                             Lucene41PostingsWriter.VERSION_START,
                             Lucene41PostingsWriter.VERSION_CURRENT);
       forUtil = new ForUtil(docIn);
+      
+      if (version >= Lucene41PostingsWriter.VERSION_CHECKSUM) {
+        // NOTE: data file is too costly to verify checksum against all the bytes on open,
+        // but for now we at least verify proper structure of the checksum footer: which looks
+        // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+        // such as file truncation.
+        CodecUtil.retrieveChecksum(docIn);
+      }
 
       if (fieldInfos.hasProx()) {
         posIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene41PostingsFormat.POS_EXTENSION),
                               ioContext);
         CodecUtil.checkHeader(posIn, Lucene41PostingsWriter.POS_CODEC, version, version);
+        
+        if (version >= Lucene41PostingsWriter.VERSION_CHECKSUM) {
+          // NOTE: data file is too costly to verify checksum against all the bytes on open,
+          // but for now we at least verify proper structure of the checksum footer: which looks
+          // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+          // such as file truncation.
+          CodecUtil.retrieveChecksum(posIn);
+        }
 
         if (fieldInfos.hasPayloads() || fieldInfos.hasOffsets()) {
           payIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene41PostingsFormat.PAY_EXTENSION),
                                 ioContext);
           CodecUtil.checkHeader(payIn, Lucene41PostingsWriter.PAY_CODEC, version, version);
+          
+          if (version >= Lucene41PostingsWriter.VERSION_CHECKSUM) {
+            // NOTE: data file is too costly to verify checksum against all the bytes on open,
+            // but for now we at least verify proper structure of the checksum footer: which looks
+            // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+            // such as file truncation.
+            CodecUtil.retrieveChecksum(payIn);
+          }
         }
       }
 

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java?rev=1612852&r1=1612851&r2=1612852&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java Wed Jul 23 15:30:41 2014
@@ -139,6 +139,14 @@ class Lucene42DocValuesProducer extends 
       if (version != version2) {
         throw new CorruptIndexException("Format versions mismatch");
       }
+      
+      if (version >= VERSION_CHECKSUM) {
+        // NOTE: data file is too costly to verify checksum against all the bytes on open,
+        // but for now we at least verify proper structure of the checksum footer: which looks
+        // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+        // such as file truncation.
+        CodecUtil.retrieveChecksum(data);
+      }
 
       success = true;
     } finally {

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java?rev=1612852&r1=1612851&r2=1612852&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java Wed Jul 23 15:30:41 2014
@@ -139,6 +139,14 @@ class Lucene45DocValuesProducer extends 
       if (version != version2) {
         throw new CorruptIndexException("Format versions mismatch");
       }
+      
+      if (version >= Lucene45DocValuesFormat.VERSION_CHECKSUM) {
+        // NOTE: data file is too costly to verify checksum against all the bytes on open,
+        // but for now we at least verify proper structure of the checksum footer: which looks
+        // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+        // such as file truncation.
+        CodecUtil.retrieveChecksum(data);
+      }
 
       success = true;
     } finally {

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesProducer.java?rev=1612852&r1=1612851&r2=1612852&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesProducer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49DocValuesProducer.java Wed Jul 23 15:30:41 2014
@@ -119,6 +119,12 @@ class Lucene49DocValuesProducer extends 
       if (version != version2) {
         throw new CorruptIndexException("Format versions mismatch");
       }
+      
+      // NOTE: data file is too costly to verify checksum against all the bytes on open,
+      // but for now we at least verify proper structure of the checksum footer: which looks
+      // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+      // such as file truncation.
+      CodecUtil.retrieveChecksum(data);
 
       success = true;
     } finally {

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsProducer.java?rev=1612852&r1=1612851&r2=1612852&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsProducer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene49/Lucene49NormsProducer.java Wed Jul 23 15:30:41 2014
@@ -92,6 +92,12 @@ class Lucene49NormsProducer extends DocV
       if (version != version2) {
         throw new CorruptIndexException("Format versions mismatch");
       }
+      
+      // NOTE: data file is too costly to verify checksum against all the bytes on open,
+      // but for now we at least verify proper structure of the checksum footer: which looks
+      // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+      // such as file truncation.
+      CodecUtil.retrieveChecksum(data);
 
       success = true;
     } finally {

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java?rev=1612852&r1=1612851&r2=1612852&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java Wed Jul 23 15:30:41 2014
@@ -88,6 +88,7 @@ public final class CompoundFileDirectory
   private static final Map<String,FileEntry> SENTINEL = Collections.emptyMap();
   private final CompoundFileWriter writer;
   private final IndexInput handle;
+  private int version;
   
   /**
    * Create a new CompoundFileDirectory.
@@ -103,6 +104,14 @@ public final class CompoundFileDirectory
       handle = directory.openInput(fileName, context);
       try {
         this.entries = readEntries(handle, directory, fileName);
+        if (version >= CompoundFileWriter.VERSION_CHECKSUM) {
+          CodecUtil.checkHeader(handle, CompoundFileWriter.DATA_CODEC, version, version);
+          // NOTE: data file is too costly to verify checksum against all the bytes on open,
+          // but for now we at least verify proper structure of the checksum footer: which looks
+          // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+          // such as file truncation.
+          CodecUtil.retrieveChecksum(handle);
+        }
         success = true;
       } finally {
         if (!success) {
@@ -126,7 +135,7 @@ public final class CompoundFileDirectory
   private static final byte CODEC_MAGIC_BYTE4 = (byte) CodecUtil.CODEC_MAGIC;
 
   /** Helper method that reads CFS entries from an input stream */
-  private static final Map<String, FileEntry> readEntries(
+  private final Map<String, FileEntry> readEntries(
       IndexInput handle, Directory dir, String name) throws IOException {
     IndexInput stream = null; 
     ChecksumIndexInput entriesStream = null;
@@ -149,7 +158,7 @@ public final class CompoundFileDirectory
           throw new CorruptIndexException("Illegal/impossible header for CFS file: " 
                                          + secondByte + "," + thirdByte + "," + fourthByte);
         }
-        int version = CodecUtil.checkHeaderNoMagic(stream, CompoundFileWriter.DATA_CODEC, 
+        version = CodecUtil.checkHeaderNoMagic(stream, CompoundFileWriter.DATA_CODEC, 
             CompoundFileWriter.VERSION_START, CompoundFileWriter.VERSION_CURRENT);
         final String entriesFileName = IndexFileNames.segmentFileName(
                                               IndexFileNames.stripExtension(name), "",
@@ -176,6 +185,7 @@ public final class CompoundFileDirectory
       } else {
         // TODO remove once 3.x is not supported anymore
         mapping = readLegacyEntries(stream, firstInt);
+        version = -1; // version before versioning was added
       }
       success = true;
     } finally {

Modified: lucene/dev/branches/branch_4x/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java?rev=1612852&r1=1612851&r2=1612852&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java Wed Jul 23 15:30:41 2014
@@ -96,6 +96,12 @@ public final class VersionBlockTreeTerms
 
       // Have PostingsReader init itself
       postingsReader.init(in);
+      
+      // NOTE: data file is too costly to verify checksum against all the bytes on open,
+      // but for now we at least verify proper structure of the checksum footer: which looks
+      // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
+      // such as file truncation.
+      CodecUtil.retrieveChecksum(in);
 
       // Read per-field details
       seekDir(in, dirOffset);