You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/04/15 15:28:34 UTC

svn commit: r1587577 - in /lucene/dev/branches/lucene_solr_4_8: ./ lucene/ lucene/core/ lucene/core/src/java/org/apache/lucene/codecs/compressing/ lucene/core/src/java/org/apache/lucene/store/

Author: rmuir
Date: Tue Apr 15 13:28:34 2014
New Revision: 1587577

URL: http://svn.apache.org/r1587577
Log:
LUCENE-5602: always verify term vectors on bulk merge

Modified:
    lucene/dev/branches/lucene_solr_4_8/   (props changed)
    lucene/dev/branches/lucene_solr_4_8/lucene/   (props changed)
    lucene/dev/branches/lucene_solr_4_8/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/lucene_solr_4_8/lucene/core/   (props changed)
    lucene/dev/branches/lucene_solr_4_8/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
    lucene/dev/branches/lucene_solr_4_8/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java
    lucene/dev/branches/lucene_solr_4_8/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java

Modified: lucene/dev/branches/lucene_solr_4_8/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_8/lucene/CHANGES.txt?rev=1587577&r1=1587576&r2=1587577&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_8/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/lucene_solr_4_8/lucene/CHANGES.txt Tue Apr 15 13:28:34 2014
@@ -95,6 +95,9 @@ New Features
 * LUCENE-5580: Checksums are automatically verified on the default stored
   fields format when performing a bulk merge. (Adrien Grand)
 
+* LUCENE-5602: Checksums are automatically verified on the default term
+  vectors format when performing a bulk merge. (Adrien Grand, Robert Muir)
+
 * LUCENE-5583: Added DataInput.skipBytes. ChecksumIndexInput can now seek, but
   only forward. (Adrien Grand, Mike McCandless, Simon Willnauer, Uwe Schindler)
 

Modified: lucene/dev/branches/lucene_solr_4_8/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_8/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java?rev=1587577&r1=1587576&r2=1587577&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_8/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java (original)
+++ lucene/dev/branches/lucene_solr_4_8/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java Tue Apr 15 13:28:34 2014
@@ -158,6 +158,10 @@ public final class CompressingTermVector
   int getPackedIntsVersion() {
     return packedIntsVersion;
   }
+  
+  int getVersion() {
+    return version;
+  }
 
   CompressingStoredFieldsIndexReader getIndex() {
     return indexReader;

Modified: lucene/dev/branches/lucene_solr_4_8/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_8/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java?rev=1587577&r1=1587576&r2=1587577&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_8/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java (original)
+++ lucene/dev/branches/lucene_solr_4_8/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java Tue Apr 15 13:28:34 2014
@@ -37,6 +37,8 @@ import org.apache.lucene.index.IndexFile
 import org.apache.lucene.index.MergeState;
 import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.index.SegmentReader;
+import org.apache.lucene.store.BufferedChecksumIndexInput;
+import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
@@ -749,6 +751,7 @@ public final class CompressingTermVector
       final Bits liveDocs = reader.getLiveDocs();
 
       if (matchingVectorsReader == null
+          || matchingVectorsReader.getVersion() != VERSION_CURRENT
           || matchingVectorsReader.getCompressionMode() != compressionMode
           || matchingVectorsReader.getChunkSize() != chunkSize
           || matchingVectorsReader.getPackedIntsVersion() != PackedInts.VERSION_CURRENT) {
@@ -761,12 +764,19 @@ public final class CompressingTermVector
         }
       } else {
         final CompressingStoredFieldsIndexReader index = matchingVectorsReader.getIndex();
-        final IndexInput vectorsStream = matchingVectorsReader.getVectorsStream();
+        final IndexInput vectorsStreamOrig = matchingVectorsReader.getVectorsStream();
+        vectorsStreamOrig.seek(0);
+        final ChecksumIndexInput vectorsStream = new BufferedChecksumIndexInput(vectorsStreamOrig.clone());
+        
         for (int i = nextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; ) {
-          if (pendingDocs.isEmpty()
-              && (i == 0 || index.getStartPointer(i - 1) < index.getStartPointer(i))) { // start of a chunk
-            final long startPointer = index.getStartPointer(i);
+          // We make sure to move the checksum input in any case, otherwise the final
+          // integrity check might need to read the whole file a second time
+          final long startPointer = index.getStartPointer(i);
+          if (startPointer > vectorsStream.getFilePointer()) {
             vectorsStream.seek(startPointer);
+          }
+          if (pendingDocs.isEmpty()
+              && (i == 0 || index.getStartPointer(i - 1) < startPointer)) { // start of a chunk
             final int docBase = vectorsStream.readVInt();
             final int chunkDocs = vectorsStream.readVInt();
             assert docBase + chunkDocs <= matchingSegmentReader.maxDoc();
@@ -798,6 +808,9 @@ public final class CompressingTermVector
             i = nextLiveDoc(i + 1, liveDocs, maxDoc);
           }
         }
+        
+        vectorsStream.seek(vectorsStream.length() - CodecUtil.footerLength());
+        CodecUtil.checkFooter(vectorsStream);
       }
     }
     finish(mergeState.fieldInfos, docCount);

Modified: lucene/dev/branches/lucene_solr_4_8/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_8/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java?rev=1587577&r1=1587576&r2=1587577&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_8/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java (original)
+++ lucene/dev/branches/lucene_solr_4_8/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java Tue Apr 15 13:28:34 2014
@@ -46,7 +46,7 @@ public abstract class ChecksumIndexInput
   public void seek(long pos) throws IOException {
     final long skip = pos - getFilePointer();
     if (skip < 0) {
-      throw new IllegalStateException(ChecksumIndexInput.class + " cannot seed backward");
+      throw new IllegalStateException(getClass() + " cannot seek backwards");
     }
     skipBytes(skip);
   }