You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2014/04/09 10:52:29 UTC

svn commit: r1585910 - in /lucene/dev/trunk/lucene: ./ core/src/java/org/apache/lucene/codecs/compressing/ core/src/java/org/apache/lucene/store/ test-framework/src/java/org/apache/lucene/index/

Author: jpountz
Date: Wed Apr  9 08:52:28 2014
New Revision: 1585910

URL: http://svn.apache.org/r1585910
Log:
LUCENE-5580: Always verify stored fields checksums on bulk merge.

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java
    lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseStoredFieldsFormatTestCase.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1585910&r1=1585909&r2=1585910&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Wed Apr  9 08:52:28 2014
@@ -160,6 +160,9 @@ New Features
   before merges by enabling IndexWriterConfig.setCheckIntegrityAtMerge.
   (Robert Muir)
 
+* LUCENE-5580: Checksums are automatically verified on the default stored
+  fields format when performing a bulk merge. (Adrien Grand)
+
 API Changes
 
 * LUCENE-5454: Add RandomAccessOrds, an optional extension of SortedSetDocValues

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java?rev=1585910&r1=1585909&r2=1585910&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java Wed Apr  9 08:52:28 2014
@@ -48,6 +48,7 @@ import org.apache.lucene.index.IndexFile
 import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.index.StoredFieldVisitor;
 import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.store.BufferedChecksumIndexInput;
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.DataInput;
@@ -393,25 +394,47 @@ public final class CompressingStoredFiel
 
   ChunkIterator chunkIterator(int startDocID) throws IOException {
     ensureOpen();
-    fieldsStream.seek(indexReader.getStartPointer(startDocID));
-    return new ChunkIterator();
+    return new ChunkIterator(startDocID);
   }
 
   final class ChunkIterator {
 
-    BytesRef spare;
-    BytesRef bytes;
+    final ChecksumIndexInput fieldsStream;
+    final BytesRef spare;
+    final BytesRef bytes;
     int docBase;
     int chunkDocs;
     int[] numStoredFields;
     int[] lengths;
 
-    private ChunkIterator() {
+    private ChunkIterator(int startDocId) throws IOException {
       this.docBase = -1;
       bytes = new BytesRef();
       spare = new BytesRef();
       numStoredFields = new int[1];
       lengths = new int[1];
+
+      IndexInput in = CompressingStoredFieldsReader.this.fieldsStream;
+      in.seek(0);
+      fieldsStream = new BufferedChecksumIndexInput(in) {
+
+        final byte[] skipBuffer = new byte[256];
+
+        @Override
+        public void seek(long target) throws IOException {
+          final long skip = target - getFilePointer();
+          if (skip < 0) {
+            throw new IllegalStateException("Seeking backward on merge: " + skip);
+          }
+          for (long skipped = 0; skipped < skip; ) {
+            final int step = (int) Math.min(skipBuffer.length, skip - skipped);
+            readBytes(skipBuffer, 0, step);
+            skipped += step;
+          }
+        }
+
+      };
+      fieldsStream.seek(indexReader.getStartPointer(startDocId));
     }
 
     /**
@@ -514,6 +537,16 @@ public final class CompressingStoredFiel
       out.copyBytes(fieldsStream, chunkEnd - fieldsStream.getFilePointer());
     }
 
+    /**
+     * Check integrity of the data. The iterator is not usable after this method has been called.
+     */
+    void checkIntegrity() throws IOException {
+      if (version >= VERSION_CHECKSUM) {
+        fieldsStream.seek(fieldsStream.length() - CodecUtil.footerLength());
+        CodecUtil.checkFooter(fieldsStream);
+      }
+    }
+
   }
 
   @Override

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java?rev=1585910&r1=1585909&r2=1585910&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java Wed Apr  9 08:52:28 2014
@@ -399,6 +399,8 @@ public final class CompressingStoredFiel
               }
             }
           } while (docID < maxDoc);
+
+          it.checkIntegrity();
         }
       }
     }

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java?rev=1585910&r1=1585909&r2=1585910&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java Wed Apr  9 08:52:28 2014
@@ -36,7 +36,7 @@ public abstract class ChecksumIndexInput
   public abstract long getChecksum() throws IOException;
 
   @Override
-  public void seek(long pos) {
+  public void seek(long pos) throws IOException {
     throw new UnsupportedOperationException();
   }
 }

Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseStoredFieldsFormatTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseStoredFieldsFormatTestCase.java?rev=1585910&r1=1585909&r2=1585910&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseStoredFieldsFormatTestCase.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/BaseStoredFieldsFormatTestCase.java Wed Apr  9 08:52:28 2014
@@ -648,4 +648,29 @@ public abstract class BaseStoredFieldsFo
     dir.close();
   }
 
+  public void testBulkMergeWithDeletes() throws IOException {
+    final int numDocs = atLeast(200);
+    Directory dir = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.COMPOUND_FILES));
+    for (int i = 0; i < numDocs; ++i) {
+      Document doc = new Document();
+      doc.add(new StringField("id", Integer.toString(i), Store.YES));
+      doc.add(new StoredField("f", TestUtil.randomSimpleString(random())));
+      w.addDocument(doc);
+    }
+    final int deleteCount = TestUtil.nextInt(random(), 5, numDocs);
+    for (int i = 0; i < deleteCount; ++i) {
+      final int id = random().nextInt(numDocs);
+      w.deleteDocuments(new Term("id", Integer.toString(id)));
+    }
+    w.commit();
+    w.close();
+    w = new RandomIndexWriter(random(), dir);
+    w.forceMerge(TestUtil.nextInt(random(), 1, 3));
+    w.commit();
+    w.close();
+    TestUtil.checkIndex(dir);
+    dir.close();
+  }
+
 }