You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2022/05/17 12:30:00 UTC

[lucene] branch main updated: LUCENE-9356: Change test to detect mismatched checksums instead of byte flips. (#876)

This is an automated email from the ASF dual-hosted git repository.

jpountz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new e65c0c777b6 LUCENE-9356: Change test to detect mismatched checksums instead of byte flips. (#876)
e65c0c777b6 is described below

commit e65c0c777b61a964483d1f9ed645d91973a1540e
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Tue May 17 14:29:51 2022 +0200

    LUCENE-9356: Change test to detect mismatched checksums instead of byte flips. (#876)
    
    This makes the test more robust and gives a good sense of whether file formats
    are implementing `checkIntegrity` correctly.
---
 .../lucene90/Lucene90HnswVectorsReader.java        |  48 ++++++----
 .../lucene91/Lucene91HnswVectorsReader.java        |  48 ++++++----
 .../codecs/lucene92/Lucene92HnswVectorsReader.java |  46 +++++----
 ...a => TestAllFilesDetectMismatchedChecksum.java} | 104 ++++++++++-----------
 4 files changed, 132 insertions(+), 114 deletions(-)

diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsReader.java
index 6d061534860..531140478c6 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsReader.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsReader.java
@@ -128,26 +128,34 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
     String fileName =
         IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, fileExtension);
     IndexInput in = state.directory.openInput(fileName, state.context);
-    int versionVectorData =
-        CodecUtil.checkIndexHeader(
-            in,
-            codecName,
-            Lucene90HnswVectorsFormat.VERSION_START,
-            Lucene90HnswVectorsFormat.VERSION_CURRENT,
-            state.segmentInfo.getId(),
-            state.segmentSuffix);
-    if (versionMeta != versionVectorData) {
-      throw new CorruptIndexException(
-          "Format versions mismatch: meta="
-              + versionMeta
-              + ", "
-              + codecName
-              + "="
-              + versionVectorData,
-          in);
-    }
-    checksumRef[0] = CodecUtil.retrieveChecksum(in);
-    return in;
+    boolean success = false;
+    try {
+      int versionVectorData =
+          CodecUtil.checkIndexHeader(
+              in,
+              codecName,
+              Lucene90HnswVectorsFormat.VERSION_START,
+              Lucene90HnswVectorsFormat.VERSION_CURRENT,
+              state.segmentInfo.getId(),
+              state.segmentSuffix);
+      if (versionMeta != versionVectorData) {
+        throw new CorruptIndexException(
+            "Format versions mismatch: meta="
+                + versionMeta
+                + ", "
+                + codecName
+                + "="
+                + versionVectorData,
+            in);
+      }
+      checksumRef[0] = CodecUtil.retrieveChecksum(in);
+      success = true;
+      return in;
+    } finally {
+      if (success == false) {
+        IOUtils.closeWhileHandlingException(in);
+      }
+    }
   }
 
   private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException {
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java
index 7c4f5916706..45f5fd5308e 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java
@@ -119,26 +119,34 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
     String fileName =
         IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, fileExtension);
     IndexInput in = state.directory.openInput(fileName, state.context);
-    int versionVectorData =
-        CodecUtil.checkIndexHeader(
-            in,
-            codecName,
-            Lucene91HnswVectorsFormat.VERSION_START,
-            Lucene91HnswVectorsFormat.VERSION_CURRENT,
-            state.segmentInfo.getId(),
-            state.segmentSuffix);
-    if (versionMeta != versionVectorData) {
-      throw new CorruptIndexException(
-          "Format versions mismatch: meta="
-              + versionMeta
-              + ", "
-              + codecName
-              + "="
-              + versionVectorData,
-          in);
-    }
-    CodecUtil.retrieveChecksum(in);
-    return in;
+    boolean success = false;
+    try {
+      int versionVectorData =
+          CodecUtil.checkIndexHeader(
+              in,
+              codecName,
+              Lucene91HnswVectorsFormat.VERSION_START,
+              Lucene91HnswVectorsFormat.VERSION_CURRENT,
+              state.segmentInfo.getId(),
+              state.segmentSuffix);
+      if (versionMeta != versionVectorData) {
+        throw new CorruptIndexException(
+            "Format versions mismatch: meta="
+                + versionMeta
+                + ", "
+                + codecName
+                + "="
+                + versionVectorData,
+            in);
+      }
+      CodecUtil.retrieveChecksum(in);
+      success = true;
+      return in;
+    } finally {
+      if (success == false) {
+        IOUtils.closeWhileHandlingException(in);
+      }
+    }
   }
 
   private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene92/Lucene92HnswVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene92/Lucene92HnswVectorsReader.java
index 1b1a120a6b2..70ceab4c5be 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene92/Lucene92HnswVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene92/Lucene92HnswVectorsReader.java
@@ -115,26 +115,34 @@ public final class Lucene92HnswVectorsReader extends KnnVectorsReader {
     String fileName =
         IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, fileExtension);
     IndexInput in = state.directory.openInput(fileName, state.context);
-    int versionVectorData =
-        CodecUtil.checkIndexHeader(
-            in,
-            codecName,
-            Lucene92HnswVectorsFormat.VERSION_START,
-            Lucene92HnswVectorsFormat.VERSION_CURRENT,
-            state.segmentInfo.getId(),
-            state.segmentSuffix);
-    if (versionMeta != versionVectorData) {
-      throw new CorruptIndexException(
-          "Format versions mismatch: meta="
-              + versionMeta
-              + ", "
-              + codecName
-              + "="
-              + versionVectorData,
-          in);
+    boolean success = false;
+    try {
+      int versionVectorData =
+          CodecUtil.checkIndexHeader(
+              in,
+              codecName,
+              Lucene92HnswVectorsFormat.VERSION_START,
+              Lucene92HnswVectorsFormat.VERSION_CURRENT,
+              state.segmentInfo.getId(),
+              state.segmentSuffix);
+      if (versionMeta != versionVectorData) {
+        throw new CorruptIndexException(
+            "Format versions mismatch: meta="
+                + versionMeta
+                + ", "
+                + codecName
+                + "="
+                + versionVectorData,
+            in);
+      }
+      CodecUtil.retrieveChecksum(in);
+      success = true;
+      return in;
+    } finally {
+      if (success == false) {
+        IOUtils.closeWhileHandlingException(in);
+      }
     }
-    CodecUtil.retrieveChecksum(in);
-    return in;
   }
 
   private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException {
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestAllFilesDetectBitFlips.java b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesDetectMismatchedChecksum.java
similarity index 59%
rename from lucene/core/src/test/org/apache/lucene/index/TestAllFilesDetectBitFlips.java
rename to lucene/core/src/test/org/apache/lucene/index/TestAllFilesDetectMismatchedChecksum.java
index 156a30b13fd..5ebbaa8fb7b 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestAllFilesDetectBitFlips.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesDetectMismatchedChecksum.java
@@ -20,6 +20,16 @@ import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collections;
 import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.KnnVectorField;
+import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
@@ -27,62 +37,59 @@ import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.tests.analysis.MockAnalyzer;
 import org.apache.lucene.tests.index.RandomIndexWriter;
 import org.apache.lucene.tests.store.BaseDirectoryWrapper;
-import org.apache.lucene.tests.util.LineFileDocs;
 import org.apache.lucene.tests.util.LuceneTestCase;
-import org.apache.lucene.tests.util.LuceneTestCase.AwaitsFix;
 import org.apache.lucene.tests.util.LuceneTestCase.SuppressFileSystems;
 import org.apache.lucene.tests.util.TestUtil;
+import org.apache.lucene.util.BytesRef;
 
-/** Test that the default codec detects bit flips at open or checkIntegrity time. */
+/** Test that the default codec detects mismatched checksums at open or checkIntegrity time. */
 @SuppressFileSystems("ExtrasFS")
-@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-9356")
-public class TestAllFilesDetectBitFlips extends LuceneTestCase {
+public class TestAllFilesDetectMismatchedChecksum extends LuceneTestCase {
 
   public void test() throws Exception {
-    doTest(false);
-  }
-
-  public void testCFS() throws Exception {
-    doTest(true);
-  }
-
-  public void doTest(boolean cfs) throws Exception {
     Directory dir = newDirectory();
 
     IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
     conf.setCodec(TestUtil.getDefaultCodec());
-
-    if (cfs == false) {
-      conf.setUseCompoundFile(false);
-      conf.getMergePolicy().setNoCFSRatio(0.0);
-    }
+    // Disable CFS, which makes it harder to test due to its double checksumming
+    conf.setUseCompoundFile(false);
+    conf.getMergePolicy().setNoCFSRatio(0.0);
 
     RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf);
-    // Use LineFileDocs so we (hopefully) get most Lucene features
-    // tested, e.g. IntPoint was recently added to it:
-    LineFileDocs docs = new LineFileDocs(random());
+    Document doc = new Document();
+    FieldType textWithTermVectorsType = new FieldType(TextField.TYPE_STORED);
+    textWithTermVectorsType.setStoreTermVectors(true);
+    Field text = new Field("text", "", textWithTermVectorsType);
+    doc.add(text);
+    Field termString = new StringField("string", "", Store.YES);
+    doc.add(termString);
+    Field dvString = new SortedDocValuesField("string", new BytesRef());
+    doc.add(dvString);
+    Field pointNumber = new LongPoint("long", 0L);
+    doc.add(pointNumber);
+    Field dvNumber = new NumericDocValuesField("long", 0L);
+    doc.add(dvNumber);
+    KnnVectorField vector = new KnnVectorField("vector", new float[16]);
+    doc.add(vector);
+
     for (int i = 0; i < 100; i++) {
-      riw.addDocument(docs.nextDoc());
-      if (random().nextInt(7) == 0) {
-        riw.commit();
-      }
-      if (random().nextInt(20) == 0) {
-        riw.deleteDocuments(new Term("docid", Integer.toString(i)));
-      }
-      if (random().nextInt(15) == 0) {
-        riw.updateNumericDocValue(
-            new Term("docid", Integer.toString(i)), "docid_intDV", Long.valueOf(i));
-      }
-    }
-    if (TEST_NIGHTLY == false) {
-      riw.forceMerge(1);
+      text.setStringValue(TestUtil.randomAnalysisString(random(), 20, true));
+      String randomString = TestUtil.randomSimpleString(random(), 5);
+      termString.setStringValue(randomString);
+      dvString.setBytesValue(new BytesRef(randomString));
+      long number = random().nextInt(10);
+      pointNumber.setLongValue(number);
+      dvNumber.setLongValue(number);
+      Arrays.fill(vector.vectorValue(), i % 4);
+      riw.addDocument(doc);
     }
+    riw.deleteDocuments(LongPoint.newRangeQuery("long", 0, 2));
     riw.close();
-    checkBitFlips(dir);
+    checkMismatchedChecksum(dir);
     dir.close();
   }
 
-  private void checkBitFlips(Directory dir) throws IOException {
+  private void checkMismatchedChecksum(Directory dir) throws IOException {
     for (String name : dir.listAll()) {
       if (name.equals(IndexWriter.WRITE_LOCK_NAME) == false) {
         corruptFile(dir, name);
@@ -95,7 +102,9 @@ public class TestAllFilesDetectBitFlips extends LuceneTestCase {
       dirCopy.setCheckIndexOnClose(false);
 
       long victimLength = dir.fileLength(victim);
-      long flipOffset = TestUtil.nextLong(random(), 0, victimLength - 1);
+      long flipOffset =
+          TestUtil.nextLong(
+              random(), Math.max(0, victimLength - CodecUtil.footerLength()), victimLength - 1);
 
       if (VERBOSE) {
         System.out.println(
@@ -118,28 +127,13 @@ public class TestAllFilesDetectBitFlips extends LuceneTestCase {
             out.writeByte((byte) (in.readByte() + TestUtil.nextInt(random(), 0x01, 0xFF)));
             out.copyBytes(in, victimLength - flipOffset - 1);
           }
-          try (IndexInput in = dirCopy.openInput(name, IOContext.DEFAULT)) {
-            try {
-              CodecUtil.checksumEntireFile(in);
-              System.out.println(
-                  "TEST: changing a byte in " + victim + " did not update the checksum)");
-              return;
-            } catch (
-                @SuppressWarnings("unused")
-                CorruptIndexException e) {
-              // ok
-            }
-          }
         }
         dirCopy.sync(Collections.singleton(name));
       }
 
       // corruption must be detected
-      expectThrowsAnyOf(
-          Arrays.asList(
-              CorruptIndexException.class,
-              IndexFormatTooOldException.class,
-              IndexFormatTooNewException.class),
+      expectThrows(
+          CorruptIndexException.class,
           () -> {
             try (IndexReader reader = DirectoryReader.open(dirCopy)) {
               for (LeafReaderContext context : reader.leaves()) {