You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2022/05/17 12:30:00 UTC
[lucene] branch main updated: LUCENE-9356: Change test to detect mismatched checksums instead of byte flips. (#876)
This is an automated email from the ASF dual-hosted git repository.
jpountz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new e65c0c777b6 LUCENE-9356: Change test to detect mismatched checksums instead of byte flips. (#876)
e65c0c777b6 is described below
commit e65c0c777b61a964483d1f9ed645d91973a1540e
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Tue May 17 14:29:51 2022 +0200
LUCENE-9356: Change test to detect mismatched checksums instead of byte flips. (#876)
This makes the test more robust and gives a good sense of whether file formats
are implementing `checkIntegrity` correctly.
---
.../lucene90/Lucene90HnswVectorsReader.java | 48 ++++++----
.../lucene91/Lucene91HnswVectorsReader.java | 48 ++++++----
.../codecs/lucene92/Lucene92HnswVectorsReader.java | 46 +++++----
...a => TestAllFilesDetectMismatchedChecksum.java} | 104 ++++++++++-----------
4 files changed, 132 insertions(+), 114 deletions(-)
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsReader.java
index 6d061534860..531140478c6 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsReader.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsReader.java
@@ -128,26 +128,34 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
String fileName =
IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, fileExtension);
IndexInput in = state.directory.openInput(fileName, state.context);
- int versionVectorData =
- CodecUtil.checkIndexHeader(
- in,
- codecName,
- Lucene90HnswVectorsFormat.VERSION_START,
- Lucene90HnswVectorsFormat.VERSION_CURRENT,
- state.segmentInfo.getId(),
- state.segmentSuffix);
- if (versionMeta != versionVectorData) {
- throw new CorruptIndexException(
- "Format versions mismatch: meta="
- + versionMeta
- + ", "
- + codecName
- + "="
- + versionVectorData,
- in);
- }
- checksumRef[0] = CodecUtil.retrieveChecksum(in);
- return in;
+ boolean success = false;
+ try {
+ int versionVectorData =
+ CodecUtil.checkIndexHeader(
+ in,
+ codecName,
+ Lucene90HnswVectorsFormat.VERSION_START,
+ Lucene90HnswVectorsFormat.VERSION_CURRENT,
+ state.segmentInfo.getId(),
+ state.segmentSuffix);
+ if (versionMeta != versionVectorData) {
+ throw new CorruptIndexException(
+ "Format versions mismatch: meta="
+ + versionMeta
+ + ", "
+ + codecName
+ + "="
+ + versionVectorData,
+ in);
+ }
+ checksumRef[0] = CodecUtil.retrieveChecksum(in);
+ success = true;
+ return in;
+ } finally {
+ if (success == false) {
+ IOUtils.closeWhileHandlingException(in);
+ }
+ }
}
private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException {
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java
index 7c4f5916706..45f5fd5308e 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java
@@ -119,26 +119,34 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
String fileName =
IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, fileExtension);
IndexInput in = state.directory.openInput(fileName, state.context);
- int versionVectorData =
- CodecUtil.checkIndexHeader(
- in,
- codecName,
- Lucene91HnswVectorsFormat.VERSION_START,
- Lucene91HnswVectorsFormat.VERSION_CURRENT,
- state.segmentInfo.getId(),
- state.segmentSuffix);
- if (versionMeta != versionVectorData) {
- throw new CorruptIndexException(
- "Format versions mismatch: meta="
- + versionMeta
- + ", "
- + codecName
- + "="
- + versionVectorData,
- in);
- }
- CodecUtil.retrieveChecksum(in);
- return in;
+ boolean success = false;
+ try {
+ int versionVectorData =
+ CodecUtil.checkIndexHeader(
+ in,
+ codecName,
+ Lucene91HnswVectorsFormat.VERSION_START,
+ Lucene91HnswVectorsFormat.VERSION_CURRENT,
+ state.segmentInfo.getId(),
+ state.segmentSuffix);
+ if (versionMeta != versionVectorData) {
+ throw new CorruptIndexException(
+ "Format versions mismatch: meta="
+ + versionMeta
+ + ", "
+ + codecName
+ + "="
+ + versionVectorData,
+ in);
+ }
+ CodecUtil.retrieveChecksum(in);
+ success = true;
+ return in;
+ } finally {
+ if (success == false) {
+ IOUtils.closeWhileHandlingException(in);
+ }
+ }
}
private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene92/Lucene92HnswVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene92/Lucene92HnswVectorsReader.java
index 1b1a120a6b2..70ceab4c5be 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene92/Lucene92HnswVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene92/Lucene92HnswVectorsReader.java
@@ -115,26 +115,34 @@ public final class Lucene92HnswVectorsReader extends KnnVectorsReader {
String fileName =
IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, fileExtension);
IndexInput in = state.directory.openInput(fileName, state.context);
- int versionVectorData =
- CodecUtil.checkIndexHeader(
- in,
- codecName,
- Lucene92HnswVectorsFormat.VERSION_START,
- Lucene92HnswVectorsFormat.VERSION_CURRENT,
- state.segmentInfo.getId(),
- state.segmentSuffix);
- if (versionMeta != versionVectorData) {
- throw new CorruptIndexException(
- "Format versions mismatch: meta="
- + versionMeta
- + ", "
- + codecName
- + "="
- + versionVectorData,
- in);
+ boolean success = false;
+ try {
+ int versionVectorData =
+ CodecUtil.checkIndexHeader(
+ in,
+ codecName,
+ Lucene92HnswVectorsFormat.VERSION_START,
+ Lucene92HnswVectorsFormat.VERSION_CURRENT,
+ state.segmentInfo.getId(),
+ state.segmentSuffix);
+ if (versionMeta != versionVectorData) {
+ throw new CorruptIndexException(
+ "Format versions mismatch: meta="
+ + versionMeta
+ + ", "
+ + codecName
+ + "="
+ + versionVectorData,
+ in);
+ }
+ CodecUtil.retrieveChecksum(in);
+ success = true;
+ return in;
+ } finally {
+ if (success == false) {
+ IOUtils.closeWhileHandlingException(in);
+ }
}
- CodecUtil.retrieveChecksum(in);
- return in;
}
private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException {
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestAllFilesDetectBitFlips.java b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesDetectMismatchedChecksum.java
similarity index 59%
rename from lucene/core/src/test/org/apache/lucene/index/TestAllFilesDetectBitFlips.java
rename to lucene/core/src/test/org/apache/lucene/index/TestAllFilesDetectMismatchedChecksum.java
index 156a30b13fd..5ebbaa8fb7b 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestAllFilesDetectBitFlips.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesDetectMismatchedChecksum.java
@@ -20,6 +20,16 @@ import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.KnnVectorField;
+import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
@@ -27,62 +37,59 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.store.BaseDirectoryWrapper;
-import org.apache.lucene.tests.util.LineFileDocs;
import org.apache.lucene.tests.util.LuceneTestCase;
-import org.apache.lucene.tests.util.LuceneTestCase.AwaitsFix;
import org.apache.lucene.tests.util.LuceneTestCase.SuppressFileSystems;
import org.apache.lucene.tests.util.TestUtil;
+import org.apache.lucene.util.BytesRef;
-/** Test that the default codec detects bit flips at open or checkIntegrity time. */
+/** Test that the default codec detects mismatched checksums at open or checkIntegrity time. */
@SuppressFileSystems("ExtrasFS")
-@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-9356")
-public class TestAllFilesDetectBitFlips extends LuceneTestCase {
+public class TestAllFilesDetectMismatchedChecksum extends LuceneTestCase {
public void test() throws Exception {
- doTest(false);
- }
-
- public void testCFS() throws Exception {
- doTest(true);
- }
-
- public void doTest(boolean cfs) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setCodec(TestUtil.getDefaultCodec());
-
- if (cfs == false) {
- conf.setUseCompoundFile(false);
- conf.getMergePolicy().setNoCFSRatio(0.0);
- }
+ // Disable CFS, which makes it harder to test due to its double checksumming
+ conf.setUseCompoundFile(false);
+ conf.getMergePolicy().setNoCFSRatio(0.0);
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf);
- // Use LineFileDocs so we (hopefully) get most Lucene features
- // tested, e.g. IntPoint was recently added to it:
- LineFileDocs docs = new LineFileDocs(random());
+ Document doc = new Document();
+ FieldType textWithTermVectorsType = new FieldType(TextField.TYPE_STORED);
+ textWithTermVectorsType.setStoreTermVectors(true);
+ Field text = new Field("text", "", textWithTermVectorsType);
+ doc.add(text);
+ Field termString = new StringField("string", "", Store.YES);
+ doc.add(termString);
+ Field dvString = new SortedDocValuesField("string", new BytesRef());
+ doc.add(dvString);
+ Field pointNumber = new LongPoint("long", 0L);
+ doc.add(pointNumber);
+ Field dvNumber = new NumericDocValuesField("long", 0L);
+ doc.add(dvNumber);
+ KnnVectorField vector = new KnnVectorField("vector", new float[16]);
+ doc.add(vector);
+
for (int i = 0; i < 100; i++) {
- riw.addDocument(docs.nextDoc());
- if (random().nextInt(7) == 0) {
- riw.commit();
- }
- if (random().nextInt(20) == 0) {
- riw.deleteDocuments(new Term("docid", Integer.toString(i)));
- }
- if (random().nextInt(15) == 0) {
- riw.updateNumericDocValue(
- new Term("docid", Integer.toString(i)), "docid_intDV", Long.valueOf(i));
- }
- }
- if (TEST_NIGHTLY == false) {
- riw.forceMerge(1);
+ text.setStringValue(TestUtil.randomAnalysisString(random(), 20, true));
+ String randomString = TestUtil.randomSimpleString(random(), 5);
+ termString.setStringValue(randomString);
+ dvString.setBytesValue(new BytesRef(randomString));
+ long number = random().nextInt(10);
+ pointNumber.setLongValue(number);
+ dvNumber.setLongValue(number);
+ Arrays.fill(vector.vectorValue(), i % 4);
+ riw.addDocument(doc);
}
+ riw.deleteDocuments(LongPoint.newRangeQuery("long", 0, 2));
riw.close();
- checkBitFlips(dir);
+ checkMismatchedChecksum(dir);
dir.close();
}
- private void checkBitFlips(Directory dir) throws IOException {
+ private void checkMismatchedChecksum(Directory dir) throws IOException {
for (String name : dir.listAll()) {
if (name.equals(IndexWriter.WRITE_LOCK_NAME) == false) {
corruptFile(dir, name);
@@ -95,7 +102,9 @@ public class TestAllFilesDetectBitFlips extends LuceneTestCase {
dirCopy.setCheckIndexOnClose(false);
long victimLength = dir.fileLength(victim);
- long flipOffset = TestUtil.nextLong(random(), 0, victimLength - 1);
+ long flipOffset =
+ TestUtil.nextLong(
+ random(), Math.max(0, victimLength - CodecUtil.footerLength()), victimLength - 1);
if (VERBOSE) {
System.out.println(
@@ -118,28 +127,13 @@ public class TestAllFilesDetectBitFlips extends LuceneTestCase {
out.writeByte((byte) (in.readByte() + TestUtil.nextInt(random(), 0x01, 0xFF)));
out.copyBytes(in, victimLength - flipOffset - 1);
}
- try (IndexInput in = dirCopy.openInput(name, IOContext.DEFAULT)) {
- try {
- CodecUtil.checksumEntireFile(in);
- System.out.println(
- "TEST: changing a byte in " + victim + " did not update the checksum)");
- return;
- } catch (
- @SuppressWarnings("unused")
- CorruptIndexException e) {
- // ok
- }
- }
}
dirCopy.sync(Collections.singleton(name));
}
// corruption must be detected
- expectThrowsAnyOf(
- Arrays.asList(
- CorruptIndexException.class,
- IndexFormatTooOldException.class,
- IndexFormatTooNewException.class),
+ expectThrows(
+ CorruptIndexException.class,
() -> {
try (IndexReader reader = DirectoryReader.open(dirCopy)) {
for (LeafReaderContext context : reader.leaves()) {