You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2020/02/28 14:27:21 UTC
[lucene-solr] 02/02: LUCENE-9247: Add tests for `checkIntegrity`.
(#1284)
This is an automated email from the ASF dual-hosted git repository.
jpountz pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
commit a703c6e334ee736bd6c7d255ee5cbcc26154f9a0
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Fri Feb 28 14:19:56 2020 +0100
LUCENE-9247: Add tests for `checkIntegrity`. (#1284)
This adds a test to `BaseIndexFileFormatTestCase` that the combination
of opening a reader and calling `checkIntegrity` on it reads all bytes
of all files (including index headers and footers). This would help
detect most cases when `checkIntegrity` is not implemented correctly.
---
.../compressing/CompressingStoredFieldsReader.java | 1 +
.../compressing/CompressingTermVectorsReader.java | 1 +
.../lucene/codecs/compressing/FieldsIndex.java | 4 +
.../codecs/compressing/FieldsIndexReader.java | 5 +
.../compressing/LegacyFieldsIndexReader.java | 5 +
.../lucene/index/BaseIndexFileFormatTestCase.java | 192 +++++++++++++++++++++
6 files changed, 208 insertions(+)
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
index 1af4133..c6476a5 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
@@ -665,6 +665,7 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
@Override
public void checkIntegrity() throws IOException {
+ indexReader.checkIntegrity();
CodecUtil.checksumEntireFile(fieldsStream);
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
index 4b045f9..6b66e24 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
@@ -1109,6 +1109,7 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
@Override
public void checkIntegrity() throws IOException {
+ indexReader.checkIntegrity();
CodecUtil.checksumEntireFile(vectorsStream);
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndex.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndex.java
index e86e749..f826774 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndex.java
@@ -17,6 +17,7 @@
package org.apache.lucene.codecs.compressing;
import java.io.Closeable;
+import java.io.IOException;
import org.apache.lucene.util.Accountable;
@@ -25,6 +26,9 @@ abstract class FieldsIndex implements Accountable, Cloneable, Closeable {
/** Get the start pointer for the block that contains the given docID. */
abstract long getStartPointer(int docID);
+ /** Check the integrity of the index. */
+ abstract void checkIntegrity() throws IOException;
+
@Override
public abstract FieldsIndex clone();
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndexReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndexReader.java
index 566c49c..d0e180c 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndexReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndexReader.java
@@ -136,4 +136,9 @@ final class FieldsIndexReader extends FieldsIndex {
public long getMaxPointer() {
return maxPointer;
}
+
+ @Override
+ void checkIntegrity() throws IOException {
+ CodecUtil.checksumEntireFile(indexInput);
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/LegacyFieldsIndexReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/LegacyFieldsIndexReader.java
index 3e0bc48..1b04a9a 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/LegacyFieldsIndexReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/LegacyFieldsIndexReader.java
@@ -216,4 +216,9 @@ final class LegacyFieldsIndexReader extends FieldsIndex {
public void close() throws IOException {
// nothing to do
}
+
+ @Override
+ void checkIntegrity() throws IOException {
+ // nothing to do, the index is checked at open time
+ }
}
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java
index a0ad890..a47b027 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java
@@ -32,6 +32,8 @@ import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.function.IntConsumer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
@@ -47,19 +49,24 @@ import org.apache.lucene.codecs.StoredFieldsWriter;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.TermVectorsWriter;
import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
+import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FilterDirectory;
import org.apache.lucene.store.FlushInfo;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CloseableThreadLocal;
+import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.LuceneTestCase;
@@ -720,4 +727,189 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase {
}
return r;
}
+
+ private static class ReadBytesIndexInputWrapper extends IndexInput {
+
+ private final IndexInput in;
+ private final IntConsumer readByte;
+
+ ReadBytesIndexInputWrapper(IndexInput in, IntConsumer readByte) {
+ super(in.toString());
+ this.in = in;
+ this.readByte = readByte;
+ }
+
+ @Override
+ public IndexInput clone() {
+ return new ReadBytesIndexInputWrapper(in.clone(), readByte);
+ }
+
+ @Override
+ public void close() throws IOException {
+ in.close();
+ }
+
+ @Override
+ public long getFilePointer() {
+ return in.getFilePointer();
+ }
+
+ @Override
+ public void seek(long pos) throws IOException {
+ in.seek(pos);
+ }
+
+ @Override
+ public long length() {
+ return in.length();
+ }
+
+ @Override
+ public IndexInput slice(String sliceDescription, long offset, long length) throws IOException {
+ IndexInput slice = in.slice(sliceDescription, offset, length);
+ return new ReadBytesIndexInputWrapper(slice, o -> readByte.accept(Math.toIntExact(offset + o)));
+ }
+
+ @Override
+ public byte readByte() throws IOException {
+ readByte.accept(Math.toIntExact(getFilePointer()));
+ return in.readByte();
+ }
+
+ @Override
+ public void readBytes(byte[] b, int offset, int len) throws IOException {
+ final int fp = Math.toIntExact(getFilePointer());
+ for (int i = 0; i < len; ++i) {
+ readByte.accept(Math.addExact(fp, i));
+ }
+ in.readBytes(b, offset, len);
+ }
+
+ }
+
+ private static class ReadBytesDirectoryWrapper extends FilterDirectory {
+
+ ReadBytesDirectoryWrapper(Directory in) {
+ super(in);
+ }
+
+ private final Map<String, FixedBitSet> readBytes = new ConcurrentHashMap<>();
+
+ Map<String, FixedBitSet> getReadBytes() {
+ return Collections.unmodifiableMap(new HashMap<>(readBytes));
+ }
+
+ @Override
+ public IndexInput openInput(String name, IOContext context) throws IOException {
+ IndexInput in = super.openInput(name, context);
+ final FixedBitSet set = readBytes.computeIfAbsent(name, n -> new FixedBitSet(Math.toIntExact(in.length())));
+ if (set.length() != in.length()) {
+ throw new IllegalStateException();
+ }
+ return new ReadBytesIndexInputWrapper(in, set::set);
+ }
+
+ @Override
+ public ChecksumIndexInput openChecksumInput(String name, IOContext context) throws IOException {
+ ChecksumIndexInput in = super.openChecksumInput(name, context);
+ final FixedBitSet set = readBytes.computeIfAbsent(name, n -> new FixedBitSet(Math.toIntExact(in.length())));
+ if (set.length() != in.length()) {
+ throw new IllegalStateException();
+ }
+ return new ChecksumIndexInput(in.toString()) {
+
+ @Override
+ public void readBytes(byte[] b, int offset, int len) throws IOException {
+ final int fp = Math.toIntExact(getFilePointer());
+ set.set(fp, Math.addExact(fp, len));
+ in.readBytes(b, offset, len);
+ }
+
+ @Override
+ public byte readByte() throws IOException {
+ set.set(Math.toIntExact(getFilePointer()));
+ return in.readByte();
+ }
+
+ @Override
+ public IndexInput slice(String sliceDescription, long offset, long length) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long length() {
+ return in.length();
+ }
+
+ @Override
+ public long getFilePointer() {
+ return in.getFilePointer();
+ }
+
+ @Override
+ public void close() throws IOException {
+ in.close();
+ }
+
+ @Override
+ public long getChecksum() throws IOException {
+ return in.getChecksum();
+ }
+ };
+ }
+
+ @Override
+ public IndexOutput createOutput(String name, IOContext context) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ /** This test is a best effort at verifying that checkIntegrity doesn't miss any files. It tests that the
+ * combination of opening a reader and calling checkIntegrity on it reads all bytes of all files. */
+ public void testCheckIntegrityReadsAllBytes() throws Exception {
+ assumeFalse("SimpleText doesn't store checksums of its files", getCodec() instanceof SimpleTextCodec);
+ Directory dir = applyCreatedVersionMajor(newDirectory());
+
+ IndexWriterConfig cfg = new IndexWriterConfig(new MockAnalyzer(random()));
+ IndexWriter w = new IndexWriter(dir, cfg);
+ final int numDocs = atLeast(100);
+ for (int i = 0; i < numDocs; ++i) {
+ Document d = new Document();
+ addRandomFields(d);
+ w.addDocument(d);
+ }
+ w.forceMerge(1);
+ w.commit();
+ w.close();
+
+ ReadBytesDirectoryWrapper readBytesWrapperDir = new ReadBytesDirectoryWrapper(dir);
+ IndexReader reader = DirectoryReader.open(readBytesWrapperDir);
+ LeafReader leafReader = getOnlyLeafReader(reader);
+ leafReader.checkIntegrity();
+
+ Map<String, FixedBitSet> readBytesMap = readBytesWrapperDir.getReadBytes();
+
+ Set<String> unreadFiles = new HashSet<>(Arrays.asList(readBytesWrapperDir.listAll()));
+ unreadFiles.removeAll(readBytesMap.keySet());
+ assertTrue("Some files have not been open: " + unreadFiles, unreadFiles.isEmpty());
+
+ List<String> messages = new ArrayList<>();
+ for (Map.Entry<String, FixedBitSet> entry : readBytesMap.entrySet()) {
+ String name = entry.getKey();
+ FixedBitSet unreadBytes = entry.getValue().clone();
+ unreadBytes.flip(0, unreadBytes.length());
+ int unread = unreadBytes.nextSetBit(0);
+ if (unread != Integer.MAX_VALUE) {
+ messages.add("Offset " + unread + " of file " + name + "(" + unreadBytes.length() + "bytes) was not read.");
+ }
+ }
+ assertTrue(String.join("\n", messages), messages.isEmpty());
+ reader.close();
+ dir.close();
+ }
}