You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2020/06/16 10:16:24 UTC
[lucene-solr] branch branch_8x updated: LUCENE-9396: Improve
truncation detection for points. (#1557)
This is an automated email from the ASF dual-hosted git repository.
jpountz pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/branch_8x by this push:
new 2711c28 LUCENE-9396: Improve truncation detection for points. (#1557)
2711c28 is described below
commit 2711c288421945aa7a1f22b77ee3a672ed5db7e4
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Tue Jun 16 12:04:41 2020 +0200
LUCENE-9396: Improve truncation detection for points. (#1557)
---
lucene/CHANGES.txt | 2 +
.../java/org/apache/lucene/codecs/CodecUtil.java | 20 +++++++++-
.../codecs/lucene86/Lucene86PointsReader.java | 9 ++++-
.../codecs/lucene86/Lucene86PointsWriter.java | 4 +-
.../org/apache/lucene/codecs/TestCodecUtil.java | 45 ++++++++++++++++++++++
5 files changed, 76 insertions(+), 4 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 0bbf6c8..f10f798 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -73,6 +73,8 @@ Improvements
* LUCENE-9397: UniformSplit supports encodable fields metadata. (Bruno Roustant)
+* LUCENE-9396: Improved truncation detection for points. (Adrien Grand, Robert Muir)
+
Optimizations
---------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
index 2e736c1..8c40e2a 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
@@ -490,7 +490,25 @@ public final class CodecUtil {
validateFooter(in);
return readCRC(in);
}
-
+
+ /**
+ * Returns (but does not validate) the checksum previously written by {@link #checkFooter}.
+ * @return actual checksum value
+ * @throws IOException if the footer is invalid
+ */
+ public static long retrieveChecksum(IndexInput in, long expectedLength) throws IOException {
+ if (expectedLength < footerLength()) {
+ throw new IllegalArgumentException("expectedLength cannot be less than the footer length");
+ }
+ if (in.length() < expectedLength) {
+ throw new CorruptIndexException("truncated file: length=" + in.length() + " but expectedLength==" + expectedLength, in);
+ } else if (in.length() > expectedLength) {
+ throw new CorruptIndexException("file too long: length=" + in.length() + " but expectedLength==" + expectedLength, in);
+ }
+
+ return retrieveChecksum(in);
+ }
+
private static void validateFooter(IndexInput in) throws IOException {
long remaining = in.length() - in.getFilePointer();
long expected = footerLength();
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsReader.java
index 9aabc97..fdc3cbd 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsReader.java
@@ -63,7 +63,6 @@ public class Lucene86PointsReader extends PointsReader implements Closeable {
Lucene86PointsFormat.VERSION_CURRENT,
readState.segmentInfo.getId(),
readState.segmentSuffix);
- CodecUtil.retrieveChecksum(indexIn);
dataIn = readState.directory.openInput(dataFileName, readState.context);
CodecUtil.checkIndexHeader(dataIn,
@@ -72,8 +71,8 @@ public class Lucene86PointsReader extends PointsReader implements Closeable {
Lucene86PointsFormat.VERSION_CURRENT,
readState.segmentInfo.getId(),
readState.segmentSuffix);
- CodecUtil.retrieveChecksum(dataIn);
+ long indexLength = -1, dataLength = -1;
try (ChecksumIndexInput metaIn = readState.directory.openChecksumInput(metaFileName, readState.context)) {
Throwable priorE = null;
try {
@@ -94,12 +93,18 @@ public class Lucene86PointsReader extends PointsReader implements Closeable {
BKDReader reader = new BKDReader(metaIn, indexIn, dataIn);
readers.put(fieldNumber, reader);
}
+ indexLength = metaIn.readLong();
+ dataLength = metaIn.readLong();
} catch (Throwable t) {
priorE = t;
} finally {
CodecUtil.checkFooter(metaIn, priorE);
}
}
+ // At this point, checksums of the meta file have been validated so we
+ // know that indexLength and dataLength are very likely correct.
+ CodecUtil.retrieveChecksum(indexIn, indexLength);
+ CodecUtil.retrieveChecksum(dataIn, dataLength);
success = true;
} finally {
if (success == false) {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsWriter.java
index 4fceecc..6fe3571 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsWriter.java
@@ -251,9 +251,11 @@ public class Lucene86PointsWriter extends PointsWriter implements Closeable {
}
finished = true;
metaOut.writeInt(-1);
- CodecUtil.writeFooter(metaOut);
CodecUtil.writeFooter(indexOut);
CodecUtil.writeFooter(dataOut);
+ metaOut.writeLong(indexOut.getFilePointer());
+ metaOut.writeLong(dataOut.getFilePointer());
+ CodecUtil.writeFooter(metaOut);
}
@Override
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/TestCodecUtil.java b/lucene/core/src/test/org/apache/lucene/codecs/TestCodecUtil.java
index 279db56..8cd90fb 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/TestCodecUtil.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/TestCodecUtil.java
@@ -23,6 +23,8 @@ import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.store.BufferedChecksumIndexInput;
import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMFile;
@@ -315,4 +317,47 @@ public class TestCodecUtil extends LuceneTestCase {
() -> CodecUtil.retrieveChecksum(input));
assertEquals("misplaced codec footer (file truncated?): length=0 but footerLength==16 (resource=RAMInputStream(name=file))", e.getMessage());
}
+
+ public void testRetrieveChecksum() throws IOException {
+ Directory dir = newDirectory();
+ try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
+ out.writeByte((byte) 42);
+ CodecUtil.writeFooter(out);
+ }
+ try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
+ CodecUtil.retrieveChecksum(in, in.length()); // no exception
+
+ CorruptIndexException exception = expectThrows(CorruptIndexException.class,
+ () -> CodecUtil.retrieveChecksum(in, in.length() - 1));
+ assertTrue(exception.getMessage().contains("too long"));
+ assertArrayEquals(new Throwable[0], exception.getSuppressed());
+
+ exception = expectThrows(CorruptIndexException.class,
+ () -> CodecUtil.retrieveChecksum(in, in.length() + 1));
+ assertTrue(exception.getMessage().contains("truncated"));
+ assertArrayEquals(new Throwable[0], exception.getSuppressed());
+ }
+
+ try (IndexOutput out = dir.createOutput("bar", IOContext.DEFAULT)) {
+ for (int i = 0; i <= CodecUtil.footerLength(); ++i) {
+ out.writeByte((byte) i);
+ }
+ }
+ try (IndexInput in = dir.openInput("bar", IOContext.DEFAULT)) {
+ CorruptIndexException exception = expectThrows(CorruptIndexException.class,
+ () -> CodecUtil.retrieveChecksum(in, in.length()));
+ assertTrue(exception.getMessage().contains("codec footer mismatch"));
+ assertArrayEquals(new Throwable[0], exception.getSuppressed());
+
+ exception = expectThrows(CorruptIndexException.class,
+ () -> CodecUtil.retrieveChecksum(in, in.length() - 1));
+ assertTrue(exception.getMessage().contains("too long"));
+
+ exception = expectThrows(CorruptIndexException.class,
+ () -> CodecUtil.retrieveChecksum(in, in.length() + 1));
+ assertTrue(exception.getMessage().contains("truncated"));
+ }
+
+ dir.close();
+ }
}