You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2020/06/16 10:16:24 UTC

[lucene-solr] branch branch_8x updated: LUCENE-9396: Improve truncation detection for points. (#1557)

This is an automated email from the ASF dual-hosted git repository.

jpountz pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/branch_8x by this push:
     new 2711c28  LUCENE-9396: Improve truncation detection for points. (#1557)
2711c28 is described below

commit 2711c288421945aa7a1f22b77ee3a672ed5db7e4
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Tue Jun 16 12:04:41 2020 +0200

    LUCENE-9396: Improve truncation detection for points. (#1557)
---
 lucene/CHANGES.txt                                 |  2 +
 .../java/org/apache/lucene/codecs/CodecUtil.java   | 20 +++++++++-
 .../codecs/lucene86/Lucene86PointsReader.java      |  9 ++++-
 .../codecs/lucene86/Lucene86PointsWriter.java      |  4 +-
 .../org/apache/lucene/codecs/TestCodecUtil.java    | 45 ++++++++++++++++++++++
 5 files changed, 76 insertions(+), 4 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 0bbf6c8..f10f798 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -73,6 +73,8 @@ Improvements
 
 * LUCENE-9397: UniformSplit supports encodable fields metadata. (Bruno Roustant)
 
+* LUCENE-9396: Improved truncation detection for points. (Adrien Grand, Robert Muir)
+
 Optimizations
 ---------------------
 
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
index 2e736c1..8c40e2a 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
@@ -490,7 +490,25 @@ public final class CodecUtil {
     validateFooter(in);
     return readCRC(in);
   }
-  
+
+  /** 
+   * Returns (but does not validate) the checksum previously written by {@link #checkFooter}.
+   * @return actual checksum value
+   * @throws IOException if the footer is invalid
+   */
+  public static long retrieveChecksum(IndexInput in, long expectedLength) throws IOException {
+    if (expectedLength < footerLength()) {
+      throw new IllegalArgumentException("expectedLength cannot be less than the footer length");
+    }
+    if (in.length() < expectedLength) {
+      throw new CorruptIndexException("truncated file: length=" + in.length() + " but expectedLength==" + expectedLength, in);
+    } else if (in.length() > expectedLength) {
+      throw new CorruptIndexException("file too long: length=" + in.length() + " but expectedLength==" + expectedLength, in);
+    }
+
+    return retrieveChecksum(in);
+  }
+
   private static void validateFooter(IndexInput in) throws IOException {
     long remaining = in.length() - in.getFilePointer();
     long expected = footerLength();
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsReader.java
index 9aabc97..fdc3cbd 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsReader.java
@@ -63,7 +63,6 @@ public class Lucene86PointsReader extends PointsReader implements Closeable {
           Lucene86PointsFormat.VERSION_CURRENT,
           readState.segmentInfo.getId(),
           readState.segmentSuffix);
-      CodecUtil.retrieveChecksum(indexIn);
 
       dataIn = readState.directory.openInput(dataFileName, readState.context);
       CodecUtil.checkIndexHeader(dataIn,
@@ -72,8 +71,8 @@ public class Lucene86PointsReader extends PointsReader implements Closeable {
           Lucene86PointsFormat.VERSION_CURRENT,
           readState.segmentInfo.getId(),
           readState.segmentSuffix);
-      CodecUtil.retrieveChecksum(dataIn);
 
+      long indexLength = -1, dataLength = -1;
       try (ChecksumIndexInput metaIn = readState.directory.openChecksumInput(metaFileName, readState.context)) {
         Throwable priorE = null;
         try {
@@ -94,12 +93,18 @@ public class Lucene86PointsReader extends PointsReader implements Closeable {
             BKDReader reader = new BKDReader(metaIn, indexIn, dataIn);
             readers.put(fieldNumber, reader);
           }
+          indexLength = metaIn.readLong();
+          dataLength = metaIn.readLong();
         } catch (Throwable t) {
           priorE = t;
         } finally {
           CodecUtil.checkFooter(metaIn, priorE);
         }
       }
+      // At this point, checksums of the meta file have been validated so we
+      // know that indexLength and dataLength are very likely correct.
+      CodecUtil.retrieveChecksum(indexIn, indexLength);
+      CodecUtil.retrieveChecksum(dataIn, dataLength);
       success = true;
     } finally {
       if (success == false) {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsWriter.java
index 4fceecc..6fe3571 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsWriter.java
@@ -251,9 +251,11 @@ public class Lucene86PointsWriter extends PointsWriter implements Closeable {
     }
     finished = true;
     metaOut.writeInt(-1);
-    CodecUtil.writeFooter(metaOut);
     CodecUtil.writeFooter(indexOut);
     CodecUtil.writeFooter(dataOut);
+    metaOut.writeLong(indexOut.getFilePointer());
+    metaOut.writeLong(dataOut.getFilePointer());
+    CodecUtil.writeFooter(metaOut);
   }
 
   @Override
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/TestCodecUtil.java b/lucene/core/src/test/org/apache/lucene/codecs/TestCodecUtil.java
index 279db56..8cd90fb 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/TestCodecUtil.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/TestCodecUtil.java
@@ -23,6 +23,8 @@ import java.util.concurrent.atomic.AtomicLong;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.store.BufferedChecksumIndexInput;
 import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.RAMFile;
@@ -315,4 +317,47 @@ public class TestCodecUtil extends LuceneTestCase {
         () -> CodecUtil.retrieveChecksum(input));
     assertEquals("misplaced codec footer (file truncated?): length=0 but footerLength==16 (resource=RAMInputStream(name=file))", e.getMessage());
   }
+
+  public void testRetrieveChecksum() throws IOException {
+    Directory dir = newDirectory();
+    try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
+      out.writeByte((byte) 42);
+      CodecUtil.writeFooter(out);
+    }
+    try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
+      CodecUtil.retrieveChecksum(in, in.length()); // no exception
+
+      CorruptIndexException exception = expectThrows(CorruptIndexException.class,
+          () -> CodecUtil.retrieveChecksum(in, in.length() - 1));
+      assertTrue(exception.getMessage().contains("too long"));
+      assertArrayEquals(new Throwable[0], exception.getSuppressed());
+
+      exception = expectThrows(CorruptIndexException.class,
+          () -> CodecUtil.retrieveChecksum(in, in.length() + 1));
+      assertTrue(exception.getMessage().contains("truncated"));
+      assertArrayEquals(new Throwable[0], exception.getSuppressed());
+    }
+
+    try (IndexOutput out = dir.createOutput("bar", IOContext.DEFAULT)) {
+      for (int i = 0; i <= CodecUtil.footerLength(); ++i) {
+        out.writeByte((byte) i);
+      }
+    }
+    try (IndexInput in = dir.openInput("bar", IOContext.DEFAULT)) {
+      CorruptIndexException exception = expectThrows(CorruptIndexException.class,
+          () -> CodecUtil.retrieveChecksum(in, in.length()));
+      assertTrue(exception.getMessage().contains("codec footer mismatch"));
+      assertArrayEquals(new Throwable[0], exception.getSuppressed());
+
+      exception = expectThrows(CorruptIndexException.class,
+          () -> CodecUtil.retrieveChecksum(in, in.length() - 1));
+      assertTrue(exception.getMessage().contains("too long"));
+
+      exception = expectThrows(CorruptIndexException.class,
+          () -> CodecUtil.retrieveChecksum(in, in.length() + 1));
+      assertTrue(exception.getMessage().contains("truncated"));
+    }
+
+    dir.close();
+  }
 }