You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2022/11/15 12:59:58 UTC

[lucene] branch main updated: Introduce IOContext.LOAD (#11930)

This is an automated email from the ASF dual-hosted git repository.

jpountz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new 729dc2bb820 Introduce IOContext.LOAD (#11930)
729dc2bb820 is described below

commit 729dc2bb8204c7f14d6c34f56c69030f707d16d9
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Tue Nov 15 13:59:51 2022 +0100

    Introduce IOContext.LOAD (#11930)
    
    The default codec has a number of small and hot files, that actually used to be
    fully loaded in memory before we moved them off-heap. In the general case,
    these files are expected to fully fit into the page cache for things to work
    well. Should we give control over preloading to codecs? This is what this
    commit does for the following files:
     - Terms index (`tip`)
     - Points index (`kdi`)
     - Stored fields index (`fdx`)
     - Terms vector index (`tvx`)
    
    This only has an effect on `MMapDirectory`.
---
 .../codecs/lucene90/Lucene90DocValuesProducer.java |  3 ++-
 .../codecs/lucene90/Lucene90NormsProducer.java     |  3 ++-
 .../codecs/lucene90/Lucene90PointsReader.java      |  5 +++--
 .../blocktree/Lucene90BlockTreeTermsReader.java    |  6 ++++--
 .../lucene90/compressing/FieldsIndexReader.java    |  2 +-
 .../codecs/lucene94/Lucene94HnswVectorsReader.java |  4 +++-
 .../java/org/apache/lucene/store/IOContext.java    | 23 ++++++++++++++++++----
 .../org/apache/lucene/store/MMapDirectory.java     |  7 +++++++
 8 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java
index 072e57c6eea..e9581ce5922 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java
@@ -42,6 +42,7 @@ import org.apache.lucene.index.TermsEnum.SeekStatus;
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.RandomAccessInput;
 import org.apache.lucene.util.BytesRef;
@@ -82,7 +83,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
     merging = false;
 
     // read in the entries from the metadata file.
-    try (ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context)) {
+    try (ChecksumIndexInput in = state.directory.openChecksumInput(metaName, IOContext.READONCE)) {
       Throwable priorE = null;
 
       try {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90NormsProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90NormsProducer.java
index 28522687810..d145bfdb0eb 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90NormsProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90NormsProducer.java
@@ -32,6 +32,7 @@ import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.RandomAccessInput;
 import org.apache.lucene.util.IOUtils;
@@ -60,7 +61,7 @@ final class Lucene90NormsProducer extends NormsProducer implements Cloneable {
     int version = -1;
 
     // read in the entries from the metadata file.
-    try (ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context)) {
+    try (ChecksumIndexInput in = state.directory.openChecksumInput(metaName, IOContext.READONCE)) {
       Throwable priorE = null;
       try {
         version =
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsReader.java
index 89d82a4f248..1656dd0a1ba 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsReader.java
@@ -27,6 +27,7 @@ import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.PointValues;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.bkd.BKDReader;
@@ -59,7 +60,7 @@ public class Lucene90PointsReader extends PointsReader {
 
     boolean success = false;
     try {
-      indexIn = readState.directory.openInput(indexFileName, readState.context);
+      indexIn = readState.directory.openInput(indexFileName, IOContext.LOAD);
       CodecUtil.checkIndexHeader(
           indexIn,
           Lucene90PointsFormat.INDEX_CODEC_NAME,
@@ -81,7 +82,7 @@ public class Lucene90PointsReader extends PointsReader {
 
       long indexLength = -1, dataLength = -1;
       try (ChecksumIndexInput metaIn =
-          readState.directory.openChecksumInput(metaFileName, readState.context)) {
+          readState.directory.openChecksumInput(metaFileName, IOContext.READONCE)) {
         Throwable priorE = null;
         try {
           CodecUtil.checkIndexHeader(
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsReader.java
index 2b8827c7023..659f7618ec9 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsReader.java
@@ -33,6 +33,7 @@ import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
@@ -134,7 +135,7 @@ public final class Lucene90BlockTreeTermsReader extends FieldsProducer {
 
       String indexName =
           IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_INDEX_EXTENSION);
-      indexIn = state.directory.openInput(indexName, state.context);
+      indexIn = state.directory.openInput(indexName, IOContext.LOAD);
       CodecUtil.checkIndexHeader(
           indexIn,
           TERMS_INDEX_CODEC_NAME,
@@ -149,7 +150,8 @@ public final class Lucene90BlockTreeTermsReader extends FieldsProducer {
       Map<String, FieldReader> fieldMap = null;
       Throwable priorE = null;
       long indexLength = -1, termsLength = -1;
-      try (ChecksumIndexInput metaIn = state.directory.openChecksumInput(metaName, state.context)) {
+      try (ChecksumIndexInput metaIn =
+          state.directory.openChecksumInput(metaName, IOContext.READONCE)) {
         try {
           CodecUtil.checkIndexHeader(
               metaIn,
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/FieldsIndexReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/FieldsIndexReader.java
index 458e97f7d64..3b46d12edf9 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/FieldsIndexReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/FieldsIndexReader.java
@@ -65,7 +65,7 @@ final class FieldsIndexReader extends FieldsIndex {
     maxPointer = metaIn.readLong();
 
     indexInput =
-        dir.openInput(IndexFileNames.segmentFileName(name, suffix, extension), IOContext.READ);
+        dir.openInput(IndexFileNames.segmentFileName(name, suffix, extension), IOContext.LOAD);
     boolean success = false;
     try {
       CodecUtil.checkIndexHeader(
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene94/Lucene94HnswVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene94/Lucene94HnswVectorsReader.java
index 688f411d489..8d95614f9fe 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene94/Lucene94HnswVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene94/Lucene94HnswVectorsReader.java
@@ -38,6 +38,7 @@ import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.TotalHits;
 import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.IOUtils;
@@ -89,7 +90,8 @@ public final class Lucene94HnswVectorsReader extends KnnVectorsReader {
         IndexFileNames.segmentFileName(
             state.segmentInfo.name, state.segmentSuffix, Lucene94HnswVectorsFormat.META_EXTENSION);
     int versionMeta = -1;
-    try (ChecksumIndexInput meta = state.directory.openChecksumInput(metaFileName, state.context)) {
+    try (ChecksumIndexInput meta =
+        state.directory.openChecksumInput(metaFileName, IOContext.READONCE)) {
       Throwable priorE = null;
       try {
         versionMeta =
diff --git a/lucene/core/src/java/org/apache/lucene/store/IOContext.java b/lucene/core/src/java/org/apache/lucene/store/IOContext.java
index ee0811c9172..21a47a0ce03 100644
--- a/lucene/core/src/java/org/apache/lucene/store/IOContext.java
+++ b/lucene/core/src/java/org/apache/lucene/store/IOContext.java
@@ -41,16 +41,27 @@ public class IOContext {
 
   public final FlushInfo flushInfo;
 
+  /** This flag indicates that the file will be opened, then fully read sequentially then closed. */
   public final boolean readOnce;
 
+  /**
+   * This flag is used for files that are a small fraction of the total index size and are expected
+   * to be heavily accessed in random-access fashion. Some {@link Directory} implementations may
+   * choose to load such files into physical memory (e.g. Java heap) as a way to provide stronger
+   * guarantees on query latency.
+   */
+  public final boolean load;
+
   public static final IOContext DEFAULT = new IOContext(Context.DEFAULT);
 
-  public static final IOContext READONCE = new IOContext(true);
+  public static final IOContext READONCE = new IOContext(true, false);
+
+  public static final IOContext READ = new IOContext(false, false);
 
-  public static final IOContext READ = new IOContext(false);
+  public static final IOContext LOAD = new IOContext(false, true);
 
   public IOContext() {
-    this(false);
+    this(false, false);
   }
 
   public IOContext(FlushInfo flushInfo) {
@@ -58,6 +69,7 @@ public class IOContext {
     this.context = Context.FLUSH;
     this.mergeInfo = null;
     this.readOnce = false;
+    this.load = false;
     this.flushInfo = flushInfo;
   }
 
@@ -65,10 +77,11 @@ public class IOContext {
     this(context, null);
   }
 
-  private IOContext(boolean readOnce) {
+  private IOContext(boolean readOnce, boolean load) {
     this.context = Context.READ;
     this.mergeInfo = null;
     this.readOnce = readOnce;
+    this.load = load;
     this.flushInfo = null;
   }
 
@@ -82,6 +95,7 @@ public class IOContext {
     assert context != Context.FLUSH : "Use IOContext(FlushInfo) to create a FLUSH IOContext";
     this.context = context;
     this.readOnce = false;
+    this.load = false;
     this.mergeInfo = mergeInfo;
     this.flushInfo = null;
   }
@@ -99,6 +113,7 @@ public class IOContext {
     this.mergeInfo = ctxt.mergeInfo;
     this.flushInfo = ctxt.flushInfo;
     this.readOnce = readOnce;
+    this.load = false;
   }
 
   @Override
diff --git a/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java b/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
index 079b0baccbf..9d12f1e438e 100644
--- a/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
+++ b/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
@@ -92,6 +92,13 @@ public class MMapDirectory extends FSDirectory {
    */
   public static final BiPredicate<String, IOContext> NO_FILES = (filename, context) -> false;
 
+  /**
+   * Argument for {@link #setPreload(BiPredicate)} that configures files to be preloaded upon
+   * opening them if they use the {@link IOContext#LOAD} I/O context.
+   */
+  public static final BiPredicate<String, IOContext> BASED_ON_LOAD_IO_CONTEXT =
+      (filename, context) -> context.load;
+
   private boolean useUnmapHack = UNMAP_SUPPORTED;
   private BiPredicate<String, IOContext> preload = NO_FILES;