You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2022/11/15 09:52:57 UTC

[lucene] branch main updated: More granular control of preloading on MMapDirectory. (#11929)

This is an automated email from the ASF dual-hosted git repository.

jpountz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new 69a7cb22e71 More granular control of preloading on MMapDirectory. (#11929)
69a7cb22e71 is described below

commit 69a7cb22e7118415cb75d2262db19322c098047c
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Tue Nov 15 10:52:49 2022 +0100

    More granular control of preloading on MMapDirectory. (#11929)
    
    This enables configuring preloading on MMapDirectory based on the file name as well as the IOContext that is used to open the file.
---
 lucene/CHANGES.txt                                 |  3 ++
 .../org/apache/lucene/store/MMapDirectory.java     | 54 ++++++++++++++++++----
 .../org/apache/lucene/store/TestMmapDirectory.java |  2 +-
 3 files changed, 50 insertions(+), 9 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 905a72b8388..1cde0758b24 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -110,6 +110,9 @@ New Features
 * GITHUB#11795: Add ByteWritesTrackingDirectoryWrapper to expose metrics for bytes merged, flushed, and overall
   write amplification factor. (Marc D'Mello)
 
+* GITHUB#11929: MMapDirectory gives more granular control on which files to
+  preload. (Adrien Grand, Uwe Schindler)
+
 Improvements
 ---------------------
 * GITHUB#11778: Detailed part-of-speech information for particle(조사) and ending(어미) on Nori
diff --git a/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java b/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
index 4402d296843..079b0baccbf 100644
--- a/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
+++ b/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
@@ -23,6 +23,7 @@ import java.nio.channels.ClosedChannelException; // javadoc @link
 import java.nio.file.Path;
 import java.util.Locale;
 import java.util.concurrent.Future;
+import java.util.function.BiPredicate;
 import java.util.logging.Logger;
 import org.apache.lucene.util.Constants;
 
@@ -38,6 +39,10 @@ import org.apache.lucene.util.Constants;
  * fragmented address space. If you get an OutOfMemoryException, it is recommended to reduce the
  * chunk size, until it works.
  *
+ * <p>This class supports preloading files into physical memory upon opening. This can help improve
+ * performance of searches on a cold page cache at the expense of slowing down opening an index. See
+ * {@link #setPreload(BiPredicate)} for more details.
+ *
  * <p>Due to <a href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4724038">this bug</a> in
  * Sun's JRE, MMapDirectory's {@link IndexInput#close} is unable to close the underlying OS file
  * handle. Only when GC finally collects the underlying objects, which could be quite some time
@@ -74,8 +79,21 @@ import org.apache.lucene.util.Constants;
  *     about MMapDirectory</a>
  */
 public class MMapDirectory extends FSDirectory {
+
+  /**
+   * Argument for {@link #setPreload(BiPredicate)} that configures all files to be preloaded upon
+   * opening them.
+   */
+  public static final BiPredicate<String, IOContext> ALL_FILES = (filename, context) -> true;
+
+  /**
+   * Argument for {@link #setPreload(BiPredicate)} that configures no files to be preloaded upon
+   * opening them.
+   */
+  public static final BiPredicate<String, IOContext> NO_FILES = (filename, context) -> false;
+
   private boolean useUnmapHack = UNMAP_SUPPORTED;
-  private boolean preload;
+  private BiPredicate<String, IOContext> preload = NO_FILES;
 
   /**
    * Default max chunk size:
@@ -204,20 +222,39 @@ public class MMapDirectory extends FSDirectory {
   }
 
   /**
-   * Set to {@code true} to ask mapped pages to be loaded into physical memory on init. The behavior
-   * is best-effort and operating system dependent.
+   * Configure which files to preload in physical memory upon opening. The default implementation
+   * does not preload anything. The behavior is best effort and operating system-dependent.
+   *
+   * @param preload a {@link BiPredicate} whose first argument is the file name, and second argument
+   *     is the {@link IOContext} used to open the file
+   * @see #ALL_FILES
+   * @see #NO_FILES
    */
-  public void setPreload(boolean preload) {
+  public void setPreload(BiPredicate<String, IOContext> preload) {
     this.preload = preload;
   }
 
   /**
-   * Returns {@code true} if mapped pages should be loaded.
+   * Configure whether to preload files on this {@link MMapDirectory} into physical memory upon
+   * opening. The behavior is best effort and operating system-dependent.
+   *
+   * @deprecated Use {@link #setPreload(BiPredicate)} instead which provides more granular control.
+   */
+  @Deprecated
+  public void setPreload(boolean preload) {
+    this.preload = preload ? ALL_FILES : NO_FILES;
+  }
+
+  /**
+   * Return whether files are loaded into physical memory upon opening.
    *
-   * @see #setPreload
+   * @deprecated This information is no longer reliable now that preloading is more granularly
+   *     configured via a predicate.
+   * @see #setPreload(BiPredicate)
    */
+  @Deprecated
   public boolean getPreload() {
-    return preload;
+    return preload == ALL_FILES;
   }
 
   /**
@@ -235,7 +272,8 @@ public class MMapDirectory extends FSDirectory {
     ensureOpen();
     ensureCanRead(name);
     Path path = directory.resolve(name);
-    return PROVIDER.openInput(path, context, chunkSizePower, preload, useUnmapHack);
+    return PROVIDER.openInput(
+        path, context, chunkSizePower, preload.test(name, context), useUnmapHack);
   }
 
   // visible for tests:
diff --git a/lucene/core/src/test/org/apache/lucene/store/TestMmapDirectory.java b/lucene/core/src/test/org/apache/lucene/store/TestMmapDirectory.java
index 810f8578e80..579b0c27108 100644
--- a/lucene/core/src/test/org/apache/lucene/store/TestMmapDirectory.java
+++ b/lucene/core/src/test/org/apache/lucene/store/TestMmapDirectory.java
@@ -32,7 +32,7 @@ public class TestMmapDirectory extends BaseDirectoryTestCase {
   @Override
   protected Directory getDirectory(Path path) throws IOException {
     MMapDirectory m = new MMapDirectory(path);
-    m.setPreload(random().nextBoolean());
+    m.setPreload((file, context) -> random().nextBoolean());
     return m;
   }