You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2022/09/26 13:32:28 UTC
[lucene] branch branch_9_4 updated: MR-JAR rewrite of MMapDirectory with JDK-19 preview Panama APIs (>= JDK-19-ea+23) (#912)
This is an automated email from the ASF dual-hosted git repository.
uschindler pushed a commit to branch branch_9_4
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/branch_9_4 by this push:
new 0384b4fcad7 MR-JAR rewrite of MMapDirectory with JDK-19 preview Panama APIs (>= JDK-19-ea+23) (#912)
0384b4fcad7 is described below
commit 0384b4fcad7856ddc574c8b994c814a568ce6d0a
Author: Uwe Schindler <us...@apache.org>
AuthorDate: Mon Sep 26 15:22:04 2022 +0200
MR-JAR rewrite of MMapDirectory with JDK-19 preview Panama APIs (>= JDK-19-ea+23) (#912)
This uses Gradle's auto-provisioning to compile Java 19 classes and build a multi-release JAR from them. Please make sure to regenerate gradle.properties (delete it) or change "org.gradle.java.installations.auto-download" to "true"
# Conflicts:
# lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
---
gradle/generation/local-settings.gradle | 8 +-
gradle/java/javac.gradle | 45 ++
gradle/testing/defaults-tests.gradle | 4 +
gradle/validation/ecj-lint.gradle | 4 +-
gradle/validation/error-prone.gradle | 6 +
gradle/validation/forbidden-apis.gradle | 5 +
lucene/CHANGES.txt | 9 +
.../org/apache/lucene/store/MMapDirectory.java | 332 +++++-------
.../store/MappedByteBufferIndexInputProvider.java | 198 +++++++
.../lucene/store/MemorySegmentIndexInput.java | 597 +++++++++++++++++++++
.../store/MemorySegmentIndexInputProvider.java | 117 ++++
.../org/apache/lucene/store/TestMmapDirectory.java | 38 +-
.../org/apache/lucene/store/TestMultiMMap.java | 27 +-
.../lucene/distribution/TestModularLayer.java | 36 ++
14 files changed, 1217 insertions(+), 209 deletions(-)
diff --git a/gradle/generation/local-settings.gradle b/gradle/generation/local-settings.gradle
index e036d718c30..829f504b921 100644
--- a/gradle/generation/local-settings.gradle
+++ b/gradle/generation/local-settings.gradle
@@ -93,12 +93,12 @@ org.gradle.workers.max=${maxWorkers}
# Maximum number of test JVMs forked per test task.
tests.jvms=${testsJvms}
-# Disable auto JVM provisioning (we don't use toolchains yet but want no surprises).
-org.gradle.java.installations.auto-download=false
+# Enable auto JVM provisioning.
+org.gradle.java.installations.auto-download=true
# Set these to enable automatic JVM location discovery.
-org.gradle.java.installations.fromEnv=JDK11,JDK12,JDK13,JDK14,JDK15,JDK16,JDK17
-org.gradle.java.installations.paths=(custom paths)
+org.gradle.java.installations.fromEnv=JAVA17_HOME,JAVA19_HOME
+#org.gradle.java.installations.paths=(custom paths)
""", "UTF-8")
diff --git a/gradle/java/javac.gradle b/gradle/java/javac.gradle
index 25f66c39de0..a3344f07193 100644
--- a/gradle/java/javac.gradle
+++ b/gradle/java/javac.gradle
@@ -85,3 +85,48 @@ allprojects {
}
}
}
+
+configure(project(":lucene:core")) {
+ plugins.withType(JavaPlugin) {
+ sourceSets {
+ main19 {
+ java {
+ srcDirs = ['src/java19']
+ }
+ }
+ }
+
+ configurations {
+ // Inherit any dependencies from the main source set.
+ main19Implementation.extendsFrom implementation
+ }
+
+ dependencies {
+ // We need the main classes to compile our Java 19 pieces.
+ main19Implementation sourceSets.main.output
+ }
+
+ tasks.named('compileMain19Java').configure {
+ javaCompiler = javaToolchains.compilerFor {
+ languageVersion = JavaLanguageVersion.of(19)
+ }
+
+ // undo alternative JDK support:
+ options.forkOptions.javaHome = null
+
+ sourceCompatibility = 19
+ targetCompatibility = 19
+ options.compilerArgs += ["--release", 19 as String, "--enable-preview"]
+ }
+
+ tasks.named('jar').configure {
+ into('META-INF/versions/19') {
+ from sourceSets.main19.output
+ }
+
+ manifest.attributes(
+ 'Multi-Release': 'true'
+ )
+ }
+ }
+}
diff --git a/gradle/testing/defaults-tests.gradle b/gradle/testing/defaults-tests.gradle
index a7cf879ddd1..187e5d272c4 100644
--- a/gradle/testing/defaults-tests.gradle
+++ b/gradle/testing/defaults-tests.gradle
@@ -124,6 +124,10 @@ allprojects {
// (if the runner JVM does not support them, it will fail tests):
jvmArgs '--add-modules', 'jdk.unsupported,jdk.management'
+ if (rootProject.runtimeJavaVersion == JavaVersion.VERSION_19) {
+ jvmArgs '--enable-preview'
+ }
+
systemProperty 'java.util.logging.config.file', file("${resources}/logging.properties")
systemProperty 'java.awt.headless', 'true'
systemProperty 'jdk.map.althashing.threshold', '0'
diff --git a/gradle/validation/ecj-lint.gradle b/gradle/validation/ecj-lint.gradle
index 33b79ca149e..7587a02f465 100644
--- a/gradle/validation/ecj-lint.gradle
+++ b/gradle/validation/ecj-lint.gradle
@@ -34,7 +34,9 @@ allprojects {
// Create a [sourceSetName]EcjLint task for each source set
// with a non-empty java.srcDirs. These tasks are then
// attached to project's "ecjLint" task.
- def lintTasks = sourceSets.collect { sourceSet ->
+
+ // TODO: Better way to disable on our MR-JAR
+ def lintTasks = sourceSets.findAll { it.name != 'main19' }.collect { sourceSet ->
def srcDirs = sourceSet.java.sourceDirectories
.filter { dir -> dir.exists() }
diff --git a/gradle/validation/error-prone.gradle b/gradle/validation/error-prone.gradle
index 67c0ea20ca0..46cabb491e8 100644
--- a/gradle/validation/error-prone.gradle
+++ b/gradle/validation/error-prone.gradle
@@ -58,6 +58,12 @@ allprojects { prj ->
}
tasks.withType(JavaCompile) { task ->
+ // TODO: Better way to disable on our MR-JAR
+ if (task.name == 'compileMain19Java') {
+ options.errorprone.enabled = false
+ return
+ }
+
task.dependsOn ":checkJdkInternalsExportedToGradle"
options.errorprone.disableWarningsInGeneratedCode = true
diff --git a/gradle/validation/forbidden-apis.gradle b/gradle/validation/forbidden-apis.gradle
index 8720b9ee5d4..8fbe82a6efd 100644
--- a/gradle/validation/forbidden-apis.gradle
+++ b/gradle/validation/forbidden-apis.gradle
@@ -115,6 +115,11 @@ allprojects { prj ->
inputs.dir(file(resources))
}
+ // TODO: Remove this when forbiddenapis supports Java 19 class files
+ tasks.matching { it.name == "forbiddenApisMain19" }.all {
+ enabled = false
+ }
+
// We rely on resolved configurations to compute the relevant set of rule
// files for forbiddenApis. Since we don't want to resolve these configurations until
// the task is executed, we can't really use them as task inputs properly. This is a
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 615ed6c59ee..1747b8bb0af 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -19,6 +19,15 @@ New Features
SortField#getOptimizeSortWithIndexedData were introduced to provide
an option to disable sort optimization for various sort fields. (Mayya Sharipova)
+* GITHUB#912: Support for Java 19 foreign memory support was added. Applications started
+ with command line parameter "java --enable-preview" will automatically use the new
+ foreign memory API of Java 19 to access indexes on disk with MMapDirectory. This is
+ an opt-in feature and requires explicit Java command line flag! When enabled, Lucene logs
+ a notice using java.util.logging. Please test thoroughly and report bugs/slowness to Lucene's
+ mailing list. When the new API is used, MMapDirectory will mmap Lucene indexes in chunks of
+ 16 GiB (instead of 1 GiB) and indexes closed while queries are running can no longer crash
+ the JVM. (Uwe Schindler)
+
Improvements
---------------------
* LUCENE-10592: Build HNSW Graph on indexing. (Mayya Sharipova, Adrien Grand, Julie Tibshirani)
diff --git a/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java b/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
index 25ed27efcbe..4cef4b50b49 100644
--- a/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
+++ b/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
@@ -16,29 +16,15 @@
*/
package org.apache.lucene.store;
-import static java.lang.invoke.MethodHandles.*;
-import static java.lang.invoke.MethodType.methodType;
-
import java.io.IOException;
-import java.lang.invoke.MethodHandle;
-import java.lang.reflect.Field;
-import java.nio.ByteBuffer;
-import java.nio.ByteOrder;
-import java.nio.MappedByteBuffer;
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.MethodType;
import java.nio.channels.ClosedChannelException; // javadoc @link
-import java.nio.channels.FileChannel;
-import java.nio.channels.FileChannel.MapMode;
import java.nio.file.Path;
-import java.nio.file.StandardOpenOption;
-import java.security.AccessController;
-import java.security.PrivilegedAction;
import java.util.Locale;
-import java.util.Objects;
import java.util.concurrent.Future;
import java.util.logging.Logger;
-import org.apache.lucene.store.ByteBufferGuard.BufferCleaner;
import org.apache.lucene.util.Constants;
-import org.apache.lucene.util.SuppressForbidden;
/**
* File-based {@link Directory} implementation that uses mmap for reading, and {@link
@@ -70,6 +56,9 @@ import org.apache.lucene.util.SuppressForbidden;
* the workaround will be automatically enabled (with no guarantees; if you discover any problems,
* you can disable it).
*
+ * <p>On <b>Java 19</b> with {@code --enable-preview} command line setting, this class will use the
+ * modern {@code MemorySegment} API which allows to safely unmap.
+ *
* <p><b>NOTE:</b> Accessing this class either directly or indirectly from a thread while it's
* interrupted can close the underlying channel immediately if at the same time the thread is
* blocked on IO. The channel will remain closed and subsequent access to {@link MMapDirectory} will
@@ -89,11 +78,16 @@ public class MMapDirectory extends FSDirectory {
private boolean preload;
/**
- * Default max chunk size.
+ * Default max chunk size:
*
- * @see #MMapDirectory(Path, LockFactory, int)
+ * <ul>
+ * <li>16 GiBytes for 64 bit <b>Java 19</b> JVMs running with {@code --enable-preview} as
+ * command line parameter
+ * <li>1 GiBytes for other 64 bit JVMs
+ * <li>256 MiBytes for 32 bit JVMs
+ * </ul>
*/
- public static final int DEFAULT_MAX_CHUNK_SIZE = Constants.JRE_IS_64BIT ? (1 << 30) : (1 << 28);
+ public static final long DEFAULT_MAX_CHUNK_SIZE;
final int chunkSizePower;
@@ -120,19 +114,41 @@ public class MMapDirectory extends FSDirectory {
this(path, FSLockFactory.getDefault());
}
+ /**
+ * Create a new MMapDirectory for the named location and {@link FSLockFactory#getDefault()}. The
+ * directory is created at the named location if it does not yet exist.
+ *
+ * @deprecated use {@link #MMapDirectory(Path, long)} instead.
+ */
+ @Deprecated
+ public MMapDirectory(Path path, int maxChunkSize) throws IOException {
+ this(path, (long) maxChunkSize);
+ }
+
/**
* Create a new MMapDirectory for the named location and {@link FSLockFactory#getDefault()}. The
* directory is created at the named location if it does not yet exist.
*
* @param path the path of the directory
- * @param maxChunkSize maximum chunk size (default is 1 GiBytes for 64 bit JVMs and 256 MiBytes
- * for 32 bit JVMs) used for memory mapping.
+ * @param maxChunkSize maximum chunk size (for default see {@link #DEFAULT_MAX_CHUNK_SIZE}) used
+ * for memory mapping.
* @throws IOException if there is a low-level I/O error
*/
- public MMapDirectory(Path path, int maxChunkSize) throws IOException {
+ public MMapDirectory(Path path, long maxChunkSize) throws IOException {
this(path, FSLockFactory.getDefault(), maxChunkSize);
}
+ /**
+ * Create a new MMapDirectory for the named location and {@link FSLockFactory#getDefault()}. The
+ * directory is created at the named location if it does not yet exist.
+ *
+ * @deprecated use {@link #MMapDirectory(Path, LockFactory, long)} instead.
+ */
+ @Deprecated
+ public MMapDirectory(Path path, LockFactory lockFactory, int maxChunkSize) throws IOException {
+ this(path, lockFactory, (long) maxChunkSize);
+ }
+
/**
* Create a new MMapDirectory for the named location, specifying the maximum chunk size used for
* memory mapping. The directory is created at the named location if it does not yet exist.
@@ -140,25 +156,28 @@ public class MMapDirectory extends FSDirectory {
* <p>Especially on 32 bit platform, the address space can be very fragmented, so large index
* files cannot be mapped. Using a lower chunk size makes the directory implementation a little
* bit slower (as the correct chunk may be resolved on lots of seeks) but the chance is higher
- * that mmap does not fail. On 64 bit Java platforms, this parameter should always be {@code 1 <<
- * 30}, as the address space is big enough.
+ * that mmap does not fail. On 64 bit Java platforms, this parameter should always be large (like
+ * 1 GiBytes, or even larger with Java 19), as the address space is big enough. If it is larger,
+ * fragmentation of address space increases, but number of file handles and mappings is lower for
+ * huge installations with many open indexes.
*
* <p><b>Please note:</b> The chunk size is always rounded down to a power of 2.
*
* @param path the path of the directory
* @param lockFactory the lock factory to use, or null for the default ({@link
* NativeFSLockFactory});
- * @param maxChunkSize maximum chunk size (default is 1 GiBytes for 64 bit JVMs and 256 MiBytes
- * for 32 bit JVMs) used for memory mapping.
+ * @param maxChunkSize maximum chunk size (for default see {@link #DEFAULT_MAX_CHUNK_SIZE}) used
+ * for memory mapping.
* @throws IOException if there is a low-level I/O error
*/
- public MMapDirectory(Path path, LockFactory lockFactory, int maxChunkSize) throws IOException {
+ public MMapDirectory(Path path, LockFactory lockFactory, long maxChunkSize) throws IOException {
super(path, lockFactory);
- if (maxChunkSize <= 0) {
+ if (maxChunkSize <= 0L) {
throw new IllegalArgumentException("Maximum chunk size for mmap must be >0");
}
- this.chunkSizePower = 31 - Integer.numberOfLeadingZeros(maxChunkSize);
- assert this.chunkSizePower >= 0 && this.chunkSizePower <= 30;
+ this.chunkSizePower = Long.SIZE - 1 - Long.numberOfLeadingZeros(maxChunkSize);
+ assert (1L << chunkSizePower) <= maxChunkSize;
+ assert (1L << chunkSizePower) > (maxChunkSize / 2);
}
/**
@@ -167,6 +186,10 @@ public class MMapDirectory extends FSDirectory {
* non-Oracle/OpenJDK JVMs. It forcefully unmaps the buffer on close by using an undocumented
* internal cleanup functionality.
*
+ * <p>On Java 19 with {@code --enable-preview} command line setting, this class will use the
+ * modern {@code MemorySegment} API which allows to safely unmap. <em>The following warnings no
+ * longer apply in that case!</em>
+ *
* <p><b>NOTE:</b> Enabling this is completely unsupported by Java and may lead to JVM crashes if
* <code>IndexInput</code> is closed while another thread is still accessing it (SIGSEGV).
*
@@ -205,8 +228,6 @@ public class MMapDirectory extends FSDirectory {
/**
* Set to {@code true} to ask mapped pages to be loaded into physical memory on init. The behavior
* is best-effort and operating system dependent.
- *
- * @see MappedByteBuffer#load
*/
public void setPreload(boolean preload) {
this.preload = preload;
@@ -224,10 +245,10 @@ public class MMapDirectory extends FSDirectory {
/**
* Returns the current mmap chunk size.
*
- * @see #MMapDirectory(Path, LockFactory, int)
+ * @see #MMapDirectory(Path, LockFactory, long)
*/
- public final int getMaxChunkSize() {
- return 1 << chunkSizePower;
+ public final long getMaxChunkSize() {
+ return 1L << chunkSizePower;
}
/** Creates an IndexInput for the file with the given name. */
@@ -236,94 +257,11 @@ public class MMapDirectory extends FSDirectory {
ensureOpen();
ensureCanRead(name);
Path path = directory.resolve(name);
- try (FileChannel c = FileChannel.open(path, StandardOpenOption.READ)) {
- final String resourceDescription = "MMapIndexInput(path=\"" + path.toString() + "\")";
- final boolean useUnmap = getUseUnmap();
- return ByteBufferIndexInput.newInstance(
- resourceDescription,
- map(resourceDescription, c, 0, c.size()),
- c.size(),
- chunkSizePower,
- new ByteBufferGuard(resourceDescription, useUnmap ? CLEANER : null));
- }
+ return PROVIDER.openInput(path, context, chunkSizePower, preload, useUnmapHack);
}
- /** Maps a file into a set of buffers */
- final ByteBuffer[] map(String resourceDescription, FileChannel fc, long offset, long length)
- throws IOException {
- if ((length >>> chunkSizePower) >= Integer.MAX_VALUE)
- throw new IllegalArgumentException(
- "RandomAccessFile too big for chunk size: " + resourceDescription);
-
- final long chunkSize = 1L << chunkSizePower;
-
- // we always allocate one more buffer, the last one may be a 0 byte one
- final int nrBuffers = (int) (length >>> chunkSizePower) + 1;
-
- ByteBuffer[] buffers = new ByteBuffer[nrBuffers];
-
- long bufferStart = 0L;
- for (int bufNr = 0; bufNr < nrBuffers; bufNr++) {
- int bufSize =
- (int) ((length > (bufferStart + chunkSize)) ? chunkSize : (length - bufferStart));
- MappedByteBuffer buffer;
- try {
- buffer = fc.map(MapMode.READ_ONLY, offset + bufferStart, bufSize);
- buffer.order(ByteOrder.LITTLE_ENDIAN);
- } catch (IOException ioe) {
- throw convertMapFailedIOException(ioe, resourceDescription, bufSize);
- }
- if (preload) {
- buffer.load();
- }
- buffers[bufNr] = buffer;
- bufferStart += bufSize;
- }
-
- return buffers;
- }
-
- private IOException convertMapFailedIOException(
- IOException ioe, String resourceDescription, int bufSize) {
- final String originalMessage;
- final Throwable originalCause;
- if (ioe.getCause() instanceof OutOfMemoryError) {
- // nested OOM confuses users, because it's "incorrect", just print a plain message:
- originalMessage = "Map failed";
- originalCause = null;
- } else {
- originalMessage = ioe.getMessage();
- originalCause = ioe.getCause();
- }
- final String moreInfo;
- if (!Constants.JRE_IS_64BIT) {
- moreInfo =
- "MMapDirectory should only be used on 64bit platforms, because the address space on 32bit operating systems is too small. ";
- } else if (Constants.WINDOWS) {
- moreInfo =
- "Windows is unfortunately very limited on virtual address space. If your index size is several hundred Gigabytes, consider changing to Linux. ";
- } else if (Constants.LINUX) {
- moreInfo =
- "Please review 'ulimit -v', 'ulimit -m' (both should return 'unlimited'), and 'sysctl vm.max_map_count'. ";
- } else {
- moreInfo = "Please review 'ulimit -v', 'ulimit -m' (both should return 'unlimited'). ";
- }
- final IOException newIoe =
- new IOException(
- String.format(
- Locale.ENGLISH,
- "%s: %s [this may be caused by lack of enough unfragmented virtual address space "
- + "or too restrictive virtual memory limits enforced by the operating system, "
- + "preventing us to map a chunk of %d bytes. %sMore information: "
- + "http://blog.thetaphi.de/2012/07/use-lucenes-mmapdirectory-on-64bit.html]",
- originalMessage,
- resourceDescription,
- bufSize,
- moreInfo),
- originalCause);
- newIoe.setStackTrace(ioe.getStackTrace());
- return newIoe;
- }
+ // visible for tests:
+ static final MMapIndexInputProvider PROVIDER;
/** <code>true</code>, if this platform supports unmapping mmapped files. */
public static final boolean UNMAP_SUPPORTED;
@@ -334,79 +272,101 @@ public class MMapDirectory extends FSDirectory {
*/
public static final String UNMAP_NOT_SUPPORTED_REASON;
- /** Reference to a BufferCleaner that does unmapping; {@code null} if not supported. */
- private static final BufferCleaner CLEANER;
+ static interface MMapIndexInputProvider {
+ IndexInput openInput(
+ Path path, IOContext context, int chunkSizePower, boolean preload, boolean useUnmapHack)
+ throws IOException;
- static {
- final Object hack =
- AccessController.doPrivileged((PrivilegedAction<Object>) MMapDirectory::unmapHackImpl);
- if (hack instanceof BufferCleaner) {
- CLEANER = (BufferCleaner) hack;
- UNMAP_SUPPORTED = true;
- UNMAP_NOT_SUPPORTED_REASON = null;
- } else {
- CLEANER = null;
- UNMAP_SUPPORTED = false;
- UNMAP_NOT_SUPPORTED_REASON = hack.toString();
- Logger.getLogger(MMapDirectory.class.getName()).warning(UNMAP_NOT_SUPPORTED_REASON);
+ long getDefaultMaxChunkSize();
+
+ boolean isUnmapSupported();
+
+ String getUnmapNotSupportedReason();
+
+ default IOException convertMapFailedIOException(
+ IOException ioe, String resourceDescription, long bufSize) {
+ final String originalMessage;
+ final Throwable originalCause;
+ if (ioe.getCause() instanceof OutOfMemoryError) {
+ // nested OOM confuses users, because it's "incorrect", just print a plain message:
+ originalMessage = "Map failed";
+ originalCause = null;
+ } else {
+ originalMessage = ioe.getMessage();
+ originalCause = ioe.getCause();
+ }
+ final String moreInfo;
+ if (!Constants.JRE_IS_64BIT) {
+ moreInfo =
+ "MMapDirectory should only be used on 64bit platforms, because the address space on 32bit operating systems is too small. ";
+ } else if (Constants.WINDOWS) {
+ moreInfo =
+ "Windows is unfortunately very limited on virtual address space. If your index size is several hundred Gigabytes, consider changing to Linux. ";
+ } else if (Constants.LINUX) {
+ moreInfo =
+ "Please review 'ulimit -v', 'ulimit -m' (both should return 'unlimited'), and 'sysctl vm.max_map_count'. ";
+ } else {
+ moreInfo = "Please review 'ulimit -v', 'ulimit -m' (both should return 'unlimited'). ";
+ }
+ final IOException newIoe =
+ new IOException(
+ String.format(
+ Locale.ENGLISH,
+ "%s: %s [this may be caused by lack of enough unfragmented virtual address space "
+ + "or too restrictive virtual memory limits enforced by the operating system, "
+ + "preventing us to map a chunk of %d bytes. %sMore information: "
+ + "https://blog.thetaphi.de/2012/07/use-lucenes-mmapdirectory-on-64bit.html]",
+ originalMessage,
+ resourceDescription,
+ bufSize,
+ moreInfo),
+ originalCause);
+ newIoe.setStackTrace(ioe.getStackTrace());
+ return newIoe;
}
}
- @SuppressForbidden(reason = "Needs access to sun.misc.Unsafe to enable hack")
- private static Object unmapHackImpl() {
- final Lookup lookup = lookup();
+ private static MMapIndexInputProvider lookupProvider() {
+ final var lookup = MethodHandles.lookup();
try {
- // *** sun.misc.Unsafe unmapping (Java 9+) ***
- final Class<?> unsafeClass = Class.forName("sun.misc.Unsafe");
- // first check if Unsafe has the right method, otherwise we can give up
- // without doing any security critical stuff:
- final MethodHandle unmapper =
- lookup.findVirtual(
- unsafeClass, "invokeCleaner", methodType(void.class, ByteBuffer.class));
- // fetch the unsafe instance and bind it to the virtual MH:
- final Field f = unsafeClass.getDeclaredField("theUnsafe");
- f.setAccessible(true);
- final Object theUnsafe = f.get(null);
- return newBufferCleaner(unmapper.bindTo(theUnsafe));
- } catch (SecurityException se) {
- return "Unmapping is not supported, because not all required permissions are given to the Lucene JAR file: "
- + se
- + " [Please grant at least the following permissions: RuntimePermission(\"accessClassInPackage.sun.misc\") "
- + " and ReflectPermission(\"suppressAccessChecks\")]";
- } catch (ReflectiveOperationException | RuntimeException e) {
- final Module module = MMapDirectory.class.getModule();
- final ModuleLayer layer = module.getLayer();
- // classpath / unnamed module has no layer, so we need to check:
- if (layer != null
- && layer.findModule("jdk.unsupported").map(module::canRead).orElse(false) == false) {
- return "Unmapping is not supported, because Lucene cannot read 'jdk.unsupported' module "
- + "[please add 'jdk.unsupported' to modular application either by command line or its module descriptor]";
+ final var cls = lookup.findClass("org.apache.lucene.store.MemorySegmentIndexInputProvider");
+ // we use method handles, so we do not need to deal with setAccessible as we have private
+ // access through the lookup:
+ final var constr = lookup.findConstructor(cls, MethodType.methodType(void.class));
+ try {
+ return (MMapIndexInputProvider) constr.invoke();
+ } catch (RuntimeException | Error e) {
+ throw e;
+ } catch (Throwable th) {
+ throw new AssertionError(th);
+ }
+ } catch (
+ @SuppressWarnings("unused")
+ ClassNotFoundException e) {
+ // we're before Java 19
+ return new MappedByteBufferIndexInputProvider();
+ } catch (
+ @SuppressWarnings("unused")
+ UnsupportedClassVersionError e) {
+ var log = Logger.getLogger(lookup.lookupClass().getName());
+ if (Runtime.version().feature() == 19) {
+ log.warning(
+ "You are running with Java 19. To make full use of MMapDirectory, please pass '--enable-preview' to the Java command line.");
+ } else {
+ log.warning(
+ "You are running with Java 20 or later. To make full use of MMapDirectory, please update Apache Lucene.");
}
- return "Unmapping is not supported on this platform, because internal Java APIs are not compatible with this Lucene version: "
- + e;
+ return new MappedByteBufferIndexInputProvider();
+ } catch (NoSuchMethodException | IllegalAccessException e) {
+ throw new LinkageError(
+ "MemorySegmentIndexInputProvider is missing correctly typed constructor", e);
}
}
- private static BufferCleaner newBufferCleaner(final MethodHandle unmapper) {
- assert Objects.equals(methodType(void.class, ByteBuffer.class), unmapper.type());
- return (String resourceDescription, ByteBuffer buffer) -> {
- if (!buffer.isDirect()) {
- throw new IllegalArgumentException("unmapping only works with direct buffers");
- }
- final Throwable error =
- AccessController.doPrivileged(
- (PrivilegedAction<Throwable>)
- () -> {
- try {
- unmapper.invokeExact(buffer);
- return null;
- } catch (Throwable t) {
- return t;
- }
- });
- if (error != null) {
- throw new IOException("Unable to unmap the mapped buffer: " + resourceDescription, error);
- }
- };
+ static {
+ PROVIDER = lookupProvider();
+ DEFAULT_MAX_CHUNK_SIZE = PROVIDER.getDefaultMaxChunkSize();
+ UNMAP_SUPPORTED = PROVIDER.isUnmapSupported();
+ UNMAP_NOT_SUPPORTED_REASON = PROVIDER.getUnmapNotSupportedReason();
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/store/MappedByteBufferIndexInputProvider.java b/lucene/core/src/java/org/apache/lucene/store/MappedByteBufferIndexInputProvider.java
new file mode 100644
index 00000000000..798c924c02c
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/store/MappedByteBufferIndexInputProvider.java
@@ -0,0 +1,198 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.store;
+
+import static java.lang.invoke.MethodHandles.lookup;
+import static java.lang.invoke.MethodType.methodType;
+
+import java.io.IOException;
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles.Lookup;
+import java.lang.reflect.Field;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.MappedByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.channels.FileChannel.MapMode;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+import java.security.AccessController;
+import java.security.PrivilegedAction;
+import java.util.Objects;
+import java.util.logging.Logger;
+import org.apache.lucene.store.ByteBufferGuard.BufferCleaner;
+import org.apache.lucene.util.Constants;
+import org.apache.lucene.util.SuppressForbidden;
+
+final class MappedByteBufferIndexInputProvider implements MMapDirectory.MMapIndexInputProvider {
+
+ private final BufferCleaner cleaner;
+
+ private final boolean unmapSupported;
+ private final String unmapNotSupportedReason;
+
+ public MappedByteBufferIndexInputProvider() {
+ final Object hack = doPrivileged(MappedByteBufferIndexInputProvider::unmapHackImpl);
+ if (hack instanceof BufferCleaner) {
+ cleaner = (BufferCleaner) hack;
+ unmapSupported = true;
+ unmapNotSupportedReason = null;
+ } else {
+ cleaner = null;
+ unmapSupported = false;
+ unmapNotSupportedReason = hack.toString();
+ Logger.getLogger(getClass().getName()).warning(unmapNotSupportedReason);
+ }
+ }
+
+ @Override
+ public IndexInput openInput(
+ Path path, IOContext context, int chunkSizePower, boolean preload, boolean useUnmapHack)
+ throws IOException {
+ if (chunkSizePower > 30) {
+ throw new IllegalArgumentException(
+ "ByteBufferIndexInput cannot use a chunk size of >1 GiBytes.");
+ }
+
+ final String resourceDescription = "ByteBufferIndexInput(path=\"" + path.toString() + "\")";
+
+ try (var fc = FileChannel.open(path, StandardOpenOption.READ)) {
+ final long fileSize = fc.size();
+ return ByteBufferIndexInput.newInstance(
+ resourceDescription,
+ map(resourceDescription, fc, chunkSizePower, preload, fileSize),
+ fileSize,
+ chunkSizePower,
+ new ByteBufferGuard(resourceDescription, useUnmapHack ? cleaner : null));
+ }
+ }
+
+ @Override
+ public long getDefaultMaxChunkSize() {
+ return Constants.JRE_IS_64BIT ? (1L << 30) : (1L << 28);
+ }
+
+ @Override
+ public boolean isUnmapSupported() {
+ return unmapSupported;
+ }
+
+ @Override
+ public String getUnmapNotSupportedReason() {
+ return unmapNotSupportedReason;
+ }
+
+ /** Maps a file into a set of buffers */
+ final ByteBuffer[] map(
+ String resourceDescription, FileChannel fc, int chunkSizePower, boolean preload, long length)
+ throws IOException {
+ if ((length >>> chunkSizePower) >= Integer.MAX_VALUE)
+ throw new IllegalArgumentException(
+ "RandomAccessFile too big for chunk size: " + resourceDescription);
+
+ final long chunkSize = 1L << chunkSizePower;
+
+ // we always allocate one more buffer, the last one may be a 0 byte one
+ final int nrBuffers = (int) (length >>> chunkSizePower) + 1;
+
+ final ByteBuffer[] buffers = new ByteBuffer[nrBuffers];
+
+ long startOffset = 0L;
+ for (int bufNr = 0; bufNr < nrBuffers; bufNr++) {
+ final int bufSize =
+ (int) ((length > (startOffset + chunkSize)) ? chunkSize : (length - startOffset));
+ final MappedByteBuffer buffer;
+ try {
+ buffer = fc.map(MapMode.READ_ONLY, startOffset, bufSize);
+ buffer.order(ByteOrder.LITTLE_ENDIAN);
+ } catch (IOException ioe) {
+ throw convertMapFailedIOException(ioe, resourceDescription, bufSize);
+ }
+ if (preload) {
+ buffer.load();
+ }
+ buffers[bufNr] = buffer;
+ startOffset += bufSize;
+ }
+
+ return buffers;
+ }
+
+ // Extracted to a method to be able to apply the SuppressForbidden annotation
+ @SuppressWarnings("removal")
+ @SuppressForbidden(reason = "security manager")
+ private static <T> T doPrivileged(PrivilegedAction<T> action) {
+ return AccessController.doPrivileged(action);
+ }
+
+ @SuppressForbidden(reason = "Needs access to sun.misc.Unsafe to enable hack")
+ private static Object unmapHackImpl() {
+ final Lookup lookup = lookup();
+ try {
+ // *** sun.misc.Unsafe unmapping (Java 9+) ***
+ final Class<?> unsafeClass = lookup.findClass("sun.misc.Unsafe");
+ // first check if Unsafe has the right method, otherwise we can give up
+ // without doing any security critical stuff:
+ final MethodHandle unmapper =
+ lookup.findVirtual(
+ unsafeClass, "invokeCleaner", methodType(void.class, ByteBuffer.class));
+ // fetch the unsafe instance and bind it to the virtual MH:
+ final Field f = unsafeClass.getDeclaredField("theUnsafe");
+ f.setAccessible(true);
+ final Object theUnsafe = f.get(null);
+ return newBufferCleaner(unmapper.bindTo(theUnsafe));
+ } catch (SecurityException se) {
+ return "Unmapping is not supported, because not all required permissions are given to the Lucene JAR file: "
+ + se
+ + " [Please grant at least the following permissions: RuntimePermission(\"accessClassInPackage.sun.misc\") "
+ + " and ReflectPermission(\"suppressAccessChecks\")]";
+ } catch (ReflectiveOperationException | RuntimeException e) {
+ final Module module = MappedByteBufferIndexInputProvider.class.getModule();
+ final ModuleLayer layer = module.getLayer();
+ // classpath / unnamed module has no layer, so we need to check:
+ if (layer != null
+ && layer.findModule("jdk.unsupported").map(module::canRead).orElse(false) == false) {
+ return "Unmapping is not supported, because Lucene cannot read 'jdk.unsupported' module "
+ + "[please add 'jdk.unsupported' to modular application either by command line or its module descriptor]";
+ }
+ return "Unmapping is not supported on this platform, because internal Java APIs are not compatible with this Lucene version: "
+ + e;
+ }
+ }
+
+ private static BufferCleaner newBufferCleaner(final MethodHandle unmapper) {
+ assert Objects.equals(methodType(void.class, ByteBuffer.class), unmapper.type());
+ return (String resourceDescription, ByteBuffer buffer) -> {
+ if (!buffer.isDirect()) {
+ throw new IllegalArgumentException("unmapping only works with direct buffers");
+ }
+ final Throwable error =
+ doPrivileged(
+ () -> {
+ try {
+ unmapper.invokeExact(buffer);
+ return null;
+ } catch (Throwable t) {
+ return t;
+ }
+ });
+ if (error != null) {
+ throw new IOException("Unable to unmap the mapped buffer: " + resourceDescription, error);
+ }
+ };
+ }
+}
diff --git a/lucene/core/src/java19/org/apache/lucene/store/MemorySegmentIndexInput.java b/lucene/core/src/java19/org/apache/lucene/store/MemorySegmentIndexInput.java
new file mode 100644
index 00000000000..7754645c708
--- /dev/null
+++ b/lucene/core/src/java19/org/apache/lucene/store/MemorySegmentIndexInput.java
@@ -0,0 +1,597 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.store;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.MemorySession;
+import java.lang.foreign.ValueLayout;
+import java.nio.ByteOrder;
+import java.util.Arrays;
+import java.util.Objects;
+
+/**
+ * Base IndexInput implementation that uses an array of MemorySegments to represent a file.
+ *
+ * <p>For efficiency, this class requires that the segment size are a power-of-two (<code>
+ * chunkSizePower</code>).
+ */
+@SuppressWarnings("preview")
+abstract class MemorySegmentIndexInput extends IndexInput implements RandomAccessInput {
+ static final ValueLayout.OfByte LAYOUT_BYTE = ValueLayout.JAVA_BYTE;
+ static final ValueLayout.OfShort LAYOUT_LE_SHORT =
+ ValueLayout.JAVA_SHORT.withOrder(ByteOrder.LITTLE_ENDIAN).withBitAlignment(8);
+ static final ValueLayout.OfInt LAYOUT_LE_INT =
+ ValueLayout.JAVA_INT.withOrder(ByteOrder.LITTLE_ENDIAN).withBitAlignment(8);
+ static final ValueLayout.OfLong LAYOUT_LE_LONG =
+ ValueLayout.JAVA_LONG.withOrder(ByteOrder.LITTLE_ENDIAN).withBitAlignment(8);
+ static final ValueLayout.OfFloat LAYOUT_LE_FLOAT =
+ ValueLayout.JAVA_FLOAT.withOrder(ByteOrder.LITTLE_ENDIAN).withBitAlignment(8);
+
+ final long length;
+ final long chunkSizeMask;
+ final int chunkSizePower;
+ final MemorySession session;
+ final MemorySegment[] segments;
+
+ int curSegmentIndex = -1;
+ MemorySegment
+ curSegment; // redundant for speed: segments[curSegmentIndex], also marker if closed!
+ long curPosition; // relative to curSegment, not globally
+
+ public static MemorySegmentIndexInput newInstance(
+ String resourceDescription,
+ MemorySession session,
+ MemorySegment[] segments,
+ long length,
+ int chunkSizePower) {
+ assert Arrays.stream(segments).map(MemorySegment::session).allMatch(session::equals);
+ if (segments.length == 1) {
+ return new SingleSegmentImpl(
+ resourceDescription, session, segments[0], length, chunkSizePower);
+ } else {
+ return new MultiSegmentImpl(
+ resourceDescription, session, segments, 0, length, chunkSizePower);
+ }
+ }
+
+ private MemorySegmentIndexInput(
+ String resourceDescription,
+ MemorySession session,
+ MemorySegment[] segments,
+ long length,
+ int chunkSizePower) {
+ super(resourceDescription);
+ this.session = session;
+ this.segments = segments;
+ this.length = length;
+ this.chunkSizePower = chunkSizePower;
+ this.chunkSizeMask = (1L << chunkSizePower) - 1L;
+ this.curSegment = segments[0];
+ }
+
+ void ensureOpen() {
+ if (curSegment == null) {
+ throw alreadyClosed(null);
+ }
+ }
+
+ RuntimeException handlePositionalIOOBE(String action, long pos) throws IOException {
+ if (pos < 0L) {
+ return new IllegalArgumentException(action + " negative position: " + this);
+ } else {
+ throw new EOFException(action + " past EOF: " + this);
+ }
+ }
+
+ // the unused parameter is just to silence javac about unused variables
+ AlreadyClosedException alreadyClosed(RuntimeException unused) {
+ return new AlreadyClosedException("Already closed: " + this);
+ }
+
+ @Override
+ public final byte readByte() throws IOException {
+ try {
+ final byte v = curSegment.get(LAYOUT_BYTE, curPosition);
+ curPosition++;
+ return v;
+ } catch (
+ @SuppressWarnings("unused")
+ IndexOutOfBoundsException e) {
+ do {
+ curSegmentIndex++;
+ if (curSegmentIndex >= segments.length) {
+ throw new EOFException("read past EOF: " + this);
+ }
+ curSegment = segments[curSegmentIndex];
+ curPosition = 0L;
+ } while (curSegment.byteSize() == 0L);
+ final byte v = curSegment.get(LAYOUT_BYTE, curPosition);
+ curPosition++;
+ return v;
+ } catch (NullPointerException | IllegalStateException e) {
+ throw alreadyClosed(e);
+ }
+ }
+
+ @Override
+ public final void readBytes(byte[] b, int offset, int len) throws IOException {
+ try {
+ MemorySegment.copy(curSegment, LAYOUT_BYTE, curPosition, b, offset, len);
+ curPosition += len;
+ } catch (
+ @SuppressWarnings("unused")
+ IndexOutOfBoundsException e) {
+ readBytesBoundary(b, offset, len);
+ } catch (NullPointerException | IllegalStateException e) {
+ throw alreadyClosed(e);
+ }
+ }
+
+ private void readBytesBoundary(byte[] b, int offset, int len) throws IOException {
+ try {
+ long curAvail = curSegment.byteSize() - curPosition;
+ while (len > curAvail) {
+ MemorySegment.copy(curSegment, LAYOUT_BYTE, curPosition, b, offset, (int) curAvail);
+ len -= curAvail;
+ offset += curAvail;
+ curSegmentIndex++;
+ if (curSegmentIndex >= segments.length) {
+ throw new EOFException("read past EOF: " + this);
+ }
+ curSegment = segments[curSegmentIndex];
+ curPosition = 0L;
+ curAvail = curSegment.byteSize();
+ }
+ MemorySegment.copy(curSegment, LAYOUT_BYTE, curPosition, b, offset, len);
+ curPosition += len;
+ } catch (NullPointerException | IllegalStateException e) {
+ throw alreadyClosed(e);
+ }
+ }
+
+ @Override
+ public void readInts(int[] dst, int offset, int length) throws IOException {
+ try {
+ MemorySegment.copy(curSegment, LAYOUT_LE_INT, curPosition, dst, offset, length);
+ curPosition += Integer.BYTES * (long) length;
+ } catch (
+ @SuppressWarnings("unused")
+ IndexOutOfBoundsException iobe) {
+ super.readInts(dst, offset, length);
+ } catch (NullPointerException | IllegalStateException e) {
+ throw alreadyClosed(e);
+ }
+ }
+
+ @Override
+ public void readLongs(long[] dst, int offset, int length) throws IOException {
+ try {
+ MemorySegment.copy(curSegment, LAYOUT_LE_LONG, curPosition, dst, offset, length);
+ curPosition += Long.BYTES * (long) length;
+ } catch (
+ @SuppressWarnings("unused")
+ IndexOutOfBoundsException iobe) {
+ super.readLongs(dst, offset, length);
+ } catch (NullPointerException | IllegalStateException e) {
+ throw alreadyClosed(e);
+ }
+ }
+
+ @Override
+ public void readFloats(float[] dst, int offset, int length) throws IOException {
+ try {
+ MemorySegment.copy(curSegment, LAYOUT_LE_FLOAT, curPosition, dst, offset, length);
+ curPosition += Float.BYTES * (long) length;
+ } catch (
+ @SuppressWarnings("unused")
+ IndexOutOfBoundsException iobe) {
+ super.readFloats(dst, offset, length);
+ } catch (NullPointerException | IllegalStateException e) {
+ throw alreadyClosed(e);
+ }
+ }
+
+ @Override
+ public final short readShort() throws IOException {
+ try {
+ final short v = curSegment.get(LAYOUT_LE_SHORT, curPosition);
+ curPosition += Short.BYTES;
+ return v;
+ } catch (
+ @SuppressWarnings("unused")
+ IndexOutOfBoundsException e) {
+ return super.readShort();
+ } catch (NullPointerException | IllegalStateException e) {
+ throw alreadyClosed(e);
+ }
+ }
+
+ @Override
+ public final int readInt() throws IOException {
+ try {
+ final int v = curSegment.get(LAYOUT_LE_INT, curPosition);
+ curPosition += Integer.BYTES;
+ return v;
+ } catch (
+ @SuppressWarnings("unused")
+ IndexOutOfBoundsException e) {
+ return super.readInt();
+ } catch (NullPointerException | IllegalStateException e) {
+ throw alreadyClosed(e);
+ }
+ }
+
+ @Override
+ public final long readLong() throws IOException {
+ try {
+ final long v = curSegment.get(LAYOUT_LE_LONG, curPosition);
+ curPosition += Long.BYTES;
+ return v;
+ } catch (
+ @SuppressWarnings("unused")
+ IndexOutOfBoundsException e) {
+ return super.readLong();
+ } catch (NullPointerException | IllegalStateException e) {
+ throw alreadyClosed(e);
+ }
+ }
+
+ @Override
+ public long getFilePointer() {
+ ensureOpen();
+ return (((long) curSegmentIndex) << chunkSizePower) + curPosition;
+ }
+
+ @Override
+ public void seek(long pos) throws IOException {
+ ensureOpen();
+ // we use >> here to preserve negative, so we will catch AIOOBE,
+ // in case pos + offset overflows.
+ final int si = (int) (pos >> chunkSizePower);
+ try {
+ if (si != curSegmentIndex) {
+ final MemorySegment seg = segments[si];
+ // write values, on exception all is unchanged
+ this.curSegmentIndex = si;
+ this.curSegment = seg;
+ }
+ this.curPosition = Objects.checkIndex(pos & chunkSizeMask, curSegment.byteSize() + 1);
+ } catch (
+ @SuppressWarnings("unused")
+ IndexOutOfBoundsException e) {
+ throw handlePositionalIOOBE("seek", pos);
+ }
+ }
+
+ @Override
+ public byte readByte(long pos) throws IOException {
+ try {
+ final int si = (int) (pos >> chunkSizePower);
+ return segments[si].get(LAYOUT_BYTE, pos & chunkSizeMask);
+ } catch (
+ @SuppressWarnings("unused")
+ IndexOutOfBoundsException ioobe) {
+ throw handlePositionalIOOBE("read", pos);
+ } catch (NullPointerException | IllegalStateException e) {
+ throw alreadyClosed(e);
+ }
+ }
+
+ // used only by random access methods to handle reads across boundaries
+ private void setPos(long pos, int si) throws IOException {
+ try {
+ final MemorySegment seg = segments[si];
+ // write values, on exception above all is unchanged
+ this.curPosition = pos & chunkSizeMask;
+ this.curSegmentIndex = si;
+ this.curSegment = seg;
+ } catch (
+ @SuppressWarnings("unused")
+ IndexOutOfBoundsException ioobe) {
+ throw handlePositionalIOOBE("read", pos);
+ } catch (NullPointerException | IllegalStateException e) {
+ throw alreadyClosed(e);
+ }
+ }
+
+ @Override
+ public short readShort(long pos) throws IOException {
+ final int si = (int) (pos >> chunkSizePower);
+ try {
+ return segments[si].get(LAYOUT_LE_SHORT, pos & chunkSizeMask);
+ } catch (
+ @SuppressWarnings("unused")
+ IndexOutOfBoundsException ioobe) {
+ // either it's a boundary, or read past EOF, fall back:
+ setPos(pos, si);
+ return readShort();
+ } catch (NullPointerException | IllegalStateException e) {
+ throw alreadyClosed(e);
+ }
+ }
+
+ @Override
+ public int readInt(long pos) throws IOException {
+ final int si = (int) (pos >> chunkSizePower);
+ try {
+ return segments[si].get(LAYOUT_LE_INT, pos & chunkSizeMask);
+ } catch (
+ @SuppressWarnings("unused")
+ IndexOutOfBoundsException ioobe) {
+ // either it's a boundary, or read past EOF, fall back:
+ setPos(pos, si);
+ return readInt();
+ } catch (NullPointerException | IllegalStateException e) {
+ throw alreadyClosed(e);
+ }
+ }
+
+ @Override
+ public long readLong(long pos) throws IOException {
+ final int si = (int) (pos >> chunkSizePower);
+ try {
+ return segments[si].get(LAYOUT_LE_LONG, pos & chunkSizeMask);
+ } catch (
+ @SuppressWarnings("unused")
+ IndexOutOfBoundsException ioobe) {
+ // either it's a boundary, or read past EOF, fall back:
+ setPos(pos, si);
+ return readLong();
+ } catch (NullPointerException | IllegalStateException e) {
+ throw alreadyClosed(e);
+ }
+ }
+
+ @Override
+ public final long length() {
+ return length;
+ }
+
+ @Override
+ public final MemorySegmentIndexInput clone() {
+ final MemorySegmentIndexInput clone = buildSlice((String) null, 0L, this.length);
+ try {
+ clone.seek(getFilePointer());
+ } catch (IOException ioe) {
+ throw new AssertionError(ioe);
+ }
+
+ return clone;
+ }
+
+ /**
+ * Creates a slice of this index input, with the given description, offset, and length. The slice
+ * is seeked to the beginning.
+ */
+ @Override
+ public final MemorySegmentIndexInput slice(String sliceDescription, long offset, long length) {
+ if (offset < 0 || length < 0 || offset + length > this.length) {
+ throw new IllegalArgumentException(
+ "slice() "
+ + sliceDescription
+ + " out of bounds: offset="
+ + offset
+ + ",length="
+ + length
+ + ",fileLength="
+ + this.length
+ + ": "
+ + this);
+ }
+
+ return buildSlice(sliceDescription, offset, length);
+ }
+
+ /** Builds the actual sliced IndexInput (may apply extra offset in subclasses). * */
+ MemorySegmentIndexInput buildSlice(String sliceDescription, long offset, long length) {
+ ensureOpen();
+
+ final long sliceEnd = offset + length;
+ final int startIndex = (int) (offset >>> chunkSizePower);
+ final int endIndex = (int) (sliceEnd >>> chunkSizePower);
+
+ // we always allocate one more slice, the last one may be a 0 byte one after truncating with
+ // asSlice():
+ final MemorySegment slices[] = Arrays.copyOfRange(segments, startIndex, endIndex + 1);
+
+ // set the last segment's limit for the sliced view.
+ slices[slices.length - 1] = slices[slices.length - 1].asSlice(0L, sliceEnd & chunkSizeMask);
+
+ offset = offset & chunkSizeMask;
+
+ final String newResourceDescription = getFullSliceDescription(sliceDescription);
+ if (slices.length == 1) {
+ return new SingleSegmentImpl(
+ newResourceDescription,
+ null, // clones don't have a MemorySession, as they can't close)
+ slices[0].asSlice(offset, length),
+ length,
+ chunkSizePower);
+ } else {
+ return new MultiSegmentImpl(
+ newResourceDescription,
+ null, // clones don't have a MemorySession, as they can't close)
+ slices,
+ offset,
+ length,
+ chunkSizePower);
+ }
+ }
+
+ @Override
+ public final void close() throws IOException {
+ if (curSegment == null) {
+ return;
+ }
+
+ // make sure all accesses to this IndexInput instance throw NPE:
+ curSegment = null;
+ Arrays.fill(segments, null);
+
+ // the master IndexInput has a MemorySession and is able
+ // to release all resources (unmap segments) - a
+ // side effect is that other threads still using clones
+ // will throw IllegalStateException
+ if (session != null) {
+ session.close();
+ }
+ }
+
+ /** Optimization of MemorySegmentIndexInput for when there is only one segment. */
+ static final class SingleSegmentImpl extends MemorySegmentIndexInput {
+
+ SingleSegmentImpl(
+ String resourceDescription,
+ MemorySession session,
+ MemorySegment segment,
+ long length,
+ int chunkSizePower) {
+ super(resourceDescription, session, new MemorySegment[] {segment}, length, chunkSizePower);
+ this.curSegmentIndex = 0;
+ }
+
+ @Override
+ public void seek(long pos) throws IOException {
+ ensureOpen();
+ try {
+ curPosition = Objects.checkIndex(pos, length + 1);
+ } catch (
+ @SuppressWarnings("unused")
+ IndexOutOfBoundsException e) {
+ throw handlePositionalIOOBE("seek", pos);
+ }
+ }
+
+ @Override
+ public long getFilePointer() {
+ ensureOpen();
+ return curPosition;
+ }
+
+ @Override
+ public byte readByte(long pos) throws IOException {
+ try {
+ return curSegment.get(LAYOUT_BYTE, pos);
+ } catch (
+ @SuppressWarnings("unused")
+ IndexOutOfBoundsException e) {
+ throw handlePositionalIOOBE("read", pos);
+ } catch (NullPointerException | IllegalStateException e) {
+ throw alreadyClosed(e);
+ }
+ }
+
+ @Override
+ public short readShort(long pos) throws IOException {
+ try {
+ return curSegment.get(LAYOUT_LE_SHORT, pos);
+ } catch (
+ @SuppressWarnings("unused")
+ IndexOutOfBoundsException e) {
+ throw handlePositionalIOOBE("read", pos);
+ } catch (NullPointerException | IllegalStateException e) {
+ throw alreadyClosed(e);
+ }
+ }
+
+ @Override
+ public int readInt(long pos) throws IOException {
+ try {
+ return curSegment.get(LAYOUT_LE_INT, pos);
+ } catch (
+ @SuppressWarnings("unused")
+ IndexOutOfBoundsException e) {
+ throw handlePositionalIOOBE("read", pos);
+ } catch (NullPointerException | IllegalStateException e) {
+ throw alreadyClosed(e);
+ }
+ }
+
+ @Override
+ public long readLong(long pos) throws IOException {
+ try {
+ return curSegment.get(LAYOUT_LE_LONG, pos);
+ } catch (
+ @SuppressWarnings("unused")
+ IndexOutOfBoundsException e) {
+ throw handlePositionalIOOBE("read", pos);
+ } catch (NullPointerException | IllegalStateException e) {
+ throw alreadyClosed(e);
+ }
+ }
+ }
+
+ /** This class adds offset support to MemorySegmentIndexInput, which is needed for slices. */
+ static final class MultiSegmentImpl extends MemorySegmentIndexInput {
+ private final long offset;
+
+ MultiSegmentImpl(
+ String resourceDescription,
+ MemorySession session,
+ MemorySegment[] segments,
+ long offset,
+ long length,
+ int chunkSizePower) {
+ super(resourceDescription, session, segments, length, chunkSizePower);
+ this.offset = offset;
+ try {
+ seek(0L);
+ } catch (IOException ioe) {
+ throw new AssertionError(ioe);
+ }
+ assert curSegment != null && curSegmentIndex >= 0;
+ }
+
+ @Override
+ public void seek(long pos) throws IOException {
+ assert pos >= 0L;
+ super.seek(pos + offset);
+ }
+
+ @Override
+ public long getFilePointer() {
+ return super.getFilePointer() - offset;
+ }
+
+ @Override
+ public byte readByte(long pos) throws IOException {
+ return super.readByte(pos + offset);
+ }
+
+ @Override
+ public short readShort(long pos) throws IOException {
+ return super.readShort(pos + offset);
+ }
+
+ @Override
+ public int readInt(long pos) throws IOException {
+ return super.readInt(pos + offset);
+ }
+
+ @Override
+ public long readLong(long pos) throws IOException {
+ return super.readLong(pos + offset);
+ }
+
+ @Override
+ MemorySegmentIndexInput buildSlice(String sliceDescription, long ofs, long length) {
+ return super.buildSlice(sliceDescription, this.offset + ofs, length);
+ }
+ }
+}
diff --git a/lucene/core/src/java19/org/apache/lucene/store/MemorySegmentIndexInputProvider.java b/lucene/core/src/java19/org/apache/lucene/store/MemorySegmentIndexInputProvider.java
new file mode 100644
index 00000000000..f4e9cc844f5
--- /dev/null
+++ b/lucene/core/src/java19/org/apache/lucene/store/MemorySegmentIndexInputProvider.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.store;
+
+import java.io.IOException;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.MemorySession;
+import java.nio.channels.FileChannel;
+import java.nio.channels.FileChannel.MapMode;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+import java.util.logging.Logger;
+import org.apache.lucene.util.Constants;
+import org.apache.lucene.util.Unwrappable;
+
+@SuppressWarnings("preview")
+final class MemorySegmentIndexInputProvider implements MMapDirectory.MMapIndexInputProvider {
+
+ public MemorySegmentIndexInputProvider() {
+ Logger.getLogger(getClass().getName()).info("Using MemorySegmentIndexInput with Java 19+");
+ }
+
+ @Override
+ public IndexInput openInput(
+ Path path, IOContext context, int chunkSizePower, boolean preload, boolean useUnmapHack)
+ throws IOException {
+ final String resourceDescription = "MemorySegmentIndexInput(path=\"" + path.toString() + "\")";
+
+ // Work around for JDK-8259028: we need to unwrap our test-only file system layers
+ path = Unwrappable.unwrapAll(path);
+
+ boolean success = false;
+ final MemorySession session = MemorySession.openShared();
+ try (var fc = FileChannel.open(path, StandardOpenOption.READ)) {
+ final long fileSize = fc.size();
+ final IndexInput in =
+ MemorySegmentIndexInput.newInstance(
+ resourceDescription,
+ session,
+ map(session, resourceDescription, fc, chunkSizePower, preload, fileSize),
+ fileSize,
+ chunkSizePower);
+ success = true;
+ return in;
+ } finally {
+ if (success == false) {
+ session.close();
+ }
+ }
+ }
+
+ @Override
+ public long getDefaultMaxChunkSize() {
+ return Constants.JRE_IS_64BIT ? (1L << 34) : (1L << 28);
+ }
+
+ @Override
+ public boolean isUnmapSupported() {
+ return true;
+ }
+
+ @Override
+ public String getUnmapNotSupportedReason() {
+ return null;
+ }
+
+ private final MemorySegment[] map(
+ MemorySession session,
+ String resourceDescription,
+ FileChannel fc,
+ int chunkSizePower,
+ boolean preload,
+ long length)
+ throws IOException {
+ if ((length >>> chunkSizePower) >= Integer.MAX_VALUE)
+ throw new IllegalArgumentException("File too big for chunk size: " + resourceDescription);
+
+ final long chunkSize = 1L << chunkSizePower;
+
+ // we always allocate one more segments, the last one may be a 0 byte one
+ final int nrSegments = (int) (length >>> chunkSizePower) + 1;
+
+ final MemorySegment[] segments = new MemorySegment[nrSegments];
+
+ long startOffset = 0L;
+ for (int segNr = 0; segNr < nrSegments; segNr++) {
+ final long segSize =
+ (length > (startOffset + chunkSize)) ? chunkSize : (length - startOffset);
+ final MemorySegment segment;
+ try {
+ segment = fc.map(MapMode.READ_ONLY, startOffset, segSize, session);
+ } catch (IOException ioe) {
+ throw convertMapFailedIOException(ioe, resourceDescription, segSize);
+ }
+ if (preload) {
+ segment.load();
+ }
+ segments[segNr] = segment;
+ startOffset += segSize;
+ }
+ return segments;
+ }
+}
diff --git a/lucene/core/src/test/org/apache/lucene/store/TestMmapDirectory.java b/lucene/core/src/test/org/apache/lucene/store/TestMmapDirectory.java
index 0149e9f5eb2..810f8578e80 100644
--- a/lucene/core/src/test/org/apache/lucene/store/TestMmapDirectory.java
+++ b/lucene/core/src/test/org/apache/lucene/store/TestMmapDirectory.java
@@ -18,11 +18,11 @@ package org.apache.lucene.store;
import java.io.IOException;
import java.nio.file.Path;
+import java.util.Objects;
import java.util.Random;
import java.util.concurrent.CountDownLatch;
import org.apache.lucene.tests.store.BaseDirectoryTestCase;
import org.junit.BeforeClass;
-import org.junit.Ignore;
/** Tests MMapDirectory */
// See: https://issues.apache.org/jira/browse/SOLR-12028 Tests cannot remove files on Windows
@@ -41,10 +41,29 @@ public class TestMmapDirectory extends BaseDirectoryTestCase {
assertTrue(MMapDirectory.UNMAP_NOT_SUPPORTED_REASON, MMapDirectory.UNMAP_SUPPORTED);
}
- @Ignore(
- "This test is for JVM testing purposes. There are no guarantees that it may not fail with SIGSEGV!")
+ private static boolean isMemorySegmentImpl() {
+ return Objects.equals(
+ "MemorySegmentIndexInputProvider", MMapDirectory.PROVIDER.getClass().getSimpleName());
+ }
+
+ public void testCorrectImplementation() {
+ final int runtimeVersion = Runtime.version().feature();
+ if (runtimeVersion == 19) {
+ assertTrue(
+ "on Java 19 we should use MemorySegmentIndexInputProvider to create mmap IndexInputs",
+ isMemorySegmentImpl());
+ } else if (runtimeVersion > 19) {
+ // TODO: We don't know how this is handled in later Java versions, so make no assumptions!
+ } else {
+ assertSame(MappedByteBufferIndexInputProvider.class, MMapDirectory.PROVIDER.getClass());
+ }
+ }
+
public void testAceWithThreads() throws Exception {
- for (int iter = 0; iter < 10; iter++) {
+ assumeTrue("Test requires MemorySegmentIndexInput", isMemorySegmentImpl());
+
+ final int iters = RANDOM_MULTIPLIER * (TEST_NIGHTLY ? 50 : 10);
+ for (int iter = 0; iter < iters; iter++) {
Directory dir = getDirectory(createTempDir("testAceWithThreads"));
IndexOutput out = dir.createOutput("test", IOContext.DEFAULT);
Random random = random();
@@ -73,7 +92,16 @@ public class TestMmapDirectory extends BaseDirectoryTestCase {
});
t1.start();
shotgun.countDown();
- in.close();
+ try {
+ in.close();
+ } catch (
+ @SuppressWarnings("unused")
+ IllegalStateException ise) {
+ // this may also happen and is a valid exception, informing our user that, e.g., a query is
+ // running!
+ // "java.lang.IllegalStateException: Cannot close while another thread is accessing the
+ // segment"
+ }
t1.join();
dir.close();
}
diff --git a/lucene/core/src/test/org/apache/lucene/store/TestMultiMMap.java b/lucene/core/src/test/org/apache/lucene/store/TestMultiMMap.java
index b1275a27911..7ef6c1d82e4 100644
--- a/lucene/core/src/test/org/apache/lucene/store/TestMultiMMap.java
+++ b/lucene/core/src/test/org/apache/lucene/store/TestMultiMMap.java
@@ -137,10 +137,7 @@ public class TestMultiMMap extends BaseChunkedDirectoryTestCase {
// check impl (we must check size < chunksize: currently, if size==chunkSize, we get 2
// buffers, the second one empty:
- assertTrue(
- (size < chunkSize)
- ? (ii instanceof ByteBufferIndexInput.SingleBufferImpl)
- : (ii instanceof ByteBufferIndexInput.MultiBufferImpl));
+ assertCorrectImpl(size < chunkSize, ii);
// clone tests:
assertSame(ii.getClass(), ii.clone().getClass());
@@ -148,10 +145,7 @@ public class TestMultiMMap extends BaseChunkedDirectoryTestCase {
// slice test (offset 0)
int sliceSize = random().nextInt(size);
IndexInput slice = ii.slice("slice", 0, sliceSize);
- assertTrue(
- (sliceSize < chunkSize)
- ? (slice instanceof ByteBufferIndexInput.SingleBufferImpl)
- : (slice instanceof ByteBufferIndexInput.MultiBufferImpl));
+ assertCorrectImpl(sliceSize < chunkSize, slice);
// slice test (offset > 0 )
int offset = random().nextInt(size - 1) + 1;
@@ -159,14 +153,21 @@ public class TestMultiMMap extends BaseChunkedDirectoryTestCase {
slice = ii.slice("slice", offset, sliceSize);
// System.out.println(offset + "/" + sliceSize + " chunkSize=" + chunkSize + " " +
// slice.getClass());
- if (offset % chunkSize + sliceSize < chunkSize) {
- assertTrue(slice instanceof ByteBufferIndexInput.SingleBufferImpl);
- } else {
- assertTrue(slice instanceof ByteBufferIndexInput.MultiBufferImpl);
- }
+ assertCorrectImpl(offset % chunkSize + sliceSize < chunkSize, slice);
ii.close();
mmapDir.close();
}
}
+
+ private static void assertCorrectImpl(boolean isSingle, IndexInput ii) {
+ var clazz = ii.getClass();
+ if (isSingle) {
+ assertTrue(
+ "Require a single impl, got " + clazz, clazz.getSimpleName().matches("Single\\w+Impl"));
+ } else {
+ assertTrue(
+ "Require a multi impl, got " + clazz, clazz.getSimpleName().matches("Multi\\w+Impl"));
+ }
+ }
}
diff --git a/lucene/distribution.tests/src/test/org/apache/lucene/distribution/TestModularLayer.java b/lucene/distribution.tests/src/test/org/apache/lucene/distribution/TestModularLayer.java
index 461a63703b1..88ce76f69a7 100644
--- a/lucene/distribution.tests/src/test/org/apache/lucene/distribution/TestModularLayer.java
+++ b/lucene/distribution.tests/src/test/org/apache/lucene/distribution/TestModularLayer.java
@@ -183,6 +183,42 @@ public class TestModularLayer extends AbstractLuceneDistributionTest {
});
}
+ /** Checks that Lucene Core is a MR-JAR and has JDK 19 classes */
+ @Test
+ public void testMultiReleaseJar() {
+ ModuleLayer bootLayer = ModuleLayer.boot();
+ Assertions.assertThatNoException()
+ .isThrownBy(
+ () -> {
+ String coreModuleId = "org.apache.lucene.core";
+
+ Configuration configuration =
+ bootLayer
+ .configuration()
+ .resolve(
+ luceneCoreAndThirdPartyModulesFinder,
+ ModuleFinder.of(),
+ List.of(coreModuleId));
+
+ ModuleLayer layer =
+ bootLayer.defineModulesWithOneLoader(
+ configuration, ClassLoader.getSystemClassLoader());
+
+ ClassLoader loader = layer.findLoader(coreModuleId);
+
+ Assertions.assertThat(
+ loader.getResource(
+ "META-INF/versions/19/org/apache/lucene/store/MemorySegmentIndexInput.class"))
+ .isNotNull();
+
+ if (Runtime.version().feature() == 19) {
+ Assertions.assertThat(
+ loader.loadClass("org.apache.lucene.store.MemorySegmentIndexInput"))
+ .isNotNull();
+ }
+ });
+ }
+
/** Make sure we don't publish automatic modules. */
@Test
public void testAllCoreModulesAreNamedModules() {