You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by "gortiz (via GitHub)" <gi...@apache.org> on 2023/06/01 07:19:33 UTC

[GitHub] [pinot] gortiz commented on a diff in pull request #10528: PinotBufferFactory and a buffer implementation that uses Unsafe

gortiz commented on code in PR #10528:
URL: https://github.com/apache/pinot/pull/10528#discussion_r1212700823


##########
pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/memory/unsafe/MmapMemory.java:
##########
@@ -0,0 +1,348 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.spi.memory.unsafe;
+
+import com.google.common.collect.Lists;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.io.UncheckedIOException;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.nio.channels.FileChannel;
+import java.util.List;
+import java.util.function.BiConsumer;
+import net.openhft.chronicle.core.Jvm;
+import net.openhft.chronicle.core.OS;
+import net.openhft.posix.MSyncFlag;
+import net.openhft.posix.PosixAPI;
+import org.apache.pinot.segment.spi.utils.JavaVersion;
+
+
+public class MmapMemory implements Memory {
+
+  private static final MapFun MAP_FUN;
+
+  /**
+   * The address actually mapped. It has to be page aligned.
+   *
+   * {@code _address = _offset - offset % pageSize}
+   */
+  private final long _address;
+  /**
+   * The offset requested to map.
+   *
+   * {@code _address = _offset - offset % pageSize}
+   */
+  private final long _offset;
+  /**
+   * How many bytes have been requested to be mapped.
+   * The actual mapped size may be larger (up to the next page), but the actual mapped size
+   * is stored by {@link #_section}.
+   */
+  private final long _size;
+  private final MapSection _section;
+
+  static {
+    try {
+      Jvm.init();
+      MAP_FUN = MapFun.find();
+    } catch (ClassNotFoundException | NoSuchMethodException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  public MmapMemory(File file, boolean readOnly, long offset, long size) {
+    _size = size;
+    _offset = offset;
+
+    try {
+      _section = MAP_FUN.map(file, readOnly, offset, size);
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+    _address = _section.getAddress();
+  }
+
+  @Override
+  public long getAddress() {
+    return _address;
+  }
+
+  @Override
+  public long getSize() {
+    return _size;
+  }
+
+  @Override
+  public void flush() {
+    MSyncFlag mode = MSyncFlag.MS_SYNC;
+    PosixAPI.posix().msync(_offset, _size, mode);
+  }
+
+  @Override
+  public void close()
+      throws IOException {
+    try {
+      _section._unmapFun.unmap();
+    } catch (InvocationTargetException | IllegalAccessException e) {
+      throw new RuntimeException("Error while calling unmap", e);
+    }
+  }
+
+  private static class MapSection {
+    public static final MapSection EMPTY = new MapSection(0, () -> {
+    });
+    private final long _address;
+    private final UnmapFun _unmapFun;
+
+    public MapSection(long address, UnmapFun unmapFun) {
+      _address = address;
+      _unmapFun = unmapFun;
+    }
+
+    public long getAddress() {
+      return _address;
+    }
+
+    public UnmapFun getUnmapFun() {
+      return _unmapFun;
+    }
+  }
+
+  interface MapFun {
+
+    /**
+     * @param file The file to be mapped. If its length is lower than offset + size and the mode is not read only,
+     *            the file will be resized to that size.
+     * @param offset The offset in the file. Any positive value is valid, even if it is larger than the file size.
+     * @param size How many bytes to map.
+     * @throws IOException in several situations. For example, if the offset + size is larger than file length and the
+     * mode is read only or if the process doesn't have permission to read or modify the file.
+     */
+    MapSection map(File file, boolean readOnly, long offset, long size) throws IOException;
+
+    static MapFun find()
+        throws ClassNotFoundException, NoSuchMethodException {
+      List<Finder<? extends MapFun>> candidates = Lists.newArrayList(
+          new Map0Fun.ChronicleCore(),
+          new Map0Fun.Java11(),
+          new Map0Fun.Java17(),
+          new Java20()
+      );
+
+      for (Finder<? extends MapFun> candidate : candidates) {
+        try {
+          return candidate.tryFind();
+        } catch (NoSuchMethodException | ClassNotFoundException | AssertionError e) {
+          // IGNORE
+        }
+      }
+      throw new NoSuchMethodException("Cannot find how to create memory map files in Java " + JavaVersion.VERSION);
+    }
+
+    class Java20 implements Finder<MapFun> {
+      @Override
+      public MapFun tryFind()
+          throws NoSuchMethodException, ClassNotFoundException {
+        Class<?> fileChannelImpl = MmapMemory.class.getClassLoader().loadClass("sun.nio.ch.FileChannelImpl");
+
+        Method mapMethod = fileChannelImpl.getDeclaredMethod("mapInternal", FileChannel.MapMode.class, long.class,
+            long.class, int.class, boolean.class);
+        mapMethod.setAccessible(true);
+
+        Class<?> unmapperClass = MmapMemory.class.getClassLoader().loadClass("sun.nio.ch.FileChannelImpl$Unmapper");
+        Method unmapMethod = unmapperClass.getDeclaredMethod("unmap");
+        unmapMethod.setAccessible(true);
+        Method addressMethod = unmapperClass.getDeclaredMethod("address");
+        addressMethod.setAccessible(true);
+
+        return (file, readOnly, offset, size) -> {
+          FileChannel.MapMode mapMode = readOnly ? FileChannel.MapMode.READ_ONLY : FileChannel.MapMode.READ_WRITE;
+          // see https://github.com/openjdk/jdk/blob/cc9f7ad9ce33dc44d335fb7fb5483795c62ba936/src/java.base/share/
+          // classes/sun/nio/ch/FileChannelImpl.java#L1223
+          int prot = readOnly ? 0 : 1;
+
+          String mode = readOnly ? "r" : "rw";
+          try (RandomAccessFile raf = new RandomAccessFile(file, mode); FileChannel fc = raf.getChannel()) {
+            Object unmapper = mapMethod.invoke(fc, mapMode, offset, size, prot, false);
+            long address;
+            UnmapFun unmapFun;
+            if (unmapper == null) {
+              // unmapper may be null if the size is 0 or if the file descriptor is closed while mapInternal was called
+              address = 0;
+              unmapFun = () -> {
+              };
+            } else {
+              address = (long) addressMethod.invoke(unmapper);;
+              unmapFun = () -> unmapMethod.invoke(unmapper);
+            }
+
+            return new MapSection(address, unmapFun);
+          } catch (InvocationTargetException | IllegalAccessException e) {
+            throw new RuntimeException(e);
+          }
+        };
+      }
+    }
+  }
+
+  /**
+   * A {@link MapFun} that actually delegates into a map0 native method included in pre 19 Java releases.
+   *
+   * Unlike normal map methods, map0 actually has some low level requirements. For example, the offset must be page
+   * aligned.
+   */
+  interface Map0Fun extends MapFun {
+
+    /**
+     * @param offset It has to be a positive value that is page aligned.
+     */
+    MapSection map0(FileChannel fc, boolean readOnly, long offset, long size)
+        throws InvocationTargetException, IllegalAccessException, IOException;
+
+    default MapSection map(File file, boolean readOnly, long offset, long size) throws IOException {
+      String mode = readOnly ? "r" : "rw";
+      try (RandomAccessFile raf = new RandomAccessFile(file, mode); FileChannel fc = raf.getChannel()) {
+        if (size == 0) {
+          return MapSection.EMPTY;
+        }
+
+        long allocationGranule = Unsafer.UNSAFE.pageSize();
+        int pagePosition = (int) (offset % allocationGranule);
+
+        // Compute mmap address
+        if (!fc.isOpen()) {
+          throw new IOException("closed " + file.getPath());
+        }
+
+        long fileSize = fc.size();
+        if (fileSize < offset + size) {
+          // If file size is smaller than the specified size, extend the file size
+          raf.seek(offset + size - 1);
+          raf.write(0);
+          //logger.trace(s"extend file size to ${fc.size}")
+        }
+        long mapPosition = offset - pagePosition;
+        long mapSize = size + pagePosition;
+        // A workaround for the error when calling fc.map(MapMode.READ_WRITE, offset, size) with size more than 2GB
+
+        MapSection map0Section = map0(fc, readOnly, mapPosition, mapSize);
+        return new MapSection(map0Section.getAddress() + pagePosition, map0Section.getUnmapFun());
+      } catch (InvocationTargetException | IllegalAccessException e) {
+        throw new RuntimeException("Cannot map file " + file + " from address " + offset + " with size " + size, e);
+      }
+    }
+
+    static BiConsumer<Long, Long> tryFindUnmapper()
+        throws NoSuchMethodException, ClassNotFoundException {
+      Class<?> fileChannelImpl = MmapMemory.class.getClassLoader().loadClass("sun.nio.ch.FileChannelImpl");
+      Method unmapMethod = fileChannelImpl.getDeclaredMethod("unmap0", long.class, long.class);
+      unmapMethod.setAccessible(true);
+      return (address, size) -> {
+        try {
+          unmapMethod.invoke(null, address, size);
+        } catch (IllegalAccessException | InvocationTargetException e) {
+          throw new RuntimeException(e);
+        }
+      };
+    }
+
+    class ChronicleCore implements Finder<Map0Fun> {

Review Comment:
   Chronicle has several libraries. For this use case we care about 3 of them:
   - Chronicle Posix is a JNA wrapper that let us call the POSIX API.
   - Chronicle Core is a utility library used in by their other libraries. It depends on POSIX. It offers some OS and JVM utilities that may be useful. For example, does the same reflection tick I had to do to find the mmap methods.
   - Chronicle Bytes is a buffer library built on top of Chronicle Core. In the Chronicle ecosystem it plays the role of our PinotDataBuffer.
   
   My first attempt was to use Chronicle Bytes as the backend of my PinotByteBuffer library and it resulted in several strange semantic bugs. Chornicle Core is quite simpler and provides access to low level stuff. That is why I'm proposing to use it. Chronicle Posix is just a wrapper on top of the POSIX api, so it is a very simple library and difficult to make mistakes there.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org