You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2021/10/28 14:24:09 UTC

[lucene-solr] branch branch_8x updated: LUCENE-9673: fix IntBlockPool's slice allocator to actually grow properly with larger and larger slice-chained int[]; excise wasted RAM due to unused (overallocation) of int[] to track in-memory postings

This is an automated email from the ASF dual-hosted git repository.

mikemccand pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/branch_8x by this push:
     new 61d8818  LUCENE-9673: fix IntBlockPool's slice allocator to actually grow properly with larger and larger slice-chained int[]; excise wasted RAM due to unused (overallocation) of int[] to track in-memory postings
61d8818 is described below

commit 61d8818a622db92e2f0bb6ecfe3f59189de03a7f
Author: Mike McCandless <mi...@apache.org>
AuthorDate: Thu Oct 28 10:23:45 2021 -0400

    LUCENE-9673: fix IntBlockPool's slice allocator to actually grow properly with larger and larger slice-chained int[]; excise wasted RAM due to unused (overallocation) of int[] to track in-memory postings
---
 lucene/CHANGES.txt                                 |  5 +++-
 .../org/apache/lucene/index/TermsHashPerField.java | 11 +++++--
 .../java/org/apache/lucene/util/IntBlockPool.java  | 35 ++++++++++------------
 3 files changed, 27 insertions(+), 24 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index a1f0410..b0dfd24 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -23,7 +23,10 @@ Improvements
 
 Optimizations
 ---------------------
-(No changes)
+
+* LUCENE-9673: Substantially improve RAM efficiency of how MemoryIndex stores
+  postings in memory, and reduced a bit of RAM overhead in
+  IndexWriter's internal postings book-keeping (mashudong)
 
 Bug Fixes
 ---------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java b/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java
index d3e0487..d0d000d 100644
--- a/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java
+++ b/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java
@@ -131,11 +131,16 @@ abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
     }
   }
 
+  /**
+   * Called when we first encounter a new term. We must allocate slies to store the postings (vInt
+   * compressed doc/freq/prox), and also the int pointers to where (in our ByteBlockPool storage)
+   * the postings for this term begin.
+   */
   private void initStreamSlices(int termID, int docID) throws IOException {
     // Init stream slices
-    // TODO: figure out why this is 2*streamCount here. streamCount should be enough?
-    if ((2*streamCount) + intPool.intUpto > IntBlockPool.INT_BLOCK_SIZE) {
-      // can we fit all the streams in the current buffer?
+    if (streamCount + intPool.intUpto > IntBlockPool.INT_BLOCK_SIZE) {
+      // not enough space remaining in this buffer -- jump to next buffer and lose this remaining
+      // piece
       intPool.nextBuffer();
     }
 
diff --git a/lucene/core/src/java/org/apache/lucene/util/IntBlockPool.java b/lucene/core/src/java/org/apache/lucene/util/IntBlockPool.java
index 4402c4f..8bfac85 100644
--- a/lucene/core/src/java/org/apache/lucene/util/IntBlockPool.java
+++ b/lucene/core/src/java/org/apache/lucene/util/IntBlockPool.java
@@ -171,7 +171,7 @@ public final class IntBlockPool {
       
     final int upto = intUpto;
     intUpto += size;
-    buffer[intUpto-1] = 1;
+    buffer[intUpto - 1] = 16;
     return upto;
   }
   
@@ -185,29 +185,25 @@ public final class IntBlockPool {
   
   
   // no need to make this public unless we support different sizes
-  // TODO make the levels and the sizes configurable
+
   /**
    * An array holding the offset into the {@link IntBlockPool#LEVEL_SIZE_ARRAY}
    * to quickly navigate to the next slice level.
    */
-  private final static int[] NEXT_LEVEL_ARRAY = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9};
-  
-  /**
-   * An array holding the level sizes for int slices.
-   */
-  private final static int[] LEVEL_SIZE_ARRAY = {2, 4, 8, 16, 32, 64, 128, 256, 512, 1024};
-  
-  /**
-   * The first level size for new slices
-   */
-  private final static int FIRST_LEVEL_SIZE = LEVEL_SIZE_ARRAY[0];
+  private static final int[] NEXT_LEVEL_ARRAY = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9};
+
+  /** An array holding the level sizes for int slices. */
+  private static final int[] LEVEL_SIZE_ARRAY = {2, 4, 8, 16, 16, 32, 32, 64, 64, 128};
+
+  /** The first level size for new slices */
+  private static final int FIRST_LEVEL_SIZE = LEVEL_SIZE_ARRAY[0];
 
   /**
    * Allocates a new slice from the given offset
    */
   private int allocSlice(final int[] slice, final int sliceOffset) {
-    final int level = slice[sliceOffset];
-    final int newLevel = NEXT_LEVEL_ARRAY[level-1];
+    final int level = slice[sliceOffset] & 15;
+    final int newLevel = NEXT_LEVEL_ARRAY[level];
     final int newSize = LEVEL_SIZE_ARRAY[newLevel];
     // Maybe allocate another block
     if (intUpto > INT_BLOCK_SIZE-newSize) {
@@ -222,7 +218,7 @@ public final class IntBlockPool {
     slice[sliceOffset] = offset;
         
     // Write new level:
-    buffer[intUpto-1] = newLevel;
+    buffer[intUpto - 1] = 16 | newLevel;
 
     return newUpto;
   }
@@ -315,9 +311,8 @@ public final class IntBlockPool {
       bufferUpto = startOffset / INT_BLOCK_SIZE;
       bufferOffset = bufferUpto * INT_BLOCK_SIZE;
       this.end = endOffset;
-      upto = startOffset;
-      level = 1;
-      
+      level = 0;
+
       buffer = pool.buffers[bufferUpto];
       upto = startOffset & INT_BLOCK_MASK;
 
@@ -356,7 +351,7 @@ public final class IntBlockPool {
     private void nextSlice() {
       // Skip to our next slice
       final int nextIndex = buffer[limit];
-      level = NEXT_LEVEL_ARRAY[level-1];
+      level = NEXT_LEVEL_ARRAY[level];
       final int newSize = LEVEL_SIZE_ARRAY[level];
 
       bufferUpto = nextIndex / INT_BLOCK_SIZE;