You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2021/10/28 13:38:00 UTC
[lucene] branch main updated: LUCENE-9673: fix IntBlockPool's slice
allocator to actually grow properly with larger and larger slice-chained
int[];
excise wasted RAM due to unused (overallocation) of int[] to track in-memory
postings
This is an automated email from the ASF dual-hosted git repository.
mikemccand pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new 512cad0 LUCENE-9673: fix IntBlockPool's slice allocator to actually grow properly with larger and larger slice-chained int[]; excise wasted RAM due to unused (overallocation) of int[] to track in-memory postings
512cad0 is described below
commit 512cad0e019d165f07062657d98987f0a38fbde9
Author: Mike McCandless <mi...@apache.org>
AuthorDate: Thu Oct 28 09:37:36 2021 -0400
LUCENE-9673: fix IntBlockPool's slice allocator to actually grow properly with larger and larger slice-chained int[]; excise wasted RAM due to unused (overallocation) of int[] to track in-memory postings
---
lucene/CHANGES.txt | 5 ++++-
.../java/org/apache/lucene/index/TermsHashPerField.java | 11 ++++++++---
.../src/java/org/apache/lucene/util/IntBlockPool.java | 17 ++++++++---------
3 files changed, 20 insertions(+), 13 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 44800ea..44f0cc1 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -465,7 +465,10 @@ Improvements
Optimizations
---------------------
-(No changes)
+
+* LUCENE-9673: Substantially improve RAM efficiency of how MemoryIndex stores
+ postings in memory, and reduced a bit of RAM overhead in
+ IndexWriter's internal postings book-keeping (mashudong)
Bug Fixes
---------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java b/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java
index 1067bc7..b812003 100644
--- a/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java
+++ b/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java
@@ -140,11 +140,16 @@ abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
}
}
+ /**
+ * Called when we first encounter a new term. We must allocate slies to store the postings (vInt
+ * compressed doc/freq/prox), and also the int pointers to where (in our ByteBlockPool storage)
+ * the postings for this term begin.
+ */
private void initStreamSlices(int termID, int docID) throws IOException {
// Init stream slices
- // TODO: figure out why this is 2*streamCount here. streamCount should be enough?
- if ((2 * streamCount) + intPool.intUpto > IntBlockPool.INT_BLOCK_SIZE) {
- // can we fit all the streams in the current buffer?
+ if (streamCount + intPool.intUpto > IntBlockPool.INT_BLOCK_SIZE) {
+ // not enough space remaining in this buffer -- jump to next buffer and lose this remaining
+ // piece
intPool.nextBuffer();
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/IntBlockPool.java b/lucene/core/src/java/org/apache/lucene/util/IntBlockPool.java
index b4575ad..598f55a 100644
--- a/lucene/core/src/java/org/apache/lucene/util/IntBlockPool.java
+++ b/lucene/core/src/java/org/apache/lucene/util/IntBlockPool.java
@@ -172,7 +172,7 @@ public final class IntBlockPool {
final int upto = intUpto;
intUpto += size;
- buffer[intUpto - 1] = 1;
+ buffer[intUpto - 1] = 16;
return upto;
}
@@ -185,7 +185,7 @@ public final class IntBlockPool {
}
// no need to make this public unless we support different sizes
- // TODO make the levels and the sizes configurable
+
/**
* An array holding the offset into the {@link IntBlockPool#LEVEL_SIZE_ARRAY} to quickly navigate
* to the next slice level.
@@ -193,15 +193,15 @@ public final class IntBlockPool {
private static final int[] NEXT_LEVEL_ARRAY = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9};
/** An array holding the level sizes for int slices. */
- private static final int[] LEVEL_SIZE_ARRAY = {2, 4, 8, 16, 32, 64, 128, 256, 512, 1024};
+ private static final int[] LEVEL_SIZE_ARRAY = {2, 4, 8, 16, 16, 32, 32, 64, 64, 128};
/** The first level size for new slices */
private static final int FIRST_LEVEL_SIZE = LEVEL_SIZE_ARRAY[0];
/** Allocates a new slice from the given offset */
private int allocSlice(final int[] slice, final int sliceOffset) {
- final int level = slice[sliceOffset];
- final int newLevel = NEXT_LEVEL_ARRAY[level - 1];
+ final int level = slice[sliceOffset] & 15;
+ final int newLevel = NEXT_LEVEL_ARRAY[level];
final int newSize = LEVEL_SIZE_ARRAY[newLevel];
// Maybe allocate another block
if (intUpto > INT_BLOCK_SIZE - newSize) {
@@ -216,7 +216,7 @@ public final class IntBlockPool {
slice[sliceOffset] = offset;
// Write new level:
- buffer[intUpto - 1] = newLevel;
+ buffer[intUpto - 1] = 16 | newLevel;
return newUpto;
}
@@ -300,8 +300,7 @@ public final class IntBlockPool {
bufferUpto = startOffset / INT_BLOCK_SIZE;
bufferOffset = bufferUpto * INT_BLOCK_SIZE;
this.end = endOffset;
- upto = startOffset;
- level = 1;
+ level = 0;
buffer = pool.buffers[bufferUpto];
upto = startOffset & INT_BLOCK_MASK;
@@ -339,7 +338,7 @@ public final class IntBlockPool {
private void nextSlice() {
// Skip to our next slice
final int nextIndex = buffer[limit];
- level = NEXT_LEVEL_ARRAY[level - 1];
+ level = NEXT_LEVEL_ARRAY[level];
final int newSize = LEVEL_SIZE_ARRAY[level];
bufferUpto = nextIndex / INT_BLOCK_SIZE;