You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ju...@apache.org on 2022/05/16 21:52:36 UTC
[lucene-solr] branch branch_8_11 updated: LUCENE-10564: Make sure SparseFixedBitSet#or updates memory usage (#882)
This is an automated email from the ASF dual-hosted git repository.
julietibs pushed a commit to branch branch_8_11
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/branch_8_11 by this push:
new b567162fe1a LUCENE-10564: Make sure SparseFixedBitSet#or updates memory usage (#882)
b567162fe1a is described below
commit b567162fe1af1dad3b3e19c767971c339eecb19d
Author: Julie Tibshirani <ju...@apache.org>
AuthorDate: Mon May 16 14:23:40 2022 -0700
LUCENE-10564: Make sure SparseFixedBitSet#or updates memory usage (#882)
Before, it didn't update the estimated memory usage, so calls to ramBytesUsed
could be totally off.
---
lucene/CHANGES.txt | 2 +-
.../org/apache/lucene/util/SparseFixedBitSet.java | 7 ++++-
.../apache/lucene/util/TestSparseFixedBitSet.java | 34 ++++++++++++++++++++++
3 files changed, 41 insertions(+), 2 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index de19f459028..b0de225c796 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -7,7 +7,7 @@ http://s.apache.org/luceneversions
Bug Fixes
---------------------
-(No changes)
+* LUCENE-10564: Make sure SparseFixedBitSet#or updates ramBytesUsed. (Julie Tibshirani)
Optimizations
---------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java b/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java
index 4fcbbef3e98..63d05bc6e66 100644
--- a/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java
+++ b/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java
@@ -371,7 +371,10 @@ public class SparseFixedBitSet extends BitSet implements Bits, Accountable {
// fast path: if we currently have nothing in the block, just copy the data
// this especially happens all the time if you call OR on an empty set
indices[i4096] = index;
- this.bits[i4096] = ArrayUtil.copyOfSubArray(bits, 0, nonZeroLongCount);
+ long[] newBits = ArrayUtil.copyOfSubArray(bits, 0, nonZeroLongCount);
+ this.bits[i4096] = newBits;
+ // we may slightly overestimate size here, but keep it cheap
+ this.ramBytesUsed += SINGLE_ELEMENT_ARRAY_BYTES_USED + ((long) newBits.length - 1 << 3);
this.nonZeroLongCount += nonZeroLongCount;
return;
}
@@ -383,6 +386,8 @@ public class SparseFixedBitSet extends BitSet implements Bits, Accountable {
newBits = currentBits;
} else {
newBits = new long[oversize(requiredCapacity)];
+ // we may slightly overestimate size here, but keep it cheap
+ this.ramBytesUsed += (long) (newBits.length - currentBits.length) << 3;
}
// we iterate backwards in order to not override data we might need on the next iteration if the
// array is reused
diff --git a/lucene/core/src/test/org/apache/lucene/util/TestSparseFixedBitSet.java b/lucene/core/src/test/org/apache/lucene/util/TestSparseFixedBitSet.java
index 581646e9dc6..dc879c73028 100644
--- a/lucene/core/src/test/org/apache/lucene/util/TestSparseFixedBitSet.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestSparseFixedBitSet.java
@@ -69,4 +69,38 @@ public class TestSparseFixedBitSet extends BaseBitSetTestCase<SparseFixedBitSet>
}
assertEquals(numDocs, set.approximateCardinality());
}
+
+ public void testRamBytesUsed() throws IOException {
+ int size = 1000 + random().nextInt(10000);
+ BitSet original = new SparseFixedBitSet(size);
+ for (int i = 0; i < 3; i++) {
+ original.set(random().nextInt(size));
+ }
+ assertTrue(original.ramBytesUsed() > 0);
+
+ // Take union with a random sparse iterator, then check memory usage
+ BitSet copy = copyOf(original, size);
+ BitSet otherBitSet = new SparseFixedBitSet(size);
+ int interval = 10 + random().nextInt(100);
+ for (int i = 0; i < size; i += interval) {
+ otherBitSet.set(i);
+ }
+ copy.or(new BitSetIterator(otherBitSet, size));
+ assertTrue(copy.ramBytesUsed() > original.ramBytesUsed());
+
+ // Take union with a dense iterator, then check memory usage
+ copy = copyOf(original, size);
+ copy.or(DocIdSetIterator.all(size));
+ assertTrue(copy.ramBytesUsed() > original.ramBytesUsed());
+ assertTrue(copy.ramBytesUsed() > size / Byte.SIZE);
+
+ // Check that both "copy" strategies result in bit sets with
+ // (roughly) same memory usage as original
+ BitSet setCopy = copyOf(original, size);
+ assertEquals(setCopy.ramBytesUsed(), original.ramBytesUsed());
+
+ BitSet orCopy = new SparseFixedBitSet(size);
+ orCopy.or(new BitSetIterator(original, size));
+ assertTrue(Math.abs(orCopy.ramBytesUsed() - original.ramBytesUsed()) < 64L);
+ }
}