You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ju...@apache.org on 2022/05/12 20:52:58 UTC

[lucene] branch branch_9x updated: LUCENE-10564: Make sure SparseFixedBitSet#or updates memory usage (#882)

This is an automated email from the ASF dual-hosted git repository.

julietibs pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new c3e351700a5 LUCENE-10564: Make sure SparseFixedBitSet#or updates memory usage (#882)
c3e351700a5 is described below

commit c3e351700a580d659475a027c2e0a55618a15472
Author: Julie Tibshirani <ju...@apache.org>
AuthorDate: Thu May 12 13:29:07 2022 -0700

    LUCENE-10564: Make sure SparseFixedBitSet#or updates memory usage (#882)
    
    Before, it didn't update the estimated memory usage, so calls to ramBytesUsed
    could be totally off.
---
 lucene/CHANGES.txt                                 |  2 ++
 .../org/apache/lucene/util/SparseFixedBitSet.java  |  8 ++++-
 .../apache/lucene/util/TestSparseFixedBitSet.java  | 34 ++++++++++++++++++++++
 3 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 6fb283814b1..8a9e45e2ecf 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -137,6 +137,8 @@ Bug Fixes
   custom dictionary support. Please also use new URL-based constructors for classpath/module
   system ressources.  (Uwe Schindler, Tomoko Uchida, Mike Sokolov)
 
+* LUCENE-10564: Make sure SparseFixedBitSet#or updates ramBytesUsed. (Julie Tibshirani)
+
 Build
 ---------------------
 
diff --git a/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java b/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java
index 02849c1585a..49d61614e86 100644
--- a/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java
+++ b/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java
@@ -408,7 +408,11 @@ public class SparseFixedBitSet extends BitSet {
       // fast path: if we currently have nothing in the block, just copy the data
       // this especially happens all the time if you call OR on an empty set
       indices[i4096] = index;
-      this.bits[i4096] = ArrayUtil.copyOfSubArray(bits, 0, nonZeroLongCount);
+
+      long[] newBits = ArrayUtil.copyOfSubArray(bits, 0, nonZeroLongCount);
+      this.bits[i4096] = newBits;
+      // we may slightly overestimate size here, but keep it cheap
+      this.ramBytesUsed += SINGLE_ELEMENT_ARRAY_BYTES_USED + ((long) newBits.length - 1 << 3);
       this.nonZeroLongCount += nonZeroLongCount;
       return;
     }
@@ -420,6 +424,8 @@ public class SparseFixedBitSet extends BitSet {
       newBits = currentBits;
     } else {
       newBits = new long[oversize(requiredCapacity)];
+      // we may slightly overestimate size here, but keep it cheap
+      this.ramBytesUsed += (long) (newBits.length - currentBits.length) << 3;
     }
     // we iterate backwards in order to not override data we might need on the next iteration if the
     // array is reused
diff --git a/lucene/core/src/test/org/apache/lucene/util/TestSparseFixedBitSet.java b/lucene/core/src/test/org/apache/lucene/util/TestSparseFixedBitSet.java
index 594fbf57245..c17e0c9e3d9 100644
--- a/lucene/core/src/test/org/apache/lucene/util/TestSparseFixedBitSet.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestSparseFixedBitSet.java
@@ -71,4 +71,38 @@ public class TestSparseFixedBitSet extends BaseBitSetTestCase<SparseFixedBitSet>
     }
     assertEquals(numDocs, set.approximateCardinality());
   }
+
+  public void testRamBytesUsed() throws IOException {
+    int size = 1000 + random().nextInt(10000);
+    BitSet original = new SparseFixedBitSet(size);
+    for (int i = 0; i < 3; i++) {
+      original.set(random().nextInt(size));
+    }
+    assertTrue(original.ramBytesUsed() > 0);
+
+    // Take union with a random sparse iterator, then check memory usage
+    BitSet copy = copyOf(original, size);
+    BitSet otherBitSet = new SparseFixedBitSet(size);
+    int interval = 10 + random().nextInt(100);
+    for (int i = 0; i < size; i += interval) {
+      otherBitSet.set(i);
+    }
+    copy.or(new BitSetIterator(otherBitSet, size));
+    assertTrue(copy.ramBytesUsed() > original.ramBytesUsed());
+
+    // Take union with a dense iterator, then check memory usage
+    copy = copyOf(original, size);
+    copy.or(DocIdSetIterator.all(size));
+    assertTrue(copy.ramBytesUsed() > original.ramBytesUsed());
+    assertTrue(copy.ramBytesUsed() > size / Byte.SIZE);
+
+    // Check that both "copy" strategies result in bit sets with
+    // (roughly) same memory usage as original
+    BitSet setCopy = copyOf(original, size);
+    assertEquals(setCopy.ramBytesUsed(), original.ramBytesUsed());
+
+    BitSet orCopy = new SparseFixedBitSet(size);
+    orCopy.or(new BitSetIterator(original, size));
+    assertEquals(orCopy.ramBytesUsed(), original.ramBytesUsed(), 64L);
+  }
 }