You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2023/05/26 16:13:23 UTC

[lucene] branch main updated: add BitSet.clear() (#12268)

This is an automated email from the ASF dual-hosted git repository.

uschindler pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new 431dc7b4151 add BitSet.clear() (#12268)
431dc7b4151 is described below

commit 431dc7b4151ebe08df36b4a6730f63b014b05cd1
Author: Jonathan Ellis <jb...@datastax.com>
AuthorDate: Fri May 26 09:13:16 2023 -0700

    add BitSet.clear() (#12268)
---
 lucene/CHANGES.txt                                  |  4 ++++
 .../lucene/analysis/hunspell/TrigramAutomaton.java  |  2 +-
 .../backward_codecs/lucene80/IndexedDISI.java       |  4 ++--
 .../apache/lucene/codecs/lucene90/IndexedDISI.java  |  4 ++--
 .../src/java/org/apache/lucene/util/BitSet.java     | 10 ++++++++++
 .../java/org/apache/lucene/util/FixedBitSet.java    |  5 +++++
 .../org/apache/lucene/util/SparseFixedBitSet.java   | 12 ++++++++++++
 .../apache/lucene/util/automaton/Operations.java    |  2 +-
 .../apache/lucene/util/hnsw/HnswGraphSearcher.java  |  2 +-
 .../facet/range/OverlappingLongRangeCounter.java    |  4 ++--
 .../lucene/tests/util/BaseBitSetTestCase.java       | 21 +++++++++++++++++++++
 11 files changed, 61 insertions(+), 9 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index af8233bf444..40075822e84 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -116,6 +116,10 @@ Other
 
 API Changes
 ---------------------
+
+* GITHUB#12268: Add BitSet.clear() without parameters for clearing the entire set
+  (Jonathan Ellis)
+
 (No changes)
 
 New Features
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/TrigramAutomaton.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/TrigramAutomaton.java
index dfe994ccf82..f4404e4bcf0 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/TrigramAutomaton.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/TrigramAutomaton.java
@@ -79,7 +79,7 @@ class TrigramAutomaton {
   }
 
   int ngramScore(CharsRef s2) {
-    countedSubstrings.clear(0, countedSubstrings.length());
+    countedSubstrings.clear();
 
     int score1 = 0, score2 = 0, score3 = 0; // scores for substrings of length 1, 2 and 3
 
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/IndexedDISI.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/IndexedDISI.java
index fc82ce58886..639bdbd7333 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/IndexedDISI.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/IndexedDISI.java
@@ -219,7 +219,7 @@ final class IndexedDISI extends DocIdSetIterator {
         // Flush block
         flush(prevBlock, buffer, blockCardinality, denseRankPower, out);
         // Reset for next block
-        buffer.clear(0, buffer.length());
+        buffer.clear();
         totalCardinality += blockCardinality;
         blockCardinality = 0;
       }
@@ -233,7 +233,7 @@ final class IndexedDISI extends DocIdSetIterator {
               jumps, out.getFilePointer() - origo, totalCardinality, jumpBlockIndex, prevBlock + 1);
       totalCardinality += blockCardinality;
       flush(prevBlock, buffer, blockCardinality, denseRankPower, out);
-      buffer.clear(0, buffer.length());
+      buffer.clear();
       prevBlock++;
     }
     final int lastBlock =
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java
index 8da289e3ad3..205892d2fe9 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java
@@ -220,7 +220,7 @@ public final class IndexedDISI extends DocIdSetIterator {
         // Flush block
         flush(prevBlock, buffer, blockCardinality, denseRankPower, out);
         // Reset for next block
-        buffer.clear(0, buffer.length());
+        buffer.clear();
         totalCardinality += blockCardinality;
         blockCardinality = 0;
       }
@@ -234,7 +234,7 @@ public final class IndexedDISI extends DocIdSetIterator {
               jumps, out.getFilePointer() - origo, totalCardinality, jumpBlockIndex, prevBlock + 1);
       totalCardinality += blockCardinality;
       flush(prevBlock, buffer, blockCardinality, denseRankPower, out);
-      buffer.clear(0, buffer.length());
+      buffer.clear();
       prevBlock++;
     }
     final int lastBlock =
diff --git a/lucene/core/src/java/org/apache/lucene/util/BitSet.java b/lucene/core/src/java/org/apache/lucene/util/BitSet.java
index f8b8ba65a59..c5d84833b28 100644
--- a/lucene/core/src/java/org/apache/lucene/util/BitSet.java
+++ b/lucene/core/src/java/org/apache/lucene/util/BitSet.java
@@ -43,6 +43,16 @@ public abstract class BitSet implements Bits, Accountable {
     return set;
   }
 
+  /**
+   * Clear all the bits of the set.
+   *
+   * <p>Depending on the implementation, this may be significantly faster than clear(0, length).
+   */
+  public void clear() {
+    // default implementation for compatibility
+    clear(0, length());
+  }
+
   /** Set the bit at <code>i</code>. */
   public abstract void set(int i);
 
diff --git a/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java b/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java
index 5566bd0c483..ebf626a777d 100644
--- a/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java
+++ b/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java
@@ -147,6 +147,11 @@ public final class FixedBitSet extends BitSet {
     assert verifyGhostBitsClear();
   }
 
+  @Override
+  public void clear() {
+    Arrays.fill(bits, 0L);
+  }
+
   /**
    * Checks if the bits past numBits are clear. Some methods rely on this implicit assumption:
    * search for "Depends on the ghost bits being clear!"
diff --git a/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java b/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java
index 49d61614e86..b4ebe3cfc59 100644
--- a/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java
+++ b/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java
@@ -17,6 +17,7 @@
 package org.apache.lucene.util;
 
 import java.io.IOException;
+import java.util.Arrays;
 import org.apache.lucene.search.DocIdSetIterator;
 
 /**
@@ -73,6 +74,17 @@ public class SparseFixedBitSet extends BitSet {
             + RamUsageEstimator.shallowSizeOf(bits);
   }
 
+  @Override
+  public void clear() {
+    Arrays.fill(bits, null);
+    Arrays.fill(indices, 0L);
+    nonZeroLongCount = 0;
+    ramBytesUsed =
+        BASE_RAM_BYTES_USED
+            + RamUsageEstimator.sizeOf(indices)
+            + RamUsageEstimator.shallowSizeOf(bits);
+  }
+
   @Override
   public int length() {
     return length;
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
index 8448dfc8795..cc53b657fa4 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
@@ -1094,7 +1094,7 @@ public final class Operations {
       FixedBitSet tmp = current;
       current = next;
       next = tmp;
-      next.clear(0, next.length());
+      next.clear();
     }
     return builder.toString();
   }
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
index d6e63f483b2..5bc71816946 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
@@ -339,7 +339,7 @@ public class HnswGraphSearcher<T> {
     if (visited.length() < capacity) {
       visited = FixedBitSet.ensureCapacity((FixedBitSet) visited, capacity);
     }
-    visited.clear(0, visited.length());
+    visited.clear();
   }
 
   /**
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/range/OverlappingLongRangeCounter.java b/lucene/facet/src/java/org/apache/lucene/facet/range/OverlappingLongRangeCounter.java
index 3c9ae7ebd7c..b478e3994e1 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/range/OverlappingLongRangeCounter.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/range/OverlappingLongRangeCounter.java
@@ -84,7 +84,7 @@ class OverlappingLongRangeCounter extends LongRangeCounter {
     if (multiValuedDocElementaryIntervalHits == null) {
       multiValuedDocElementaryIntervalHits = new FixedBitSet(boundaries.length);
     } else {
-      multiValuedDocElementaryIntervalHits.clear(0, multiValuedDocElementaryIntervalHits.length());
+      multiValuedDocElementaryIntervalHits.clear();
     }
   }
 
@@ -103,7 +103,7 @@ class OverlappingLongRangeCounter extends LongRangeCounter {
     if (multiValuedDocRangeHits == null) {
       multiValuedDocRangeHits = new FixedBitSet(rangeCount());
     } else {
-      multiValuedDocRangeHits.clear(0, multiValuedDocRangeHits.length());
+      multiValuedDocRangeHits.clear();
     }
     elementaryIntervalUpto = 0;
     rollupMultiValued(root);
diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/util/BaseBitSetTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/util/BaseBitSetTestCase.java
index 1bb5e500a44..cbce97d87ac 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/tests/util/BaseBitSetTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/tests/util/BaseBitSetTestCase.java
@@ -170,6 +170,22 @@ public abstract class BaseBitSetTestCase<T extends BitSet> extends LuceneTestCas
     }
   }
 
+  /** Test the {@link BitSet#clear()} method. */
+  public void testClearAll() throws IOException {
+    Random random = random();
+    final int numBits = 1 + random.nextInt(100000);
+    for (float percentSet : new float[] {0, 0.01f, 0.1f, 0.5f, 0.9f, 0.99f, 1f}) {
+      BitSet set1 = new JavaUtilBitSet(randomSet(numBits, percentSet), numBits);
+      T set2 = copyOf(set1, numBits);
+      final int iters = atLeast(random, 10);
+      for (int i = 0; i < iters; ++i) {
+        set1.clear();
+        set2.clear();
+        assertEquals(set1, set2, numBits);
+      }
+    }
+  }
+
   private DocIdSet randomCopy(BitSet set, int numBits) throws IOException {
     switch (random().nextInt(5)) {
       case 0:
@@ -241,6 +257,11 @@ public abstract class BaseBitSetTestCase<T extends BitSet> extends LuceneTestCas
       this.numBits = numBits;
     }
 
+    @Override
+    public void clear() {
+      bitSet.clear();
+    }
+
     @Override
     public void clear(int index) {
       bitSet.clear(index);