You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2021/10/02 03:17:06 UTC

[lucene] branch main updated: LUCENE-10130: HnswGraph could make use of a SparseFixedBitSet.getAndSet

This is an automated email from the ASF dual-hosted git repository.

rmuir pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new d395435  LUCENE-10130: HnswGraph could make use of a SparseFixedBitSet.getAndSet
d395435 is described below

commit d395435fa87a1f5e102d76622ab13f6a9c2bfed0
Author: Robert Muir <rm...@apache.org>
AuthorDate: Fri Oct 1 23:16:20 2021 -0400

    LUCENE-10130: HnswGraph could make use of a SparseFixedBitSet.getAndSet
---
 .../src/java/org/apache/lucene/util/BitSet.java    |  3 ++
 .../java/org/apache/lucene/util/FixedBitSet.java   |  1 +
 .../org/apache/lucene/util/SparseFixedBitSet.java  | 40 +++++++++++++++++++---
 .../org/apache/lucene/util/hnsw/HnswGraph.java     |  6 ++--
 .../org/apache/lucene/util/BaseBitSetTestCase.java | 23 +++++++++++++
 5 files changed, 65 insertions(+), 8 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/util/BitSet.java b/lucene/core/src/java/org/apache/lucene/util/BitSet.java
index d92dc05..60e6ea0 100644
--- a/lucene/core/src/java/org/apache/lucene/util/BitSet.java
+++ b/lucene/core/src/java/org/apache/lucene/util/BitSet.java
@@ -46,6 +46,9 @@ public abstract class BitSet implements Bits, Accountable {
   /** Set the bit at <code>i</code>. */
   public abstract void set(int i);
 
+  /** Set the bit at <code>i</code>, returning <code>true</code> if it was previously set. */
+  public abstract boolean getAndSet(int i);
+
   /** Clear the bit at <code>i</code>. */
   public abstract void clear(int i);
 
diff --git a/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java b/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java
index 913014d..1e91494 100644
--- a/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java
+++ b/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java
@@ -194,6 +194,7 @@ public final class FixedBitSet extends BitSet {
     bits[wordNum] |= bitmask;
   }
 
+  @Override
   public boolean getAndSet(int index) {
     assert index >= 0 && index < numBits : "index=" + index + ", numBits=" + numBits;
     int wordNum = index >> 6; // div 64
diff --git a/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java b/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java
index 8dfb95b..afe30bf 100644
--- a/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java
+++ b/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java
@@ -116,19 +116,50 @@ public class SparseFixedBitSet extends BitSet {
     final int i4096 = i >>> 12;
     final long index = indices[i4096];
     final int i64 = i >>> 6;
+    final long i64bit = 1L << i64;
     // first check the index, if the i64-th bit is not set, then i is not set
     // note: this relies on the fact that shifts are mod 64 in java
-    if ((index & (1L << i64)) == 0) {
+    if ((index & i64bit) == 0) {
       return false;
     }
 
     // if it is set, then we count the number of bits that are set on the right
     // of i64, and that gives us the index of the long that stores the bits we
     // are interested in
-    final long bits = this.bits[i4096][Long.bitCount(index & ((1L << i64) - 1))];
+    final long bits = this.bits[i4096][Long.bitCount(index & (i64bit - 1))];
     return (bits & (1L << i)) != 0;
   }
 
+  @Override
+  public boolean getAndSet(int i) {
+    assert consistent(i);
+    final int i4096 = i >>> 12;
+    final long index = indices[i4096];
+    final int i64 = i >>> 6;
+    final long i64bit = 1L << i64;
+    if ((index & i64bit) != 0) {
+      // in that case the sub 64-bits block we are interested in already exists,
+      // we just need to set a bit in an existing long: the number of ones on
+      // the right of i64 gives us the index of the long we need to update
+      final int location = Long.bitCount(index & (i64bit - 1));
+      final long bit = 1L << i; // shifts are mod 64 in java
+      boolean v = (bits[i4096][location] & bit) != 0;
+      bits[i4096][location] |= bit;
+      return v;
+    } else if (index == 0) {
+      // if the index is 0, it means that we just found a block of 4096 bits
+      // that has no bit that is set yet. So let's initialize a new block:
+      insertBlock(i4096, i64, i);
+      return false;
+    } else {
+      // in that case we found a block of 4096 bits that has some values, but
+      // the sub-block of 64 bits that we are interested in has no value yet,
+      // so we need to insert a new long
+      insertLong(i4096, i64, i, index);
+      return false;
+    }
+  }
+
   private static int oversize(int s) {
     int newSize = s + (s >>> 1);
     if (newSize > 50) {
@@ -144,11 +175,12 @@ public class SparseFixedBitSet extends BitSet {
     final int i4096 = i >>> 12;
     final long index = indices[i4096];
     final int i64 = i >>> 6;
-    if ((index & (1L << i64)) != 0) {
+    final long i64bit = 1L << i64;
+    if ((index & i64bit) != 0) {
       // in that case the sub 64-bits block we are interested in already exists,
       // we just need to set a bit in an existing long: the number of ones on
       // the right of i64 gives us the index of the long we need to update
-      bits[i4096][Long.bitCount(index & ((1L << i64) - 1))] |= 1L << i; // shifts are mod 64 in java
+      bits[i4096][Long.bitCount(index & (i64bit - 1))] |= 1L << i; // shifts are mod 64 in java
     } else if (index == 0) {
       // if the index is 0, it means that we just found a block of 4096 bits
       // that has no bit that is set yet. So let's initialize a new block:
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java
index d1f0420..43ed6ec 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java
@@ -112,8 +112,7 @@ public final class HnswGraph extends KnnGraphValues {
     int boundedNumSeed = Math.min(numSeed, 2 * size);
     for (int i = 0; i < boundedNumSeed; i++) {
       int entryPoint = random.nextInt(size);
-      if (visited.get(entryPoint) == false) {
-        visited.set(entryPoint);
+      if (visited.getAndSet(entryPoint) == false) {
         // explore the topK starting points of some random numSeed probes
         float score = similarityFunction.compare(query, vectors.vectorValue(entryPoint));
         candidates.add(entryPoint, score);
@@ -141,10 +140,9 @@ public final class HnswGraph extends KnnGraphValues {
       int friendOrd;
       while ((friendOrd = graphValues.nextNeighbor()) != NO_MORE_DOCS) {
         assert friendOrd < size : "friendOrd=" + friendOrd + "; size=" + size;
-        if (visited.get(friendOrd)) {
+        if (visited.getAndSet(friendOrd)) {
           continue;
         }
-        visited.set(friendOrd);
 
         float score = similarityFunction.compare(query, vectors.vectorValue(friendOrd));
         if (results.size() < numSeed || bound.check(score) == false) {
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/BaseBitSetTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/util/BaseBitSetTestCase.java
index a9cc4b6..6f7bb7d 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/BaseBitSetTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/BaseBitSetTestCase.java
@@ -112,6 +112,22 @@ public abstract class BaseBitSetTestCase<T extends BitSet> extends LuceneTestCas
     assertEquals(set1, set2, numBits);
   }
 
+  /** Test the {@link BitSet#getAndSet} method. */
+  public void testGetAndSet() throws IOException {
+    Random random = random();
+    final int numBits = 1 + random.nextInt(100000);
+    BitSet set1 = new JavaUtilBitSet(randomSet(numBits, 0), numBits);
+    T set2 = copyOf(set1, numBits);
+    final int iters = 10000 + random.nextInt(10000);
+    for (int i = 0; i < iters; ++i) {
+      final int index = random.nextInt(numBits);
+      boolean v1 = set1.getAndSet(index);
+      boolean v2 = set2.getAndSet(index);
+      assertEquals(v1, v2);
+    }
+    assertEquals(set1, set2, numBits);
+  }
+
   /** Test the {@link BitSet#clear(int)} method. */
   public void testClear() throws IOException {
     Random random = random();
@@ -229,6 +245,13 @@ public abstract class BaseBitSetTestCase<T extends BitSet> extends LuceneTestCas
     }
 
     @Override
+    public boolean getAndSet(int index) {
+      boolean v = get(index);
+      set(index);
+      return v;
+    }
+
+    @Override
     public int length() {
       return numBits;
     }