You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2021/10/02 03:17:06 UTC
[lucene] branch main updated: LUCENE-10130: HnswGraph could make
use of a SparseFixedBitSet.getAndSet
This is an automated email from the ASF dual-hosted git repository.
rmuir pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new d395435 LUCENE-10130: HnswGraph could make use of a SparseFixedBitSet.getAndSet
d395435 is described below
commit d395435fa87a1f5e102d76622ab13f6a9c2bfed0
Author: Robert Muir <rm...@apache.org>
AuthorDate: Fri Oct 1 23:16:20 2021 -0400
LUCENE-10130: HnswGraph could make use of a SparseFixedBitSet.getAndSet
---
.../src/java/org/apache/lucene/util/BitSet.java | 3 ++
.../java/org/apache/lucene/util/FixedBitSet.java | 1 +
.../org/apache/lucene/util/SparseFixedBitSet.java | 40 +++++++++++++++++++---
.../org/apache/lucene/util/hnsw/HnswGraph.java | 6 ++--
.../org/apache/lucene/util/BaseBitSetTestCase.java | 23 +++++++++++++
5 files changed, 65 insertions(+), 8 deletions(-)
diff --git a/lucene/core/src/java/org/apache/lucene/util/BitSet.java b/lucene/core/src/java/org/apache/lucene/util/BitSet.java
index d92dc05..60e6ea0 100644
--- a/lucene/core/src/java/org/apache/lucene/util/BitSet.java
+++ b/lucene/core/src/java/org/apache/lucene/util/BitSet.java
@@ -46,6 +46,9 @@ public abstract class BitSet implements Bits, Accountable {
/** Set the bit at <code>i</code>. */
public abstract void set(int i);
+ /** Set the bit at <code>i</code>, returning <code>true</code> if it was previously set. */
+ public abstract boolean getAndSet(int i);
+
/** Clear the bit at <code>i</code>. */
public abstract void clear(int i);
diff --git a/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java b/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java
index 913014d..1e91494 100644
--- a/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java
+++ b/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java
@@ -194,6 +194,7 @@ public final class FixedBitSet extends BitSet {
bits[wordNum] |= bitmask;
}
+ @Override
public boolean getAndSet(int index) {
assert index >= 0 && index < numBits : "index=" + index + ", numBits=" + numBits;
int wordNum = index >> 6; // div 64
diff --git a/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java b/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java
index 8dfb95b..afe30bf 100644
--- a/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java
+++ b/lucene/core/src/java/org/apache/lucene/util/SparseFixedBitSet.java
@@ -116,19 +116,50 @@ public class SparseFixedBitSet extends BitSet {
final int i4096 = i >>> 12;
final long index = indices[i4096];
final int i64 = i >>> 6;
+ final long i64bit = 1L << i64;
// first check the index, if the i64-th bit is not set, then i is not set
// note: this relies on the fact that shifts are mod 64 in java
- if ((index & (1L << i64)) == 0) {
+ if ((index & i64bit) == 0) {
return false;
}
// if it is set, then we count the number of bits that are set on the right
// of i64, and that gives us the index of the long that stores the bits we
// are interested in
- final long bits = this.bits[i4096][Long.bitCount(index & ((1L << i64) - 1))];
+ final long bits = this.bits[i4096][Long.bitCount(index & (i64bit - 1))];
return (bits & (1L << i)) != 0;
}
+ @Override
+ public boolean getAndSet(int i) {
+ assert consistent(i);
+ final int i4096 = i >>> 12;
+ final long index = indices[i4096];
+ final int i64 = i >>> 6;
+ final long i64bit = 1L << i64;
+ if ((index & i64bit) != 0) {
+ // in that case the sub 64-bits block we are interested in already exists,
+ // we just need to set a bit in an existing long: the number of ones on
+ // the right of i64 gives us the index of the long we need to update
+ final int location = Long.bitCount(index & (i64bit - 1));
+ final long bit = 1L << i; // shifts are mod 64 in java
+ boolean v = (bits[i4096][location] & bit) != 0;
+ bits[i4096][location] |= bit;
+ return v;
+ } else if (index == 0) {
+ // if the index is 0, it means that we just found a block of 4096 bits
+ // that has no bit that is set yet. So let's initialize a new block:
+ insertBlock(i4096, i64, i);
+ return false;
+ } else {
+ // in that case we found a block of 4096 bits that has some values, but
+ // the sub-block of 64 bits that we are interested in has no value yet,
+ // so we need to insert a new long
+ insertLong(i4096, i64, i, index);
+ return false;
+ }
+ }
+
private static int oversize(int s) {
int newSize = s + (s >>> 1);
if (newSize > 50) {
@@ -144,11 +175,12 @@ public class SparseFixedBitSet extends BitSet {
final int i4096 = i >>> 12;
final long index = indices[i4096];
final int i64 = i >>> 6;
- if ((index & (1L << i64)) != 0) {
+ final long i64bit = 1L << i64;
+ if ((index & i64bit) != 0) {
// in that case the sub 64-bits block we are interested in already exists,
// we just need to set a bit in an existing long: the number of ones on
// the right of i64 gives us the index of the long we need to update
- bits[i4096][Long.bitCount(index & ((1L << i64) - 1))] |= 1L << i; // shifts are mod 64 in java
+ bits[i4096][Long.bitCount(index & (i64bit - 1))] |= 1L << i; // shifts are mod 64 in java
} else if (index == 0) {
// if the index is 0, it means that we just found a block of 4096 bits
// that has no bit that is set yet. So let's initialize a new block:
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java
index d1f0420..43ed6ec 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java
@@ -112,8 +112,7 @@ public final class HnswGraph extends KnnGraphValues {
int boundedNumSeed = Math.min(numSeed, 2 * size);
for (int i = 0; i < boundedNumSeed; i++) {
int entryPoint = random.nextInt(size);
- if (visited.get(entryPoint) == false) {
- visited.set(entryPoint);
+ if (visited.getAndSet(entryPoint) == false) {
// explore the topK starting points of some random numSeed probes
float score = similarityFunction.compare(query, vectors.vectorValue(entryPoint));
candidates.add(entryPoint, score);
@@ -141,10 +140,9 @@ public final class HnswGraph extends KnnGraphValues {
int friendOrd;
while ((friendOrd = graphValues.nextNeighbor()) != NO_MORE_DOCS) {
assert friendOrd < size : "friendOrd=" + friendOrd + "; size=" + size;
- if (visited.get(friendOrd)) {
+ if (visited.getAndSet(friendOrd)) {
continue;
}
- visited.set(friendOrd);
float score = similarityFunction.compare(query, vectors.vectorValue(friendOrd));
if (results.size() < numSeed || bound.check(score) == false) {
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/BaseBitSetTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/util/BaseBitSetTestCase.java
index a9cc4b6..6f7bb7d 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/BaseBitSetTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/BaseBitSetTestCase.java
@@ -112,6 +112,22 @@ public abstract class BaseBitSetTestCase<T extends BitSet> extends LuceneTestCas
assertEquals(set1, set2, numBits);
}
+ /** Test the {@link BitSet#getAndSet} method. */
+ public void testGetAndSet() throws IOException {
+ Random random = random();
+ final int numBits = 1 + random.nextInt(100000);
+ BitSet set1 = new JavaUtilBitSet(randomSet(numBits, 0), numBits);
+ T set2 = copyOf(set1, numBits);
+ final int iters = 10000 + random.nextInt(10000);
+ for (int i = 0; i < iters; ++i) {
+ final int index = random.nextInt(numBits);
+ boolean v1 = set1.getAndSet(index);
+ boolean v2 = set2.getAndSet(index);
+ assertEquals(v1, v2);
+ }
+ assertEquals(set1, set2, numBits);
+ }
+
/** Test the {@link BitSet#clear(int)} method. */
public void testClear() throws IOException {
Random random = random();
@@ -229,6 +245,13 @@ public abstract class BaseBitSetTestCase<T extends BitSet> extends LuceneTestCas
}
@Override
+ public boolean getAndSet(int index) {
+ boolean v = get(index);
+ set(index);
+ return v;
+ }
+
+ @Override
public int length() {
return numBits;
}