You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2022/01/27 17:10:04 UTC
[lucene] branch main updated: Simplify HnswGraph#search. (#627)
This is an automated email from the ASF dual-hosted git repository.
jpountz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new 09ddac1 Simplify HnswGraph#search. (#627)
09ddac1 is described below
commit 09ddac1fe503a9bc568f4f20e7d440327e761578
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Thu Jan 27 18:08:06 2022 +0100
Simplify HnswGraph#search. (#627)
Currently the contract on `bound` is that it holds the score of the top of the
`results` priority queue. It means that a candidate is only considered if its
score is better than the bound *or* if less than `topK` results have been
accumulated so far. I think it would be simpler if `bound` would always hold
the minimum score that is required for a candidate to be considered? This would
also be more consistent with how our WAND support works, by trusting
`setMinCompetitiveScore` alone, instead of having to check whether the priority
queue is full as well.
---
.../java/org/apache/lucene/util/hnsw/HnswGraph.java | 21 +++++++++++----------
1 file changed, 11 insertions(+), 10 deletions(-)
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java
index 6678bb4..3a98b54 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java
@@ -167,17 +167,17 @@ public final class HnswGraph extends KnnGraphValues {
}
}
- // Set the bound to the worst current result and below reject any newly-generated candidates
- // failing to exceed this bound
+ // A bound that holds the minimum similarity to the query vector that a candidate vector must
+ // have to be considered.
BoundsChecker bound = BoundsChecker.create(similarityFunction.reversed);
- bound.set(results.topScore());
+ if (results.size() >= topK) {
+ bound.set(results.topScore());
+ }
while (candidates.size() > 0) {
// get the best candidate (closest or best scoring)
float topCandidateScore = candidates.topScore();
- if (results.size() >= topK) {
- if (bound.check(topCandidateScore)) {
- break;
- }
+ if (bound.check(topCandidateScore)) {
+ break;
}
int topCandidateNode = candidates.pop();
graphValues.seek(level, topCandidateNode);
@@ -189,11 +189,12 @@ public final class HnswGraph extends KnnGraphValues {
}
float score = similarityFunction.compare(query, vectors.vectorValue(friendOrd));
- if (results.size() < topK || bound.check(score) == false) {
+ if (bound.check(score) == false) {
candidates.add(friendOrd, score);
if (acceptOrds == null || acceptOrds.get(friendOrd)) {
- results.insertWithOverflow(friendOrd, score);
- bound.set(results.topScore());
+ if (results.insertWithOverflow(friendOrd, score) && results.size() >= topK) {
+ bound.set(results.topScore());
+ }
}
}
}