You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2022/01/28 13:48:41 UTC

[lucene] branch branch_9x updated: Simplify HnswGraph#search. (#627)

This is an automated email from the ASF dual-hosted git repository.

jpountz pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new 9340fb7  Simplify HnswGraph#search. (#627)
9340fb7 is described below

commit 9340fb7b757cc7878aaa17818ee8949f6e4a697b
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Thu Jan 27 18:08:06 2022 +0100

    Simplify HnswGraph#search. (#627)
    
    Currently the contract on `bound` is that it holds the score of the top of the
    `results` priority queue. It means that a candidate is only considered if its
    score is better than the bound *or* if less than `topK` results have been
    accumulated so far. I think it would be simpler if `bound` would always hold
    the minimum score that is required for a candidate to be considered? This would
    also be more consistent with how our WAND support works, by trusting
    `setMinCompetitiveScore` alone, instead of having to check whether the priority
    queue is full as well.
---
 .../java/org/apache/lucene/util/hnsw/HnswGraph.java | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java
index 6678bb4..3a98b54 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java
@@ -167,17 +167,17 @@ public final class HnswGraph extends KnnGraphValues {
       }
     }
 
-    // Set the bound to the worst current result and below reject any newly-generated candidates
-    // failing to exceed this bound
+    // A bound that holds the minimum similarity to the query vector that a candidate vector must
+    // have to be considered.
     BoundsChecker bound = BoundsChecker.create(similarityFunction.reversed);
-    bound.set(results.topScore());
+    if (results.size() >= topK) {
+      bound.set(results.topScore());
+    }
     while (candidates.size() > 0) {
       // get the best candidate (closest or best scoring)
       float topCandidateScore = candidates.topScore();
-      if (results.size() >= topK) {
-        if (bound.check(topCandidateScore)) {
-          break;
-        }
+      if (bound.check(topCandidateScore)) {
+        break;
       }
       int topCandidateNode = candidates.pop();
       graphValues.seek(level, topCandidateNode);
@@ -189,11 +189,12 @@ public final class HnswGraph extends KnnGraphValues {
         }
 
         float score = similarityFunction.compare(query, vectors.vectorValue(friendOrd));
-        if (results.size() < topK || bound.check(score) == false) {
+        if (bound.check(score) == false) {
           candidates.add(friendOrd, score);
           if (acceptOrds == null || acceptOrds.get(friendOrd)) {
-            results.insertWithOverflow(friendOrd, score);
-            bound.set(results.topScore());
+            if (results.insertWithOverflow(friendOrd, score) && results.size() >= topK) {
+              bound.set(results.topScore());
+            }
           }
         }
       }