You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ju...@apache.org on 2021/11/12 22:30:00 UTC

[lucene] branch branch_9_0 updated: LUCENE-10069: Document that kNN queries might not return all results (#434)

This is an automated email from the ASF dual-hosted git repository.

julietibs pushed a commit to branch branch_9_0
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/branch_9_0 by this push:
     new 231f407  LUCENE-10069: Document that kNN queries might not return all results (#434)
231f407 is described below

commit 231f407d21f15933811b81e4fae0e32a050a4ea3
Author: Julie Tibshirani <ju...@apache.org>
AuthorDate: Fri Nov 12 14:19:20 2021 -0800

    LUCENE-10069: Document that kNN queries might not return all results (#434)
    
    Performing a kNN search with very large k may return fewer than k documents.
    This is due to the fact that the HNSW graph is not guaranteed to be connected.
    This commit documents the behavior as part of a general warning that the results
    of a kNN search may be approximate.
---
 .../src/java/org/apache/lucene/codecs/KnnVectorsReader.java    | 10 +++++++---
 .../core/src/java/org/apache/lucene/search/KnnVectorQuery.java |  2 +-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsReader.java
index b692ace..d89e5c7 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsReader.java
@@ -45,9 +45,13 @@ public abstract class KnnVectorsReader implements Closeable, Accountable {
 
   /**
    * Return the k nearest neighbor documents as determined by comparison of their vector values for
-   * this field, to the given vector, by the field's search strategy. If the search strategy is
-   * reversed, lower values indicate nearer vectors, otherwise higher scores indicate nearer
-   * vectors. Unlike relevance scores, vector scores may be negative.
+   * this field, to the given vector, by the field's similarity function. The score of each document
+   * is derived from the vector similarity in a way that ensures scores are positive and that a
+   * larger score corresponds to a higher ranking.
+   *
+   * <p>The search is allowed to be approximate, meaning the results are not guaranteed to be the
+   * true k closest neighbors. For large values of k (for example when k is close to the total
+   * number of documents), the search may also retrieve fewer than k documents.
    *
    * @param field the vector field to search
    * @param target the vector-valued query
diff --git a/lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java b/lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java
index ba77432..4b2e7dd 100644
--- a/lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java
@@ -28,7 +28,7 @@ import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.util.Bits;
 
-/** Uses {@link KnnVectorsReader#search} to perform nearest Neighbour search. */
+/** Uses {@link KnnVectorsReader#search} to perform nearest neighbour search. */
 public class KnnVectorQuery extends Query {
 
   private static final TopDocs NO_RESULTS =