You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ju...@apache.org on 2021/11/12 22:30:00 UTC
[lucene] branch branch_9_0 updated: LUCENE-10069: Document that kNN
queries might not return all results (#434)
This is an automated email from the ASF dual-hosted git repository.
julietibs pushed a commit to branch branch_9_0
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/branch_9_0 by this push:
new 231f407 LUCENE-10069: Document that kNN queries might not return all results (#434)
231f407 is described below
commit 231f407d21f15933811b81e4fae0e32a050a4ea3
Author: Julie Tibshirani <ju...@apache.org>
AuthorDate: Fri Nov 12 14:19:20 2021 -0800
LUCENE-10069: Document that kNN queries might not return all results (#434)
Performing a kNN search with very large k may return fewer than k documents.
This is due to the fact that the HNSW graph is not guaranteed to be connected.
This commit documents the behavior as part of a general warning that the results
of a kNN search may be approximate.
---
.../src/java/org/apache/lucene/codecs/KnnVectorsReader.java | 10 +++++++---
.../core/src/java/org/apache/lucene/search/KnnVectorQuery.java | 2 +-
2 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsReader.java
index b692ace..d89e5c7 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsReader.java
@@ -45,9 +45,13 @@ public abstract class KnnVectorsReader implements Closeable, Accountable {
/**
* Return the k nearest neighbor documents as determined by comparison of their vector values for
- * this field, to the given vector, by the field's search strategy. If the search strategy is
- * reversed, lower values indicate nearer vectors, otherwise higher scores indicate nearer
- * vectors. Unlike relevance scores, vector scores may be negative.
+ * this field, to the given vector, by the field's similarity function. The score of each document
+ * is derived from the vector similarity in a way that ensures scores are positive and that a
+ * larger score corresponds to a higher ranking.
+ *
+ * <p>The search is allowed to be approximate, meaning the results are not guaranteed to be the
+ * true k closest neighbors. For large values of k (for example when k is close to the total
+ * number of documents), the search may also retrieve fewer than k documents.
*
* @param field the vector field to search
* @param target the vector-valued query
diff --git a/lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java b/lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java
index ba77432..4b2e7dd 100644
--- a/lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java
@@ -28,7 +28,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.Bits;
-/** Uses {@link KnnVectorsReader#search} to perform nearest Neighbour search. */
+/** Uses {@link KnnVectorsReader#search} to perform nearest neighbour search. */
public class KnnVectorQuery extends Query {
private static final TopDocs NO_RESULTS =