You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ju...@apache.org on 2021/11/12 22:26:03 UTC
[lucene] branch branch_9x updated: LUCENE-10069: Document that kNN
queries might not return all results (#434)
This is an automated email from the ASF dual-hosted git repository.
julietibs pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new 607b10d LUCENE-10069: Document that kNN queries might not return all results (#434)
607b10d is described below
commit 607b10dc2af9fdeb0a76566adc0f6b76f45120cd
Author: Julie Tibshirani <ju...@apache.org>
AuthorDate: Fri Nov 12 14:19:20 2021 -0800
LUCENE-10069: Document that kNN queries might not return all results (#434)
Performing a kNN search with very large k may return fewer than k documents.
This is due to the fact that the HNSW graph is not guaranteed to be connected.
This commit documents the behavior as part of a general warning that the results
of a kNN search may be approximate.
---
.../src/java/org/apache/lucene/codecs/KnnVectorsReader.java | 10 +++++++---
.../core/src/java/org/apache/lucene/search/KnnVectorQuery.java | 2 +-
2 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsReader.java
index b692ace..d89e5c7 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsReader.java
@@ -45,9 +45,13 @@ public abstract class KnnVectorsReader implements Closeable, Accountable {
/**
* Return the k nearest neighbor documents as determined by comparison of their vector values for
- * this field, to the given vector, by the field's search strategy. If the search strategy is
- * reversed, lower values indicate nearer vectors, otherwise higher scores indicate nearer
- * vectors. Unlike relevance scores, vector scores may be negative.
+ * this field, to the given vector, by the field's similarity function. The score of each document
+ * is derived from the vector similarity in a way that ensures scores are positive and that a
+ * larger score corresponds to a higher ranking.
+ *
+ * <p>The search is allowed to be approximate, meaning the results are not guaranteed to be the
+ * true k closest neighbors. For large values of k (for example when k is close to the total
+ * number of documents), the search may also retrieve fewer than k documents.
*
* @param field the vector field to search
* @param target the vector-valued query
diff --git a/lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java b/lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java
index ba77432..4b2e7dd 100644
--- a/lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java
@@ -28,7 +28,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.Bits;
-/** Uses {@link KnnVectorsReader#search} to perform nearest Neighbour search. */
+/** Uses {@link KnnVectorsReader#search} to perform nearest neighbour search. */
public class KnnVectorQuery extends Query {
private static final TopDocs NO_RESULTS =