You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2022/01/04 20:59:37 UTC

[lucene] branch main updated: LUCENE-10351 Correct knn search failure with deleted docs (#580)

This is an automated email from the ASF dual-hosted git repository.

mayya pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new 78da703  LUCENE-10351 Correct knn search failure with deleted docs (#580)
78da703 is described below

commit 78da7030370102d024afc9945965ba3ae7195823
Author: Mayya Sharipova <ma...@elastic.co>
AuthorDate: Tue Jan 4 15:59:30 2022 -0500

    LUCENE-10351 Correct knn search failure with deleted docs (#580)
    
    Current when doing knn search on an segment where all documents
    with knn field were deleted, we get the following error:
    
    maxSize must be > 0 and < 2147483630; got: 0
    java.lang.IllegalArgumentException: maxSize must be > 0 and < 2147483630; got: 0
    	at __randomizedtesting.SeedInfo.seed([43F1F124D7076A4E:1B860BFCCB9B0BB5]:0)
    	at org.apache.lucene.util.LongHeap.<init>(LongHeap.java:57)
    	at org.apache.lucene.util.LongHeap$1.<init>(LongHeap.java:69)
    	at org.apache.lucene.util.LongHeap.create(LongHeap.java:69)
    	at org.apache.lucene.util.hnsw.NeighborQueue.<init>(NeighborQueue.java:41)
    	at org.apache.lucene.util.hnsw.HnswGraph.search(HnswGraph.java:105)#
    
    This patch fixes this error and ensures empty TopDocs are returned when
    knn field doesn't have any documents left.
---
 .../apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java  | 3 +++
 .../apache/lucene/tests/index/BaseKnnVectorsFormatTestCase.java   | 8 +++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java
index c3e5e0a..b0ac8a9 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java
@@ -239,6 +239,9 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
     if (fieldEntry == null || fieldEntry.dimension == 0) {
       return null;
     }
+    if (fieldEntry.size() == 0) {
+      return new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]);
+    }
 
     // bound k by total number of vectors to prevent oversizing data structures
     k = Math.min(k, fieldEntry.size());
diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseKnnVectorsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseKnnVectorsFormatTestCase.java
index 8d90fdc..84d83f0 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseKnnVectorsFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseKnnVectorsFormatTestCase.java
@@ -556,9 +556,15 @@ public abstract class BaseKnnVectorsFormatTestCase extends BaseIndexFileFormatTe
       w.deleteDocuments(new Term("id", "0"));
       w.forceMerge(1);
       try (DirectoryReader r = DirectoryReader.open(w)) {
-        VectorValues values = getOnlyLeafReader(r).getVectorValues("v");
+        LeafReader leafReader = getOnlyLeafReader(r);
+        VectorValues values = leafReader.getVectorValues("v");
         assertNotNull(values);
         assertEquals(0, values.size());
+
+        // assert that knn search doesn't fail on a field with all deleted docs
+        TopDocs results =
+            leafReader.searchNearestVectors("v", randomVector(3), 1, leafReader.getLiveDocs());
+        assertEquals(0, results.scoreDocs.length);
       }
     }
   }