You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ju...@apache.org on 2021/08/24 18:15:31 UTC

[lucene] branch main updated: LUCENE-10040: Relax TestKnnVectorQuery#testDeletes assertion (#251)

This is an automated email from the ASF dual-hosted git repository.

julietibs pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new 782c3cc  LUCENE-10040: Relax TestKnnVectorQuery#testDeletes assertion (#251)
782c3cc is described below

commit 782c3cca3a55d75673da2650f69d73fdecc88d4d
Author: Julie Tibshirani <ju...@gmail.com>
AuthorDate: Tue Aug 24 11:15:27 2021 -0700

    LUCENE-10040: Relax TestKnnVectorQuery#testDeletes assertion (#251)
    
    TestKnnVectorQuery#testDeletes assumes that if there are n total documents, we
    can perform a kNN search with k=n and retrieve all documents. This isn't true
    with our implementation -- due to randomization we may select less than n entry
    points and never visit some vectors.
---
 .../org/apache/lucene/search/TestKnnVectorQuery.java     | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorQuery.java
index b5262a5..923265e 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorQuery.java
@@ -313,15 +313,11 @@ public class TestKnnVectorQuery extends LuceneTestCase {
         IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
       final int numDocs = atLeast(100);
       final int dim = 30;
-      int docIndex = 0;
       for (int i = 0; i < numDocs; ++i) {
         Document d = new Document();
+        d.add(new StringField("index", String.valueOf(i), Field.Store.YES));
         if (frequently()) {
-          d.add(new StringField("index", String.valueOf(docIndex), Field.Store.YES));
           d.add(new KnnVectorField("vector", randomVector(dim)));
-          docIndex++;
-        } else {
-          d.add(new StringField("other", "value" + (i % 5), Field.Store.NO));
         }
         w.addDocument(d);
       }
@@ -329,18 +325,18 @@ public class TestKnnVectorQuery extends LuceneTestCase {
 
       // Delete some documents at random, both those with and without vectors
       Set<Term> toDelete = new HashSet<>();
-      for (int i = 0; i < 20; i++) {
-        int index = random().nextInt(docIndex);
+      for (int i = 0; i < 25; i++) {
+        int index = random().nextInt(numDocs);
         toDelete.add(new Term("index", String.valueOf(index)));
       }
       w.deleteDocuments(toDelete.toArray(new Term[0]));
-      w.deleteDocuments(new Term("other", "value" + random().nextInt(5)));
       w.commit();
 
+      int hits = 50;
       try (IndexReader reader = DirectoryReader.open(dir)) {
         Set<String> allIds = new HashSet<>();
         IndexSearcher searcher = new IndexSearcher(reader);
-        KnnVectorQuery query = new KnnVectorQuery("vector", randomVector(dim), numDocs);
+        KnnVectorQuery query = new KnnVectorQuery("vector", randomVector(dim), hits);
         TopDocs topDocs = searcher.search(query, numDocs);
         for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
           Document doc = reader.document(scoreDoc.doc, Set.of("index"));
@@ -350,7 +346,7 @@ public class TestKnnVectorQuery extends LuceneTestCase {
               toDelete.contains(new Term("index", index)));
           allIds.add(index);
         }
-        assertEquals("search missed some documents", docIndex - toDelete.size(), allIds.size());
+        assertEquals("search missed some documents", hits, allIds.size());
       }
     }
   }