You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2022/11/10 23:51:48 UTC

[lucene] 01/02: GITHUB#11911: improve checkindex to be more thorough for vectors (#11916)

This is an automated email from the ASF dual-hosted git repository.

rmuir pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git

commit d89b63d48369c09c36de5a30f90981833503505d
Author: Benjamin Trent <be...@gmail.com>
AuthorDate: Thu Nov 10 16:45:47 2022 -0500

    GITHUB#11911: improve checkindex to be more thorough for vectors (#11916)
    
    search every N docs to get close to 64 tests
---
 lucene/CHANGES.txt                                 |  4 +++
 .../java/org/apache/lucene/index/CheckIndex.java   | 33 ++++++++++++++++++----
 2 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 4640b5e4bfa..903a23e3667 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -92,6 +92,10 @@ Build
 
 ======================== Lucene 9.4.2 =======================
 
+Improvements
+---------------------
+* GITHUB#11916: improve checkindex to be more thorough for vectors. (Ben Trent)
+
 Bug Fixes
 ---------------------
 * GITHUB#11905: Fix integer overflow when seeking the vector index for connections in a single segment.
diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
index 78b5e57e1a5..29c0b4aaea5 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@@ -55,11 +55,7 @@ import org.apache.lucene.document.DocumentStoredFieldVisitor;
 import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
 import org.apache.lucene.index.PointValues.IntersectVisitor;
 import org.apache.lucene.index.PointValues.Relation;
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.FieldExistsQuery;
-import org.apache.lucene.search.LeafFieldComparator;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.*;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
@@ -2593,8 +2589,33 @@ public final class CheckIndex implements Closeable {
             status.totalKnnVectorFields++;
 
             int docCount = 0;
+            final Bits bits = reader.getLiveDocs();
+            int everyNdoc = Math.max(values.size() / 64, 1);
             while (values.nextDoc() != NO_MORE_DOCS) {
-              int valueLength = values.vectorValue().length;
+              float[] vectorValue = values.vectorValue();
+              // search the first maxNumSearches vectors to exercise the graph
+              if (values.docID() % everyNdoc == 0) {
+                TopDocs docs =
+                    reader
+                        .getVectorReader()
+                        .search(fieldInfo.name, vectorValue, 10, bits, Integer.MAX_VALUE);
+                if (docs.scoreDocs.length == 0) {
+                  throw new CheckIndexException(
+                      "Field \"" + fieldInfo.name + "\" failed to search k nearest neighbors");
+                }
+                if (bits != null) {
+                  for (ScoreDoc doc : docs.scoreDocs) {
+                    if (bits.get(doc.doc) == false) {
+                      throw new CheckIndexException(
+                          "Searching Field \""
+                              + fieldInfo.name
+                              + "\" matched deleted doc="
+                              + doc.doc);
+                    }
+                  }
+                }
+              }
+              int valueLength = vectorValue.length;
               if (valueLength != dimension) {
                 throw new CheckIndexException(
                     "Field \""