You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2022/11/10 23:51:48 UTC
[lucene] 01/02: GITHUB#11911: improve checkindex to be more thorough for vectors (#11916)
This is an automated email from the ASF dual-hosted git repository.
rmuir pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git
commit d89b63d48369c09c36de5a30f90981833503505d
Author: Benjamin Trent <be...@gmail.com>
AuthorDate: Thu Nov 10 16:45:47 2022 -0500
GITHUB#11911: improve checkindex to be more thorough for vectors (#11916)
search every N docs to get close to 64 tests
---
lucene/CHANGES.txt | 4 +++
.../java/org/apache/lucene/index/CheckIndex.java | 33 ++++++++++++++++++----
2 files changed, 31 insertions(+), 6 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 4640b5e4bfa..903a23e3667 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -92,6 +92,10 @@ Build
======================== Lucene 9.4.2 =======================
+Improvements
+---------------------
+* GITHUB#11916: improve checkindex to be more thorough for vectors. (Ben Trent)
+
Bug Fixes
---------------------
* GITHUB#11905: Fix integer overflow when seeking the vector index for connections in a single segment.
diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
index 78b5e57e1a5..29c0b4aaea5 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@@ -55,11 +55,7 @@ import org.apache.lucene.document.DocumentStoredFieldVisitor;
import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.FieldExistsQuery;
-import org.apache.lucene.search.LeafFieldComparator;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.*;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
@@ -2593,8 +2589,33 @@ public final class CheckIndex implements Closeable {
status.totalKnnVectorFields++;
int docCount = 0;
+ final Bits bits = reader.getLiveDocs();
+ int everyNdoc = Math.max(values.size() / 64, 1);
while (values.nextDoc() != NO_MORE_DOCS) {
- int valueLength = values.vectorValue().length;
+ float[] vectorValue = values.vectorValue();
+ // search the first maxNumSearches vectors to exercise the graph
+ if (values.docID() % everyNdoc == 0) {
+ TopDocs docs =
+ reader
+ .getVectorReader()
+ .search(fieldInfo.name, vectorValue, 10, bits, Integer.MAX_VALUE);
+ if (docs.scoreDocs.length == 0) {
+ throw new CheckIndexException(
+ "Field \"" + fieldInfo.name + "\" failed to search k nearest neighbors");
+ }
+ if (bits != null) {
+ for (ScoreDoc doc : docs.scoreDocs) {
+ if (bits.get(doc.doc) == false) {
+ throw new CheckIndexException(
+ "Searching Field \""
+ + fieldInfo.name
+ + "\" matched deleted doc="
+ + doc.doc);
+ }
+ }
+ }
+ }
+ int valueLength = vectorValue.length;
if (valueLength != dimension) {
throw new CheckIndexException(
"Field \""