You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2022/11/10 21:45:52 UTC
[lucene] branch main updated: GITHUB#11911: improve checkindex to be more thorough for vectors (#11916)
This is an automated email from the ASF dual-hosted git repository.
rmuir pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new 3a506ec87a0 GITHUB#11911: improve checkindex to be more thorough for vectors (#11916)
3a506ec87a0 is described below
commit 3a506ec87a01556a530eee5eb54ada49fe3cde3f
Author: Benjamin Trent <be...@gmail.com>
AuthorDate: Thu Nov 10 16:45:47 2022 -0500
GITHUB#11911: improve checkindex to be more thorough for vectors (#11916)
search every N docs to get close to 64 tests
---
lucene/CHANGES.txt | 4 +++
.../java/org/apache/lucene/index/CheckIndex.java | 33 ++++++++++++++++++----
2 files changed, 31 insertions(+), 6 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 5a286300867..aee9d66a641 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -187,6 +187,10 @@ Build
======================== Lucene 9.4.2 =======================
+Improvements
+---------------------
+* GITHUB#11916: improve checkindex to be more thorough for vectors. (Ben Trent)
+
Bug Fixes
---------------------
* GITHUB#11905: Fix integer overflow when seeking the vector index for connections in a single segment.
diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
index 78b5e57e1a5..29c0b4aaea5 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@@ -55,11 +55,7 @@ import org.apache.lucene.document.DocumentStoredFieldVisitor;
import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.FieldExistsQuery;
-import org.apache.lucene.search.LeafFieldComparator;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.*;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
@@ -2593,8 +2589,33 @@ public final class CheckIndex implements Closeable {
status.totalKnnVectorFields++;
int docCount = 0;
+ final Bits bits = reader.getLiveDocs();
+ int everyNdoc = Math.max(values.size() / 64, 1);
while (values.nextDoc() != NO_MORE_DOCS) {
- int valueLength = values.vectorValue().length;
+ float[] vectorValue = values.vectorValue();
+ // search the first maxNumSearches vectors to exercise the graph
+ if (values.docID() % everyNdoc == 0) {
+ TopDocs docs =
+ reader
+ .getVectorReader()
+ .search(fieldInfo.name, vectorValue, 10, bits, Integer.MAX_VALUE);
+ if (docs.scoreDocs.length == 0) {
+ throw new CheckIndexException(
+ "Field \"" + fieldInfo.name + "\" failed to search k nearest neighbors");
+ }
+ if (bits != null) {
+ for (ScoreDoc doc : docs.scoreDocs) {
+ if (bits.get(doc.doc) == false) {
+ throw new CheckIndexException(
+ "Searching Field \""
+ + fieldInfo.name
+ + "\" matched deleted doc="
+ + doc.doc);
+ }
+ }
+ }
+ }
+ int valueLength = vectorValue.length;
if (valueLength != dimension) {
throw new CheckIndexException(
"Field \""