You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2022/11/10 23:51:47 UTC

[lucene] branch branch_9x updated (78177d64843 -> 69def7c403a)

This is an automated email from the ASF dual-hosted git repository.

rmuir pushed a change to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git


    from 78177d64843 Fix bug with set of strings since upgrade of Gradle -> explicit cast from GString to String
     new d89b63d4836 GITHUB#11911: improve checkindex to be more thorough for vectors (#11916)
     new 69def7c403a Follow up to GITHUB#11916, remove deleted docs check (#11919)

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 lucene/CHANGES.txt                                  |  4 ++++
 .../java/org/apache/lucene/index/CheckIndex.java    | 21 +++++++++++++++------
 2 files changed, 19 insertions(+), 6 deletions(-)


[lucene] 02/02: Follow up to GITHUB#11916, remove deleted docs check (#11919)

Posted by rm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

rmuir pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git

commit 69def7c403ab2395cae6fb0131812c20736e3a67
Author: Benjamin Trent <be...@gmail.com>
AuthorDate: Thu Nov 10 18:40:24 2022 -0500

    Follow up to GITHUB#11916, remove deleted docs check (#11919)
---
 .../core/src/java/org/apache/lucene/index/CheckIndex.java  | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
index 29c0b4aaea5..0e8d7e2e22d 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@@ -2589,7 +2589,6 @@ public final class CheckIndex implements Closeable {
             status.totalKnnVectorFields++;
 
             int docCount = 0;
-            final Bits bits = reader.getLiveDocs();
             int everyNdoc = Math.max(values.size() / 64, 1);
             while (values.nextDoc() != NO_MORE_DOCS) {
               float[] vectorValue = values.vectorValue();
@@ -2598,22 +2597,11 @@ public final class CheckIndex implements Closeable {
                 TopDocs docs =
                     reader
                         .getVectorReader()
-                        .search(fieldInfo.name, vectorValue, 10, bits, Integer.MAX_VALUE);
+                        .search(fieldInfo.name, vectorValue, 10, null, Integer.MAX_VALUE);
                 if (docs.scoreDocs.length == 0) {
                   throw new CheckIndexException(
                       "Field \"" + fieldInfo.name + "\" failed to search k nearest neighbors");
                 }
-                if (bits != null) {
-                  for (ScoreDoc doc : docs.scoreDocs) {
-                    if (bits.get(doc.doc) == false) {
-                      throw new CheckIndexException(
-                          "Searching Field \""
-                              + fieldInfo.name
-                              + "\" matched deleted doc="
-                              + doc.doc);
-                    }
-                  }
-                }
               }
               int valueLength = vectorValue.length;
               if (valueLength != dimension) {


[lucene] 01/02: GITHUB#11911: improve checkindex to be more thorough for vectors (#11916)

Posted by rm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

rmuir pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git

commit d89b63d48369c09c36de5a30f90981833503505d
Author: Benjamin Trent <be...@gmail.com>
AuthorDate: Thu Nov 10 16:45:47 2022 -0500

    GITHUB#11911: improve checkindex to be more thorough for vectors (#11916)
    
    search every N docs to get close to 64 tests
---
 lucene/CHANGES.txt                                 |  4 +++
 .../java/org/apache/lucene/index/CheckIndex.java   | 33 ++++++++++++++++++----
 2 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 4640b5e4bfa..903a23e3667 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -92,6 +92,10 @@ Build
 
 ======================== Lucene 9.4.2 =======================
 
+Improvements
+---------------------
+* GITHUB#11916: improve checkindex to be more thorough for vectors. (Ben Trent)
+
 Bug Fixes
 ---------------------
 * GITHUB#11905: Fix integer overflow when seeking the vector index for connections in a single segment.
diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
index 78b5e57e1a5..29c0b4aaea5 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@@ -55,11 +55,7 @@ import org.apache.lucene.document.DocumentStoredFieldVisitor;
 import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus;
 import org.apache.lucene.index.PointValues.IntersectVisitor;
 import org.apache.lucene.index.PointValues.Relation;
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.FieldExistsQuery;
-import org.apache.lucene.search.LeafFieldComparator;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.*;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
@@ -2593,8 +2589,33 @@ public final class CheckIndex implements Closeable {
             status.totalKnnVectorFields++;
 
             int docCount = 0;
+            final Bits bits = reader.getLiveDocs();
+            int everyNdoc = Math.max(values.size() / 64, 1);
             while (values.nextDoc() != NO_MORE_DOCS) {
-              int valueLength = values.vectorValue().length;
+              float[] vectorValue = values.vectorValue();
+              // search the first maxNumSearches vectors to exercise the graph
+              if (values.docID() % everyNdoc == 0) {
+                TopDocs docs =
+                    reader
+                        .getVectorReader()
+                        .search(fieldInfo.name, vectorValue, 10, bits, Integer.MAX_VALUE);
+                if (docs.scoreDocs.length == 0) {
+                  throw new CheckIndexException(
+                      "Field \"" + fieldInfo.name + "\" failed to search k nearest neighbors");
+                }
+                if (bits != null) {
+                  for (ScoreDoc doc : docs.scoreDocs) {
+                    if (bits.get(doc.doc) == false) {
+                      throw new CheckIndexException(
+                          "Searching Field \""
+                              + fieldInfo.name
+                              + "\" matched deleted doc="
+                              + doc.doc);
+                    }
+                  }
+                }
+              }
+              int valueLength = vectorValue.length;
               if (valueLength != dimension) {
                 throw new CheckIndexException(
                     "Field \""