You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ju...@apache.org on 2021/10/27 18:08:51 UTC
[lucene] branch main updated: LUCENE-9614: Fix KnnVectorQuery
failure when numDocs is 0 (#413)
This is an automated email from the ASF dual-hosted git repository.
julietibs pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new abd5ec4 LUCENE-9614: Fix KnnVectorQuery failure when numDocs is 0 (#413)
abd5ec4 is described below
commit abd5ec4ff0b56b1abfc2883e47e75871e60d3cad
Author: Julie Tibshirani <ju...@apache.org>
AuthorDate: Wed Oct 27 11:08:47 2021 -0700
LUCENE-9614: Fix KnnVectorQuery failure when numDocs is 0 (#413)
When the reader has no live docs, `KnnVectorQuery` can error out. This happens
because `IndexReader#numDocs` is 0, and we end up passing an illegal value of
`k = 0` to the search method.
This commit removes the problematic optimization in `KnnVectorQuery` and
replaces with a lower-level based on the total number of vectors in the segment.
---
.../codecs/lucene90/Lucene90HnswVectorsReader.java | 4 +-
.../org/apache/lucene/search/KnnVectorQuery.java | 2 +-
.../apache/lucene/search/TestKnnVectorQuery.java | 82 ++++++++++++++++++++++
3 files changed, 86 insertions(+), 2 deletions(-)
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java
index 56dcf89..c3e5e0a 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java
@@ -240,8 +240,10 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
return null;
}
- OffHeapVectorValues vectorValues = getOffHeapVectorValues(fieldEntry);
+ // bound k by total number of vectors to prevent oversizing data structures
+ k = Math.min(k, fieldEntry.size());
+ OffHeapVectorValues vectorValues = getOffHeapVectorValues(fieldEntry);
// use a seed that is fixed for the index so we get reproducible results for the same query
final SplittableRandom random = new SplittableRandom(checksumSeed);
NeighborQueue results =
diff --git a/lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java b/lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java
index 08e3750..ba77432 100644
--- a/lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java
@@ -60,7 +60,7 @@ public class KnnVectorQuery extends Query {
public Query rewrite(IndexReader reader) throws IOException {
TopDocs[] perLeafResults = new TopDocs[reader.leaves().size()];
for (LeafReaderContext ctx : reader.leaves()) {
- perLeafResults[ctx.ord] = searchLeaf(ctx, Math.min(k, reader.numDocs()));
+ perLeafResults[ctx.ord] = searchLeaf(ctx, k);
}
// Merge sort the results
TopDocs topK = TopDocs.merge(k, perLeafResults);
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorQuery.java
index 7d4d2d1..6e64d30 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorQuery.java
@@ -30,13 +30,17 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.KnnVectorField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.FilterDirectoryReader;
+import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Bits;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.VectorUtil;
@@ -518,6 +522,34 @@ public class TestKnnVectorQuery extends LuceneTestCase {
}
}
+ /**
+ * Check that the query behaves reasonably when using a custom filter reader where there are no
+ * live docs.
+ */
+ public void testNoLiveDocsReader() throws IOException {
+ IndexWriterConfig iwc = newIndexWriterConfig();
+ try (Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, iwc)) {
+ final int numDocs = 10;
+ final int dim = 30;
+ for (int i = 0; i < numDocs; ++i) {
+ Document d = new Document();
+ d.add(new StringField("index", String.valueOf(i), Field.Store.NO));
+ d.add(new KnnVectorField("vector", randomVector(dim)));
+ w.addDocument(d);
+ }
+ w.commit();
+
+ try (DirectoryReader reader = DirectoryReader.open(dir)) {
+ DirectoryReader wrappedReader = new NoLiveDocsDirectoryReader(reader);
+ IndexSearcher searcher = new IndexSearcher(wrappedReader);
+ KnnVectorQuery query = new KnnVectorQuery("vector", randomVector(dim), numDocs);
+ TopDocs topDocs = searcher.search(query, numDocs);
+ assertEquals(0, topDocs.scoreDocs.length);
+ }
+ }
+ }
+
private Directory getIndexStore(String field, float[]... contents) throws IOException {
Directory indexStore = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), indexStore);
@@ -536,4 +568,54 @@ public class TestKnnVectorQuery extends LuceneTestCase {
ScoreDoc[] result = searcher.search(q, 1000).scoreDocs;
assertEquals(expectedMatches, result.length);
}
+
+ private static class NoLiveDocsDirectoryReader extends FilterDirectoryReader {
+
+ private NoLiveDocsDirectoryReader(DirectoryReader in) throws IOException {
+ super(
+ in,
+ new SubReaderWrapper() {
+ @Override
+ public LeafReader wrap(LeafReader reader) {
+ return new NoLiveDocsLeafReader(reader);
+ }
+ });
+ }
+
+ @Override
+ protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
+ return new NoLiveDocsDirectoryReader(in);
+ }
+
+ @Override
+ public CacheHelper getReaderCacheHelper() {
+ return in.getReaderCacheHelper();
+ }
+ }
+
+ private static class NoLiveDocsLeafReader extends FilterLeafReader {
+ private NoLiveDocsLeafReader(LeafReader in) {
+ super(in);
+ }
+
+ @Override
+ public int numDocs() {
+ return 0;
+ }
+
+ @Override
+ public Bits getLiveDocs() {
+ return new Bits.MatchNoBits(in.maxDoc());
+ }
+
+ @Override
+ public CacheHelper getReaderCacheHelper() {
+ return in.getReaderCacheHelper();
+ }
+
+ @Override
+ public CacheHelper getCoreCacheHelper() {
+ return in.getCoreCacheHelper();
+ }
+ }
}